Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
perceptron-model-proto-writer.C
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
36 
37 #include <sstream>
38 
39 #include "../proto/dataio.h"
41 
42 namespace reranker {
43 
45 
46 using confusion_learning::FeatureMessage;
47 using confusion_learning::FeatureVecMessage;
48 using confusion_learning::SymbolTableMessage;
49 using confusion_learning::SymbolMessage;
50 
51 void
52 PerceptronModelProtoWriter::Write(const Model *model,
53  ModelMessage *model_message,
54  bool write_features) const {
55  const PerceptronModel *perceptron_model =
56  dynamic_cast<const PerceptronModel *>(model);
57  model_message->set_identifier(perceptron_model->name());
58  model_message->set_reader_spec(perceptron_model->proto_reader_spec());
59  model_message->set_num_iterations(perceptron_model->best_model_epoch());
60  model_message->set_training_errors(perceptron_model->num_training_errors());
61  model_message->set_model_spec(perceptron_model->model_spec());
62 
63 
64  if (write_features) {
65  // TODO(dbikel): Figure out exactly what quantity to serialize as
66  // model loss.
67  FeatureVecMessage *raw_feature_vector_message =
68  model_message->mutable_raw_parameters();
69  fv_writer_.Write(perceptron_model->best_models_.weights(),
70  FeatureMessage::BASIC,
71  raw_feature_vector_message);
72  FeatureVecMessage *avg_feature_vector_message =
73  model_message->mutable_avg_parameters();
74  fv_writer_.Write(perceptron_model->best_models_.average_weights(),
75  FeatureMessage::BASIC,
76  avg_feature_vector_message);
77 
78  // Don't write the symbol table if we are not writing the features.
79  if (perceptron_model->symbols_ != NULL) {
80  SymbolTableMessage *symbol_table_message =
81  model_message->mutable_symbols();
82  Symbols *symbols = perceptron_model->symbols_;
83  for (Symbols::const_iterator it = symbols->begin();
84  it != symbols->end();
85  ++it) {
86  SymbolMessage *symbol_message = symbol_table_message->add_symbol();
87  symbol_message->set_symbol(it->first);
88  symbol_message->set_index(it->second);
89  }
90  }
91  }
92 }
93 
94 void
96  ostream &os,
97  bool output_best_epoch,
98  double weight,
99  bool output_key,
100  const string separator)
101  const {
102  ConfusionProtoIO proto_writer;
103  const PerceptronModel *perceptron_model =
104  dynamic_cast<const PerceptronModel *>(model);
105  const FeatureVector<int, double> &avg_weights =
106  output_best_epoch ?
107  perceptron_model->best_models_.average_weights() :
108  perceptron_model->models_.average_weights();
109  const FeatureVector<int, double> &raw_weights =
110  perceptron_model->models_.weights();
111  FeatureMessage feature_message;
112  for (FeatureVector<int, double>::const_iterator it = raw_weights.begin();
113  it != raw_weights.end();
114  ++it) {
115  feature_message.Clear();
116  Symbols *symbols = perceptron_model->symbols();
117  fv_writer_.SerializeFeature(it->first, weight * it->second,
118  FeatureMessage::BASIC,
119  &feature_message,
120  symbols);
121  // Lookup the average weight.
122  if (!avg_weights.empty()) {
123  feature_message.set_avg_value(weight * avg_weights.GetValue(it->first));
124  }
125  string encoded_message;
126  proto_writer.EncodeBase64(feature_message, &encoded_message);
127  if (output_key) {
128  const string &feat_name = symbols->GetSymbol(it->first);
129  if (feat_name == "") {
130  stringstream ss;
131  ss << it->first;
132  os << ss.str() << separator;
133  } else {
134  os << feat_name << separator;
135  }
136  }
137  os << encoded_message;
138  }
139  os.flush();
140 }
141 
142 } // namespace reranker
virtual const string & model_spec() const
Returns the spec string for constructing a default instance of this model so it may be properly de-se...
Model is an interface for reranking models.
Definition: model.H:141
const string & name() const
Returns the unique name for this model instance.
Definition: model.H:281
TrainingVectorSet best_models_
The best models seen so far during training, according to evaluation on the held-out development test...
Serializer for reranker::PerceptronModel instances to ModelMessage instances.
Symbols * symbols() const
Returns the symbol table for this model.
Definition: model.H:284
virtual const_iterator end()=0
#define REGISTER_MODEL_PROTO_WRITER(TYPE)
Registers the ModelProtoWriter implementation with the specified subtype TYPE with the ModelProtoWri...
TrainingVectorSet models_
The feature vectors representing this model.
virtual const string & proto_reader_spec() const
Returns the spec string for contructing an instance of a ModelProtoReader capable of de-serializing t...
const_iterator end() const
Returns a const iterator pointing to the end of the feature-value pairs of this feature vector...
const_iterator begin() const
Returns a const iterator pointing to the first of the feature-value pairs of this feature vector...
virtual const string & GetSymbol(int index) const =0
Returns the unique symbol for the specified index, or the empty string if the specified index does no...
This class implements a perceptron model reranker.
virtual void WriteFeatures(const Model *model, ostream &os, bool output_best_epoch, double weight, bool output_key, const string separator) const
Writes out the features of this model to a series of FeatureMessage instances using the specified Con...
int num_training_errors() const
Returns the number of training errors made by this model.
Definition: model.H:307
Symbols * symbols_
The symbol table for this model (may be NULL).
Definition: model.H:557
virtual int best_model_epoch() const
Returns the epoch of the best models seen so far during training.
unordered_map< string, int >::const_iterator const_iterator
Definition: symbol-table.H:60
const FeatureVector< int, double > & average_weights() const
Returns the feature vector corresponding to the averaged perceptron.
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57
const FeatureVector< int, double > & weights() const
Returns the "raw" feature weights computed during training.
A class to construct a ModelMessage from a PerceptronModel instance.
V GetValue(const K &uid) const
Synonymous with GetWeight.
virtual const_iterator begin()=0