Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
perceptron-model.C
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
35 
36 #define DEBUG 1
37 
38 #include <iostream>
39 #include <vector>
40 #include <unordered_set>
41 
42 #include "candidate-set.H"
43 #include "training-time.H"
44 
45 #include "perceptron-model.H"
46 
47 using std::cerr;
48 using std::endl;
49 using std::vector;
50 using std::unordered_set;
51 
52 namespace reranker {
53 
55 
57  PerceptronModelDefaultUpdatePredicate)
58 
60  PerceptronModelDefaultUpdater)
61 
62 string
63 PerceptronModel::proto_reader_spec_("PerceptronModelProtoReader()");
64 
65 string
66 PerceptronModel::proto_writer_spec_("PerceptronModelProtoWriter()");
67 
68 
69 void
70 PerceptronModel::RegisterInitializers(Initializers &initializers) {
71  bool required = true;
72  initializers.Add("name", &name_, required);
73  initializers.Add("score_comparator", &score_comparator_);
74  initializers.Add("gold_comparator", &gold_comparator_);
75  initializers.Add("candidate_set_scorer", &candidate_set_scorer_);
76  initializers.Add("update_predicate", &update_predicate_);
77  initializers.Add("updater", &updater_);
78  initializers.Add("step_size", &step_size_);
79 }
80 
81 void
82 PerceptronModel::Init(const Environment *env, const string &arg) {
83  model_spec_.clear();
84  model_spec_.append(arg);
85 }
86 
87 void
89  CandidateSetIterator &development_test) {
90  while (NeedToKeepTraining()) {
91  NewEpoch();
92  TrainOneEpoch(examples);
93  Evaluate(development_test);
94  // TODO(dbikel,kbhall): Iterative parameter mixing goes here.
95  // Keith: Please note that FeatureVector has
96  // an AddScaledVector method which is
97  // probably useful here.
98  }
99  if (DEBUG) {
100  cerr << "Best model epoch: " << best_model_epoch_ << endl;
101  cerr << "Total elpased time: " << time_.absolute_seconds() << " seconds."
102  << endl;
103  }
104  if (DEBUG >= 2) {
105  cerr << "Final raw model: " << models_.GetModel(true) << endl
106  << "Final averaged model: " << models_.GetModel(false) << endl;
107  cerr << "Final best raw model: "
108  << best_models_.GetModel(true) << endl
109  << "Final best averaged model: "
110  << best_models_.GetModel(false) << endl;
111  }
112 }
113 
114 bool
116  int num_epochs = time().epoch() + 1;
117 
118  if (DEBUG) {
119  if (max_epochs() > 0) {
120  if (num_epochs < max_epochs()) {
121  cerr << "Training because we have trained only " << num_epochs
122  << " epochs but max epochs is " << max_epochs() << "." << endl;
123  }
124  else {
125  cerr << "Stopping training because we have trained "
126  << num_epochs << " epochs and max epochs is "
127  << max_epochs() << "." << endl;
128  }
129  }
130  }
131 
132  if (max_epochs() > 0 && num_epochs >= max_epochs()) {
133  return false;
134  }
135 
136  if (DEBUG) {
137  if (min_epochs() > 0 && num_epochs < min_epochs()) {
138  cerr << "Training because we have trained " << num_epochs
139  << " epochs but min epochs is " << min_epochs() << "." << endl;
140  }
141  }
142 
143  if (min_epochs() > 0 && num_epochs < min_epochs()) {
144  return true;
145  }
146 
147  if (DEBUG) {
149  cerr << "Training because num epochs in decline is "
150  << num_epochs_in_decline_ << " which is less than "
151  << max_epochs_in_decline_ << "." << endl;
152  } else {
153  cerr << "Stopping training because num epochs in decline is "
154  << num_epochs_in_decline_ << " which is greater than or equal to "
155  << max_epochs_in_decline_ << "." << endl;
156  }
157  }
158 
160 }
161 
162 void
164  if (DEBUG) {
165  if (time_.epoch() > 0) {
166  cerr << "Epoch " << time_.epoch() << ": "
167  << time_.seconds_since_last_epoch() << " seconds." << endl;
168  }
169  }
170  time_.NewEpoch();
171  num_training_errors_per_epoch_.push_back(0);
172 }
173 
174 
175 void
177  examples.Reset();
178  while (examples.HasNext()) {
179  TrainOnExample(examples.Next());
180  }
181  EndOfEpoch();
182 }
183 
184 void
187  if (end_of_epoch_hook_ != NULL) {
188  end_of_epoch_hook_->Do(this);
189  }
190  if (DEBUG) {
191  int num_training_errors_this_epoch =
193  double percent_training_errors_this_epoch =
194  ((double)num_training_errors_this_epoch / time_.index()) * 100.0;
195  cerr << "Epoch " << time_.epoch() << ": number of training errors: "
196  << num_training_errors_this_epoch << " ("
197  << percent_training_errors_this_epoch << "%)" << endl;
198  }
199 }
200 
201 
202 void
204  time_.Tick();
205 
206  if (symbols_ != NULL) {
207  example.CompileFeatures(symbols_);
208  }
209 
210  bool training = true;
211  ScoreCandidates(example, training);
212 
213  if (NeedToUpdate(example)) {
214  if (DEBUG >= 2) {
215  cerr << "Time:" << time_.to_string() << ": need to update because "
216  << "best scoring index " << example.best_scoring_index()
217  << " is not equal to gold index " << example.gold_index() << endl;
218  }
219  ++(*num_training_errors_per_epoch_.rbegin());
221  Update(example);
222  }
223 }
224 
225 bool
227  return update_predicate_->NeedToUpdate(this, example);
228 }
229 
230 bool
231 PerceptronModel::DefaultUpdatePredicate::NeedToUpdate(Model *model,
232  CandidateSet &example) {
233  return example.best_scoring_index() != example.gold_index();
234 }
235 
236 void
238  updater_->Update(this, example);
239 }
240 
241 void
242 PerceptronModel::DefaultUpdater::Update(Model *m, CandidateSet &example) {
243  PerceptronModel *model = dynamic_cast<PerceptronModel *>(m);
244  ++(model->num_updates_);
245  unordered_set<int> gold_features;
246  unordered_set<int> best_scoring_features;
247  model->ComputeFeaturesToUpdate(example, gold_features, best_scoring_features);
248 
250  gold_features,
251  best_scoring_features);
252  double step_size =
253  model->ComputeStepSize(gold_features, best_scoring_features, example);
254 
255  // Finally, update perceptrons.
256 
257  if (DEBUG >= 2) {
258  cerr << "Updating weights for gold features [";
259  for (unordered_set<int>::const_iterator it = gold_features.begin();
260  it != gold_features.end(); ++it) {
261  cerr << " " << *it;
262  }
263  cerr << "] from\n\t" << example.GetGold() << endl;
264 
265  cerr << "Updating weights for best scoring features [";
266  for (unordered_set<int>::const_iterator it = best_scoring_features.begin();
267  it != best_scoring_features.end(); ++it) {
268  cerr << " " << *it;
269  }
270  cerr << "] from\n\t" << example.GetBestScoring() << endl;
271 
272  }
273 
274  double positive_step = step_size;
275  model->models_.UpdateWeights(model->time_, gold_features,
276  example.GetGold().features(), positive_step);
277  double negative_step = -step_size;
278  model->models_.UpdateWeights(model->time_, best_scoring_features,
279  example.GetBestScoring().features(), negative_step);
280 
281  if (DEBUG >=2) {
282  cerr << "Raw model: " << model->models_.GetModel(true) << endl;
283  cerr << "Avg model: " << model->models_.GetModel(false) << endl;
284  }
285 }
286 
287 double
289  double total_weight = 0.0;
290  double total_weighted_loss = 0.0;
291  double total_oracle_loss = 0.0;
292  double total_baseline_loss = 0.0;
293  num_testing_errors_per_epoch_.push_back(0);
294 
295  bool not_training = false;
296  size_t development_test_size = 0;
297  development_test.Reset();
298  while (development_test.HasNext()) {
299  ++development_test_size;
300  CandidateSet &candidate_set = development_test.Next();
301  if (symbols_ != NULL) {
302  candidate_set.CompileFeatures(symbols_);
303  }
304  ScoreCandidates(candidate_set, not_training);
305  double loss_weight =
306  use_weighted_loss() ? candidate_set.loss_weight() : 1.0;
307  total_weight += loss_weight;
308  total_weighted_loss += loss_weight * candidate_set.GetBestScoring().loss();
309  total_oracle_loss += loss_weight * candidate_set.GetGold().loss();
310 
311  // For now, assume that the candidate sets are sorted by the baseline score.
312  total_baseline_loss += loss_weight * candidate_set.Get(0).loss();
313  if (candidate_set.best_scoring_index() != candidate_set.gold_index()) {
314  ++(*num_testing_errors_per_epoch_.rbegin());
315  }
316  }
317 
318  double loss_this_epoch = total_weighted_loss / total_weight;
319  loss_per_epoch_.push_back(loss_this_epoch);
320 
321  int num_testing_errors_this_epoch =
323  double percent_testing_errors_this_epoch =
324  ((double)num_testing_errors_this_epoch / development_test_size) * 100.0;
325  double oracle_loss = total_oracle_loss / total_weight;
326  double baseline_loss = total_baseline_loss / total_weight;
327  cerr << "Epoch " << time_.epoch() << ": oracle loss: " << oracle_loss << endl;
328  cerr << "Epoch " << time_.epoch() << ": baseline loss: " << baseline_loss << endl;
329  cerr << "Epoch " << time_.epoch() << ": average devtest loss: "
330  << loss_this_epoch << endl;
331  cerr << "Epoch " << time_.epoch() << ": number of testing errors: "
332  << num_testing_errors_this_epoch << " ("
333  << percent_testing_errors_this_epoch << "%)" << endl;
334 
335  if (time_.epoch() == 0 ||
336  loss_this_epoch < loss_per_epoch_[best_model_epoch_]) {
339  }
340 
341  if (time_.epoch() > 0 &&
343  loss_this_epoch >= loss_per_epoch_[best_model_epoch_]) {
345  } else {
346  // We're in the first epoch, or we've made strictly fewer errors
347  // than the previous epoch.
349  }
350  return loss_this_epoch;
351 }
352 
353 void
354 PerceptronModel::ScoreCandidates(CandidateSet &candidates, bool training) {
355  candidate_set_scorer_->Score(this, candidates, training);
356 }
357 
358 double
359 PerceptronModel::ScoreCandidate(Candidate &candidate, bool training) {
360  bool use_raw = training;
361  const FeatureVector<int,double> &model = models_.GetModel(use_raw);
362  double score = kernel_fn_->Apply(model, candidate.features());
363  if (DEBUG >= 2) {
364  cerr << "Time:" << time_.to_string() << ": scoring candidate "
365  << candidate << " with " << (use_raw ? "raw" : "avg")
366  << " model: " << model << endl
367  << "\tscore: " << score << endl;
368  }
369  candidate.set_score(score);
370  return score;
371 }
372 
373 void
375  // First, produce mapping for uid's of current non-zero features to dense
376  // interval [0,n-1] (where there are n non-zero features).
377  unordered_set<int> old_uids;
378  models_.weights().GetNonZeroFeatures(old_uids);
380  unordered_map<int, int> old_to_new_uids;
381  int new_uid = 0;
382  for (unordered_set<int>::const_iterator it = old_uids.begin();
383  it != old_uids.end();
384  ++it) {
385  old_to_new_uids[*it] = new_uid++;
386  }
387  models_.RemapFeatureUids(old_to_new_uids);
388  best_models_.RemapFeatureUids(old_to_new_uids);
389 
390  if (symbols_ != NULL) {
391  Symbols *old_symbols = symbols_->Clone();
392  symbols_->Clear();
393  for (Symbols::const_iterator it = old_symbols->begin();
394  it != old_symbols->end();
395  ++it) {
396  unordered_map<int, int>::const_iterator old_to_new_uid_it =
397  old_to_new_uids.find(it->second);
398  if (old_to_new_uid_it != old_to_new_uids.end()) {
399  int new_uid = old_to_new_uid_it->second;
400  const string &symbol = it->first;
401  symbols_->SetIndex(symbol, new_uid);
402  }
403  }
404  delete old_symbols;
405  }
406 }
407 
408 void
410  unordered_set<int> &
411  gold_features_to_update,
412  unordered_set<int> &
413  best_scoring_features_to_update)
414  const {
415  // Collect gold features that are not in best-scoring candidate.
416  const FeatureVector<int,double> &gold_features =
417  example.GetGold().features();
418  const FeatureVector<int,double> &best_scoring_features =
419  example.GetBestScoring().features();
420 
421  if (DEBUG >= 2) {
422  cerr << "Gold index: " << example.gold_index()
423  << "; best scoring index: " << example.best_scoring_index()
424  << endl;
425  cerr << "Original gold features: " << gold_features << endl
426  << "Original best scoring features: " << best_scoring_features << endl;
427  }
428 
429  gold_features.GetNonZeroFeatures(gold_features_to_update);
430  best_scoring_features.RemoveEqualFeatures(gold_features,
431  gold_features_to_update);
432 
433  if (DEBUG >= 2) {
434  cerr << "Time:" << time_.to_string() << ": new gold features: [";
435  for (unordered_set<int>::const_iterator it =
436  gold_features_to_update.begin();
437  it != gold_features_to_update.end();
438  ++it) {
439  cerr << " " << *it;
440  }
441  cerr << "]" << endl;
442  }
443 
444  best_scoring_features.GetNonZeroFeatures(best_scoring_features_to_update);
445  gold_features.RemoveEqualFeatures(best_scoring_features,
446  best_scoring_features_to_update);
447  if (DEBUG >= 2) {
448  cerr << "Time:" << time_.to_string() << ": new best scoring features: [";
449  for (unordered_set<int>::const_iterator it =
450  best_scoring_features_to_update.begin();
451  it != best_scoring_features_to_update.end();
452  ++it) {
453  cerr << " " << *it;
454  }
455  cerr << "]" << endl;
456  }
457 
458 }
459 
460 } // namespace reranker
void UpdateAllFeatureAverages(const Time &time)
void NewEpoch()
Increments the epoch counter.
Provides the reranker::PerceptronModel reranker class.
const Candidate & GetGold() const
virtual double ScoreCandidate(Candidate &candidate, bool training)
Scores a candidate according to either the raw or averaged version of this perceptron model...
Model is an interface for reranking models.
Definition: model.H:141
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
unordered_set< K > & RemoveEqualFeatures(const FeatureVector< K, V > &other, unordered_set< K > &set) const
Removes from the specified set the uid's of feature with weights equal in this vector to their weight...
TrainingVectorSet best_models_
The best models seen so far during training, according to evaluation on the held-out development test...
void RemapFeatureUids(const unordered_map< int, int > &old_to_new_uids)
virtual Symbols * Clone() const =0
Creates a newly-constructed clone of this Symbols instance that has the same runtime type...
#define REGISTER_MODEL(TYPE)
Registers the Model implementation with the specified subtype TYPE with the Model Factory...
Definition: model.H:606
const Candidate & GetBestScoring() const
virtual void Clear()=0
Clears all symbols from this symbol table.
bool CompileFeatures(Symbols *symbols, bool clear_features=false, bool clear_symbolic_features=true, bool force=false)
Compiles any symbolic features in this candidate set.
virtual const_iterator end()=0
vector< double > loss_per_epoch_
The average loss per epoch.
Definition: model.H:571
TrainingVectorSet models_
The feature vectors representing this model.
int num_epochs_in_decline_
The current number of training epochs in which the model has been degrading in development set perfor...
virtual void CompactifyFeatureUids()
Renumbers the potentially sparse feature uid’s so that they occupy the interval [0,n-1] densely, for n non-zero features in use by this model.
shared_ptr< CandidateSet::Scorer > candidate_set_scorer_
A scorer for CandidateSet instances.
Definition: model.H:565
virtual void Do(Model *model)=0
The function to be executed by the Model that wraps this hook.
This class implements a perceptron model reranker.
virtual void TrainOnExample(CandidateSet &example)
Trains this model on the specified training example.
virtual void TrainOneEpoch(CandidateSetIterator &examples)
Trains this model for one epoch, i.e., a single pass through the specified set of training examples...
Symbols * symbols_
The symbol table for this model (may be NULL).
Definition: model.H:557
double loss_weight() const
Returns the weight of the loss for this candidate set’s reference.
virtual double Evaluate(CandidateSetIterator &development_test)
Evaluates this model on the specified set of held-out development test data.
virtual bool HasNext() const =0
Returns whether this iterator contains another CandidateSet.
vector< int > num_testing_errors_per_epoch_
The number of testing errors made on held-out development test data for each epoch.
Definition: model.H:574
virtual double Apply(const FeatureVector< int, double > &fv1, const FeatureVector< int, double > &fv2)
Applies this kernel function to the specified feature vectors.
#define REGISTER_NAMED_MODEL_UPDATE_PREDICATE(TYPE, NAME)
Registers the Model::UpdatePredicate implementation with the specified subtype TYPE and NAME with th...
Definition: model.H:613
unordered_map< string, int >::const_iterator const_iterator
Definition: symbol-table.H:60
A class to construct a PerceptronModel from a ModelMessage instance.
Time time_
The tiny object that holds the "training time" for this model (epoch, index and absolute time index)...
Definition: model.H:552
virtual void Reset()=0
Resets this iterator back to the beginning of its backing collection.
const FeatureVector< int, double > & average_weights() const
Returns the feature vector corresponding to the averaged perceptron.
vector< int > num_training_errors_per_epoch_
The number of errors made on training examples during each epoch.
Definition: model.H:576
A class to hold a set of candidates, either for training or test.
Definition: candidate-set.H:62
double absolute_seconds() const
Definition: training-time.H:81
int max_epochs() const
Returns the maximum number of epochs to train.
Definition: model.H:313
#define REGISTER_NAMED_MODEL_UPDATER(TYPE, NAME)
Registers the Model::Updater implementation with the specified subtype TYPE and NAME with the Model:...
Definition: model.H:629
KernelFunction * kernel_fn_
Yes, this is an interface, but we add the kernel function as a data member.
Definition: model.H:555
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57
virtual bool use_weighted_loss()
Definition: model.H:469
double seconds_since_last_epoch() const
Definition: training-time.H:85
const Time & time() const
Returns the current training time of this model: number of epochs, number of time steps in the curren...
Definition: model.H:290
An interface for an environment in which variables of various types are mapped to their values...
Definition: environment.H:125
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
Definition: candidate.H:60
int num_training_errors_
The number of errors made on training examples.
Definition: model.H:580
void Tick()
Increments both the time index for the current epoch and the absolute time index. ...
void set_score(double score)
Sets the score of this candidate.
Definition: candidate.H:156
virtual void ScoreCandidates(CandidateSet &candidates, bool training)
Scores the specified set of candidates according to either the raw or averaged version of this percep...
virtual void ComputeFeaturesToUpdate(const CandidateSet &example, unordered_set< int > &gold_features_to_update, unordered_set< int > &best_scoring_features_to_update) const
Computes the features to be updated for the gold candidate and the best-scoring candidate.
const FeatureVector< int, double > & weights() const
Returns the "raw" feature weights computed during training.
size_t best_scoring_index() const
Definition: candidate-set.H:96
Candidate & Get(size_t idx)
double loss() const
Returns the loss of this candidate.
Definition: candidate.H:129
virtual void Update(CandidateSet &example)
Updates the current model based on the specified set of candidates.
const FeatureVector< int, double > & GetModel(bool raw) const
Returns either the raw or averaged feature vector, depending on the argument.
int epoch() const
Returns the index of the current epoch.
Definition: training-time.H:74
A class to construct a ModelMessage from a PerceptronModel instance.
virtual void Init(const Environment *env, const string &arg)
Initializes this instance.
virtual void Train(CandidateSetIterator &examples, CandidateSetIterator &development_test)
Trains this model on a collection of training examples, where each training example is a set of candi...
virtual double ComputeStepSize(const unordered_set< int > &gold_features, const unordered_set< int > &best_scoring_features, const CandidateSet &example)
Computes the step size for the next update, and, as a side effect, caches this value in step_size_...
shared_ptr< UpdatePredicate > update_predicate_
The update predicate for this model.
Definition: model.H:567
int index() const
Returns the index of the current training example within the current epoch.
Definition: training-time.H:77
int min_epochs() const
Returns the minimum number of epochs to train.
Definition: model.H:310
void UpdateGoldAndCandidateFeatureAverages(const Time &time, const Collection &gold_feature_uids, const Collection &candidate_feature_uids)
Updates the feature averages the specified pair of feature uid collections, one for a gold reference ...
unordered_set< K > & GetNonZeroFeatures(unordered_set< K > &set) const
Inserts the uid's of features with non-zero weights into the specified set.
int best_model_epoch_
The epoch of the best models seen so far during training.
void UpdateWeights(const Time &time, const Collection &feature_uids, const FeatureVector< int, double > &feature_vector, double scalar)
Increments the weights for the specified collection of features.
virtual const_iterator begin()=0
int num_updates_
The number of times an update was performed on this model during training.
Definition: model.H:584
int max_epochs_in_decline_
The maximum number of training epochs to keep training after the model starts to degrade (i...
#define DEBUG
virtual void SetIndex(const string &symbol, int index)=0
virtual bool NeedToKeepTraining()
Returns whether more training epochs are required for this model.
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
string to_string() const
Definition: training-time.H:95
virtual CandidateSet & Next()=0
Returns the next CandidateSet.
virtual bool NeedToUpdate(CandidateSet &example)
Indicates whether the current model needs to be updated; the implementation here simply returns true ...
shared_ptr< Updater > updater_
The updater for this model.
Definition: model.H:569
Hook * end_of_epoch_hook_
A hook to be performed at the end of every epoch.
Definition: model.H:590
A container for all the member initializers for a particular Factory-constructible instance...
Definition: factory.H:203
Provides the reranker::Time class, which holds the three notions of training time: current epoch...
size_t gold_index() const
Definition: candidate-set.H:97
const FeatureVector< int, double > & features() const
Returns the feature vector for this candidate.
Definition: candidate.H:137