Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
model.H
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
35 
36 #ifndef RERANKER_MODEL_H_
37 #define RERANKER_MODEL_H_
38 
39 #include <stdexcept>
40 #include <memory>
41 #include <vector>
42 
43 #include "candidate-set.H"
44 #include "candidate-set-iterator.H"
45 #include "kernel-function.H"
46 #include "symbol-table.H"
47 #include "training-time.H"
48 #include "factory.H"
49 
52 namespace reranker {
53 
54 using std::vector;
55 using std::shared_ptr;
56 
61  public:
66  virtual int Compare(const Model &model,
67  const Candidate &c1, const Candidate &c2) {
68  double score_diff = c1.score() - c2.score();
69  return score_diff == 0.0 ? 0 : (score_diff < 0.0 ? -1 : 1);
70  }
71 };
72 
76  public:
85  virtual int Compare(const Model &model,
86  const Candidate &c1, const Candidate &c2) {
87  double loss_diff = c1.loss() - c2.loss();
88  double score_diff = c1.score() - c2.score();
89  // If c1 has "less loss" it is "better", so we return 1.
90  if (loss_diff < 0.0) {
91  return 1;
92  } else if (loss_diff > 0.0) {
93  return -1;
94  } else {
95  // If c1&rsquo;s and c2&rsquo;s losses are equal, we compare
96  // them based on score, where a higher score is &ldquo;better&rdquo;
97  // (meaning a positive return value).
98  return score_diff == 0.0 ? 0 : (score_diff < 0.0 ? -1 : 1);
99  }
100  }
101 };
102 
110  public:
111  virtual void Score(Model *model,
112  CandidateSet &candidates, bool training);
113 };
114 
120  public:
123  virtual void Init(const Environment *env, const string &arg);
128  virtual void Score(Model *model,
129  CandidateSet &candidates, bool training);
130  private:
133  size_t GetRandomIndex(size_t max);
134 };
135 
141 class Model : public FactoryConstructible {
142  public:
145  Model() : name_(""), time_(), kernel_fn_(NULL),
146  symbols_(new LocalSymbolTable()),
147  loss_per_epoch_(),
151  min_epochs_(-1), max_epochs_(-1),
152  end_of_epoch_hook_(NULL) {
154  }
155 
156 
160  Model(const string &name) :
161  name_(name), time_(), kernel_fn_(NULL),
162  symbols_(new LocalSymbolTable()),
163  loss_per_epoch_(),
167  min_epochs_(-1), max_epochs_(-1),
168  end_of_epoch_hook_(NULL) {
170  }
171 
177  Model(const string &name, KernelFunction *kernel_fn) :
178  name_(name), time_(), kernel_fn_(kernel_fn),
179  symbols_(new LocalSymbolTable()),
180  loss_per_epoch_(),
184  num_updates_(0),
185  min_epochs_(-1), max_epochs_(-1),
186  end_of_epoch_hook_(NULL) {
188  }
189 
198  Model(const string &name, KernelFunction *kernel_fn,
199  Symbols *symbols) :
200  name_(name), time_(), kernel_fn_(kernel_fn),
201  symbols_(symbols),
202  loss_per_epoch_(),
206  num_updates_(0),
207  min_epochs_(-1), max_epochs_(-1),
208  end_of_epoch_hook_(NULL) {
210  }
211 
213  virtual ~Model() {
214  delete kernel_fn_;
215  delete symbols_;
216  delete end_of_epoch_hook_;
217  }
218 
219  // inner interfaces
220 
223  class Hook {
224  public:
225  virtual ~Hook() { }
231  virtual void Do(Model *model) = 0;
232  };
233 
244  public:
252  virtual bool NeedToUpdate(Model *model, CandidateSet &example) = 0;
253  };
254 
268  class Updater : public FactoryConstructible {
269  public:
275  virtual void Update(Model *model, CandidateSet &example) = 0;
276  };
277 
278  // accessors
279 
281  const string &name() const { return name_; }
282 
284  Symbols *symbols() const { return symbols_; }
285 
290  const Time &time() const { return time_; }
291 
292  virtual int best_model_epoch() const = 0;
293 
297  int num_updates() const { return num_updates_; }
301  const vector<int> &num_training_errors_per_epoch() {
303  }
308 
310  int min_epochs() const { return min_epochs_; }
311 
313  int max_epochs() const { return max_epochs_; }
314 
316  const vector<double> &loss_per_epoch() { return loss_per_epoch_; }
317 
321  virtual const string &model_spec() const = 0;
322 
326  virtual const string &proto_reader_spec() const = 0;
327 
331  virtual const string &proto_writer_spec() const = 0;
332 
334  virtual shared_ptr<Candidate::Comparator> score_comparator() {
335  return score_comparator_;
336  }
337 
339  virtual shared_ptr<Candidate::Comparator> gold_comparator() {
340  return gold_comparator_;
341  }
342 
343  // training methods
344 
352  virtual bool NeedToKeepTraining() = 0;
353 
354  // TODO(dbikel,kbhall): Add Train method that takes MapInput (or
355  // Hadoop equivalent) for streaming input of
356  // CandidateSet instances.
357 
368  virtual void Train(CandidateSetIterator &examples,
369  CandidateSetIterator &development_test) = 0;
370 
371  virtual void NewEpoch() = 0;
372 
373  virtual void EndOfEpoch() = 0;
374 
380  virtual void TrainOneEpoch(CandidateSetIterator &examples) = 0;
381 
386  virtual void TrainOnExample(CandidateSet &example) = 0;
387 
398  virtual bool NeedToUpdate(CandidateSet &example) = 0;
399 
408  virtual void Update(CandidateSet &example) = 0;
409 
422  virtual double Evaluate(CandidateSetIterator &development_test)
423  = 0;
424 
432  virtual void ScoreCandidates(CandidateSet &candidates, bool training) = 0;
433 
443  virtual double ScoreCandidate(Candidate &candidate, bool training) = 0;
444 
445  // mutators
446 
450 
454 
460  virtual void CompactifyFeatureUids() = 0;
461 
462  virtual void set_end_of_epoch_hook(Hook *end_of_epoch_hook) {
463  if (end_of_epoch_hook_ != NULL) {
464  delete end_of_epoch_hook_;
465  }
466  end_of_epoch_hook_ = end_of_epoch_hook;
467  }
468 
469  virtual bool use_weighted_loss() { return use_weighted_loss_; }
470 
473  }
474 
478  virtual void set_symbols(Symbols *symbols) {
479  delete symbols_;
480  symbols_ = symbols;
481  }
482 
483  protected:
484  // more mutators (protected, because they should only be used by subclasses)
485 
487  void set_name(const string &name) { name_ = name; }
488 
491  void set_kernel_fn(KernelFunction *kernel_fn) {
492  delete kernel_fn_;
493  kernel_fn_ = kernel_fn;
494  }
495 
496  void set_score_comparator(shared_ptr<Candidate::Comparator> score_comparator)
497  {
499  }
500 
501  void set_gold_comparator(shared_ptr<Candidate::Comparator> gold_comparator) {
503  }
504 
508  }
509 
511  score_comparator_ = GetComparator("DefaultScoreComparator()");
512  gold_comparator_ = GetComparator("DefaultGoldComparator()");
513  }
514 
517  GetCandidateSetScorer("DefaultCandidateSetScorer()");
518  }
519 
520  shared_ptr<Candidate::Comparator> GetComparator(const string &spec) const;
521 
522  shared_ptr<CandidateSet::Scorer> GetCandidateSetScorer(const string &spec)
523  const;
524 
525  shared_ptr<UpdatePredicate> GetUpdatePredicate(const string &spec) const;
526 
527  shared_ptr<Updater> GetUpdater(const string &spec) const;
528 
540  virtual void CheckNumberOfTokens(const string &arg,
541  const vector<string> &tokens,
542  size_t min_expected_number,
543  size_t max_expected_number,
544  const string &class_name) const;
545 
547  string name_;
560  shared_ptr<Candidate::Comparator> score_comparator_;
563  shared_ptr<Candidate::Comparator> gold_comparator_;
565  shared_ptr<CandidateSet::Scorer> candidate_set_scorer_;
567  shared_ptr<UpdatePredicate> update_predicate_;
569  shared_ptr<Updater> updater_;
571  vector<double> loss_per_epoch_;
594 };
595 
600 #define REGISTER_NAMED_MODEL(TYPE,NAME) REGISTER_NAMED(TYPE,NAME,Model)
601 
606 #define REGISTER_MODEL(TYPE) REGISTER_NAMED_MODEL(TYPE,TYPE)
607 
613 #define REGISTER_NAMED_MODEL_UPDATE_PREDICATE(TYPE,NAME) \
614  REGISTER_NAMED(TYPE,NAME,Model::UpdatePredicate)
615 
621 #define REGISTER_MODEL_UPDATE_PREDICATE(TYPE) \
622  REGISTER_NAMED_MODEL_UPDATE_PREDICATE(TYPE,TYPE)
623 
629 #define REGISTER_NAMED_MODEL_UPDATER(TYPE,NAME) \
630  REGISTER_NAMED(TYPE,NAME,Model::Updater)
631 
636 #define REGISTER_MODEL_UPDATER(TYPE) \
637  REGISTER_NAMED_MODEL_UPDATER(TYPE,TYPE)
638 
639 } // namespace reranker
640 
641 #endif
Model is an interface for reranking models.
Definition: model.H:141
const string & name() const
Returns the unique name for this model instance.
Definition: model.H:281
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
Model(const string &name)
Constructs a new instance with a NULL kernel function.
Definition: model.H:160
A simple class to hold the three notions of time during training: the current epoch, the current time index within the current epoch, and the absolute time index.
Definition: training-time.H:56
Provides an interface and some implementations for iterating over CandidateSet instances.
The default comparator for comparing two Candidate instances based on their respective scores (i...
Definition: model.H:60
virtual const string & proto_writer_spec() const =0
Returns the spec string for contructing an instance of a ModelProtoWriter capable of serializing this...
double score() const
Returns the reranker’s score for this candidate.
Definition: candidate.H:131
virtual void set_symbols(Symbols *symbols)
Sets the Symbols instance for this Model to be the specified instance.
Definition: model.H:478
Provides the reranker::Symbols interface as well as the reranker::StaticSymbolTable implementation...
Model(const string &name, KernelFunction *kernel_fn, Symbols *symbols)
Constructs a new instance with the specified kernel function and symbol table.
Definition: model.H:198
Symbols * symbols() const
Returns the symbol table for this model.
Definition: model.H:284
virtual void set_use_weighted_loss(bool use_weighted_loss)
Definition: model.H:471
A symbol table that stores the mapping from symbols to int’s and vice versa in local (non-static) dat...
Definition: symbol-table.H:185
vector< double > loss_per_epoch_
The average loss per epoch.
Definition: model.H:571
virtual void Score(Model *model, CandidateSet &candidates, bool training)
Definition: model.C:61
const vector< int > & num_training_errors_per_epoch()
Returns the number of training errors made for each epoch.
Definition: model.H:301
virtual void set_max_epochs(int max_epochs)
Sets the maximum number of epochs to train.
Definition: model.H:453
shared_ptr< CandidateSet::Scorer > candidate_set_scorer_
A scorer for CandidateSet instances.
Definition: model.H:565
virtual void Do(Model *model)=0
The function to be executed by the Model that wraps this hook.
void SetDefaultCandidateSetScorer()
Definition: model.H:515
virtual const string & model_spec() const =0
Returns the spec string for constructing a default instance of this model so it may be properly de-se...
virtual int Compare(const Model &model, const Candidate &c1, const Candidate &c2)
This method first compares c1 to c2 based on their respective losses (i.e., the values returned by in...
Definition: model.H:85
int num_training_errors() const
Returns the number of training errors made by this model.
Definition: model.H:307
An inner interface for a model to score a CandidateSet.
Definition: candidate-set.H:79
virtual void ScoreCandidates(CandidateSet &candidates, bool training)=0
Scores the specified set of candidates according to either the raw or averaged version of this percep...
An inner interface specifying comparison between two Candidate instances.
Definition: candidate.H:108
virtual void Update(Model *model, CandidateSet &example)=0
Updates this model based on the specified training example.
The default comparator for comparing two Candidate instances for being the “gold” candidate...
Definition: model.H:75
Symbols * symbols_
The symbol table for this model (may be NULL).
Definition: model.H:557
virtual void set_min_epochs(int min_epochs)
Sets the minimum number of epochs to train.
Definition: model.H:449
virtual shared_ptr< Candidate::Comparator > score_comparator()
Returns a pointer to the score comparator used by this model.
Definition: model.H:334
virtual void Train(CandidateSetIterator &examples, CandidateSetIterator &development_test)=0
Trains this model on a collection of training examples, where each training example is a set of candi...
virtual bool NeedToKeepTraining()=0
Returns whether more training epochs are required for this model.
virtual void Score(Model *model, CandidateSet &candidates, bool training)
Picks two candidates at random from the set, scores them and then identifies which has the higher sco...
Definition: model.C:116
vector< int > num_testing_errors_per_epoch_
The number of testing errors made on held-out development test data for each epoch.
Definition: model.H:574
virtual bool NeedToUpdate(CandidateSet &example)=0
Indicates whether the current model needs to be updated.
This candidate set scorer picks two candidates at random from the set, scores them and then identifie...
Definition: model.H:119
void set_score_comparator(shared_ptr< Candidate::Comparator > score_comparator)
Definition: model.H:496
void set_name(const string &name)
Sets the name of this Model instance.
Definition: model.H:487
virtual ~Model()
Destroys this model and its associated kernel function.
Definition: model.H:213
virtual ~Hook()
Definition: model.H:225
Time time_
The tiny object that holds the "training time" for this model (epoch, index and absolute time index)...
Definition: model.H:552
void set_kernel_fn(KernelFunction *kernel_fn)
Sets the kernel function for this model.
Definition: model.H:491
vector< int > num_training_errors_per_epoch_
The number of errors made on training examples during each epoch.
Definition: model.H:576
virtual int best_model_epoch() const =0
A class to hold a set of candidates, either for training or test.
Definition: candidate-set.H:62
int max_epochs() const
Returns the maximum number of epochs to train.
Definition: model.H:313
shared_ptr< CandidateSet::Scorer > GetCandidateSetScorer(const string &spec) const
Definition: model.C:163
KernelFunction * kernel_fn_
Yes, this is an interface, but we add the kernel function as a data member.
Definition: model.H:555
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57
virtual bool use_weighted_loss()
Definition: model.H:469
const vector< double > & loss_per_epoch()
Returns the loss per epoch for epoch of training that was evaluated.
Definition: model.H:316
int min_epochs_
The minimum number of training epochs to execute.
Definition: model.H:586
const Time & time() const
Returns the current training time of this model: number of epochs, number of time steps in the curren...
Definition: model.H:290
An interface for an environment in which variables of various types are mapped to their values...
Definition: environment.H:125
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
Definition: candidate.H:60
int num_training_errors_
The number of errors made on training examples.
Definition: model.H:580
virtual shared_ptr< Candidate::Comparator > gold_comparator()
Returns a pointer to the gold comparator used by this model.
Definition: model.H:339
virtual void NewEpoch()=0
int num_updates() const
Returns the number of updates made by this model.
Definition: model.H:297
An inner interface for a predicate that tests whether a Model needs to be updated based on the curren...
Definition: model.H:243
int max_epochs_
The maximum number of training epochs to execute.
Definition: model.H:588
void SetDefaultObjects()
Definition: model.H:505
string name_
This model’s unique name.
Definition: model.H:547
shared_ptr< Updater > GetUpdater(const string &spec) const
Definition: model.C:179
double loss() const
Returns the loss of this candidate.
Definition: candidate.H:129
virtual void TrainOnExample(CandidateSet &example)=0
Trains this model on the specified training example.
The default candidate set scorer scores each candidate using the Model::ScoreCandidate method and the...
Definition: model.H:109
virtual void Init(const Environment *env, const string &arg)
Initializes the random seed using srand and the current time available by calling time(NULL)...
Definition: model.C:89
virtual void set_end_of_epoch_hook(Hook *end_of_epoch_hook)
Definition: model.H:462
virtual void CheckNumberOfTokens(const string &arg, const vector< string > &tokens, size_t min_expected_number, size_t max_expected_number, const string &class_name) const
A helper method for implementing the Init method: throws a std::runtime_error if the number of tokens...
Definition: model.C:137
shared_ptr< Candidate::Comparator > gold_comparator_
A comparator to provide an ordering for candidates to find the gold candidate in a set...
Definition: model.H:563
An inner interface specifying an update function for a model.
Definition: model.H:268
An interface to make it easier to implement Factory-constructible types by implementing both required...
Definition: factory.H:382
shared_ptr< UpdatePredicate > update_predicate_
The update predicate for this model.
Definition: model.H:567
int min_epochs() const
Returns the minimum number of epochs to train.
Definition: model.H:310
Provides the reranker::KernelFunction interface.
shared_ptr< Candidate::Comparator > GetComparator(const string &spec) const
Definition: model.C:155
shared_ptr< UpdatePredicate > GetUpdatePredicate(const string &spec) const
Definition: model.C:171
virtual bool NeedToUpdate(Model *model, CandidateSet &example)=0
Returns whether the specified needs to be updated based on the specified training example...
void SetDefaultComparators()
Definition: model.H:510
int num_updates_
The number of times an update was performed on this model during training.
Definition: model.H:584
virtual void CompactifyFeatureUids()=0
Renumbers the potentially sparse feature uid’s so that they occupy the interval [0,n-1] densely, for n non-zero features in use by this model.
Model(const string &name, KernelFunction *kernel_fn)
Constructs a new instance with the specified kernel function.
Definition: model.H:177
virtual double Evaluate(CandidateSetIterator &development_test)=0
Evaluates this model on the specified set of held-out development test data.
virtual void TrainOneEpoch(CandidateSetIterator &examples)=0
Trains this model for one epoch, i.e., a single pass through the specified set of training examples...
Provides a generic dynamic object factory.
An interface specifying a kernel function for two FeatureVector instances.
virtual const string & proto_reader_spec() const =0
Returns the spec string for contructing an instance of a ModelProtoReader capable of de-serializing t...
shared_ptr< Candidate::Comparator > score_comparator_
A comparator to provide an ordering for candidates based on score when scoring all candidates in a se...
Definition: model.H:560
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
void set_gold_comparator(shared_ptr< Candidate::Comparator > gold_comparator)
Definition: model.H:501
virtual void Update(CandidateSet &example)=0
Updates the current model based on the specified set of candidates.
An interface for specifying a hook to be run by a Model instance.
Definition: model.H:223
bool use_weighted_loss_
Indicates whether this model should weight each candidate’s loss by the value returned by CandidateSe...
Definition: model.H:593
Model()
Constructs a new instance with the empty string for its name and a NULL kernel function.
Definition: model.H:145
virtual int Compare(const Model &model, const Candidate &c1, const Candidate &c2)
Returns 0 if the two candidates’ scores are equal, less than zero if the score of c1 is less than tha...
Definition: model.H:66
virtual double ScoreCandidate(Candidate &candidate, bool training)=0
Scores a candidate according to either the raw or averaged version of this perceptron model...
shared_ptr< Updater > updater_
The updater for this model.
Definition: model.H:569
Hook * end_of_epoch_hook_
A hook to be performed at the end of every epoch.
Definition: model.H:590
virtual void EndOfEpoch()=0
Provides the reranker::Time class, which holds the three notions of training time: current epoch...