36 #ifndef RERANKER_MODEL_H_
37 #define RERANKER_MODEL_H_
55 using std::shared_ptr;
69 return score_diff == 0.0 ? 0 : (score_diff < 0.0 ? -1 : 1);
87 double loss_diff = c1.
loss() - c2.
loss();
90 if (loss_diff < 0.0) {
92 }
else if (loss_diff > 0.0) {
98 return score_diff == 0.0 ? 0 : (score_diff < 0.0 ? -1 : 1);
133 size_t GetRandomIndex(
size_t max);
231 virtual void Do(
Model *model) = 0;
520 shared_ptr<Candidate::Comparator>
GetComparator(
const string &spec)
const;
527 shared_ptr<Updater>
GetUpdater(
const string &spec)
const;
541 const vector<string> &tokens,
542 size_t min_expected_number,
543 size_t max_expected_number,
544 const string &class_name)
const;
600 #define REGISTER_NAMED_MODEL(TYPE,NAME) REGISTER_NAMED(TYPE,NAME,Model)
606 #define REGISTER_MODEL(TYPE) REGISTER_NAMED_MODEL(TYPE,TYPE)
613 #define REGISTER_NAMED_MODEL_UPDATE_PREDICATE(TYPE,NAME) \
614 REGISTER_NAMED(TYPE,NAME,Model::UpdatePredicate)
621 #define REGISTER_MODEL_UPDATE_PREDICATE(TYPE) \
622 REGISTER_NAMED_MODEL_UPDATE_PREDICATE(TYPE,TYPE)
629 #define REGISTER_NAMED_MODEL_UPDATER(TYPE,NAME) \
630 REGISTER_NAMED(TYPE,NAME,Model::Updater)
636 #define REGISTER_MODEL_UPDATER(TYPE) \
637 REGISTER_NAMED_MODEL_UPDATER(TYPE,TYPE)
Model is an interface for reranking models.
const string & name() const
Returns the unique name for this model instance.
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
Model(const string &name)
Constructs a new instance with a NULL kernel function.
A simple class to hold the three notions of time during training: the current epoch, the current time index within the current epoch, and the absolute time index.
Provides an interface and some implementations for iterating over CandidateSet instances.
The default comparator for comparing two Candidate instances based on their respective scores (i...
virtual const string & proto_writer_spec() const =0
Returns the spec string for contructing an instance of a ModelProtoWriter capable of serializing this...
double score() const
Returns the reranker’s score for this candidate.
virtual void set_symbols(Symbols *symbols)
Sets the Symbols instance for this Model to be the specified instance.
Provides the reranker::Symbols interface as well as the reranker::StaticSymbolTable implementation...
Model(const string &name, KernelFunction *kernel_fn, Symbols *symbols)
Constructs a new instance with the specified kernel function and symbol table.
Symbols * symbols() const
Returns the symbol table for this model.
virtual void set_use_weighted_loss(bool use_weighted_loss)
A symbol table that stores the mapping from symbols to int’s and vice versa in local (non-static) dat...
vector< double > loss_per_epoch_
The average loss per epoch.
virtual void Score(Model *model, CandidateSet &candidates, bool training)
const vector< int > & num_training_errors_per_epoch()
Returns the number of training errors made for each epoch.
virtual void set_max_epochs(int max_epochs)
Sets the maximum number of epochs to train.
shared_ptr< CandidateSet::Scorer > candidate_set_scorer_
A scorer for CandidateSet instances.
virtual void Do(Model *model)=0
The function to be executed by the Model that wraps this hook.
void SetDefaultCandidateSetScorer()
virtual const string & model_spec() const =0
Returns the spec string for constructing a default instance of this model so it may be properly de-se...
virtual int Compare(const Model &model, const Candidate &c1, const Candidate &c2)
This method first compares c1 to c2 based on their respective losses (i.e., the values returned by in...
int num_training_errors() const
Returns the number of training errors made by this model.
An inner interface for a model to score a CandidateSet.
virtual void ScoreCandidates(CandidateSet &candidates, bool training)=0
Scores the specified set of candidates according to either the raw or averaged version of this percep...
An inner interface specifying comparison between two Candidate instances.
virtual void Update(Model *model, CandidateSet &example)=0
Updates this model based on the specified training example.
The default comparator for comparing two Candidate instances for being the “gold” candidate...
Symbols * symbols_
The symbol table for this model (may be NULL).
virtual void set_min_epochs(int min_epochs)
Sets the minimum number of epochs to train.
virtual shared_ptr< Candidate::Comparator > score_comparator()
Returns a pointer to the score comparator used by this model.
virtual void Train(CandidateSetIterator &examples, CandidateSetIterator &development_test)=0
Trains this model on a collection of training examples, where each training example is a set of candi...
virtual bool NeedToKeepTraining()=0
Returns whether more training epochs are required for this model.
virtual void Score(Model *model, CandidateSet &candidates, bool training)
Picks two candidates at random from the set, scores them and then identifies which has the higher sco...
vector< int > num_testing_errors_per_epoch_
The number of testing errors made on held-out development test data for each epoch.
virtual bool NeedToUpdate(CandidateSet &example)=0
Indicates whether the current model needs to be updated.
This candidate set scorer picks two candidates at random from the set, scores them and then identifie...
void set_score_comparator(shared_ptr< Candidate::Comparator > score_comparator)
void set_name(const string &name)
Sets the name of this Model instance.
virtual ~Model()
Destroys this model and its associated kernel function.
Time time_
The tiny object that holds the "training time" for this model (epoch, index and absolute time index)...
void set_kernel_fn(KernelFunction *kernel_fn)
Sets the kernel function for this model.
vector< int > num_training_errors_per_epoch_
The number of errors made on training examples during each epoch.
virtual int best_model_epoch() const =0
A class to hold a set of candidates, either for training or test.
int max_epochs() const
Returns the maximum number of epochs to train.
shared_ptr< CandidateSet::Scorer > GetCandidateSetScorer(const string &spec) const
KernelFunction * kernel_fn_
Yes, this is an interface, but we add the kernel function as a data member.
An interface specifying a converter from symbols (strings) to int indices.
virtual bool use_weighted_loss()
const vector< double > & loss_per_epoch()
Returns the loss per epoch for epoch of training that was evaluated.
int min_epochs_
The minimum number of training epochs to execute.
const Time & time() const
Returns the current training time of this model: number of epochs, number of time steps in the curren...
An interface for an environment in which variables of various types are mapped to their values...
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
int num_training_errors_
The number of errors made on training examples.
virtual shared_ptr< Candidate::Comparator > gold_comparator()
Returns a pointer to the gold comparator used by this model.
virtual void NewEpoch()=0
int num_updates() const
Returns the number of updates made by this model.
An inner interface for a predicate that tests whether a Model needs to be updated based on the curren...
int max_epochs_
The maximum number of training epochs to execute.
string name_
This model’s unique name.
shared_ptr< Updater > GetUpdater(const string &spec) const
double loss() const
Returns the loss of this candidate.
virtual void TrainOnExample(CandidateSet &example)=0
Trains this model on the specified training example.
The default candidate set scorer scores each candidate using the Model::ScoreCandidate method and the...
virtual void Init(const Environment *env, const string &arg)
Initializes the random seed using srand and the current time available by calling time(NULL)...
virtual void set_end_of_epoch_hook(Hook *end_of_epoch_hook)
virtual void CheckNumberOfTokens(const string &arg, const vector< string > &tokens, size_t min_expected_number, size_t max_expected_number, const string &class_name) const
A helper method for implementing the Init method: throws a std::runtime_error if the number of tokens...
shared_ptr< Candidate::Comparator > gold_comparator_
A comparator to provide an ordering for candidates to find the gold candidate in a set...
An inner interface specifying an update function for a model.
An interface to make it easier to implement Factory-constructible types by implementing both required...
shared_ptr< UpdatePredicate > update_predicate_
The update predicate for this model.
int min_epochs() const
Returns the minimum number of epochs to train.
Provides the reranker::KernelFunction interface.
shared_ptr< Candidate::Comparator > GetComparator(const string &spec) const
shared_ptr< UpdatePredicate > GetUpdatePredicate(const string &spec) const
virtual bool NeedToUpdate(Model *model, CandidateSet &example)=0
Returns whether the specified needs to be updated based on the specified training example...
void SetDefaultComparators()
int num_updates_
The number of times an update was performed on this model during training.
virtual void CompactifyFeatureUids()=0
Renumbers the potentially sparse feature uid’s so that they occupy the interval [0,n-1] densely, for n non-zero features in use by this model.
Model(const string &name, KernelFunction *kernel_fn)
Constructs a new instance with the specified kernel function.
virtual double Evaluate(CandidateSetIterator &development_test)=0
Evaluates this model on the specified set of held-out development test data.
virtual void TrainOneEpoch(CandidateSetIterator &examples)=0
Trains this model for one epoch, i.e., a single pass through the specified set of training examples...
Provides a generic dynamic object factory.
An interface specifying a kernel function for two FeatureVector instances.
virtual const string & proto_reader_spec() const =0
Returns the spec string for contructing an instance of a ModelProtoReader capable of de-serializing t...
shared_ptr< Candidate::Comparator > score_comparator_
A comparator to provide an ordering for candidates based on score when scoring all candidates in a se...
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
void set_gold_comparator(shared_ptr< Candidate::Comparator > gold_comparator)
virtual void Update(CandidateSet &example)=0
Updates the current model based on the specified set of candidates.
An interface for specifying a hook to be run by a Model instance.
bool use_weighted_loss_
Indicates whether this model should weight each candidate’s loss by the value returned by CandidateSe...
Model()
Constructs a new instance with the empty string for its name and a NULL kernel function.
virtual int Compare(const Model &model, const Candidate &c1, const Candidate &c2)
Returns 0 if the two candidates’ scores are equal, less than zero if the score of c1 is less than tha...
virtual double ScoreCandidate(Candidate &candidate, bool training)=0
Scores a candidate according to either the raw or averaged version of this perceptron model...
shared_ptr< Updater > updater_
The updater for this model.
Hook * end_of_epoch_hook_
A hook to be performed at the end of every epoch.
virtual void EndOfEpoch()=0
Provides the reranker::Time class, which holds the three notions of training time: current epoch...