36 #ifndef RERANKER_PERCEPTRON_MODEL_H_
37 #define RERANKER_PERCEPTRON_MODEL_H_
40 #include <unordered_set>
49 #define DEFAULT_MAX_EPOCHS_IN_DECLINE 5
53 using std::shared_ptr;
54 using std::unordered_set;
102 Model(name, kernel_fn),
122 Model(name, kernel_fn, symbols),
360 GetUpdater(
"PerceptronModelDefaultUpdater()");
381 gold_features_to_update,
383 best_scoring_features_to_update)
const;
390 const unordered_set<int> &best_scoring_features,
virtual void EndOfEpoch()
virtual const string & model_spec() const
Returns the spec string for constructing a default instance of this model so it may be properly de-se...
virtual double ScoreCandidate(Candidate &candidate, bool training)
Scores a candidate according to either the raw or averaged version of this perceptron model...
Model is an interface for reranking models.
const string & name() const
Returns the unique name for this model instance.
This class defines a dot product kernel function for two vectors.
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
TrainingVectorSet best_models_
The best models seen so far during training, according to evaluation on the held-out development test...
Symbols * symbols() const
Returns the symbol table for this model.
Provides a dot product implementation of the reranker::KernelFunction interface.
TrainingVectorSet models_
The feature vectors representing this model.
PerceptronModel(const string &name, KernelFunction *kernel_fn)
Constructs a new perceptron model with the specified kernel function.
virtual const string & proto_reader_spec() const
Returns the spec string for contructing an instance of a ModelProtoReader capable of de-serializing t...
int num_epochs_in_decline_
The current number of training epochs in which the model has been degrading in development set perfor...
virtual const TrainingVectorSet & models() const
Returns the set of models and statistics used by this PerceptronModel instance.
virtual void RegisterInitializers(Initializers &initializers)
Registers several variables that may be initialized when this object is constructed via Factory::Crea...
virtual void CompactifyFeatureUids()
Renumbers the potentially sparse feature uid’s so that they occupy the interval [0,n-1] densely, for n non-zero features in use by this model.
The default update function for perceptron models.
static string proto_writer_spec_
A string that specifies to construct a PerceptronModelProtoWriter, which is capable of serializing an...
This class implements a perceptron model reranker.
virtual void TrainOnExample(CandidateSet &example)
Trains this model on the specified training example.
virtual void TrainOneEpoch(CandidateSetIterator &examples)
Trains this model for one epoch, i.e., a single pass through the specified set of training examples...
PerceptronModel(const string &name)
Constructs a new perceptron model with a DotProduct kernel function.
void set_max_epochs_in_decline(int max_epochs_in_decline)
Sets the maximum number of training epochs to keep training after the model starts to degrade (i...
virtual double Evaluate(CandidateSetIterator &development_test)
Evaluates this model on the specified set of held-out development test data.
static string proto_reader_spec_
A string that specifies to construct a PerceptronModelProtoReader, which is capable of de-serializing...
virtual int best_model_epoch() const
Returns the epoch of the best models seen so far during training.
A class to construct a PerceptronModel from a ModelMessage instance.
A class to hold a set of candidates, either for training or test.
An interface specifying a converter from symbols (strings) to int indices.
An interface for an environment in which variables of various types are mapped to their values...
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
A class to hold the several feature vectors needed during training (especially for the perceptron fam...
An inner interface for a predicate that tests whether a Model needs to be updated based on the curren...
virtual void ScoreCandidates(CandidateSet &candidates, bool training)
Scores the specified set of candidates according to either the raw or averaged version of this percep...
The default update predicate for perceptron and perceptron-style models, which indicates to do a mode...
PerceptronModel()
Constructs a new instance with the empty string for its name and the DotProduct kernel function...
virtual const string & proto_writer_spec() const
Returns the spec string for contructing an instance of a ModelProtoWriter capable of serializing this...
virtual void ComputeFeaturesToUpdate(const CandidateSet &example, unordered_set< int > &gold_features_to_update, unordered_set< int > &best_scoring_features_to_update) const
Computes the features to be updated for the gold candidate and the best-scoring candidate.
shared_ptr< Updater > GetUpdater(const string &spec) const
virtual ~PerceptronModel()
Destroys this perceptron model and all its data members.
virtual void Update(CandidateSet &example)
Updates the current model based on the specified set of candidates.
An inner interface specifying an update function for a model.
A class to construct a ModelMessage from a PerceptronModel instance.
double step_size_
The last value computed by the ComputeStepSize method.
virtual void Init(const Environment *env, const string &arg)
Initializes this instance.
virtual void Train(CandidateSetIterator &examples, CandidateSetIterator &development_test)
Trains this model on a collection of training examples, where each training example is a set of candi...
virtual double ComputeStepSize(const unordered_set< int > &gold_features, const unordered_set< int > &best_scoring_features, const CandidateSet &example)
Computes the step size for the next update, and, as a side effect, caches this value in step_size_...
shared_ptr< UpdatePredicate > update_predicate_
The update predicate for this model.
PerceptronModel(const string &name, KernelFunction *kernel_fn, Symbols *symbols)
Constructs a new perceptron model with the specified kernel function and symbol table.
#define DEFAULT_MAX_EPOCHS_IN_DECLINE
int best_model_epoch_
The epoch of the best models seen so far during training.
shared_ptr< UpdatePredicate > GetUpdatePredicate(const string &spec) const
int max_epochs_in_decline_
The maximum number of training epochs to keep training after the model starts to degrade (i...
An interface specifying a kernel function for two FeatureVector instances.
virtual bool NeedToKeepTraining()
Returns whether more training epochs are required for this model.
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
Reranker model interface.
Provides the reranker::TrainingVectorSet class.
virtual bool NeedToUpdate(CandidateSet &example)
Indicates whether the current model needs to be updated; the implementation here simply returns true ...
shared_ptr< Updater > updater_
The updater for this model.
A container for all the member initializers for a particular Factory-constructible instance...
Provides the reranker::Time class, which holds the three notions of training time: current epoch...