refr/perceptron-model_8_h_source.html

 // Copyright 2012, Google Inc.

 // All rights reserved.

 //

 // Redistribution and use in source and binary forms, with or without

 // modification, are permitted provided that the following conditions are

 // met:

 //

 //   * Redistributions of source code must retain the above copyright

 //     notice, this list of conditions and the following disclaimer.

 //   * Redistributions in binary form must reproduce the above

 //     copyright notice, this list of conditions and the following disclaimer

 //     in the documentation and/or other materials provided with the

 //     distribution.

 //   * Neither the name of Google Inc. nor the names of its

 //     contributors may be used to endorse or promote products derived from

 //     this software without specific prior written permission.

 //

 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 // -----------------------------------------------------------------------------

 //

 //


 #ifndef RERANKER_PERCEPTRON_MODEL_H_

 #define RERANKER_PERCEPTRON_MODEL_H_


 #include <vector>

 #include <unordered_set>

 #include <memory>


 #include "candidate-set.H"

 #include "dot-product.H"

 #include "model.H"

 #include "training-time.H"

 #include "training-vector-set.H"


 #define DEFAULT_MAX_EPOCHS_IN_DECLINE 5


 namespace reranker {


 using std::shared_ptr;

 using std::unordered_set;

 using std::vector;


 class PerceptronModel : public Model {

  public:

   friend class PerceptronModelProtoWriter;

   friend class PerceptronModelProtoReader;


   PerceptronModel() :

       Model("", new DotProduct()),

       models_(),

       best_models_(),

       best_model_epoch_(-1),

       max_epochs_in_decline_(DEFAULT_MAX_EPOCHS_IN_DECLINE),

       num_epochs_in_decline_(0),

       step_size_(1.0) {

     SetDefaultObjects();

   }


   PerceptronModel(const string &name) :

       Model(name, new DotProduct()),

       models_(),

       best_models_(),

       best_model_epoch_(-1),

       max_epochs_in_decline_(DEFAULT_MAX_EPOCHS_IN_DECLINE),

       num_epochs_in_decline_(0),

       step_size_(1.0) {

     SetDefaultObjects();

   }


   PerceptronModel(const string &name, KernelFunction *kernel_fn) :

       Model(name, kernel_fn),

       models_(),

       best_models_(),

       best_model_epoch_(-1),

       max_epochs_in_decline_(DEFAULT_MAX_EPOCHS_IN_DECLINE),

       num_epochs_in_decline_(0),

       step_size_(1.0) {

     SetDefaultObjects();

   }


   PerceptronModel(const string &name, KernelFunction *kernel_fn,

                   Symbols *symbols) :

       Model(name, kernel_fn, symbols),

       models_(),

       best_models_(),

       best_model_epoch_(-1),

       max_epochs_in_decline_(DEFAULT_MAX_EPOCHS_IN_DECLINE),

       num_epochs_in_decline_(0),

       step_size_(1.0) {

     SetDefaultObjects();

   }


   virtual ~PerceptronModel() { }


   class DefaultUpdatePredicate : public Model::UpdatePredicate {

     virtual bool NeedToUpdate(Model *model, CandidateSet &example);

   };


   class DefaultUpdater : public Model::Updater {

     virtual void Update(Model *m, CandidateSet &example);

   };


   virtual const string &model_spec() const { return model_spec_; }


   virtual const string &proto_reader_spec() const {

     return proto_reader_spec_;

   }


   virtual const string &proto_writer_spec() const {

     return proto_writer_spec_;

   }


   virtual int best_model_epoch() const { return best_model_epoch_; }


   virtual void RegisterInitializers(Initializers &initializers);


   virtual void Init(const Environment *env, const string &arg);


   // training methods


   virtual bool NeedToKeepTraining();


   virtual void Train(CandidateSetIterator &examples,

                      CandidateSetIterator &development_test);


   virtual void NewEpoch();


   virtual void EndOfEpoch();


   virtual void TrainOneEpoch(CandidateSetIterator &examples);


   virtual void TrainOnExample(CandidateSet &example);


   virtual bool NeedToUpdate(CandidateSet &example);


   virtual void Update(CandidateSet &example);


   virtual double Evaluate(CandidateSetIterator &development_test);


   virtual void ScoreCandidates(CandidateSet &candidates, bool training);


   virtual double ScoreCandidate(Candidate &candidate, bool training);


   // mutators


   virtual void CompactifyFeatureUids();


   void set_max_epochs_in_decline(int max_epochs_in_decline) {

     max_epochs_in_decline_ = max_epochs_in_decline;

   }


   virtual const TrainingVectorSet &models() const { return models_; }


  protected:

   void SetDefaultObjects() {

     update_predicate_ =

         GetUpdatePredicate("PerceptronModelDefaultUpdatePredicate()");

     updater_ =

         GetUpdater("PerceptronModelDefaultUpdater()");

   }


   virtual void ComputeFeaturesToUpdate(const CandidateSet &example,

                                        unordered_set<int> &

                                        gold_features_to_update,

                                        unordered_set<int> &

                                        best_scoring_features_to_update) const;


   virtual double ComputeStepSize(const unordered_set<int> &gold_features,

                                const unordered_set<int> &best_scoring_features,

                                const CandidateSet &example) {

     return step_size_;

     // For MIRA, simply derive class and override to be

     /*

     int feature_count = gold_features.size() + best_scoring_features.size();

     double loss_diff =

       example.GetBestScoring().loss() - example.GetGold().loss();

     double score_diff =

       example.GetBestScoring().score() - example.GetGold().score();

     double raw_step = (loss_diff + score_diff) / feature_count;

     step_size_ = raw_step > mira_clip_ : mira_clip_ : raw_step;

     return step_size_;

     */

   }


   // data members

   TrainingVectorSet models_;

   TrainingVectorSet best_models_;

   int best_model_epoch_;

   int max_epochs_in_decline_;

   int num_epochs_in_decline_;

   double step_size_;

   string model_spec_;


   static string proto_reader_spec_;

   static string proto_writer_spec_;

 };


 }  // namespace reranker


 #endif

reranker::PerceptronModel::EndOfEpoch
virtual void EndOfEpoch()
Definition: perceptron-model.C:185

reranker::PerceptronModel::model_spec
virtual const string & model_spec() const
Returns the spec string for constructing a default instance of this model so it may be properly de-se...
Definition: perceptron-model.H:176

reranker::PerceptronModel::ScoreCandidate
virtual double ScoreCandidate(Candidate &candidate, bool training)
Scores a candidate according to either the raw or averaged version of this perceptron model...
Definition: perceptron-model.C:359

reranker::Model
Model is an interface for reranking models.
Definition: model.H:141

reranker::Model::name
const string & name() const
Returns the unique name for this model instance.
Definition: model.H:281

reranker::DotProduct
This class defines a dot product kernel function for two vectors.
Definition: dot-product.H:47

reranker::PerceptronModel::model_spec_
string model_spec_
Definition: perceptron-model.H:423

reranker::CandidateSetIterator
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
Definition: candidate-set-iterator.H:58

reranker::PerceptronModel::best_models_
TrainingVectorSet best_models_
The best models seen so far during training, according to evaluation on the held-out development test...
Definition: perceptron-model.H:411

reranker::PerceptronModel::SetDefaultObjects
void SetDefaultObjects()
Definition: perceptron-model.H:356

reranker::Model::symbols
Symbols * symbols() const
Returns the symbol table for this model.
Definition: model.H:284

dot-product.H
Provides a dot product implementation of the reranker::KernelFunction interface.

reranker::PerceptronModel::models_
TrainingVectorSet models_
The feature vectors representing this model.
Definition: perceptron-model.H:408

reranker::PerceptronModel::PerceptronModel
PerceptronModel(const string &name, KernelFunction *kernel_fn)
Constructs a new perceptron model with the specified kernel function.
Definition: perceptron-model.H:101

reranker::PerceptronModel::proto_reader_spec
virtual const string & proto_reader_spec() const
Returns the spec string for contructing an instance of a ModelProtoReader capable of de-serializing t...
Definition: perceptron-model.H:179

reranker::PerceptronModel::num_epochs_in_decline_
int num_epochs_in_decline_
The current number of training epochs in which the model has been degrading in development set perfor...
Definition: perceptron-model.H:420

reranker::PerceptronModel::models
virtual const TrainingVectorSet & models() const
Returns the set of models and statistics used by this PerceptronModel instance.
Definition: perceptron-model.H:353

reranker::PerceptronModel::RegisterInitializers
virtual void RegisterInitializers(Initializers &initializers)
Registers several variables that may be initialized when this object is constructed via Factory::Crea...
Definition: perceptron-model.C:70

reranker::PerceptronModel::CompactifyFeatureUids
virtual void CompactifyFeatureUids()
Renumbers the potentially sparse feature uid’s so that they occupy the interval [0,n-1] densely, for n non-zero features in use by this model.
Definition: perceptron-model.C:374

reranker::PerceptronModel::DefaultUpdater
The default update function for perceptron models.
Definition: perceptron-model.H:171

reranker::PerceptronModel::proto_writer_spec_
static string proto_writer_spec_
A string that specifies to construct a PerceptronModelProtoWriter, which is capable of serializing an...
Definition: perceptron-model.H:432

reranker::PerceptronModel
This class implements a perceptron model reranker.
Definition: perceptron-model.H:63

reranker::PerceptronModel::TrainOnExample
virtual void TrainOnExample(CandidateSet &example)
Trains this model on the specified training example.
Definition: perceptron-model.C:203

reranker::PerceptronModel::TrainOneEpoch
virtual void TrainOneEpoch(CandidateSetIterator &examples)
Trains this model for one epoch, i.e., a single pass through the specified set of training examples...
Definition: perceptron-model.C:176

reranker::PerceptronModel::PerceptronModel
PerceptronModel(const string &name)
Constructs a new perceptron model with a DotProduct kernel function.
Definition: perceptron-model.H:85

reranker::PerceptronModel::set_max_epochs_in_decline
void set_max_epochs_in_decline(int max_epochs_in_decline)
Sets the maximum number of training epochs to keep training after the model starts to degrade (i...
Definition: perceptron-model.H:347

reranker::PerceptronModel::Evaluate
virtual double Evaluate(CandidateSetIterator &development_test)
Evaluates this model on the specified set of held-out development test data.
Definition: perceptron-model.C:288

reranker::PerceptronModel::proto_reader_spec_
static string proto_reader_spec_
A string that specifies to construct a PerceptronModelProtoReader, which is capable of de-serializing...
Definition: perceptron-model.H:428

reranker::PerceptronModel::best_model_epoch
virtual int best_model_epoch() const
Returns the epoch of the best models seen so far during training.
Definition: perceptron-model.H:190

reranker::PerceptronModelProtoReader
A class to construct a PerceptronModel from a ModelMessage instance.
Definition: perceptron-model-proto-reader.H:57

reranker::CandidateSet
A class to hold a set of candidates, either for training or test.
Definition: candidate-set.H:62

reranker::Symbols
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57

reranker::Environment
An interface for an environment in which variables of various types are mapped to their values...
Definition: environment.H:125

reranker::Candidate
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
Definition: candidate.H:60

reranker::TrainingVectorSet
A class to hold the several feature vectors needed during training (especially for the perceptron fam...
Definition: training-vector-set.H:59

reranker::Model::UpdatePredicate
An inner interface for a predicate that tests whether a Model needs to be updated based on the curren...
Definition: model.H:243

reranker::PerceptronModel::ScoreCandidates
virtual void ScoreCandidates(CandidateSet &candidates, bool training)
Scores the specified set of candidates according to either the raw or averaged version of this percep...
Definition: perceptron-model.C:354

reranker::PerceptronModel::DefaultUpdatePredicate
The default update predicate for perceptron and perceptron-style models, which indicates to do a mode...
Definition: perceptron-model.H:142

reranker::PerceptronModel::NewEpoch
virtual void NewEpoch()
Definition: perceptron-model.C:163

reranker::PerceptronModel::PerceptronModel
PerceptronModel()
Constructs a new instance with the empty string for its name and the DotProduct kernel function...
Definition: perceptron-model.H:70

reranker::PerceptronModel::proto_writer_spec
virtual const string & proto_writer_spec() const
Returns the spec string for contructing an instance of a ModelProtoWriter capable of serializing this...
Definition: perceptron-model.H:184

reranker::PerceptronModel::ComputeFeaturesToUpdate
virtual void ComputeFeaturesToUpdate(const CandidateSet &example, unordered_set< int > &gold_features_to_update, unordered_set< int > &best_scoring_features_to_update) const
Computes the features to be updated for the gold candidate and the best-scoring candidate.
Definition: perceptron-model.C:409

reranker::Model::GetUpdater
shared_ptr< Updater > GetUpdater(const string &spec) const
Definition: model.C:179

reranker::PerceptronModel::~PerceptronModel
virtual ~PerceptronModel()
Destroys this perceptron model and all its data members.
Definition: perceptron-model.H:134

reranker::PerceptronModel::Update
virtual void Update(CandidateSet &example)
Updates the current model based on the specified set of candidates.
Definition: perceptron-model.C:237

reranker::Model::Updater
An inner interface specifying an update function for a model.
Definition: model.H:268

reranker::PerceptronModelProtoWriter
A class to construct a ModelMessage from a PerceptronModel instance.
Definition: perceptron-model-proto-writer.H:58

reranker::PerceptronModel::step_size_
double step_size_
The last value computed by the ComputeStepSize method.
Definition: perceptron-model.H:422

reranker::PerceptronModel::Init
virtual void Init(const Environment *env, const string &arg)
Initializes this instance.
Definition: perceptron-model.C:82

reranker::PerceptronModel::Train
virtual void Train(CandidateSetIterator &examples, CandidateSetIterator &development_test)
Trains this model on a collection of training examples, where each training example is a set of candi...
Definition: perceptron-model.C:88

reranker::PerceptronModel::ComputeStepSize
virtual double ComputeStepSize(const unordered_set< int > &gold_features, const unordered_set< int > &best_scoring_features, const CandidateSet &example)
Computes the step size for the next update, and, as a side effect, caches this value in step_size_...
Definition: perceptron-model.H:389

reranker::Model::update_predicate_
shared_ptr< UpdatePredicate > update_predicate_
The update predicate for this model.
Definition: model.H:567

reranker::PerceptronModel::PerceptronModel
PerceptronModel(const string &name, KernelFunction *kernel_fn, Symbols *symbols)
Constructs a new perceptron model with the specified kernel function and symbol table.
Definition: perceptron-model.H:120

DEFAULT_MAX_EPOCHS_IN_DECLINE
#define DEFAULT_MAX_EPOCHS_IN_DECLINE
Definition: perceptron-model.H:49

reranker::PerceptronModel::best_model_epoch_
int best_model_epoch_
The epoch of the best models seen so far during training.
Definition: perceptron-model.H:413

reranker::Model::GetUpdatePredicate
shared_ptr< UpdatePredicate > GetUpdatePredicate(const string &spec) const
Definition: model.C:171

reranker::PerceptronModel::max_epochs_in_decline_
int max_epochs_in_decline_
The maximum number of training epochs to keep training after the model starts to degrade (i...
Definition: perceptron-model.H:416

reranker::KernelFunction
An interface specifying a kernel function for two FeatureVector instances.
Definition: kernel-function.H:50

reranker::PerceptronModel::NeedToKeepTraining
virtual bool NeedToKeepTraining()
Returns whether more training epochs are required for this model.
Definition: perceptron-model.C:115

candidate-set.H
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.

model.H
Reranker model interface.

training-vector-set.H
Provides the reranker::TrainingVectorSet class.

reranker::PerceptronModel::NeedToUpdate
virtual bool NeedToUpdate(CandidateSet &example)
Indicates whether the current model needs to be updated; the implementation here simply returns true ...
Definition: perceptron-model.C:226

reranker::Model::updater_
shared_ptr< Updater > updater_
The updater for this model.
Definition: model.H:569

reranker::Initializers
A container for all the member initializers for a particular Factory-constructible instance...
Definition: factory.H:203

training-time.H
Provides the reranker::Time class, which holds the three notions of training time: current epoch...