refr/perceptron-model_8_c_source.html

 // Copyright 2012, Google Inc.

 // All rights reserved.

 //

 // Redistribution and use in source and binary forms, with or without

 // modification, are permitted provided that the following conditions are

 // met:

 //

 //   * Redistributions of source code must retain the above copyright

 //     notice, this list of conditions and the following disclaimer.

 //   * Redistributions in binary form must reproduce the above

 //     copyright notice, this list of conditions and the following disclaimer

 //     in the documentation and/or other materials provided with the

 //     distribution.

 //   * Neither the name of Google Inc. nor the names of its

 //     contributors may be used to endorse or promote products derived from

 //     this software without specific prior written permission.

 //

 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 // -----------------------------------------------------------------------------

 //

 //


 #define DEBUG 1


 #include <iostream>

 #include <vector>

 #include <unordered_set>


 #include "candidate-set.H"

 #include "training-time.H"


 #include "perceptron-model.H"


 using std::cerr;

 using std::endl;

 using std::vector;

 using std::unordered_set;


 namespace reranker {


 REGISTER_MODEL(PerceptronModel)


 REGISTER_NAMED_MODEL_UPDATE_PREDICATE(PerceptronModel::DefaultUpdatePredicate,

                                       PerceptronModelDefaultUpdatePredicate)


 REGISTER_NAMED_MODEL_UPDATER(PerceptronModel::DefaultUpdater,

                              PerceptronModelDefaultUpdater)


 string

 PerceptronModel::proto_reader_spec_("PerceptronModelProtoReader()");


 string

 PerceptronModel::proto_writer_spec_("PerceptronModelProtoWriter()");


 void

 PerceptronModel::RegisterInitializers(Initializers &initializers) {

   bool required = true;

   initializers.Add("name", &name_, required);

   initializers.Add("score_comparator", &score_comparator_);

   initializers.Add("gold_comparator", &gold_comparator_);

   initializers.Add("candidate_set_scorer", &candidate_set_scorer_);

   initializers.Add("update_predicate", &update_predicate_);

   initializers.Add("updater", &updater_);

   initializers.Add("step_size", &step_size_);

 }


 void

 PerceptronModel::Init(const Environment *env, const string &arg) {

   model_spec_.clear();

   model_spec_.append(arg);

 }


 void

 PerceptronModel::Train(CandidateSetIterator &examples,

                        CandidateSetIterator &development_test) {

   while (NeedToKeepTraining()) {

     NewEpoch();

     TrainOneEpoch(examples);

     Evaluate(development_test);

     // TODO(dbikel,kbhall): Iterative parameter mixing goes here.

     //                      Keith: Please note that FeatureVector has

     //                      an AddScaledVector method which is

     //                      probably useful here.

   }

   if (DEBUG) {

     cerr << "Best model epoch: " << best_model_epoch_ << endl;

     cerr << "Total elpased time: " << time_.absolute_seconds() << " seconds."

          << endl;

   }

   if (DEBUG >= 2) {

     cerr << "Final raw model: " << models_.GetModel(true) << endl

          << "Final averaged model: " << models_.GetModel(false) << endl;

     cerr << "Final best raw model: "

          << best_models_.GetModel(true) << endl

          << "Final best averaged model: "

          << best_models_.GetModel(false) << endl;

   }

 }


 bool

 PerceptronModel::NeedToKeepTraining() {

   int num_epochs = time().epoch() + 1;


   if (DEBUG) {

     if (max_epochs() > 0) {

       if (num_epochs < max_epochs()) {

         cerr << "Training because we have trained only " << num_epochs

              << " epochs but max epochs is " << max_epochs() << "." << endl;

       }

       else {

         cerr << "Stopping training because we have trained "

              << num_epochs << " epochs and max epochs is "

              << max_epochs() << "." << endl;

       }

     }

   }


   if (max_epochs() > 0 && num_epochs >= max_epochs()) {

     return false;

   }


   if (DEBUG) {

     if (min_epochs() > 0 && num_epochs < min_epochs()) {

         cerr << "Training because we have trained " << num_epochs

              << " epochs but min epochs is " << min_epochs() << "." << endl;

     }

   }


   if (min_epochs() > 0 && num_epochs < min_epochs()) {

     return true;

   }


   if (DEBUG) {

     if (num_epochs_in_decline_ < max_epochs_in_decline_) {

       cerr << "Training because num epochs in decline is "

            << num_epochs_in_decline_ << " which is less than "

            << max_epochs_in_decline_ << "." << endl;

     } else {

       cerr << "Stopping training because num epochs in decline is "

            << num_epochs_in_decline_ << " which is greater than or equal to "

            << max_epochs_in_decline_ << "." << endl;

     }

   }


   return num_epochs_in_decline_ < max_epochs_in_decline_;

 }


 void

 PerceptronModel::NewEpoch() {

   if (DEBUG) {

     if (time_.epoch() > 0) {

       cerr << "Epoch " << time_.epoch() << ": "

            << time_.seconds_since_last_epoch() << " seconds." << endl;

     }

   }

   time_.NewEpoch();

   num_training_errors_per_epoch_.push_back(0);

 }


 void

 PerceptronModel::TrainOneEpoch(CandidateSetIterator &examples) {

   examples.Reset();

   while (examples.HasNext()) {

     TrainOnExample(examples.Next());

   }

   EndOfEpoch();

 }


 void

 PerceptronModel::EndOfEpoch() {

   models_.UpdateAllFeatureAverages(time_);

   if (end_of_epoch_hook_ != NULL) {

     end_of_epoch_hook_->Do(this);

   }

   if (DEBUG) {

     int num_training_errors_this_epoch =

         *num_training_errors_per_epoch_.rbegin();

     double percent_training_errors_this_epoch =

         ((double)num_training_errors_this_epoch / time_.index()) * 100.0;

     cerr << "Epoch " << time_.epoch() << ": number of training errors: "

          << num_training_errors_this_epoch << " ("

          << percent_training_errors_this_epoch << "%)" << endl;

   }

 }


 void

 PerceptronModel::TrainOnExample(CandidateSet &example) {

   time_.Tick();


   if (symbols_ != NULL) {

     example.CompileFeatures(symbols_);

   }


   bool training = true;

   ScoreCandidates(example, training);


   if (NeedToUpdate(example)) {

     if (DEBUG >= 2) {

       cerr << "Time:" << time_.to_string() << ": need to update because "

            << "best scoring index " << example.best_scoring_index()

            << " is not equal to gold index " << example.gold_index() << endl;

     }

     ++(*num_training_errors_per_epoch_.rbegin());

     ++num_training_errors_;

     Update(example);

   }

 }


 bool

 PerceptronModel::NeedToUpdate(CandidateSet &example) {

   return update_predicate_->NeedToUpdate(this, example);

 }


 bool

 PerceptronModel::DefaultUpdatePredicate::NeedToUpdate(Model *model,

                                                       CandidateSet &example) {

   return example.best_scoring_index() != example.gold_index();

 }


 void

 PerceptronModel::Update(CandidateSet &example) {

   updater_->Update(this, example);

 }


 void

 PerceptronModel::DefaultUpdater::Update(Model *m, CandidateSet &example) {

   PerceptronModel *model = dynamic_cast<PerceptronModel *>(m);

   ++(model->num_updates_);

   unordered_set<int> gold_features;

   unordered_set<int> best_scoring_features;

   model->ComputeFeaturesToUpdate(example, gold_features, best_scoring_features);


   model->models_.UpdateGoldAndCandidateFeatureAverages(model->time_,

                                                        gold_features,

                                                        best_scoring_features);

   double step_size =

       model->ComputeStepSize(gold_features, best_scoring_features, example);


   // Finally, update perceptrons.


   if (DEBUG >= 2) {

     cerr << "Updating weights for gold features [";

     for (unordered_set<int>::const_iterator it = gold_features.begin();

          it != gold_features.end(); ++it) {

       cerr << " " << *it;

     }

     cerr << "] from\n\t" << example.GetGold() << endl;


     cerr << "Updating weights for best scoring features [";

     for (unordered_set<int>::const_iterator it = best_scoring_features.begin();

          it != best_scoring_features.end(); ++it) {

       cerr << " " << *it;

     }

     cerr << "] from\n\t" << example.GetBestScoring() << endl;


   }


   double positive_step = step_size;

   model->models_.UpdateWeights(model->time_, gold_features,

                                example.GetGold().features(), positive_step);

   double negative_step = -step_size;

   model->models_.UpdateWeights(model->time_, best_scoring_features,

                         example.GetBestScoring().features(), negative_step);


   if (DEBUG >=2) {

     cerr << "Raw model: " << model->models_.GetModel(true) << endl;

     cerr << "Avg model: " << model->models_.GetModel(false) << endl;

   }

 }


 double

 PerceptronModel::Evaluate(CandidateSetIterator &development_test) {

   double total_weight = 0.0;

   double total_weighted_loss = 0.0;

   double total_oracle_loss = 0.0;

   double total_baseline_loss = 0.0;

   num_testing_errors_per_epoch_.push_back(0);


   bool not_training = false;

   size_t development_test_size = 0;

   development_test.Reset();

   while (development_test.HasNext()) {

     ++development_test_size;

     CandidateSet &candidate_set = development_test.Next();

     if (symbols_ != NULL) {

       candidate_set.CompileFeatures(symbols_);

     }

     ScoreCandidates(candidate_set, not_training);

     double loss_weight =

         use_weighted_loss() ? candidate_set.loss_weight() : 1.0;

     total_weight += loss_weight;

     total_weighted_loss += loss_weight * candidate_set.GetBestScoring().loss();

     total_oracle_loss += loss_weight * candidate_set.GetGold().loss();


     // For now, assume that the candidate sets are sorted by the baseline score.

     total_baseline_loss += loss_weight * candidate_set.Get(0).loss();

     if (candidate_set.best_scoring_index() != candidate_set.gold_index()) {

       ++(*num_testing_errors_per_epoch_.rbegin());

     }

   }


   double loss_this_epoch = total_weighted_loss / total_weight;

   loss_per_epoch_.push_back(loss_this_epoch);


   int num_testing_errors_this_epoch =

       num_testing_errors_per_epoch_[time_.epoch()];

   double percent_testing_errors_this_epoch =

       ((double)num_testing_errors_this_epoch / development_test_size) * 100.0;

   double oracle_loss = total_oracle_loss / total_weight;

   double baseline_loss = total_baseline_loss / total_weight;

   cerr << "Epoch " << time_.epoch() << ": oracle loss: " << oracle_loss << endl;

   cerr << "Epoch " << time_.epoch() << ": baseline loss: " << baseline_loss << endl;

   cerr << "Epoch " << time_.epoch() << ": average devtest loss: "

        << loss_this_epoch << endl;

   cerr << "Epoch " << time_.epoch() << ": number of testing errors: "

        << num_testing_errors_this_epoch << " ("

        << percent_testing_errors_this_epoch << "%)" << endl;


   if (time_.epoch() == 0 ||

       loss_this_epoch < loss_per_epoch_[best_model_epoch_]) {

     best_models_ = models_;

     best_model_epoch_ = time_.epoch();

   }


   if (time_.epoch() > 0 &&

       time_.epoch() != best_model_epoch_ &&

       loss_this_epoch >= loss_per_epoch_[best_model_epoch_]) {

     ++num_epochs_in_decline_;

   } else {

     // We're in the first epoch, or we've made strictly fewer errors

     // than the previous epoch.

     num_epochs_in_decline_ = 0;

   }

   return loss_this_epoch;

 }


 void

 PerceptronModel::ScoreCandidates(CandidateSet &candidates, bool training) {

   candidate_set_scorer_->Score(this, candidates, training);

 }


 double

 PerceptronModel::ScoreCandidate(Candidate &candidate, bool training) {

   bool use_raw = training;

   const FeatureVector<int,double> &model = models_.GetModel(use_raw);

   double score = kernel_fn_->Apply(model, candidate.features());

   if (DEBUG >= 2) {

     cerr << "Time:" << time_.to_string() << ": scoring candidate "

          << candidate << " with " << (use_raw ? "raw" : "avg")

          << " model: " << model << endl

          << "\tscore: " << score << endl;

   }

   candidate.set_score(score);

   return score;

 }


 void

 PerceptronModel::CompactifyFeatureUids() {

   // First, produce mapping for uid's of current non-zero features to dense

   // interval [0,n-1] (where there are n non-zero features).

   unordered_set<int> old_uids;

   models_.weights().GetNonZeroFeatures(old_uids);

   models_.average_weights().GetNonZeroFeatures(old_uids);

   unordered_map<int, int> old_to_new_uids;

   int new_uid = 0;

   for (unordered_set<int>::const_iterator it = old_uids.begin();

        it != old_uids.end();

        ++it) {

     old_to_new_uids[*it] = new_uid++;

   }

   models_.RemapFeatureUids(old_to_new_uids);

   best_models_.RemapFeatureUids(old_to_new_uids);


   if (symbols_ != NULL) {

     Symbols *old_symbols = symbols_->Clone();

     symbols_->Clear();

     for (Symbols::const_iterator it = old_symbols->begin();

          it != old_symbols->end();

          ++it) {

       unordered_map<int, int>::const_iterator old_to_new_uid_it =

           old_to_new_uids.find(it->second);

       if (old_to_new_uid_it != old_to_new_uids.end()) {

         int new_uid = old_to_new_uid_it->second;

         const string &symbol = it->first;

         symbols_->SetIndex(symbol, new_uid);

       }

     }

     delete old_symbols;

   }

 }


 void

 PerceptronModel::ComputeFeaturesToUpdate(const CandidateSet &example,

                                          unordered_set<int> &

                                          gold_features_to_update,

                                          unordered_set<int> &

                                          best_scoring_features_to_update)

     const {

   // Collect gold features that are not in best-scoring candidate.

   const FeatureVector<int,double> &gold_features =

       example.GetGold().features();

   const FeatureVector<int,double> &best_scoring_features =

       example.GetBestScoring().features();


   if (DEBUG >= 2) {

     cerr << "Gold index: " << example.gold_index()

          << "; best scoring index: " << example.best_scoring_index()

          << endl;

     cerr << "Original gold features: " << gold_features << endl

          << "Original best scoring features: " << best_scoring_features << endl;

   }


   gold_features.GetNonZeroFeatures(gold_features_to_update);

   best_scoring_features.RemoveEqualFeatures(gold_features,

                                             gold_features_to_update);


   if (DEBUG >= 2) {

     cerr << "Time:" << time_.to_string() << ": new gold features: [";

     for (unordered_set<int>::const_iterator it =

              gold_features_to_update.begin();

          it != gold_features_to_update.end();

          ++it) {

       cerr << " " << *it;

     }

     cerr << "]" << endl;

   }


   best_scoring_features.GetNonZeroFeatures(best_scoring_features_to_update);

   gold_features.RemoveEqualFeatures(best_scoring_features,

                                     best_scoring_features_to_update);

   if (DEBUG >= 2) {

     cerr << "Time:" << time_.to_string() << ": new best scoring features: [";

     for (unordered_set<int>::const_iterator it =

              best_scoring_features_to_update.begin();

          it != best_scoring_features_to_update.end();

          ++it) {

       cerr << " " << *it;

     }

     cerr << "]" << endl;

   }


 }


 }  // namespace reranker

reranker::TrainingVectorSet::UpdateAllFeatureAverages
void UpdateAllFeatureAverages(const Time &time)
Definition: training-vector-set.H:128

reranker::PerceptronModel::EndOfEpoch
virtual void EndOfEpoch()
Definition: perceptron-model.C:185

reranker::Time::NewEpoch
void NewEpoch()
Increments the epoch counter.
Definition: training-time.H:109

perceptron-model.H
Provides the reranker::PerceptronModel reranker class.

reranker::CandidateSet::GetGold
const Candidate & GetGold() const
Definition: candidate-set.H:105

reranker::PerceptronModel::ScoreCandidate
virtual double ScoreCandidate(Candidate &candidate, bool training)
Scores a candidate according to either the raw or averaged version of this perceptron model...
Definition: perceptron-model.C:359

reranker::Model
Model is an interface for reranking models.
Definition: model.H:141

reranker::PerceptronModel::model_spec_
string model_spec_
Definition: perceptron-model.H:423

reranker::CandidateSetIterator
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
Definition: candidate-set-iterator.H:58

reranker::FeatureVector::RemoveEqualFeatures
unordered_set< K > & RemoveEqualFeatures(const FeatureVector< K, V > &other, unordered_set< K > &set) const
Removes from the specified set the uid's of feature with weights equal in this vector to their weight...
Definition: feature-vector.H:210

reranker::PerceptronModel::best_models_
TrainingVectorSet best_models_
The best models seen so far during training, according to evaluation on the held-out development test...
Definition: perceptron-model.H:411

reranker::TrainingVectorSet::RemapFeatureUids
void RemapFeatureUids(const unordered_map< int, int > &old_to_new_uids)
Definition: training-vector-set.H:136

reranker::Symbols::Clone
virtual Symbols * Clone() const =0
Creates a newly-constructed clone of this Symbols instance that has the same runtime type...

REGISTER_MODEL
#define REGISTER_MODEL(TYPE)
Registers the Model  implementation with the specified subtype TYPE with the Model  Factory...
Definition: model.H:606

reranker::CandidateSet::GetBestScoring
const Candidate & GetBestScoring() const
Definition: candidate-set.H:108

reranker::Symbols::Clear
virtual void Clear()=0
Clears all symbols from this symbol table.

reranker::CandidateSet::CompileFeatures
bool CompileFeatures(Symbols *symbols, bool clear_features=false, bool clear_symbolic_features=true, bool force=false)
Compiles any symbolic features in this candidate set.
Definition: candidate-set.H:163

reranker::Symbols::end
virtual const_iterator end()=0

reranker::Model::loss_per_epoch_
vector< double > loss_per_epoch_
The average loss per epoch.
Definition: model.H:571

reranker::PerceptronModel::models_
TrainingVectorSet models_
The feature vectors representing this model.
Definition: perceptron-model.H:408

reranker::PerceptronModel::num_epochs_in_decline_
int num_epochs_in_decline_
The current number of training epochs in which the model has been degrading in development set perfor...
Definition: perceptron-model.H:420

reranker::PerceptronModel::CompactifyFeatureUids
virtual void CompactifyFeatureUids()
Renumbers the potentially sparse feature uid’s so that they occupy the interval [0,n-1] densely, for n non-zero features in use by this model.
Definition: perceptron-model.C:374

reranker::Model::candidate_set_scorer_
shared_ptr< CandidateSet::Scorer > candidate_set_scorer_
A scorer for CandidateSet instances.
Definition: model.H:565

reranker::Model::Hook::Do
virtual void Do(Model *model)=0
The function to be executed by the Model that wraps this hook.

reranker::PerceptronModel
This class implements a perceptron model reranker.
Definition: perceptron-model.H:63

reranker::PerceptronModel::TrainOnExample
virtual void TrainOnExample(CandidateSet &example)
Trains this model on the specified training example.
Definition: perceptron-model.C:203

reranker::PerceptronModel::TrainOneEpoch
virtual void TrainOneEpoch(CandidateSetIterator &examples)
Trains this model for one epoch, i.e., a single pass through the specified set of training examples...
Definition: perceptron-model.C:176

reranker::Model::symbols_
Symbols * symbols_
The symbol table for this model (may be NULL).
Definition: model.H:557

reranker::CandidateSet::loss_weight
double loss_weight() const
Returns the weight of the loss for this candidate set’s reference.
Definition: candidate-set.H:122

reranker::PerceptronModel::Evaluate
virtual double Evaluate(CandidateSetIterator &development_test)
Evaluates this model on the specified set of held-out development test data.
Definition: perceptron-model.C:288

reranker::CandidateSetIterator::HasNext
virtual bool HasNext() const =0
Returns whether this iterator contains another CandidateSet.

reranker::Model::num_testing_errors_per_epoch_
vector< int > num_testing_errors_per_epoch_
The number of testing errors made on held-out development test data for each epoch.
Definition: model.H:574

reranker::KernelFunction::Apply
virtual double Apply(const FeatureVector< int, double > &fv1, const FeatureVector< int, double > &fv2)
Applies this kernel function to the specified feature vectors.
Definition: kernel-function.H:54

REGISTER_NAMED_MODEL_UPDATE_PREDICATE
#define REGISTER_NAMED_MODEL_UPDATE_PREDICATE(TYPE, NAME)
Registers the Model::UpdatePredicate  implementation with the specified subtype TYPE and NAME with th...
Definition: model.H:613

reranker::Symbols::const_iterator
unordered_map< string, int >::const_iterator const_iterator
Definition: symbol-table.H:60

reranker::PerceptronModelProtoReader
A class to construct a PerceptronModel from a ModelMessage instance.
Definition: perceptron-model-proto-reader.H:57

reranker::Model::time_
Time time_
The tiny object that holds the "training time" for this model (epoch, index and absolute time index)...
Definition: model.H:552

reranker::CandidateSetIterator::Reset
virtual void Reset()=0
Resets this iterator back to the beginning of its backing collection.

reranker::TrainingVectorSet::average_weights
const FeatureVector< int, double > & average_weights() const
Returns the feature vector corresponding to the averaged perceptron.
Definition: training-vector-set.H:74

reranker::Model::num_training_errors_per_epoch_
vector< int > num_training_errors_per_epoch_
The number of errors made on training examples during each epoch.
Definition: model.H:576

reranker::CandidateSet
A class to hold a set of candidates, either for training or test.
Definition: candidate-set.H:62

reranker::Time::absolute_seconds
double absolute_seconds() const
Definition: training-time.H:81

reranker::Model::max_epochs
int max_epochs() const
Returns the maximum number of epochs to train.
Definition: model.H:313

REGISTER_NAMED_MODEL_UPDATER
#define REGISTER_NAMED_MODEL_UPDATER(TYPE, NAME)
Registers the Model::Updater  implementation with the specified subtype TYPE and NAME with the Model:...
Definition: model.H:629

reranker::Model::kernel_fn_
KernelFunction * kernel_fn_
Yes, this is an interface, but we add the kernel function as a data member.
Definition: model.H:555

reranker::Symbols
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57

reranker::Model::use_weighted_loss
virtual bool use_weighted_loss()
Definition: model.H:469

reranker::Time::seconds_since_last_epoch
double seconds_since_last_epoch() const
Definition: training-time.H:85

reranker::Model::time
const Time & time() const
Returns the current training time of this model: number of epochs, number of time steps in the curren...
Definition: model.H:290

reranker::Environment
An interface for an environment in which variables of various types are mapped to their values...
Definition: environment.H:125

reranker::Candidate
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
Definition: candidate.H:60

reranker::Model::num_training_errors_
int num_training_errors_
The number of errors made on training examples.
Definition: model.H:580

reranker::Time::Tick
void Tick()
Increments both the time index for the current epoch and the absolute time index. ...
Definition: training-time.H:104

reranker::Candidate::set_score
void set_score(double score)
Sets the score of this candidate.
Definition: candidate.H:156

reranker::PerceptronModel::ScoreCandidates
virtual void ScoreCandidates(CandidateSet &candidates, bool training)
Scores the specified set of candidates according to either the raw or averaged version of this percep...
Definition: perceptron-model.C:354

reranker::PerceptronModel::NewEpoch
virtual void NewEpoch()
Definition: perceptron-model.C:163

reranker::PerceptronModel::ComputeFeaturesToUpdate
virtual void ComputeFeaturesToUpdate(const CandidateSet &example, unordered_set< int > &gold_features_to_update, unordered_set< int > &best_scoring_features_to_update) const
Computes the features to be updated for the gold candidate and the best-scoring candidate.
Definition: perceptron-model.C:409

reranker::TrainingVectorSet::weights
const FeatureVector< int, double > & weights() const
Returns the "raw" feature weights computed during training.
Definition: training-vector-set.H:72

reranker::CandidateSet::best_scoring_index
size_t best_scoring_index() const
Definition: candidate-set.H:96

reranker::CandidateSet::Get
Candidate & Get(size_t idx)
Definition: candidate-set.H:101

reranker::Candidate::loss
double loss() const
Returns the loss of this candidate.
Definition: candidate.H:129

reranker::PerceptronModel::Update
virtual void Update(CandidateSet &example)
Updates the current model based on the specified set of candidates.
Definition: perceptron-model.C:237

reranker::TrainingVectorSet::GetModel
const FeatureVector< int, double > & GetModel(bool raw) const
Returns either the raw or averaged feature vector, depending on the argument.
Definition: training-vector-set.H:83

reranker::Time::epoch
int epoch() const
Returns the index of the current epoch.
Definition: training-time.H:74

reranker::PerceptronModelProtoWriter
A class to construct a ModelMessage from a PerceptronModel instance.
Definition: perceptron-model-proto-writer.H:58

reranker::PerceptronModel::Init
virtual void Init(const Environment *env, const string &arg)
Initializes this instance.
Definition: perceptron-model.C:82

reranker::PerceptronModel::Train
virtual void Train(CandidateSetIterator &examples, CandidateSetIterator &development_test)
Trains this model on a collection of training examples, where each training example is a set of candi...
Definition: perceptron-model.C:88

reranker::PerceptronModel::ComputeStepSize
virtual double ComputeStepSize(const unordered_set< int > &gold_features, const unordered_set< int > &best_scoring_features, const CandidateSet &example)
Computes the step size for the next update, and, as a side effect, caches this value in step_size_...
Definition: perceptron-model.H:389

reranker::Model::update_predicate_
shared_ptr< UpdatePredicate > update_predicate_
The update predicate for this model.
Definition: model.H:567

reranker::Time::index
int index() const
Returns the index of the current training example within the current epoch.
Definition: training-time.H:77

reranker::Model::min_epochs
int min_epochs() const
Returns the minimum number of epochs to train.
Definition: model.H:310

reranker::TrainingVectorSet::UpdateGoldAndCandidateFeatureAverages
void UpdateGoldAndCandidateFeatureAverages(const Time &time, const Collection &gold_feature_uids, const Collection &candidate_feature_uids)
Updates the feature averages the specified pair of feature uid collections, one for a gold reference ...
Definition: training-vector-set.H:115

reranker::FeatureVector::GetNonZeroFeatures
unordered_set< K > & GetNonZeroFeatures(unordered_set< K > &set) const
Inserts the uid's of features with non-zero weights into the specified set.
Definition: feature-vector.H:181

reranker::PerceptronModel::best_model_epoch_
int best_model_epoch_
The epoch of the best models seen so far during training.
Definition: perceptron-model.H:413

reranker::TrainingVectorSet::UpdateWeights
void UpdateWeights(const Time &time, const Collection &feature_uids, const FeatureVector< int, double > &feature_vector, double scalar)
Increments the weights for the specified collection of features.
Definition: training-vector-set.H:104

reranker::Symbols::begin
virtual const_iterator begin()=0

reranker::Model::num_updates_
int num_updates_
The number of times an update was performed on this model during training.
Definition: model.H:584

reranker::PerceptronModel::max_epochs_in_decline_
int max_epochs_in_decline_
The maximum number of training epochs to keep training after the model starts to degrade (i...
Definition: perceptron-model.H:416

DEBUG
#define DEBUG
Definition: perceptron-model.C:36

reranker::Symbols::SetIndex
virtual void SetIndex(const string &symbol, int index)=0

reranker::PerceptronModel::NeedToKeepTraining
virtual bool NeedToKeepTraining()
Returns whether more training epochs are required for this model.
Definition: perceptron-model.C:115

candidate-set.H
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.

reranker::Time::to_string
string to_string() const
Definition: training-time.H:95

reranker::CandidateSetIterator::Next
virtual CandidateSet & Next()=0
Returns the next CandidateSet.

reranker::FeatureVector< int, double >

reranker::PerceptronModel::NeedToUpdate
virtual bool NeedToUpdate(CandidateSet &example)
Indicates whether the current model needs to be updated; the implementation here simply returns true ...
Definition: perceptron-model.C:226

reranker::Model::updater_
shared_ptr< Updater > updater_
The updater for this model.
Definition: model.H:569

reranker::Model::end_of_epoch_hook_
Hook * end_of_epoch_hook_
A hook to be performed at the end of every epoch.
Definition: model.H:590

reranker::Initializers
A container for all the member initializers for a particular Factory-constructible instance...
Definition: factory.H:203

training-time.H
Provides the reranker::Time class, which holds the three notions of training time: current epoch...

reranker::CandidateSet::gold_index
size_t gold_index() const
Definition: candidate-set.H:97

reranker::Candidate::features
const FeatureVector< int, double > & features() const
Returns the feature vector for this candidate.
Definition: candidate.H:137