47 #include "../proto/data.pb.h"
48 #include "../proto/dataio.h"
51 #define REPORTING_INTERVAL 100
53 #define MAX_NUM_EXAMPLES 1000
54 #define MAX_NUM_CANDIDATES 1000000
57 using namespace reranker;
60 int main(
int argc,
char **argv) {
62 cout <<
"usage: <training data>+ <devtest data> <model output file>"
66 vector<string> training_files;
68 for ( ; i < argc - 2; ++i) {
69 training_files.push_back(argv[i]);
71 const string devtest_file = argv[i++];
72 const string model_file = argv[i++];
78 vector<shared_ptr<CandidateSet> > training_examples;
79 bool compressed =
true;
80 bool use_base64 =
true;
81 bool reset_counters =
true;
82 for (vector<string>::const_iterator it = training_files.begin();
83 it != training_files.end();
85 csr.
Read(*it, compressed, use_base64, reset_counters, training_examples);
88 cout <<
"Read " << training_examples.size() <<
" training examples."
92 vector<shared_ptr<CandidateSet> > devtest_examples;
93 csr.
Read(devtest_file, compressed, use_base64, reset_counters,
96 cout <<
"Read " << devtest_examples.size() <<
" devtest examples."
102 CandidateSetVectorIt;
103 CandidateSetVectorIt training_examples_it(training_examples);
104 CandidateSetVectorIt devtest_examples_it(devtest_examples);
105 model->Train(training_examples_it, devtest_examples_it);
107 model->CompactifyFeatureUids();
114 confusion_learning::ModelMessage model_message;
115 model_writer->Write(model.get(), &model_message);
118 shared_ptr<ConfusionProtoIO> proto_writer(
119 new ConfusionProtoIO(model_file, ConfusionProtoIO::WRITE,
120 compressed, use_base64));
121 proto_writer->Write(model_message);
123 cout <<
"Have a nice day!" << endl;
Provides the reranker::PerceptronModel reranker class.
Provides the reranker::Candidate class for representing a candidate hypothesis from an initial model...
Serializer for reranker::PerceptronModel instances to ModelMessage instances.
Provides an interface and some implementations for iterating over CandidateSet instances.
void set_verbosity(int verbosity)
Sets the verbosity of this reader (mostly for debugging purposes).
This class implements a perceptron model reranker.
#define REPORTING_INTERVAL
Class for reading streams of training or test instances, where each training or test instance is a re...
void Read(const string &filename, bool compressed, bool use_base64, bool reset_counters, vector< shared_ptr< CandidateSet > > &examples)
Reads a stream of CandidateSet instances from the specified file or from standard input...
int main(int argc, char **argv)
An implementation of the CandidateSetIterator interface that is backed by an arbitrary C++ collection...
A class to construct a ModelMessage from a PerceptronModel instance.
A class for reading streams of training or test instances, where each training or test instance is a ...
Interface for serializer for reranker::Model instances to ModelMessage instances. ...
#define MAX_NUM_CANDIDATES
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
Reranker model interface.