42 #include "../proto/data.pb.h"
43 #include "../proto/dataio.h"
44 #include "../proto/model.pb.h"
45 #include "../utils/kdebug.h"
54 #define DEFAULT_MODEL_PROTO_READER_SPEC "PerceptronModelProtoReader()"
57 using namespace reranker;
58 using confusion_learning::FeatureMessage;
59 using confusion_learning::ModelMessage;
61 int main(
int argc,
char* argv[]) {
63 bool use_integer_feats =
false;
65 string devtest_filename;
66 int max_examples_to_read = -1;
69 while ((option_char = getopt(argc, argv,
"Io:d:n:")) != EOF) {
70 switch (option_char) {
72 use_integer_feats =
true;
78 devtest_filename = optarg;
81 max_examples_to_read = atoi(optarg);
84 cerr <<
"usage: " << argv[0]
85 <<
" [-E] [-I] [-d <devtest examples>] [-o <output file>]"
87 cerr <<
"-E - normalize with the total number of errors" << endl;
88 cerr <<
"-I - use integer feature id's from proto" << endl;
94 ModelMessage model_with_feats;
98 ConfusionProtoIO reader;
99 ConfusionProtoIO* writer;
100 if (output_name.empty()) {
101 writer =
new ConfusionProtoIO(
"", ConfusionProtoIO::WRITESTD,
false,
true);
103 writer =
new ConfusionProtoIO(output_name, ConfusionProtoIO::WRITE,
true,
109 getline(cin, input_data);
110 if (input_data.empty()) {
113 int delim_pos = input_data.find(
'\t');
114 string feat_id = input_data.substr(0, delim_pos);
115 string value = input_data.substr(delim_pos + 1);
117 if (feat_id.compare(ModelInfoReducer::kModelMessageFeatureName) == 0) {
118 if (model_with_feats.num_iterations() > 0) {
119 cerr <<
"Merging in more than one model message." << endl;
122 ModelMessage new_model;
123 if (!reader.DecodeBase64(value, &new_model)) {
124 cerr <<
"Error decoding message: " << value.c_str() << endl;
127 model_with_feats.MergeFrom(new_model);
128 writer->Write(new_model);
130 FeatureMessage* feat =
131 model_with_feats.mutable_raw_parameters()->add_feature();
132 if (!reader.DecodeBase64(value, feat)) {
133 cerr <<
"Error decoding message: " << value.c_str() << endl;
137 if (model_with_feats.raw_parameters().feature_size() == 0) {
138 cerr <<
"Empty model, nothing to output." << endl;
142 for (
int fix = 0; fix < model_with_feats.raw_parameters().feature_size();
144 FeatureMessage* feat =
145 model_with_feats.mutable_raw_parameters()->mutable_feature(fix);
146 if (!isfinite(feat->value()) || !isfinite(feat->avg_value())) {
147 cerr <<
"WARNING: feature " << feat->name() <<
" (ID:"
148 << feat->id() <<
") has non-finite value." << endl;
150 if (model_with_feats.training_errors() > 0) {
151 feat->set_value(feat->value() / model_with_feats.training_errors());
152 feat->set_avg_value(feat->avg_value() / model_with_feats.training_errors());
153 if (!isfinite(feat->value()) || !isfinite(feat->avg_value())) {
154 cerr <<
"WARNING: after error normalization, feature "
155 << feat->name() <<
" (ID:" << feat->id()
156 <<
") has non-finite value." << endl;
160 writer->Write(*feat);
165 if (! devtest_filename.empty()) {
168 shared_ptr<Model> model = model_reader.
Read(model_with_feats);
170 vector<shared_ptr<CandidateSet> > devtest_examples;
173 csr.
Read(devtest_filename,
true,
true,
true, devtest_examples);
175 CandidateSetVectorIt;
177 new CandidateSetVectorIt(devtest_examples);
179 model->Evaluate(*devtest_it);
180 loss = model->loss_per_epoch().back();
183 cout << loss << endl;
int main(int argc, char *argv[])
Provides the reranker::Candidate class for representing a candidate hypothesis from an initial model...
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
Provides an interface and some implementations for iterating over CandidateSet instances.
Provides the ModelReader class, which can create Model instances from a file.
shared_ptr< Model > Read(const string &filename, bool compressed, bool use_base64)
void set_verbosity(int verbosity)
Sets the verbosity of this reader (mostly for debugging purposes).
Class for reading streams of training or test instances, where each training or test instance is a re...
Reducer classes for trainer.
void Read(const string &filename, bool compressed, bool use_base64, bool reset_counters, vector< shared_ptr< CandidateSet > > &examples)
Reads a stream of CandidateSet instances from the specified file or from standard input...
An implementation of the CandidateSetIterator interface that is backed by an arbitrary C++ collection...
A class for reading streams of training or test instances, where each training or test instance is a ...
Provides a generic dynamic object factory.
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
Knows how to create Model instances that have been serialized to a file.