37 #ifndef RERANKER_CANDIDATE_SET_WRITER_H_
38 #define RERANKER_CANDIDATE_SET_WRITER_H_
46 #include "../proto/dataio.h"
48 #define DEFAULT_WRITER_REPORTING_INTERVAL 1000
56 using std::shared_ptr;
67 max_num_to_write_(-1),
70 reporting_interval_(reporting_interval),
74 void Open(
const string &filename,
77 if (verbosity_ >= 1) {
78 cerr <<
"CandidateSetWriter: writing to file \"" << filename
81 bool writing_to_stdout = filename ==
"-";
82 ConfusionProtoIO::Mode mode =
84 ConfusionProtoIO::WRITESTD : ConfusionProtoIO::WRITE;
85 compressed = writing_to_stdout ?
false : compressed;
86 writer_ =
new ConfusionProtoIO(filename, mode, compressed, use_base64);
98 void Write(vector<shared_ptr<CandidateSet> > &examples,
99 const string &filename,
102 Open(filename, compressed, use_base64);
103 bool writer_valid =
true;
104 for (vector<shared_ptr<CandidateSet> >::const_iterator it =
106 writer_valid && it != examples.end();
116 if (num_written_ == max_num_to_write_) {
120 confusion_learning::CandidateSetMessage tmp_msg;
121 candidate_set_proto_writer_.
Write(candidate_set, &tmp_msg);
124 bool writer_valid = writer_->Write(tmp_msg);
126 if (verbosity_ >= 3) {
127 cerr <<
"CandidateSetWriter: most recent CandidateSetMessage: "
128 << tmp_msg.Utf8DebugString();
131 if (verbosity_ >= 2) {
132 cerr <<
"CandidateSetWriter: candidate set " << candidate_set;
138 if (interval_written_ == reporting_interval_) {
139 if (verbosity_ >= 1) {
140 cerr <<
"CandidateSetWriter: wrote " << num_written_
141 <<
" candidate sets." << endl;
143 interval_written_ = 0;
158 interval_written_ = 0;
175 max_num_to_write_ = max_num_to_write;
180 ConfusionProtoIO *writer_;
182 int max_num_to_write_;
184 long interval_written_;
185 long reporting_interval_;
void Open(const string &filename, bool compressed, bool use_base64)
void Write(const CandidateSet &set, CandidateSetMessage *candidate_set_message) const
Serializes a CandidateSet instance to a CandidateSetMessage.
#define DEFAULT_WRITER_REPORTING_INTERVAL
virtual ~CandidateSetWriter()
A class to construct a CandidateSetMessage from a CandidateSet instance.
void Write(vector< shared_ptr< CandidateSet > > &examples, const string &filename, bool compressed, bool use_base64)
Writes a stream of CandidateSet instances to the specified file or to standard output.
void set_verbosity(int verbosity)
Sets the verbosity of this writer (mostly for debugging purposes).
A class for writing streams of training or test instances, where each training or test instance is a ...
CandidateSetWriter(long reporting_interval=1000)
Constructs a new insta.
Serializer for reranker::CandidateSet instances to CandidateSetMessage instances. ...
void Reset()
Resets this writer so that its internal count of the number of CandidateSet’s written goes back to ze...
A class to hold a set of candidates, either for training or test.
bool WriteNext(const CandidateSet &candidate_set)
void set_max_num_to_write(int max_num_to_write)
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.