37 #ifndef RERANKER_CANDIDATE_SET_WRITER_H_ 
   38 #define RERANKER_CANDIDATE_SET_WRITER_H_ 
   46 #include "../proto/dataio.h" 
   48 #define DEFAULT_WRITER_REPORTING_INTERVAL 1000 
   56 using std::shared_ptr;
 
   67       max_num_to_write_(-1),
 
   70       reporting_interval_(reporting_interval),
 
   74   void Open(
const string &filename,
 
   77     if (verbosity_ >= 1) {
 
   78       cerr << 
"CandidateSetWriter: writing to file \"" << filename
 
   81     bool writing_to_stdout = filename == 
"-";
 
   82     ConfusionProtoIO::Mode mode =
 
   84         ConfusionProtoIO::WRITESTD : ConfusionProtoIO::WRITE;
 
   85     compressed = writing_to_stdout ? 
false : compressed;
 
   86     writer_ = 
new ConfusionProtoIO(filename, mode, compressed, use_base64);
 
   98   void Write(vector<shared_ptr<CandidateSet> > &examples,
 
   99              const string &filename,
 
  102     Open(filename, compressed, use_base64);
 
  103     bool writer_valid = 
true;
 
  104     for (vector<shared_ptr<CandidateSet> >::const_iterator it =
 
  106          writer_valid && it != examples.end();
 
  116     if (num_written_ == max_num_to_write_) {
 
  120     confusion_learning::CandidateSetMessage tmp_msg;
 
  121     candidate_set_proto_writer_.
Write(candidate_set, &tmp_msg);
 
  124     bool writer_valid = writer_->Write(tmp_msg);
 
  126       if (verbosity_ >= 3) {
 
  127         cerr << 
"CandidateSetWriter: most recent CandidateSetMessage: " 
  128              << tmp_msg.Utf8DebugString();
 
  131       if (verbosity_ >= 2) {
 
  132         cerr << 
"CandidateSetWriter: candidate set " << candidate_set;
 
  138       if (interval_written_ == reporting_interval_) {
 
  139         if (verbosity_ >= 1) {
 
  140           cerr << 
"CandidateSetWriter: wrote " << num_written_
 
  141                << 
" candidate sets." << endl;
 
  143         interval_written_ = 0;
 
  158     interval_written_ = 0;
 
  175     max_num_to_write_ = max_num_to_write;
 
  180   ConfusionProtoIO *writer_;
 
  182   int max_num_to_write_;
 
  184   long interval_written_;
 
  185   long reporting_interval_;
 
void Open(const string &filename, bool compressed, bool use_base64)
 
void Write(const CandidateSet &set, CandidateSetMessage *candidate_set_message) const 
Serializes a CandidateSet instance to a CandidateSetMessage. 
 
#define DEFAULT_WRITER_REPORTING_INTERVAL
 
virtual ~CandidateSetWriter()
 
A class to construct a CandidateSetMessage from a CandidateSet instance. 
 
void Write(vector< shared_ptr< CandidateSet > > &examples, const string &filename, bool compressed, bool use_base64)
Writes a stream of CandidateSet instances to the specified file or to standard output. 
 
void set_verbosity(int verbosity)
Sets the verbosity of this writer (mostly for debugging purposes). 
 
A class for writing streams of training or test instances, where each training or test instance is a ...
 
CandidateSetWriter(long reporting_interval=1000)
Constructs a new insta. 
 
Serializer for reranker::CandidateSet instances to CandidateSetMessage instances. ...
 
void Reset()
Resets this writer so that its internal count of the number of CandidateSet’s written goes back to ze...
 
A class to hold a set of candidates, either for training or test. 
 
bool WriteNext(const CandidateSet &candidate_set)
 
void set_max_num_to_write(int max_num_to_write)
 
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.