37 #ifndef RERANKER_CANDIDATE_SET_PROTO_READER_H_
38 #define RERANKER_CANDIDATE_SET_PROTO_READER_H_
40 #include <unordered_map>
47 #include "../proto/data.pb.h"
48 #include "../proto/model.pb.h"
52 using std::unordered_map;
54 using confusion_learning::CandidateSetMessage;
87 void Read(
const CandidateSetMessage &m,
int max_candidates,
94 int CountTokens(
const string &s,
const char *delimiters =
" \t")
const {
98 while (begin_pos != string::npos) {
99 begin_pos = s.find_first_not_of(delimiters, end_pos);
100 end_pos = s.find_first_of(delimiters, begin_pos);
101 if (end_pos == string::npos) {
102 end_pos = s.length();
104 if (begin_pos != string::npos) {
112 double ComputeLoss(CandidateSet &set,
const string &candidate_raw_data);
115 Tokenizer tokenizer_;
void Read(const CandidateSetMessage &m, CandidateSet &set)
Fills in the specified CandidateSet based on the specified CandidateSetMessage, crucially constructin...
CandidateSetProtoReader()
Provides the reranker::Candidate class for representing a candidate hypothesis from an initial model...
Provides the Tokenizer class.
static void Clear()
Clears the strings from the internal data structure.
virtual ~CandidateSetProtoReader()
A class to hold a set of candidates, either for training or test.
A class to fill in a CandidateSet based on a CandidateSetMessage, crucially constructing new Candidat...
Provides the reranker::StringCanonicalizer class.
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.