Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
candidate.H
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
36 
37 #ifndef RERANKER_CANDIDATE_H_
38 #define RERANKER_CANDIDATE_H_
39 
40 #include <iostream>
41 #include <string>
42 #include <sstream>
43 
44 #include "../proto/data.pb.h"
45 #include "factory.H"
46 #include "feature-vector.H"
47 #include "symbol-table.H"
48 
49 namespace reranker {
50 
51 using std::ostream;
52 using std::string;
53 
54 class Model;
55 
60 class Candidate {
61  public:
62  friend class FeatureExtractor;
72  double loss,
73  double baseline_score,
74  int num_words,
75  const string &raw_data) :
76  index_(index), num_errors_(0), num_correct_(0),
77  loss_(loss), score_(0.0), baseline_score_(baseline_score),
78  num_words_(num_words), raw_data_(raw_data), compiled_(false) {
79  }
80 
91  double loss,
92  double baseline_score,
93  int num_words,
94  const string &raw_data,
97  index_(index), num_errors_(0), num_correct_(0),
98  loss_(loss), score_(0.0), baseline_score_(baseline_score),
99  num_words_(num_words), features_(features),
100  symbolic_features_(symbolic_features), raw_data_(raw_data),
101  compiled_(false) {
102  }
103 
105  virtual ~Candidate() { }
106 
109  public:
114  virtual int Compare(const Model &model,
115  const Candidate &c1, const Candidate &c2) = 0;
116  };
117 
118  // accessors
120  // in its set.
121  int index() const { return index_; }
124  int num_errors() const { return num_errors_; }
127  int num_correct() const { return num_correct_; }
129  double loss() const { return loss_; }
131  double score() const { return score_; }
133  double baseline_score() const { return baseline_score_; }
135  int num_words() const { return num_words_; }
137  const FeatureVector<int,double> &features() const { return features_; }
140  return symbolic_features_;
141  }
143  const string &raw_data() const { return raw_data_; }
147  bool compiled() const { return compiled_; }
148 
149  // mutators
151  void set_raw_data(const string &raw_data) { raw_data_ = raw_data; }
152 
156  void set_score(double score) { score_ = score; }
157 
162  baseline_score_ = baseline_score;
163  }
164 
168  void set_loss(double loss) { loss_ = loss; }
169 
189  bool Compile(Symbols *symbols,
190  bool clear_features = false,
191  bool clear_symbolic_features = true,
192  bool force = false);
193 
209  void Decompile(Symbols *symbols,
210  bool clear_symbolic_features = false,
211  bool clear_features = true,
212  bool force = false);
213 
220  friend ostream &operator<<(ostream &os, const Candidate &c) {
221  os << "{index:" << c.index() << "; loss:" << c.loss()
222  << "; score:" << c.score() << "; baseline_score:" << c.baseline_score()
223  << "; features:" << c.features()
224  << "; symbolic_features:" << c.symbolic_features()
225  << "; raw_data:\"" << c.raw_data() << "\""
226  << "; compiled: " << (c.compiled() ? "true" : "false")
227  << "}";
228  return os;
229  }
230 
232  string to_string() {
233  std::ostringstream oss;
234  oss << *this;
235  return oss.str();
236  }
237 
238  private:
239  // Methods for friend class FeatureExtractor.
240  FeatureVector<int,double> &mutable_features() { return features_; }
241  FeatureVector<string,double> &mutable_symbolic_features() {
242  return symbolic_features_;
243  }
244 
245  // data members
246  // The index of this candidate in its ordered list of candidates
247  // (its candidate set).
248  // TODO(dbikel,kbhall): Is this really necessary? The CandidateSet
249  // will contain this index. Maybe this will be
250  // necessary if we want to pass a single
251  // Candidate to a feature function that needs
252  // its "rank" in the CandidateSet.
253  int index_;
254  // TODO(dbikel,kbhall): The following three data members seem overly
255  // specific to WER (and therefore to speech
256  // evaluation). The number of word errors in
257  // this candidate.
258  int num_errors_;
259  // The number of correct words in this candidate.a
260  int num_correct_;
261  // The loss of this candidate relative to the reference. For speech, this
262  // value should be (num_errors_ / num_correct_).
263  double loss_;
264  // The score of this candidate according to the reranker.
265  double score_;
266  // The baseline model score for this candidate.
267  double baseline_score_;
268  // The number of words in this candidate. (This value might be hard
269  // or impossible to determine simply from features.)
270  int num_words_;
271  // The features for this candidate.
272  FeatureVector<int,double> features_;
273  // The symbolic features for this candidate.
274  FeatureVector<string,double> symbolic_features_;
275  // The raw data (string) corresponding to this candidate.
276  string raw_data_;
277  // Whether this candidate's symbolic features have been "compiled".
278  bool compiled_;
279 };
280 
281 #define REGISTER_NAMED_CANDIDATE_COMPARATOR(TYPE,NAME) \
282  REGISTER_NAMED(TYPE,NAME,Candidate::Comparator)
283 
284 #define REGISTER_CANDIDATE_COMPARATOR(TYPE) \
285  REGISTER_NAMED_CANDIDATE_COMPARATOR(TYPE,TYPE)
286 
287 } // namespace reranker
288 
289 #endif
Model is an interface for reranking models.
Definition: model.H:141
double score() const
Returns the reranker’s score for this candidate.
Definition: candidate.H:131
Provides the reranker::Symbols interface as well as the reranker::StaticSymbolTable implementation...
bool Compile(Symbols *symbols, bool clear_features=false, bool clear_symbolic_features=true, bool force=false)
Compiles any symbolic features in this candidate.
Definition: candidate.C:42
bool compiled() const
Returns whether this candidate’s symbolic features have been compiled.
Definition: candidate.H:147
const string & raw_data() const
Returns the raw data (typically the sentence) for this candidate.
Definition: candidate.H:143
void Decompile(Symbols *symbols, bool clear_symbolic_features=false, bool clear_features=true, bool force=false)
Decompiles any non-symbolic features in this candidate.
Definition: candidate.C:67
const FeatureVector< string, double > & symbolic_features() const
Returns the symbolic feature vector for this candidate.
Definition: candidate.H:139
void set_loss(double loss)
Sets the loss of this candidate.
Definition: candidate.H:168
double baseline_score() const
Returns the baseline model score for this candidate.
Definition: candidate.H:133
void set_baseline_score(double baseline_score)
Sets the score of this candidate.
Definition: candidate.H:161
An inner interface specifying comparison between two Candidate instances.
Definition: candidate.H:108
string to_string()
Returns a the string that would be output by operator<<.
Definition: candidate.H:232
This class makes it easy for concrete subclasses to extract features based on input from a file...
int index() const
Returns the index of this candidate relative to the other candidates.
Definition: candidate.H:121
friend ostream & operator<<(ostream &os, const Candidate &c)
Outputs a human-readable string version of this Candidate instance to the specified ostream...
Definition: candidate.H:220
int num_words() const
Returns the number of words in this candidate.
Definition: candidate.H:135
int num_errors() const
Returns the number of word errors of this candidate relative to a reference string.
Definition: candidate.H:124
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57
Candidate(int index, double loss, double baseline_score, int num_words, const string &raw_data)
Constructor for a candidate without features.
Definition: candidate.H:71
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
Definition: candidate.H:60
An abstract base class/interface for all feature extractors.
void set_score(double score)
Sets the score of this candidate.
Definition: candidate.H:156
virtual ~Candidate()
Destroys this candidate.
Definition: candidate.H:105
double loss() const
Returns the loss of this candidate.
Definition: candidate.H:129
Defines the reranker::FeatureVector class, which, as it happens, is used to store feature vectors...
void set_raw_data(const string &raw_data)
Sets the raw data (typically the sentence) for this candidate).
Definition: candidate.H:151
An interface to make it easier to implement Factory-constructible types by implementing both required...
Definition: factory.H:382
Candidate(int index, double loss, double baseline_score, int num_words, const string &raw_data, const FeatureVector< int, double > &features, const FeatureVector< string, double > &symbolic_features)
Constructor for a candidate with "compiled" features.
Definition: candidate.H:90
Provides a generic dynamic object factory.
int num_correct() const
Returns the number of correct words of this candidate relative to a reference string.
Definition: candidate.H:127
virtual int Compare(const Model &model, const Candidate &c1, const Candidate &c2)=0
Compares the two specified Candidate instances.
const FeatureVector< int, double > & features() const
Returns the feature vector for this candidate.
Definition: candidate.H:137