Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
candidate-set.H
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
37 
38 #ifndef RERANKER_CANDIDATE_SET_H_
39 #define RERANKER_CANDIDATE_SET_H_
40 
41 #include <iostream>
42 #include <string>
43 #include <memory>
44 #include <vector>
45 
46 #include "candidate.H"
47 #include "factory.H"
48 
49 namespace reranker {
50 
51 using std::shared_ptr;
52 using std::ostream;
53 using std::string;
54 using std::vector;
55 
62 class CandidateSet {
63  public:
65  CandidateSet() : compiled_(false) { }
69  CandidateSet(const string &key) : training_key_(key), compiled_(false) { }
71  virtual ~CandidateSet() { }
72 
73  typedef vector<shared_ptr<Candidate> >::iterator iterator;
74  typedef vector<shared_ptr<Candidate> >::const_iterator const_iterator;
75 
79  class Scorer : public FactoryConstructible {
80  public:
81  virtual void Score(Model *model,
82  CandidateSet &candidates, bool training) = 0;
83  };
84 
85  // accessors
86  const_iterator begin() const { return candidates_.begin(); }
87 
88  const_iterator end() const { return candidates_.end(); }
89 
90  iterator begin() { return candidates_.begin(); }
91 
92  iterator end() { return candidates_.end(); }
93 
94  size_t size() const { return candidates_.size(); }
95 
96  size_t best_scoring_index() const { return best_scoring_index_; }
97  size_t gold_index() const { return gold_index_; }
98 
99  const string &training_key() const { return training_key_; }
100 
101  Candidate &Get(size_t idx) {
102  return *(candidates_[idx]);
103  }
104 
105  const Candidate &GetGold() const {
106  return *(candidates_[gold_index_]);
107  }
108  const Candidate &GetBestScoring() const {
109  return *(candidates_[best_scoring_index_]);
110  }
111 
112  const string &reference_string() const { return reference_string_; }
113 
115  return reference_string_token_count_;
116  }
117 
122  double loss_weight() const {
123  return reference_string_token_count_;
124  }
125 
130  bool compiled() const { return compiled_; }
131 
132  // mutators
133  void AddCandidate(shared_ptr<Candidate> candidate) {
134  candidates_.push_back(candidate);
135  }
136 
163  bool CompileFeatures(Symbols *symbols,
164  bool clear_features = false,
165  bool clear_symbolic_features = true,
166  bool force = false) {
167  if (!compiled_ || force) {
168  for (iterator it = begin(); it != end(); ++it) {
169  compiled_ |= (*it)->Compile(symbols, clear_features,
170  clear_symbolic_features, force);
171  }
172  }
173  return compiled_;
174  }
175 
197  void DecompileFeatures(Symbols *symbols,
198  bool clear_symbolic_features = false,
199  bool clear_features = true,
200  bool force = false) {
201  if (compiled_ || force) {
202  for (iterator it = begin(); it != end(); ++it) {
203  (*it)->Decompile(symbols, clear_symbolic_features,
204  clear_features, force);
205  }
206  }
207  compiled_ = false;
208  }
209 
212  void ClearRawData() {
213  for (iterator it = begin(); it != end(); ++it) {
214  (*it)->set_raw_data(empty_string);
215  }
216  }
217 
218  void set_best_scoring_index(size_t index) {
219  best_scoring_index_ = index;
220  }
221 
222  void set_gold_index(size_t index) {
223  gold_index_ = index;
224  }
225 
226  void set_training_key(const string &training_key) {
227  training_key_ = training_key;
228  }
229 
231  reference_string_ = reference_string;
232  }
233 
235  reference_string_token_count_ = reference_string_token_count;
236  }
237 
238  // I/O methods
239 
240  friend ostream &operator<<(ostream &os, const CandidateSet &set) {
241  os << "Candidate set with key \"" << set.training_key()
242  << "\" and reference string\n\t" << set.reference_string()
243  << "\nwith " << set.size() << " candidates:\n";
244  for (const_iterator it = set.begin(); it != set.end(); ++it) {
245  os << "\t" << *(*it) << "\n";
246  }
247  return os;
248  }
249 
250  private:
251  // data members
253  vector<shared_ptr<Candidate> > candidates_;
255  string training_key_;
257  size_t gold_index_;
259  size_t best_scoring_index_;
261  string reference_string_;
263  int reference_string_token_count_;
265  bool compiled_;
266 
267  static string empty_string;
268 };
269 
270 #define REGISTER_NAMED_CANDIDATE_SET_SCORER(TYPE,NAME) \
271  REGISTER_NAMED(TYPE,NAME,CandidateSet::Scorer)
272 
273 #define REGISTER_CANDIDATE_SET_SCORER(TYPE) \
274  REGISTER_NAMED_CANDIDATE_SET_SCORER(TYPE,TYPE)
275 
276 } // namespace reranker
277 
278 #endif
const string & reference_string() const
const Candidate & GetGold() const
Model is an interface for reranking models.
Definition: model.H:141
Provides the reranker::Candidate class for representing a candidate hypothesis from an initial model...
void set_reference_string(const string &reference_string)
friend ostream & operator<<(ostream &os, const CandidateSet &set)
const Candidate & GetBestScoring() const
bool CompileFeatures(Symbols *symbols, bool clear_features=false, bool clear_symbolic_features=true, bool force=false)
Compiles any symbolic features in this candidate set.
void ClearRawData()
Clears the raw data for all candidates in this set by setting each to be the empty string...
virtual void Score(Model *model, CandidateSet &candidates, bool training)=0
vector< shared_ptr< Candidate > >::iterator iterator
Definition: candidate-set.H:73
const string & training_key() const
Definition: candidate-set.H:99
void AddCandidate(shared_ptr< Candidate > candidate)
void DecompileFeatures(Symbols *symbols, bool clear_symbolic_features=false, bool clear_features=true, bool force=false)
Decompiles any non-symbolic features in the candidates in this candidate set.
An inner interface for a model to score a CandidateSet.
Definition: candidate-set.H:79
double loss_weight() const
Returns the weight of the loss for this candidate set’s reference.
void set_training_key(const string &training_key)
size_t size() const
Definition: candidate-set.H:94
void set_gold_index(size_t index)
A class to hold a set of candidates, either for training or test.
Definition: candidate-set.H:62
An interface specifying a converter from symbols (strings) to int indices.
Definition: symbol-table.H:57
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
Definition: candidate.H:60
CandidateSet()
Constructs a new candidate set with no information set.
Definition: candidate-set.H:65
void set_reference_string_token_count(int reference_string_token_count)
bool compiled() const
Returns whether any symbolic features in any of the candidates in this candidate set were compiled by...
vector< shared_ptr< Candidate > >::const_iterator const_iterator
Definition: candidate-set.H:74
size_t best_scoring_index() const
Definition: candidate-set.H:96
Candidate & Get(size_t idx)
const_iterator begin() const
Definition: candidate-set.H:86
An interface to make it easier to implement Factory-constructible types by implementing both required...
Definition: factory.H:382
int reference_string_token_count() const
void set_best_scoring_index(size_t index)
const_iterator end() const
Definition: candidate-set.H:88
Provides a generic dynamic object factory.
virtual ~CandidateSet()
Destroys this instance.
Definition: candidate-set.H:71
CandidateSet(const string &key)
Constructs a candidate set with the specified key.
Definition: candidate-set.H:69
size_t gold_index() const
Definition: candidate-set.H:97