Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
candidate-set-iterator.H
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
36 
37 #ifndef RERANKER_CANDIDATE_SET_ITERATOR_H_
38 #define RERANKER_CANDIDATE_SET_ITERATOR_H_
39 
40 #include <memory>
41 #include <vector>
42 
43 #include "candidate-set.H"
44 #include "candidate-set-reader.H"
46 
47 namespace reranker {
48 
49 using std::vector;
50 using std::shared_ptr;
51 
59  public:
60  virtual ~CandidateSetIterator() { }
63  virtual bool HasNext() const = 0;
65  virtual CandidateSet &Next() = 0;
67  virtual void Reset() = 0;
68 };
69 
87 template <typename Collection>
89  public:
90  CollectionCandidateSetIterator(Collection &collection) :
91  collection_(collection) {
92  it_ = collection_.begin();
93  }
94 
95  virtual bool HasNext() const { return it_ != collection_.end(); }
96  virtual CandidateSet &Next() { return **it_++; }
97  virtual void Reset() { it_ = collection_.begin(); }
98  private:
100  typename Collection::iterator it_;
102  Collection &collection_;
103 };
104 
105 
113  public:
151  MultiFileCandidateSetIterator(vector<string> files,
152  shared_ptr<const ExecutiveFeatureExtractor> efe,
153  int max_examples,
154  int max_candidates,
155  int reporting_interval,
156  int verbosity,
157  bool compressed,
158  bool use_base64) :
159  files_(files), efe_(efe),
160  compressed_(compressed), use_base64_(use_base64),
161  reader_valid_(false), file_open_(false),
162  csr_(max_examples, max_candidates, reporting_interval),
163  verbosity_(verbosity),
164  next_candidate_set_(),
165  prev_candidate_set_() {
166  file_it_ = files_.begin();
167  csr_.set_verbosity(0);
168  Reset();
169  csr_.set_verbosity(verbosity);
170  }
171 
173  virtual bool HasNext() const {
174  return next_candidate_set_.get() != NULL;
175  }
176 
178  virtual CandidateSet &Next() {
179  prev_candidate_set_ = next_candidate_set_;
180  next_candidate_set_ = shared_ptr<CandidateSet>();
181  ReadNext();
182  return *prev_candidate_set_;
183  }
184 
185  const string curr_file() const {
186  return file_open_ ? *file_it_ : string("");
187  }
188 
193  virtual void Reset() {
194  if (file_open_) {
195  csr_.Close();
196  file_open_ = false;
197  }
198  file_it_ = files_.begin();
199  if (file_it_ != files_.end()) {
200  csr_.Open(*file_it_, compressed_, use_base64_);
201  file_open_ = true;
202  }
203  next_candidate_set_ = shared_ptr<CandidateSet>();
204  if (efe_.get() != NULL) {
205  efe_->Reset();
206  }
207  ReadNext();
208  }
209  private:
210  void ReadNext() {
211  reader_valid_ = false;
212  while (file_it_ != files_.end() && next_candidate_set_.get() == NULL) {
213  next_candidate_set_ = csr_.ReadNext(reader_valid_);
214  if (!reader_valid_ || next_candidate_set_.get() == NULL) {
215  if (verbosity_ >= 1) {
216  if (csr_.num_read() == 0) {
217  cerr << "Warning: could not read any training examples from file \""
218  << *file_it_ << "\"." << endl;
219  }
220  }
221  // Reached eof. Try to open next file.
222  csr_.Close();
223  file_open_ = false;
224  ++file_it_;
225  if (file_it_ != files_.end()) {
226  csr_.Open(*file_it_, compressed_, use_base64_);
227  file_open_ = true;
228  }
229  }
230  }
231  if (efe_.get() != NULL && next_candidate_set_.get() != NULL) {
232  efe_->Extract(*next_candidate_set_);
233  }
234  }
235 
236  // data members
237  vector<string> files_;
238  shared_ptr<const ExecutiveFeatureExtractor> efe_;
239  bool compressed_;
240  bool use_base64_;
241  bool reader_valid_;
242  bool file_open_;
243  CandidateSetReader csr_;
244  int verbosity_;
245  vector<string>::const_iterator file_it_;
246  shared_ptr<CandidateSet> next_candidate_set_;
247  shared_ptr<CandidateSet> prev_candidate_set_;
248 };
249 
250 } // namespace reranker
251 
252 #endif
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
An implementation of the CandidateSetIterator interface that iterates over CandidateSet instances tha...
virtual void Reset()
Resets this multi-file candidate set iterator so that the next CandidateSet retrieved will be the fir...
CollectionCandidateSetIterator(Collection &collection)
virtual CandidateSet & Next()
Returns the next CandidateSet.
void set_verbosity(int verbosity)
Sets the verbosity of this reader (mostly for debugging purposes).
virtual bool HasNext() const =0
Returns whether this iterator contains another CandidateSet.
Provides the reranker::ExecutiveFeatureExtractor class.
MultiFileCandidateSetIterator(vector< string > files, shared_ptr< const ExecutiveFeatureExtractor > efe, int max_examples, int max_candidates, int reporting_interval, int verbosity, bool compressed, bool use_base64)
Constructs a new instance that iterates over the CandidateSet instances serialized in the specified s...
Class for reading streams of training or test instances, where each training or test instance is a re...
virtual bool HasNext() const
Returns whether this iterator contains another CandidateSet.
virtual void Reset()=0
Resets this iterator back to the beginning of its backing collection.
A class to hold a set of candidates, either for training or test.
Definition: candidate-set.H:62
virtual bool HasNext() const
Returns whether this iterator contains another CandidateSet.
virtual CandidateSet & Next()
Returns the next CandidateSet.
An implementation of the CandidateSetIterator interface that is backed by an arbitrary C++ collection...
shared_ptr< CandidateSet > ReadNext(bool &reader_valid)
virtual void Reset()
Resets this iterator back to the beginning of its backing collection.
long num_read()
Returns the number of CandidateSet instances read since the last invocation of the Reset method...
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
void Open(const string &filename, bool compressed, bool use_base64, bool reset_counters=true)
virtual CandidateSet & Next()=0
Returns the next CandidateSet.