37 #ifndef RERANKER_CANDIDATE_SET_ITERATOR_H_
38 #define RERANKER_CANDIDATE_SET_ITERATOR_H_
50 using std::shared_ptr;
63 virtual bool HasNext()
const = 0;
67 virtual void Reset() = 0;
87 template <
typename Collection>
91 collection_(collection) {
92 it_ = collection_.begin();
95 virtual bool HasNext()
const {
return it_ != collection_.end(); }
97 virtual void Reset() { it_ = collection_.begin(); }
100 typename Collection::iterator it_;
102 Collection &collection_;
152 shared_ptr<const ExecutiveFeatureExtractor> efe,
155 int reporting_interval,
159 files_(files), efe_(efe),
160 compressed_(compressed), use_base64_(use_base64),
161 reader_valid_(false), file_open_(false),
162 csr_(max_examples, max_candidates, reporting_interval),
163 verbosity_(verbosity),
164 next_candidate_set_(),
165 prev_candidate_set_() {
166 file_it_ = files_.begin();
174 return next_candidate_set_.get() != NULL;
179 prev_candidate_set_ = next_candidate_set_;
180 next_candidate_set_ = shared_ptr<CandidateSet>();
182 return *prev_candidate_set_;
186 return file_open_ ? *file_it_ : string(
"");
198 file_it_ = files_.begin();
199 if (file_it_ != files_.end()) {
200 csr_.
Open(*file_it_, compressed_, use_base64_);
203 next_candidate_set_ = shared_ptr<CandidateSet>();
204 if (efe_.get() != NULL) {
211 reader_valid_ =
false;
212 while (file_it_ != files_.end() && next_candidate_set_.get() == NULL) {
213 next_candidate_set_ = csr_.
ReadNext(reader_valid_);
214 if (!reader_valid_ || next_candidate_set_.get() == NULL) {
215 if (verbosity_ >= 1) {
217 cerr <<
"Warning: could not read any training examples from file \""
218 << *file_it_ <<
"\"." << endl;
225 if (file_it_ != files_.end()) {
226 csr_.
Open(*file_it_, compressed_, use_base64_);
231 if (efe_.get() != NULL && next_candidate_set_.get() != NULL) {
232 efe_->Extract(*next_candidate_set_);
237 vector<string> files_;
238 shared_ptr<const ExecutiveFeatureExtractor> efe_;
243 CandidateSetReader csr_;
245 vector<string>::const_iterator file_it_;
246 shared_ptr<CandidateSet> next_candidate_set_;
247 shared_ptr<CandidateSet> prev_candidate_set_;
virtual ~CandidateSetIterator()
An interface specifying iteration over CandidateSet instances, using Java-style semantics (sorry...
An implementation of the CandidateSetIterator interface that iterates over CandidateSet instances tha...
virtual void Reset()
Resets this multi-file candidate set iterator so that the next CandidateSet retrieved will be the fir...
CollectionCandidateSetIterator(Collection &collection)
virtual CandidateSet & Next()
Returns the next CandidateSet.
void set_verbosity(int verbosity)
Sets the verbosity of this reader (mostly for debugging purposes).
virtual bool HasNext() const =0
Returns whether this iterator contains another CandidateSet.
const string curr_file() const
MultiFileCandidateSetIterator(vector< string > files, shared_ptr< const ExecutiveFeatureExtractor > efe, int max_examples, int max_candidates, int reporting_interval, int verbosity, bool compressed, bool use_base64)
Constructs a new instance that iterates over the CandidateSet instances serialized in the specified s...
Class for reading streams of training or test instances, where each training or test instance is a re...
virtual bool HasNext() const
Returns whether this iterator contains another CandidateSet.
virtual void Reset()=0
Resets this iterator back to the beginning of its backing collection.
A class to hold a set of candidates, either for training or test.
virtual bool HasNext() const
Returns whether this iterator contains another CandidateSet.
virtual CandidateSet & Next()
Returns the next CandidateSet.
An implementation of the CandidateSetIterator interface that is backed by an arbitrary C++ collection...
shared_ptr< CandidateSet > ReadNext(bool &reader_valid)
virtual void Reset()
Resets this iterator back to the beginning of its backing collection.
long num_read()
Returns the number of CandidateSet instances read since the last invocation of the Reset method...
Class to hold a single training instance for a reranker, which is a set of examples, typically the n-best output of some input process, posibly including a gold-standard feature vector.
void Open(const string &filename, bool compressed, bool use_base64, bool reset_counters=true)
virtual CandidateSet & Next()=0
Returns the next CandidateSet.