Reranker Framework (ReFr)
Reranking framework for structure prediction and discriminative language modeling
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
feature-vector.H
Go to the documentation of this file.
1 // Copyright 2012, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
36 
37 #ifndef RERANKER_FEATURE_VECTOR_H_
38 #define RERANKER_FEATURE_VECTOR_H_
39 
40 #include <iostream>
41 #include <map>
42 #include <string>
43 #include <unordered_map>
44 #include <unordered_set>
45 #include <vector>
46 
47 #include "string-canonicalizer.H"
48 
49 namespace reranker {
50 
51 using std::cerr;
52 using std::endl;
53 using std::map;
54 using std::ostream;
55 using std::string;
56 using std::unordered_map;
57 using std::unordered_set;
58 
59 template <typename T>
60 struct DelKey {
61  static const T Get() { return T(); }
62 };
63 
64 template <>
65 struct DelKey<int> {
66  static const int Get() { return -1; }
67 };
68 
69 template <>
70 struct DelKey<double> {
71  static const double Get() { return -1.0; }
72 };
73 
74 template <>
75 struct DelKey<string> {
76  static const string Get() { return ""; }
77 };
78 
85 template<typename T>
86 struct UidGetter {
88  static const T &Get(const T &uid) { return uid; }
89 };
90 
94 template<>
95 struct UidGetter<string> {
97  static const string &Get(const string &uid) {
98  return StringCanonicalizer::Get(uid);
99  }
100 };
101 
110 template <typename K, typename V,
111  //typename Map = google::sparse_hash_map<K, V> >
112  //typename Map = map<K, V> >
113  typename Map = unordered_map<K, V> >
115  public:
118  //features_.set_deleted_key(DelKey<K>::Get());
119  }
120 
125  FeatureVector(const Map &features) : features_(features) {
126  //features_.set_deleted_key(DelKey<K>::Get());
127  }
128 
136  template <typename MapType>
137  FeatureVector(const MapType &features) {
138  //features_.set_deleted_key(DelKey<K>::Get());
139  for (typename MapType::const_iterator it = features.begin();
140  it != features.end(); ++it) {
141  features_.insert(*it);
142  }
143  }
144 
145  virtual ~FeatureVector() {}
146 
147  // Forward type declarations that mirror what STL maps do.
149  typedef K key_type;
151  typedef V mapped_type;
152 
155  typedef Map FeatureMap;
157  typedef typename FeatureMap::const_iterator const_iterator;
158 
159  // accessors
160 
164  return features_.begin();
165  }
166 
169  const_iterator end() const {
170  return features_.end();
171  }
172 
181  unordered_set<K> &GetNonZeroFeatures(unordered_set<K> &set) const {
182  for (const_iterator it = features_.begin(); it != features_.end(); ++it) {
183  set.insert(it->first);
184  }
185  return set;
186  }
187 
196  unordered_set<K> &RemoveNonZeroFeatures(unordered_set<K> &set) const {
197  for (const_iterator it = features_.begin(); it != features_.end(); ++it) {
198  set.erase(it->first);
199  }
200  return set;
201  }
202 
210  unordered_set<K> &RemoveEqualFeatures(const FeatureVector<K,V> &other,
211  unordered_set<K> &set) const {
212  for (const_iterator it = features_.begin(); it != features_.end(); ++it) {
213  const K &uid = it->first;
214  if (set.find(uid) != set.end() &&
215  GetWeight(uid) == other.GetWeight(uid)) {
216  set.erase(uid);
217  }
218  }
219  return set;
220  }
221 
227  V GetWeight(const K &uid) const {
228  const_iterator feature_it = features_.find(uid);
229  return feature_it == features_.end() ? V() : feature_it->second;
230  }
231 
235  V GetValue(const K &uid) const {
236  return GetWeight(uid);
237  }
238 
240  size_t size() const { return features_.size(); }
241 
242  bool empty() const { return features_.empty(); }
243 
247  V Dot(const FeatureVector<K,V> &other) const {
248  bool this_is_smaller = size() < other.size();
249  const FeatureVector &small = this_is_smaller ? *this : other;
250  const FeatureVector &large = this_is_smaller ? other : *this;
251  V dot_product = V();
252  for (const_iterator it = small.features_.begin();
253  it != small.features_.end();
254  ++it) {
255  dot_product += it->second * large.GetWeight(it->first);
256  }
257  return dot_product;
258  }
259 
260  // mutators
261 
269  V IncrementWeight(const K &uid, V by) {
270  const K &canonical_uid = UidGetter<K>::Get(uid);
271  V default_val = V(); // zero
272  // Short circuit: if we're trying to increase a feature's weight by
273  // zero, bail out early.
274  bool no_increase = by == default_val;
275  iterator feature_it = features_.find(canonical_uid);
276  if (feature_it == features_.end()) {
277  if (no_increase) {
278  return default_val;
279  }
280  features_[canonical_uid] = by;
281  return by;
282  } else {
283  if (no_increase) {
284  return feature_it->second;
285  }
286  V new_weight = feature_it->second + by;
287  if (new_weight == default_val) {
288  features_.erase(canonical_uid);
289  } else {
290  feature_it->second = new_weight;
291  }
292  return new_weight;
293  }
294  }
295 
304  V IncrementValue(const K &uid, V by) {
305  return IncrementWeight(uid, by);
306  }
307 
312  V SetWeight(const K &uid, V new_weight) {
313  V old_weight = GetWeight(uid);
314  if (new_weight == V()) {
315  features_.erase(uid);
316  } else {
317  features_[uid] = new_weight;
318  }
319  return old_weight;
320  }
321 
323  V SetValue(const K &uid, V new_value) {
324  return SetWeight(uid, new_value);
325  }
326 
332  for (iterator it = features_.begin(); it != features_.end(); ++it) {
333  it->second = it->second * scalar;
334  }
335  return *this;
336  }
337 
349  V scalar) {
350  for (const_iterator it = other.begin(); it != other.end(); ++it) {
351  IncrementWeight(it->first, scalar * it->second);
352  }
353  return *this;
354  }
355 
372  template <typename Collection>
373  FeatureVector<K,V> &AddScaledSubvector(const Collection &feature_uids,
374  const FeatureVector<K,V> &
375  feature_vector,
376  V scalar) {
377  for (typename Collection::const_iterator it = feature_uids.begin();
378  it != feature_uids.end();
379  ++it) {
380  IncrementWeight(*it, feature_vector.GetWeight(*it) * scalar);
381  }
382  return *this;
383  }
384 
392  template <typename Collection>
393  FeatureVector<K,V> &IncrementWeights(const Collection &feature_uids,
394  V increment) {
395  for (typename Collection::const_iterator it = feature_uids.begin();
396  it != feature_uids.end();
397  ++it) {
398  IncrementWeight(*it, increment);
399  }
400  return *this;
401  }
402 
405  void clear() {
406  features_.clear();
407  }
408 
409  // I/O methods
410 
411  // TODO(dbikel): Add methods to serialize and de-serialize to/from protobuf's.
412 
413  friend ostream &operator<<(ostream &os, const FeatureVector<K,V> &fv) {
414  os << "[";
415  const_iterator it = fv.features_.begin();
416  if (it != fv.features_.end()) {
417  os << it->first << "=" << it->second;
418  ++it;
419  }
420  for ( ; it != fv.features_.end(); ++it) {
421  os << " " << it->first << "=" << it->second;
422  }
423  os << "]";
424  return os;
425  }
426 
427  private:
428  // a helpful internal typedef for mutating methods
429  typedef typename FeatureMap::iterator iterator;
430 
431  // data members
434  FeatureMap features_;
435 };
436 
437 } // namespace reranker
438 
439 #endif
FeatureVector< K, V > & AddScaledVector(const FeatureVector< K, V > &other, V scalar)
Modifies this vector so that it equals this vector plus the scaled specified vector.
size_t size() const
Returns the number of non-zero feature components of this feature vector.
static const int Get()
A simple class that provides a layer of abstraction when retrieving objects to represent unique ident...
unordered_set< K > & RemoveEqualFeatures(const FeatureVector< K, V > &other, unordered_set< K > &set) const
Removes from the specified set the uid's of feature with weights equal in this vector to their weight...
V GetWeight(const K &uid) const
Returns the weight of the feature with the specified uid, where crucially features not "present" in t...
FeatureVector< K, V > & Scale(V scalar)
Multiplies this vector, in situ, by the specified scalar.
FeatureVector()
Create an empty feature vector.
static const T & Get(const T &uid)
Gets the canonical uid object for the specified uid object.
static const T Get()
const_iterator end() const
Returns a const iterator pointing to the end of the feature-value pairs of this feature vector...
const_iterator begin() const
Returns a const iterator pointing to the first of the feature-value pairs of this feature vector...
static const string & Get(const string &s)
Returns the canonical string for the specified string.
V IncrementValue(const K &uid, V by)
Increments the value of the specified vector component by the specified amount.
V SetValue(const K &uid, V new_value)
Synonym for SetWeight.
V mapped_type
The type of values/feature weights in this vector.
static const string & Get(const string &uid)
Gets the canonical uid string for the specified uid string.
FeatureVector< K, V > & AddScaledSubvector(const Collection &feature_uids, const FeatureVector< K, V > &feature_vector, V scalar)
Modifies this vector so that it equals this vector plus the scaled specified subvector.
FeatureMap::const_iterator const_iterator
The type of const iterator for the feature-weight pairs in this vector.
K key_type
The type of vector component/feature uid's in this vector.
static const string Get()
Map FeatureMap
The underlying type that this class stores the mapping of feature uid's to their weights.
V SetWeight(const K &uid, V new_weight)
Sets the weight of the specified feature to the specified value.
FeatureVector< K, V > & IncrementWeights(const Collection &feature_uids, V increment)
Increments the weights for the specified collection of features.
V IncrementWeight(const K &uid, V by)
Increments the weight of the specified feature by the specified amount.
V GetValue(const K &uid) const
Synonymous with GetWeight.
V Dot(const FeatureVector< K, V > &other) const
Computes the dot product of this feature vector with the specified FeatureVector. ...
unordered_set< K > & GetNonZeroFeatures(unordered_set< K > &set) const
Inserts the uid's of features with non-zero weights into the specified set.
Provides the reranker::StringCanonicalizer class.
static const double Get()
void clear()
Sets all feature weights to zero and, because this is a sparse vector, clears all storage...
unordered_set< K > & RemoveNonZeroFeatures(unordered_set< K > &set) const
Removes the uid's of features with non-zero weights from the specified set.
FeatureVector(const Map &features)
Copy features from the type of map used internally.
FeatureVector(const MapType &features)
Copy features from any map, or any collection of (feature,value) pairs, for that matter.
A class to represent a feature vector, where features are represented by unique identifiers, and feature values are represented by the template type.