40 using std::stringstream;
 
   49   int tokens_len = (int)tokens.size();
 
   50   int last_token_index = tokens_len - 1;
 
   51   for (
int i = 1; i < tokens_len; ++i) {
 
   53     for (
int prev_index = ((i - n + 1) < 0) ? 0 : (i - n + 1);
 
   54          prev_index <= max_prev;
 
   57       if (max_prev == last_token_index && max_prev == prev_index) {
 
   60       stringstream symbol_ss;
 
   62         symbol_ss << n << 
"g_ng{";
 
   64         symbol_ss << prefix << 
"{";
 
   66       for (
int j = prev_index; j <= max_prev; ++j) {
 
   67         symbol_ss << tokens[j] << ((j < max_prev) ? 
"," : 
"}");
 
   69       symbolic_features.IncrementWeight(symbol_ss.str(), 1.0);
 
   78   vector<string> tokens;
 
   79   tokens.push_back(
"<s>");
 
   81   tokens.push_back(
"</s>");
 
   82   ngram_extractor_.
Extract(tokens, n_, prefix_, symbolic_features);
 
const string & raw_data() const 
Returns the raw data (typically the sentence) for this candidate. 
 
void Tokenize(const string &s, vector< string > &toks, const char *delimiters=" \t") const 
Tokenizes the specified string, depositing the results into the specified vector. ...
 
A class to represent a candidate in a set of candidates that constitutes a training instance for a re...
 
A class to represent a feature vector, where features are represented by unique identifiers, and feature values are represented by the template type.