36 #ifndef RERANKER_STREAM_TOKENIZER_H_ 
   37 #define RERANKER_STREAM_TOKENIZER_H_ 
   50 using std::istringstream;
 
   51 using std::ostringstream;
 
   63 static const char *default_reserved_words[] = {
 
   80 #define DEFAULT_RESERVED_CHARS "(){},=;/" 
  103     static const char *names[] = {
 
  104       "EOF", 
"RESERVED_CHAR", 
"RESERVED_WORD", 
"STRING", 
"NUMBER", 
"IDENTIFIER" 
  106     return names[token_type];
 
  136       is_(is), num_read_(0), line_number_(0), eof_reached_(false),
 
  138     Init(reserved_chars);
 
  149       sstream_(s), is_(sstream_), num_read_(0), line_number_(0),
 
  150       eof_reached_(false), next_token_idx_(0) {
 
  151     Init(reserved_chars);
 
  157     reserved_words_ = reserved_words;
 
  162     delete[] reserved_chars_;
 
  167   string str() { 
return oss_.str(); }
 
  173     return HasPrev() ? token_[next_token_idx_ - 1].curr_pos : 0;
 
  180     return HasNext() ? token_[next_token_idx_].line_number : line_number_;
 
  184   bool HasNext()
 const { 
return next_token_idx_ < token_.size(); }
 
  186   bool HasPrev()
 const { 
return next_token_idx_ > 0; }
 
  189     return HasPrev() ? token_[next_token_idx_ - 1].tok : 
"";
 
  193     return HasPrev() ? token_[next_token_idx_ - 1].start : 0;
 
  204       throw std::runtime_error(
"invoking StreamTokenizer::Next when HasNext " 
  208     size_t curr_token_idx = next_token_idx_;
 
  212     if (!eof_reached_ && next_token_idx_ + 1 == token_.size()) {
 
  214       if (GetNext(&next)) {
 
  215         token_.push_back(next);
 
  219     if (next_token_idx_ < token_.size()) {
 
  223     return token_[curr_token_idx].tok;
 
  238     if (num_tokens > next_token_idx_) {
 
  239       num_tokens = next_token_idx_;
 
  241     next_token_idx_ -= num_tokens;
 
  252     return HasNext() ? token_[next_token_idx_].start : num_read_;
 
  265     return HasNext() ? token_[next_token_idx_].line_number : line_number_;
 
  271   string Peek()
 const { 
return HasNext() ? token_[next_token_idx_].tok : 
""; }
 
  274   void Init(
const char *reserved_chars) {
 
  275     num_reserved_chars_ = strlen(reserved_chars);
 
  276     reserved_chars_ = 
new char[num_reserved_chars_];
 
  277     strcpy(reserved_chars_, reserved_chars);
 
  278     int num_reserved_words = 
sizeof(default_reserved_words)/
sizeof(
const char*);
 
  279     for (
int i = 0; i < num_reserved_words; ++i) {
 
  280       reserved_words_.insert(
string(default_reserved_words[i]));
 
  283     if (GetNext(&next)) {
 
  284       token_.push_back(next);
 
  288   void ConsumeChar(
char c);
 
  290   bool ReadChar(
char *c);
 
  299   bool GetNext(Token *next);
 
  303   bool ReservedChar(
char c)
 const {
 
  304     for (
size_t i = 0; i < num_reserved_chars_; ++i) {
 
  305       if (c == reserved_chars_[i]) {
 
  317   istringstream sstream_;
 
  322   char *reserved_chars_;
 
  323   size_t num_reserved_chars_;
 
  324   set<string> reserved_words_;
 
  333   vector<Token> token_;
 
  338   size_t next_token_idx_;
 
A simple class for tokenizing a stream of tokens for the formally specified language used to construc...
 
string str()
Returns the entire sequence of characters read so far by this stream tokenizer as a newly constructed...
 
size_t line_number() const 
Returns the number of lines read from the underlying byte stream, where a line is any number of bytes...
 
#define DEFAULT_RESERVED_CHARS
Default set of reserved characters for the StreamTokenizer class. 
 
StreamTokenizer(const string &s, const char *reserved_chars="(){},=;/")
Constructs a new instance around the specified string. 
 
size_t PeekTokenStart() const 
Returns the next token’s start position, or the byte position of the underlying byte stream if there ...
 
size_t start
The starting byte of the token in the underlying stream. 
 
void set_reserved_words(set< string > &reserved_words)
Sets the set of “reserved words” used by this stream tokenizer. 
 
static const char * TypeName(TokenType token_type)
Returns a string type name for the specified TokenType constant. 
 
size_t PeekPrevTokenStart() const 
 
TokenType type
The token’s type. 
 
bool HasNext() const 
Returns whether there is another token in the token stream. 
 
string Next()
Returns the next token in the token stream. 
 
void Rewind()
Rewinds this token stream to the beginning. 
 
size_t tellg() const 
Returns the number of bytes read from the underlying byte stream just after scanning the most recent ...
 
size_t PeekTokenLineNumber() const 
Returns the line number of the first byte of the next token, or the current line number of the underl...
 
Information about a token read from the underlying stream. 
 
size_t line_number
The line number of the first byte of the token in the underlying stream. 
 
TokenType
The set of types of tokens read by this stream tokenizer. 
 
string Peek() const 
Returns the next token that would be returned by the Next method. 
 
TokenType PeekPrevTokenType() const 
 
string tok
The token itself. 
 
virtual ~StreamTokenizer()
Destroys this instance. 
 
size_t curr_pos
The current position in the underlying stream just after reading this token. 
 
StreamTokenizer(istream &is, const char *reserved_chars="(){},=;/")
Constructs a new instance around the specified byte stream. 
 
void Putback()
A synonym for Rewind(1). 
 
void Rewind(size_t num_tokens)
Rewinds this token stream by the specified number of tokens. 
 
TokenType PeekTokenType() const 
Returns the type of the next token, or EOF_TYPE if there is no next token.