36 #ifndef INFACT_STREAM_TOKENIZER_H_
37 #define INFACT_STREAM_TOKENIZER_H_
52 using std::istringstream;
53 using std::ostringstream;
65 static const char *default_reserved_words[] = {
83 #define DEFAULT_RESERVED_CHARS "(){},=;/"
106 static const char *names[] = {
107 "EOF",
"RESERVED_CHAR",
"RESERVED_WORD",
"STRING",
"NUMBER",
"IDENTIFIER"
109 return names[token_type];
142 Init(reserved_chars);
153 sstream_(s), is_(sstream_) {
154 Init(reserved_chars);
160 reserved_words_ = reserved_words;
165 delete[] reserved_chars_;
170 string str() {
return oss_.str(); }
176 return HasPrev() ? token_[next_token_idx_ - 1].curr_pos : 0;
183 return HasNext() ? token_[next_token_idx_].line_number : line_number_;
196 return HasNext() ? token_[next_token_idx_].line_start_pos : 0;
200 bool HasNext()
const {
return next_token_idx_ < token_.size(); }
202 bool HasPrev()
const {
return next_token_idx_ > 0; }
205 return HasPrev() ? token_[next_token_idx_ - 1].tok :
"";
212 return HasPrev() ? token_[next_token_idx_ - 1].line_start_pos : 0;
216 return HasPrev() ? token_[next_token_idx_ - 1].start : 0;
226 Error(
"invoking StreamTokenizer::Next when HasNext returns false");
229 size_t curr_token_idx = next_token_idx_;
233 if (!eof_reached_ && next_token_idx_ + 1 == token_.size()) {
235 if (GetNext(&next)) {
236 token_.push_back(next);
240 if (next_token_idx_ < token_.size()) {
244 return token_[curr_token_idx].tok;
259 if (num_tokens > next_token_idx_) {
260 num_tokens = next_token_idx_;
262 next_token_idx_ -= num_tokens;
273 return HasNext() ? token_[next_token_idx_].start : num_read_;
286 return HasNext() ? token_[next_token_idx_].line_number : line_number_;
292 string Peek()
const {
return HasNext() ? token_[next_token_idx_].tok :
""; }
295 void Init(
const char *reserved_chars) {
296 num_reserved_chars_ = strlen(reserved_chars);
297 reserved_chars_ =
new char[num_reserved_chars_ + 1];
298 strcpy(reserved_chars_, reserved_chars);
299 int num_reserved_words =
sizeof(default_reserved_words)/
sizeof(
const char*);
300 for (
int i = 0; i < num_reserved_words; ++i) {
301 reserved_words_.insert(
string(default_reserved_words[i]));
304 if (GetNext(&next)) {
305 token_.push_back(next);
309 void ConsumeChar(
char c);
311 bool ReadChar(
char *c);
320 bool GetNext(Token *next);
324 bool ReservedChar(
char c)
const {
325 for (
size_t i = 0; i < num_reserved_chars_; ++i) {
326 if (c == reserved_chars_[i]) {
338 istringstream sstream_;
343 char *reserved_chars_;
344 size_t num_reserved_chars_;
345 set<string> reserved_words_;
348 size_t num_read_ = 0;
349 size_t line_number_ = 0;
350 size_t line_start_pos_ = 0;
351 bool eof_reached_ =
false;
355 vector<Token> token_;
360 size_t next_token_idx_ = 0;
Information about a token read from the underlying stream.
StreamTokenizer(const string &s, const char *reserved_chars="(){},=;/")
Constructs a new instance around the specified string.
size_t curr_pos
The current position in the underlying stream just after reading this token.
string Next()
Returns the next token in the token stream.
size_t line_start_pos
The stream position of the start of the line of this token.
size_t PeekPrevTokenStart() const
Provides an error handling function that optionally throws an exception.
size_t PeekPrevTokenLineStart() const
Returns the stream position of the most recent line start of the previous token, or 0 if this stream ...
TokenType PeekTokenType() const
Returns the type of the next token, or EOF_TYPE if there is no next token.
size_t start
The starting byte of the token in the underlying stream.
virtual ~StreamTokenizer()
Destroys this instance.
void Putback()
A synonym for Rewind(1).
size_t line_start()
Returns the stream position of the current line in the underlying byte stream.
string str()
Returns the entire sequence of characters read so far by this stream tokenizer as a newly constructed...
A simple class for tokenizing a stream of tokens for the formally specified language used to construc...
bool HasNext() const
Returns whether there is another token in the token stream.
StreamTokenizer(istream &is, const char *reserved_chars="(){},=;/")
Constructs a new instance around the specified byte stream.
static const char * TypeName(TokenType token_type)
Returns a string type name for the specified TokenType constant.
TokenType PeekPrevTokenType() const
size_t PeekTokenStart() const
Returns the next token’s start position, or the byte position of the underlying byte stream if there ...
void Rewind(size_t num_tokens)
Rewinds this token stream by the specified number of tokens.
size_t PeekTokenLineNumber() const
Returns the line number of the first byte of the next token, or the current line number of the underl...
void set_reserved_words(set< string > &reserved_words)
Sets the set of “reserved words” used by this stream tokenizer.
void Error(const std::string &message)
Reports an error encountered during parsing and/or construction of an object.
string Peek() const
Returns the next token that would be returned by the Next method.
#define DEFAULT_RESERVED_CHARS
Default set of reserved characters for the StreamTokenizer class.
TokenType
The set of types of tokens read by this stream tokenizer.
string line()
Returns a string consisting of the characters read so far of the current line containing the most rec...
size_t line_number() const
Returns the number of lines read from the underlying byte stream, where a line is any number of bytes...
string tok
The token itself.
size_t line_number
The line number of the first byte of the token in the underlying stream.
void Rewind()
Rewinds this token stream to the beginning.
size_t tellg() const
Returns the number of bytes read from the underlying byte stream just after scanning the most recent ...
TokenType type
The token’s type.