InFact
Interpreter and factory for easily creating C++ objects at run-time
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
stream-tokenizer.cc
Go to the documentation of this file.
1 // Copyright 2014, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 // -----------------------------------------------------------------------------
30 //
31 //
36 
37 #include <ctype.h>
38 #include <sstream>
39 #include <stdexcept>
40 
41 #include "stream-tokenizer.h"
42 
43 namespace infact {
44 
45 void
46 StreamTokenizer::ConsumeChar(char c) {
47  oss_ << c;
48  ++num_read_;
49  if (c == '\n') {
50  ++line_number_;
51  line_start_pos_ = num_read_;
52  }
53 }
54 
55 bool
56 StreamTokenizer::ReadChar(char *c) {
57  (*c) = is_.get();
58  if (!is_.good()) {
59  eof_reached_ = true;
60  return false;
61  } else {
62  ConsumeChar(*c);
63  return true;
64  }
65 }
66 
67 bool
68 StreamTokenizer::GetNext(Token *next) {
69  if (!is_.good()) {
70  eof_reached_ = true;
71  return false;
72  }
73  // Get first character of next token.
74  char c;
75  bool is_whitespace = true;
76  while (is_whitespace) {
77  if (!ReadChar(&c)) {
78  return false;
79  }
80  is_whitespace = isspace(c);
81 
82  // If we find a comment character, then read to the end of the line.
83  if (!is_whitespace && c == '/' && is_.peek() == '/') {
84  while (c != '\n') {
85  if (!ReadChar(&c)) {
86  return false;
87  }
88  }
89  is_whitespace = true;
90  }
91  }
92 
93  // In case we're successful in reading the next token, we fill in
94  // the two stream state data available now.
95  next->start = num_read_ - 1;
96  next->line_number = line_number_;
97  next->line_start_pos = line_start_pos_;
98 
99  bool next_tok_complete = false;
100  next->tok.clear();
101  if (ReservedChar(c)) {
102  next->tok += c;
103  next_tok_complete = true;
104  next->type = RESERVED_CHAR;
105  } else if (c == '"') {
106  // We've got a string literal, so keep reading characters,
107  // until hitting a non-escaped double quote.
108  streampos string_literal_start_pos = num_read_ - 1;
109  bool found_closing_quote = false;
110  while (is_.good()) {
111  bool success = ReadChar(&c);
112  if (success) {
113  if (c == '"') {
114  found_closing_quote = true;
115  break;
116  } else if (c == '\\') {
117  success = ReadChar(&c);
118  }
119  }
120  if (success) {
121  next->tok += c;
122  }
123  }
124  if (!found_closing_quote) {
125  ostringstream err_ss;
126  err_ss << "StreamTokenizer: error: could not find closing "
127  << "double quote for string literal beginning at stream index "
128  << string_literal_start_pos
129  << "; partial string literal read: \""
130  << next->tok;
131  Error(err_ss.str());
132  }
133  next_tok_complete = true;
134  next->type = STRING;
135  } else {
136  // This is a number, a reserved word or C++ identifier token, so
137  // add first character; the remainder of the token will be handled
138  // in the next block.
139  next->tok += c;
140  next->type = (c == '-' || (c >= '0' && c <= '9')) ? NUMBER : IDENTIFIER;
141  }
142  if (!next_tok_complete) {
143  // The current token is a number, a reserved word or C++
144  // identifier, so we keep reading characters until hitting a
145  // "reserved character", a whitespace character or EOF.
146  bool done = false;
147  while (!done && is_.good()) {
148  // We don't call ReadChar below because the next character might
149  // tell us that the current token has ended (i.e., if it's a
150  // reserved character, a double quote or a whitespace
151  // character).
152  int peek = is_.peek();
153  if (peek != EOF) {
154  char next_char = static_cast<char>(peek);
155  if (ReservedChar(next_char) || next_char == '"' || isspace(next_char)) {
156  // Now that we've finished reading something that is not a
157  // string literal, change its type to be RESERVED_WORD if it
158  // exactly matches something in the set of reserved words.
159  if (reserved_words_.count(next->tok) != 0) {
160  next->type = RESERVED_WORD;
161  }
162  done = true;
163  } else {
164  ReadChar(&c);
165  next->tok += c;
166  }
167  } else {
168  eof_reached_ = true;
169  }
170  }
171  }
172  // We're about to return a successfully read token, so we make sure to record
173  // the stream position at this point in the Token object.
174  next->curr_pos = num_read_;
175 
176  return true;
177 }
178 
179 namespace {
180 
182 string getline(const string &str, size_t pos) {
183  string curr_line;
184  for (; pos < str.length() && str[pos] != '\n'; ++pos) {
185  curr_line += str[pos];
186  }
187  return curr_line;
188 }
189 
190 }
191 
192 string
194  if (HasPrev()) {
195  return getline(str(), token_[next_token_idx_ - 1].line_start_pos);
196  } else {
197  return "";
198  }
199 }
200 
201 } // namespace infact
string str()
Returns the entire sequence of characters read so far by this stream tokenizer as a newly constructed...
Provides the StreamTokenizer class.
void Error(const std::string &message)
Reports an error encountered during parsing and/or construction of an object.
Definition: error.cc:47
string line()
Returns a string consisting of the characters read so far of the current line containing the most rec...