Ion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
stringutils.cc
Go to the documentation of this file.
1 
18 #include "ion/base/stringutils.h"
19 
20 #include <ctype.h>
21 
22 #include <algorithm>
23 #include <cerrno>
24 #include <climits>
25 #include <cstring>
26 #include <iomanip>
27 #include <sstream>
28 
29 #include "ion/base/logging.h"
30 #include "third_party/omaha/omaha/base/security/b64.h"
31 
32 namespace ion {
33 namespace base {
34 
35 namespace {
36 
37 inline static int HexToChar(int c) {
38  const int l = tolower(c);
39  return isdigit(l) ? l - '0' : l - 'W';
40 }
41 
42 static char ToUpper(char c) {
44  return static_cast<char>(toupper(c));
45 }
46 
47 static bool LessI(char c1, char c2) {
48  return toupper(c1) < toupper(c2);
49 }
50 
51 static bool EqualI(char c1, char c2) {
52  return toupper(c1) == toupper(c2);
53 }
54 
55 } // anonymous namespace
56 
57 std::string ION_API MimeBase64EncodeString(const std::string& str) {
58  static const char alphabet[] =
59  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
60 
62  const size_t length = str.length();
64  size_t dest_length = (length / 3U) * 4U;
66  const size_t overflow = length % 3U;
68  if (overflow)
69  dest_length += overflow + 1U;
70 
72  const size_t padding_length = (4U - (dest_length % 4U)) % 4U;
73  dest_length += padding_length;
74 
75  std::string dest;
76  dest.reserve(dest_length);
77  for (size_t source_pos = 0U; source_pos < length; source_pos += 3) {
78  const uint8 octet_a = str[source_pos];
79  const uint8 octet_b = static_cast<uint8>(
80  source_pos + 1 >= length ? 0 : str[source_pos + 1]);
81  const uint8 octet_c = static_cast<uint8>(
82  source_pos + 2 >= length ? 0 : str[source_pos + 2]);
83 
85  dest.push_back(alphabet[octet_a >> 2]);
88  dest.push_back(alphabet[((octet_a & 0x3) << 4) | (octet_b >> 4)]);
91  if (source_pos + 1U < length)
92  dest.push_back(alphabet[(octet_b & 0xf) << 2 | (octet_c >> 6)]);
94  if (source_pos + 2U < length)
95  dest.push_back(alphabet[octet_c & 0x3f]);
96  }
97  for (size_t i = 0; i < padding_length; ++i)
98  dest.push_back('=');
99 
100  DCHECK_EQ(dest_length, dest.length());
101 
102  return dest;
103 }
104 
105 std::string ION_API EscapeString(const std::string& str) {
106  const size_t length = str.length();
107  std::string out;
108  out.reserve(length);
109  for (size_t i = 0; i < length; ++i) {
110  switch (str[i]) {
111  case '\a':
112  out.append("\\a");
113  break;
114  case '\b':
115  out.append("\\b");
116  break;
117  case '\f':
118  out.append("\\f");
119  break;
120  case '\n':
121  out.append("\\n");
122  break;
123  case '\r':
124  out.append("\\r");
125  break;
126  case '\t':
127  out.append("\\t");
128  break;
129  case '\v':
130  out.append("\\v");
131  break;
132  case '\\':
133  out.append("\\\\");
134  break;
135  case '\'':
136  out.append("\\'");
137  break;
138  case '\"':
139  out.append("\\\"");
140  break;
141  case '\?':
142  out.append("\\\?");
143  break;
144  default:
145  out.append(1, str[i]);
146  break;
147  }
148  }
149  return out;
150 }
151 
152 std::string ION_API EscapeNewlines(const std::string& str) {
153  const size_t length = str.length();
154  std::string out;
155  out.reserve(length);
156  for (size_t i = 0; i < length; ++i) {
157  switch (str[i]) {
158  case '\n':
159  out.append("\\n");
160  break;
161  default:
162  out.append(1, str[i]);
163  break;
164  }
165  }
166  return out;
167 }
168 
169 template <typename Alloc>
171  const std::string& str, const std::string& delimiters,
172  std::vector<std::string, Alloc>* strings) {
173  size_t end_pos = 0;
174  while (true) {
176  size_t start_pos = str.find_first_not_of(delimiters, end_pos);
177  if (start_pos == std::string::npos)
178  break;
179 
181  end_pos = str.find_first_of(delimiters, start_pos);
182 
183  strings->push_back(str.substr(start_pos, end_pos - start_pos));
184  }
185 }
186 
187 std::vector<std::string> ION_API SplitString(
188  const std::string& str, const std::string& delimiters) {
189  std::vector<std::string> strings;
190  SplitStringHelper(str, delimiters, &strings);
191  return strings;
192 }
193 
195  const std::string& str, const std::string& delimiters,
196  const AllocatorPtr& alloc) {
197  AllocVector<std::string> strings(alloc);
198  SplitStringHelper(str, delimiters, &strings);
199  return strings;
200 }
201 
202 std::vector<std::string> ION_API SplitStringWithoutSkipping(
203  const std::string& str, const std::string& delimiters) {
204  std::vector<std::string> strings;
205 
206  size_t end_pos = 0;
207  const size_t length = str.length();
208  while (end_pos != std::string::npos && end_pos < length) {
209  const size_t start_pos = end_pos;
211  end_pos = str.find_first_of(delimiters, start_pos);
212 
213  strings.push_back(str.substr(start_pos, end_pos - start_pos));
216  if (end_pos != std::string::npos)
217  end_pos++;
218  }
219  return strings;
220 }
221 
222 ION_API std::string UrlDecodeString(const std::string& str) {
223  std::string decoded;
224  const size_t count = str.size();
225  for (size_t i = 0; i < count; ++i) {
226  if (str[i] == '+') {
227  decoded.push_back(' ');
228  } else if (i + 2U < count) {
229  const int c1 = static_cast<int>(str[i + 1]);
230  const int c2 = static_cast<int>(str[i + 2]);
231  if (str[i] == '%' && isxdigit(c1) && isxdigit(c2)) {
232  const char c = static_cast<char>((HexToChar(c1) << 4) | HexToChar(c2));
233  decoded.push_back(c);
234  i += 2U;
235  } else {
236  decoded.push_back(str[i]);
237  }
238  } else {
239  decoded.push_back(str[i]);
240  }
241  }
242 
243  return decoded;
244 }
245 
246 ION_API std::string UrlEncodeString(const std::string& str) {
248  static const char* kUnescaped = "._-$,;~()";
250  static const char* kHexDigits = "0123456789abcdef";
251 
252  std::string encoded;
253  encoded.reserve(str.length() * 3);
254  const size_t length = str.length();
255  for (size_t i = 0; i < length; ++i) {
256  if (isalnum(str[i]) || strchr(kUnescaped, str[i]) != NULL) {
257  encoded.push_back(str[i]);
258  } else {
259  encoded.push_back('%');
260  encoded.push_back(kHexDigits[static_cast<uint8>(str[i]) >> 4]);
261  encoded.push_back(kHexDigits[static_cast<uint8>(str[i]) & 0xf]);
262  }
263  }
264  return encoded;
265 }
266 
268  const std::string& s0, const std::string& s1,
269  size_t* first_different_index, std::string* line0, std::string* line1,
270  std::string* context0, std::string* context1) {
271  if (s0 == s1) {
272  return true;
273  } else {
274  const std::vector<std::string> v0 = SplitString(s0, "\n");
275  const std::vector<std::string> v1 = SplitString(s1, "\n");
276  const size_t num_lines = std::min(v0.size(), v1.size());
277  size_t bad_index = num_lines;
278  for (size_t i = 0; i < num_lines; ++i) {
279  if (v0[i] != v1[i]) {
280  bad_index = i;
281  break;
282  }
283  }
289  if (bad_index >= num_lines && v0.size() == v1.size())
290  return true;
291  if (first_different_index)
292  *first_different_index = bad_index;
293  if (line0)
294  *line0 = bad_index < v0.size() ? v0[bad_index] : std::string("<missing>");
295  if (line1)
296  *line1 = bad_index < v1.size() ? v1[bad_index] : std::string("<missing>");
299  if (context0 || context1) {
300  static const size_t kContextLines = 5;
301  const size_t context_start =
302  bad_index - std::min(kContextLines, bad_index);
303  if (context0) {
304  std::ostringstream str;
305  const size_t context_end =
306  std::min(v0.size(), bad_index + kContextLines + 1U);
307  for (size_t i = context_start; i < context_end; ++i) {
308  str << std::setfill(' ') << std::setw(5) << i << ": ";
309  str << v0[i] << "\n";
310  }
311  *context0 = str.str();
312  }
313  if (context1) {
314  std::ostringstream str;
315  const size_t context_end =
316  std::min(v1.size(), bad_index + kContextLines + 1U);
317  for (size_t i = context_start; i < context_end; ++i) {
318  str << std::setfill(' ') << std::setw(5) << i << ": ";
319  str << v1[i] << "\n";
320  }
321  *context1 = str.str();
322  }
323  }
324  return false;
325  }
326 }
327 
328 int32 ION_API StringToInt32(const std::string& str) {
329  int32 value = 0;
330  std::istringstream stream(str);
331  stream >> value;
332  return value;
333 }
334 
336  const std::string& str1, const std::string& str2) {
337  if (std::lexicographical_compare(str1.begin(), str1.end(),
338  str2.begin(), str2.end(), LessI))
339  return -1;
340  else if (str1.size() == str2.size() &&
341  std::equal(str1.begin(), str1.end(), str2.begin(), EqualI))
342  return 0;
343  else
344  return 1;
345 }
346 
347 ION_API bool StartsWithCaseInsensitive(const std::string& target,
348  const std::string& start) {
349  return !start.empty() && start.length() <= target.length() &&
350  std::equal(start.begin(), start.end(), target.begin(), EqualI);
351 }
352 
353 ION_API bool EndsWithCaseInsensitive(const std::string& target,
354  const std::string& end) {
355  return !end.empty() && end.length() <= target.length() &&
356  std::equal(end.rbegin(), end.rend(), target.rbegin(), EqualI);
357 }
358 
360  const std::string& target, const std::string& substr) {
361  if (substr.empty()) {
362  return -1;
363  }
364  std::string target_upper(target.length(), 0);
365  std::string substr_upper(substr.length(), 0);
366  std::transform(target.begin(), target.end(), target_upper.begin(), ToUpper);
367  std::transform(substr.begin(), substr.end(), substr_upper.begin(), ToUpper);
368  size_t result = target_upper.find(substr_upper);
369  if (result == std::string::npos) {
370  return -1;
371  }
372  return static_cast<int>(result);
373 }
374 
375 ION_API std::string WebSafeBase64Decode(const std::string& str) {
381  std::string encoded = str;
382  size_t length = encoded.length();
383  size_t eq = length;
384  while (eq != 0 && encoded[eq - 1] == '=') {
385  encoded[eq - 1] = 0;
386  --eq;
387  }
388  for (size_t i = 0; i < eq; ++i) {
389  if (encoded[i] == '+')
390  encoded[i] = '-';
391  else if (encoded[i] == '/')
392  encoded[i] = '_';
393  }
394 
397  const size_t buffer_length = encoded.length();
398 
399  char* result = reinterpret_cast<char*>(alloca(buffer_length));
400  int decoded_length = B64_decode(encoded.c_str(),
401  reinterpret_cast<uint8*>(result),
402  static_cast<int>(buffer_length));
403  if (decoded_length == -1) {
405  return "";
406  }
407 
408  return std::string(result, decoded_length);
409 }
410 
411 ION_API std::string WebSafeBase64Encode(const std::string& input) {
412  const size_t length = input.size();
415  const size_t buffer_length = ((length + 2U) / 3U) * 4U + 1;
416 
417  char* buffer = reinterpret_cast<char*>(alloca(buffer_length));
418  B64_encode(reinterpret_cast<const uint8*>(input.data()),
419  static_cast<int>(input.size()), buffer,
420  static_cast<int>(buffer_length));
421  return std::string(buffer);
422 }
423 
424 } // namespace base
425 } // namespace ion
std::string buffer
ION_API int CompareCaseInsensitive(const std::string &str1, const std::string &str2)
Case-insensitive comparison of str1 and str2.
Definition: stringutils.cc:335
const std::string & str
void SplitStringHelper(const std::string &str, const std::string &delimiters, std::vector< std::string, Alloc > *strings)
Definition: stringutils.cc:170
double value
ION_API bool EndsWithCaseInsensitive(const std::string &target, const std::string &end)
Returns whether target ends with end (case-insensitive).
Definition: stringutils.cc:353
std::vector< std::string > ION_API SplitString(const std::string &str, const std::string &delimiters)
Splits a string into a vector of substrings, given a set of delimiter characters (expressed as a stri...
Definition: stringutils.cc:187
uint32 length
ION_API std::string WebSafeBase64Encode(const std::string &input)
Encodes a byte array using RFC 4648 base64url ('-' and '_' for 62 and 63, respectively, and no padding).
Definition: stringutils.cc:411
std::string ION_API EscapeNewlines(const std::string &str)
Returns a string with all newlines replaced by "\\n".
Definition: stringutils.cc:152
std::string ION_API EscapeString(const std::string &str)
Returns an escaped version of the passed string.
Definition: stringutils.cc:105
int32 ION_API StringToInt32(const std::string &str)
Extracts and returns an integral value from str.
Definition: stringutils.cc:328
ION_API std::string UrlDecodeString(const std::string &str)
Returns a decoded version of a URL-encoded string.
Definition: stringutils.cc:222
Copyright 2016 Google Inc.
#define DCHECK_EQ(val1, val2)
Definition: logging.h:332
std::string ION_API MimeBase64EncodeString(const std::string &str)
Returns a mime base-64 encoded version of the passed string.
Definition: stringutils.cc:57
bool ION_API AreMultiLineStringsEqual(const std::string &s0, const std::string &s1, size_t *first_different_index, std::string *line0, std::string *line1, std::string *context0, std::string *context1)
This function can be useful for comparing multi-line strings in tests.
Definition: stringutils.cc:267
std::vector< std::string > ION_API SplitStringWithoutSkipping(const std::string &str, const std::string &delimiters)
Splits a string into a vector of substrings, given a set of delimiter characters (expressed as a stri...
Definition: stringutils.cc:202
ION_API bool StartsWithCaseInsensitive(const std::string &target, const std::string &start)
Returns whether target begins with start (case-insensitive).
Definition: stringutils.cc:347
ION_API std::string UrlEncodeString(const std::string &str)
Returns a URL-encoded version of a string.
Definition: stringutils.cc:246
ION_API int FindCaseInsensitive(const std::string &target, const std::string &substr)
Case-insensitive version of std::string find.
Definition: stringutils.cc:359
ION_API std::string WebSafeBase64Decode(const std::string &str)
Decodes a Base64 encoded string.
Definition: stringutils.cc:375
A SharedPtr is a smart shared pointer to an instance of some class that implements reference counting...
Definition: sharedptr.h:60
This class can be used in place of std::vector to allow an Ion Allocator to be used for memory alloca...
Definition: allocvector.h:50