Google APIs Client Library for C++
stringpiece.h
Go to the documentation of this file.
00001 /*
00002  * \copyright Copyright 2013 Google Inc. All Rights Reserved.
00003  * \license @{
00004  *
00005  * Licensed under the Apache License, Version 2.0 (the "License");
00006  * you may not use this file except in compliance with the License.
00007  * You may obtain a copy of the License at
00008  *
00009  *     http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  *
00017  * @}
00018  */
00019 // Copyright 2001, Google Inc.  All rights reserved.
00020 // Maintainer: mec@google.com (Michael Chastain)
00021 //
00022 // A StringPiece points to part or all of a string, Cord, double-quoted string
00023 // literal, or other string-like object.  A StringPiece does *not* own the
00024 // string to which it points.  A StringPiece is not null-terminated.
00025 //
00026 // You can use StringPiece as a function or method parameter.  A StringPiece
00027 // parameter can receive a double-quoted string literal argument, a "const
00028 // char*" argument, a string argument, or a StringPiece argument with no data
00029 // copying.  Systematic use of StringPiece for arguments reduces data
00030 // copies and strlen() calls.
00031 //
00032 // Prefer passing StringPieces by value:
00033 //   void MyFunction(StringPiece arg);
00034 // If circumstances require, you may also pass by const reference:
00035 //   void MyFunction(const StringPiece& arg);  // not preferred
00036 // Both of these have the same lifetime semantics.  Passing by value
00037 // generates slightly smaller code.  For more discussion, see the thread
00038 // go/stringpiecebyvalue on c-users.
00039 //
00040 // StringPiece is also suitable for local variables if you know that
00041 // the lifetime of the underlying object is longer than the lifetime
00042 // of your StringPiece variable.
00043 //
00044 // Beware of binding a StringPiece to a temporary:
00045 //   StringPiece sp = obj.MethodReturningString();  // BAD: lifetime problem
00046 //
00047 // This code is okay:
00048 //   string str = obj.MethodReturningString();  // str owns its contents
00049 //   StringPiece sp(str);  // GOOD, although you may not need sp at all
00050 //
00051 // StringPiece is sometimes a poor choice for a return value and usually a poor
00052 // choice for a data member.  If you do use a StringPiece this way, it is your
00053 // responsibility to ensure that the object pointed to by the StringPiece
00054 // outlives the StringPiece.
00055 //
00056 // A StringPiece may represent just part of a string; thus the name "Piece".
00057 // For example, when splitting a string, vector<StringPiece> is a natural data
00058 // type for the output.  For another example, a Cord is a non-contiguous,
00059 // potentially very long string-like object.  The Cord class has an interface
00060 // that iteratively provides StringPiece objects that point to the
00061 // successive pieces of a Cord object.
00062 //
00063 // A StringPiece is not null-terminated.  If you write code that scans a
00064 // StringPiece, you must check its length before reading any characters.
00065 // Common idioms that work on null-terminated strings do not work on
00066 // StringPiece objects.
00067 //
00068 // There are several ways to create a null StringPiece:
00069 //   StringPiece()
00070 //   StringPiece(NULL)
00071 //   StringPiece(NULL, 0)
00072 // For all of the above, sp.data() == NULL, sp.length() == 0,
00073 // and sp.empty() == true.  Also, if you create a StringPiece with
00074 // a non-NULL pointer then sp.data() != NULL.  Once created,
00075 // sp.data() will stay either NULL or not-NULL, except if you call
00076 // sp.clear() or sp.set().
00077 //
00078 // Thus, you can use StringPiece(NULL) to signal an out-of-band value
00079 // that is different from other StringPiece values.  This is similar
00080 // to the way that const char* p1 = NULL; is different from
00081 // const char* p2 = "";.
00082 //
00083 // There are many ways to create an empty StringPiece:
00084 //   StringPiece()
00085 //   StringPiece(NULL)
00086 //   StringPiece(NULL, 0)
00087 //   StringPiece("")
00088 //   StringPiece("", 0)
00089 //   StringPiece("abcdef", 0)
00090 //   StringPiece("abcdef"+6, 0)
00091 // For all of the above, sp.length() will be 0 and sp.empty() will be true.
00092 // For some empty StringPiece values, sp.data() will be NULL.
00093 // For some empty StringPiece values, sp.data() will not be NULL.
00094 //
00095 // Be careful not to confuse: null StringPiece and empty StringPiece.
00096 // The set of empty StringPieces properly includes the set of null StringPieces.
00097 // That is, every null StringPiece is an empty StringPiece,
00098 // but some non-null StringPieces are empty Stringpieces too.
00099 //
00100 // All empty StringPiece values compare equal to each other.
00101 // Even a null StringPieces compares equal to a non-null empty StringPiece:
00102 //  StringPiece() == StringPiece("", 0)
00103 //  StringPiece(NULL) == StringPiece("abc", 0)
00104 //  StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0)
00105 //
00106 // Look carefully at this example:
00107 //   StringPiece("") == NULL
00108 // True or false?  TRUE, because StringPiece::operator== converts
00109 // the right-hand side from NULL to StringPiece(NULL),
00110 // and then compares two zero-length spans of characters.
00111 // However, we are working to make this example produce a compile error.
00112 //
00113 // Suppose you want to write:
00114 //   bool TestWhat?(StringPiece sp) { return sp == NULL; }  // BAD
00115 // Do not do that.  Write one of these instead:
00116 //   bool TestNull(StringPiece sp) { return sp.data() == NULL; }
00117 //   bool TestEmpty(StringPiece sp) { return sp.empty(); }
00118 // The intent of TestWhat? is unclear.  Did you mean TestNull or TestEmpty?
00119 // Right now, TestWhat? behaves likes TestEmpty.
00120 // We are working to make TestWhat? produce a compile error.
00121 // TestNull is good to test for an out-of-band signal.
00122 // TestEmpty is good to test for an empty StringPiece.
00123 //
00124 // Caveats (again):
00125 // (1) The lifetime of the pointed-to string (or piece of a string)
00126 //     must be longer than the lifetime of the StringPiece.
00127 // (2) There may or may not be a '\0' character after the end of
00128 //     StringPiece data.
00129 // (3) A null StringPiece is empty.
00130 //     An empty StringPiece may or may not be a null StringPiece.
00131 
00132 #ifndef STRINGS_STRINGPIECE_H_
00133 #define STRINGS_STRINGPIECE_H_
00134 
00135 #include <assert.h>
00136 #include <stddef.h>
00137 #include <string.h>
00138 #include <iosfwd>
00139 using std::ostream;
00140 #include <limits>
00141 using std::numeric_limits;
00142 #include <string>
00143 using std::string;
00144 
00145 #include "googleapis/base/integral_types.h"
00146 #include "googleapis/base/port.h"
00147 #include "googleapis/base/type_traits.h"
00148 #include "googleapis/strings/fastmem.h"
00149 #include "googleapis/util/hash.h"
00150 namespace googleapis {
00151 
00152 // StringPiece has *two* size types.
00153 // StringPiece::size_type
00154 //   is unsigned
00155 //   is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
00156 //   no future changes intended
00157 // stringpiece_ssize_type
00158 //   is signed
00159 //   is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
00160 //
00161 typedef string::difference_type stringpiece_ssize_type;
00162 
00163 // STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows.
00164 // TODO(user): delete this after stringpiece_ssize_type goes 64 bit.
00165 #if !defined(NDEBUG)
00166 #define STRINGPIECE_CHECK_SIZE 1
00167 #elif defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0
00168 #define STRINGPIECE_CHECK_SIZE 1
00169 #else
00170 #define STRINGPIECE_CHECK_SIZE 0
00171 #endif
00172 
00173 class StringPiece {
00174  private:
00175   const char* ptr_;
00176   stringpiece_ssize_type length_;
00177 
00178   // Prevent overflow in debug mode or fortified mode.
00179   // sizeof(stringpiece_ssize_type) may be smaller than sizeof(size_t).
00180   static stringpiece_ssize_type CheckedSsizeTFromSizeT(size_t size) {
00181 #if STRINGPIECE_CHECK_SIZE > 0
00182     if (size > static_cast<size_t>(
00183         std::numeric_limits<stringpiece_ssize_type>::max())) {
00184       // Some people grep for this message in logs
00185       // so take care if you ever change it.
00186       LogFatalSizeTooBig(size, "size_t to int conversion");
00187     }
00188 #endif
00189     return static_cast<stringpiece_ssize_type>(size);
00190   }
00191 
00192   // Out-of-line error path.
00193   static void LogFatalSizeTooBig(size_t size, const char* details);
00194 
00195  public:
00196   // We provide non-explicit singleton constructors so users can pass
00197   // in a "const char*" or a "string" wherever a "StringPiece" is
00198   // expected.
00199   //
00200   // Style guide exception granted:
00201   // http://goto/style-guide-exception-20978288
00202   StringPiece() : ptr_(NULL), length_(0) {}
00203 
00204   StringPiece(const char* str)  // NOLINT(runtime/explicit)
00205       : ptr_(str), length_(0) {
00206     if (str != NULL) {
00207       length_ = CheckedSsizeTFromSizeT(strlen(str));
00208     }
00209   }
00210 
00211   template <class Allocator>
00212   StringPiece(const std::basic_string<char, std::char_traits<char>,
00213               Allocator> &str)  // NOLINT(runtime/explicit)
00214       : ptr_(str.data()), length_(0) {
00215     length_ = CheckedSsizeTFromSizeT(str.size());
00216   }
00217 #if defined(HAS_GLOBAL_STRING)
00218   template <class Allocator>
00219   StringPiece(const basic_string<char, std::char_traits<char>,
00220               Allocator> &str)  // NOLINT(runtime/explicit)
00221       : ptr_(str.data()), length_(0) {
00222     length_ = CheckedSsizeTFromSizeT(str.size());
00223   }
00224 #endif
00225 
00226   StringPiece(const char* offset, stringpiece_ssize_type len)
00227       : ptr_(offset), length_(len) {
00228     assert(len >= 0);
00229   }
00230 
00231   // Substring of another StringPiece.
00232   // pos must be non-negative and <= x.length().
00233   StringPiece(StringPiece x, stringpiece_ssize_type pos);
00234   // Substring of another StringPiece.
00235   // pos must be non-negative and <= x.length().
00236   // len must be non-negative and will be pinned to at most x.length() - pos.
00237   StringPiece(StringPiece x,
00238               stringpiece_ssize_type pos,
00239               stringpiece_ssize_type len);
00240 
00241   // data() may return a pointer to a buffer with embedded NULs, and the
00242   // returned buffer may or may not be null terminated.  Therefore it is
00243   // typically a mistake to pass data() to a routine that expects a NUL
00244   // terminated string.
00245   const char* data() const { return ptr_; }
00246   stringpiece_ssize_type size() const { return length_; }
00247   stringpiece_ssize_type length() const { return length_; }
00248   bool empty() const { return length_ == 0; }
00249 
00250   void clear() {
00251     ptr_ = NULL;
00252     length_ = 0;
00253   }
00254 
00255   void set(const char* data, stringpiece_ssize_type len) {
00256     assert(len >= 0);
00257     ptr_ = data;
00258     length_ = len;
00259   }
00260 
00261   void set(const char* str) {
00262     ptr_ = str;
00263     if (str != NULL)
00264       length_ = CheckedSsizeTFromSizeT(strlen(str));
00265     else
00266       length_ = 0;
00267   }
00268 
00269   void set(const void* data, stringpiece_ssize_type len) {
00270     ptr_ = reinterpret_cast<const char*>(data);
00271     length_ = len;
00272   }
00273 
00274   char operator[](stringpiece_ssize_type i) const {
00275     assert(0 <= i);
00276     assert(i < length_);
00277     return ptr_[i];
00278   }
00279 
00280   void remove_prefix(stringpiece_ssize_type n) {
00281     assert(length_ >= n);
00282     ptr_ += n;
00283     length_ -= n;
00284   }
00285 
00286   void remove_suffix(stringpiece_ssize_type n) {
00287     assert(length_ >= n);
00288     length_ -= n;
00289   }
00290 
00291   // returns {-1, 0, 1}
00292   int compare(StringPiece x) const {
00293     const stringpiece_ssize_type min_size =
00294         length_ < x.length_ ? length_ : x.length_;
00295     int r = memcmp(ptr_, x.ptr_, min_size);
00296     if (r < 0) return -1;
00297     if (r > 0) return 1;
00298     if (length_ < x.length_) return -1;
00299     if (length_ > x.length_) return 1;
00300     return 0;
00301   }
00302 
00303   string as_string() const {
00304     return ToString();
00305   }
00306   // We also define ToString() here, since many other string-like
00307   // interfaces name the routine that converts to a C++ string
00308   // "ToString", and it's confusing to have the method that does that
00309   // for a StringPiece be called "as_string()".  We also leave the
00310   // "as_string()" method defined here for existing code.
00311   string ToString() const {
00312     if (ptr_ == NULL) return string();
00313     return string(data(), size());
00314   }
00315 
00316   void CopyToString(string* target) const;
00317   void AppendToString(string* target) const;
00318 
00319   bool starts_with(StringPiece x) const {
00320     return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0);
00321   }
00322 
00323   bool ends_with(StringPiece x) const {
00324     return ((length_ >= x.length_) &&
00325             (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
00326   }
00327 
00328   // standard STL container boilerplate
00329   typedef char value_type;
00330   typedef const char* pointer;
00331   typedef const char& reference;
00332   typedef const char& const_reference;
00333   typedef size_t size_type;
00334   typedef ptrdiff_t difference_type;
00335   static const size_type npos;
00336   typedef const char* const_iterator;
00337   typedef const char* iterator;
00338   typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
00339   typedef std::reverse_iterator<iterator> reverse_iterator;
00340   iterator begin() const { return ptr_; }
00341   iterator end() const { return ptr_ + length_; }
00342   const_reverse_iterator rbegin() const {
00343     return const_reverse_iterator(ptr_ + length_);
00344   }
00345   const_reverse_iterator rend() const {
00346     return const_reverse_iterator(ptr_);
00347   }
00348   stringpiece_ssize_type max_size() const { return length_; }
00349   stringpiece_ssize_type capacity() const { return length_; }
00350 
00351   // cpplint.py emits a false positive [build/include_what_you_use]
00352   stringpiece_ssize_type copy(char* buf, size_type n, size_type pos = 0) const;  // NOLINT
00353 
00354   bool contains(StringPiece s) const;
00355 
00356   stringpiece_ssize_type find(StringPiece s, size_type pos = 0) const;
00357   stringpiece_ssize_type find(char c, size_type pos = 0) const;
00358   stringpiece_ssize_type rfind(StringPiece s, size_type pos = npos) const;
00359   stringpiece_ssize_type rfind(char c, size_type pos = npos) const;
00360 
00361   stringpiece_ssize_type find_first_of(StringPiece s, size_type pos = 0) const;
00362   stringpiece_ssize_type find_first_of(char c, size_type pos = 0) const {
00363     return find(c, pos);
00364   }
00365   stringpiece_ssize_type find_first_not_of(StringPiece s,
00366                                            size_type pos = 0) const;
00367   stringpiece_ssize_type find_first_not_of(char c, size_type pos = 0) const;
00368   stringpiece_ssize_type find_last_of(StringPiece s,
00369                                       size_type pos = npos) const;
00370   stringpiece_ssize_type find_last_of(char c, size_type pos = npos) const {
00371     return rfind(c, pos);
00372   }
00373   stringpiece_ssize_type find_last_not_of(StringPiece s,
00374                                           size_type pos = npos) const;
00375   stringpiece_ssize_type find_last_not_of(char c, size_type pos = npos) const;
00376 
00377   StringPiece substr(size_type pos, size_type n = npos) const;
00378 };
00379 
00380 #ifndef SWIG
00381 DECLARE_POD(StringPiece);  // So vector<StringPiece> becomes really fast
00382 #endif
00383 
00384 // This large function is defined inline so that in a fairly common case where
00385 // one of the arguments is a literal, the compiler can elide a lot of the
00386 // following comparisons.
00387 inline bool operator==(StringPiece x, StringPiece y) {
00388   stringpiece_ssize_type len = x.size();
00389   if (len != y.size()) {
00390     return false;
00391   }
00392 
00393   return x.data() == y.data() || len <= 0 ||
00394       strings::memeq(x.data(), y.data(), len);
00395 }
00396 
00397 inline bool operator!=(StringPiece x, StringPiece y) {
00398   return !(x == y);
00399 }
00400 
00401 inline bool operator<(StringPiece x, StringPiece y) {
00402   const stringpiece_ssize_type min_size =
00403       x.size() < y.size() ? x.size() : y.size();
00404   const int r = memcmp(x.data(), y.data(), min_size);
00405   return (r < 0) || (r == 0 && x.size() < y.size());
00406 }
00407 
00408 inline bool operator>(StringPiece x, StringPiece y) {
00409   return y < x;
00410 }
00411 
00412 inline bool operator<=(StringPiece x, StringPiece y) {
00413   return !(x > y);
00414 }
00415 
00416 inline bool operator>=(StringPiece x, StringPiece y) {
00417   return !(x < y);
00418 }
00419 class StringPiece;
00420 template <class X> struct GoodFastHash;
00421 
00422 // ------------------------------------------------------------------
00423 // Functions used to create STL containers that use StringPiece
00424 //  Remember that a StringPiece's lifetime had better be less than
00425 //  that of the underlying string or char*.  If it is not, then you
00426 //  cannot safely store a StringPiece into an STL container
00427 // ------------------------------------------------------------------
00428 
00429 // SWIG doesn't know how to parse this stuff properly. Omit it.
00430 #ifndef SWIG
00431 
00432 // An implementation of GoodFastHash for StringPiece.  See
00433 // GoodFastHash values.
00434 template<> struct GoodFastHash<StringPiece> {
00435   size_t operator()(StringPiece s) const {
00436     return HashStringThoroughly(s.data(), s.size());
00437   }
00438   // Less than operator, for MSVC.
00439   bool operator()(const StringPiece& s1, const StringPiece& s2) const {
00440     return s1 < s2;
00441   }
00442   static const size_t bucket_size = 4;  // These are required by MSVC
00443   static const size_t min_buckets = 8;  // 4 and 8 are defaults.
00444 };
00445 #endif
00446 
00447 // allow StringPiece to be logged
00448 extern std::ostream& operator<<(std::ostream& o, StringPiece piece);
00449 
00450 
00451 } // namespace googleapis
00452 #endif  // STRINGS_STRINGPIECE_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines