util.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. /*
  2. * Copyright 2014 Google Inc. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef FLATBUFFERS_UTIL_H_
  17. #define FLATBUFFERS_UTIL_H_
  18. #include <errno.h>
  19. #include <ctype.h>
  20. #include "flatbuffers/base.h"
  21. #include "flatbuffers/stl_emulation.h"
  22. // For TFLM we always want to use FLATBUFFERS_PREFER_PRINTF=1. See
  23. // http://b/211811553 for more context.
  24. #ifndef FLATBUFFERS_PREFER_PRINTF
  25. #define FLATBUFFERS_PREFER_PRINTF 1
  26. #endif
  27. #ifndef FLATBUFFERS_PREFER_PRINTF
  28. # include <sstream>
  29. # include <iomanip>
  30. #else // FLATBUFFERS_PREFER_PRINTF
  31. # include <float.h>
  32. # include <stdio.h>
  33. #endif // FLATBUFFERS_PREFER_PRINTF
  34. #include <string>
  35. namespace flatbuffers {
  36. // @locale-independent functions for ASCII characters set.
  37. // Fast checking that character lies in closed range: [a <= x <= b]
  38. // using one compare (conditional branch) operator.
  39. inline bool check_ascii_range(char x, char a, char b) {
  40. FLATBUFFERS_ASSERT(a <= b);
  41. // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
  42. // The x, a, b will be promoted to int and subtracted without overflow.
  43. return static_cast<unsigned int>(x - a) <= static_cast<unsigned int>(b - a);
  44. }
  45. // Case-insensitive isalpha
  46. inline bool is_alpha(char c) {
  47. // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
  48. return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
  49. }
  50. // Check for uppercase alpha
  51. inline bool is_alpha_upper(char c) { return check_ascii_range(c, 'A', 'Z'); }
  52. // Check (case-insensitive) that `c` is equal to alpha.
  53. inline bool is_alpha_char(char c, char alpha) {
  54. FLATBUFFERS_ASSERT(is_alpha(alpha));
  55. // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
  56. return ((c & 0xDF) == (alpha & 0xDF));
  57. }
  58. // https://en.cppreference.com/w/cpp/string/byte/isxdigit
  59. // isdigit and isxdigit are the only standard narrow character classification
  60. // functions that are not affected by the currently installed C locale. although
  61. // some implementations (e.g. Microsoft in 1252 codepage) may classify
  62. // additional single-byte characters as digits.
  63. inline bool is_digit(char c) { return check_ascii_range(c, '0', '9'); }
  64. inline bool is_xdigit(char c) {
  65. // Replace by look-up table.
  66. return is_digit(c) || check_ascii_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
  67. }
  68. // Case-insensitive isalnum
  69. inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
  70. inline char CharToUpper(char c) {
  71. return static_cast<char>(::toupper(static_cast<unsigned char>(c)));
  72. }
  73. inline char CharToLower(char c) {
  74. return static_cast<char>(::tolower(static_cast<unsigned char>(c)));
  75. }
  76. // @end-locale-independent functions for ASCII character set
  77. #ifdef FLATBUFFERS_PREFER_PRINTF
  78. template<typename T> size_t IntToDigitCount(T t) {
  79. size_t digit_count = 0;
  80. // Count the sign for negative numbers
  81. if (t < 0) digit_count++;
  82. // Count a single 0 left of the dot for fractional numbers
  83. if (-1 < t && t < 1) digit_count++;
  84. // Count digits until fractional part
  85. T eps = std::numeric_limits<T>::epsilon();
  86. while (t <= (-1 + eps) || (1 - eps) <= t) {
  87. t /= 10;
  88. digit_count++;
  89. }
  90. return digit_count;
  91. }
  92. template<typename T> size_t NumToStringWidth(T t, int precision = 0) {
  93. size_t string_width = IntToDigitCount(t);
  94. // Count the dot for floating point numbers
  95. if (precision) string_width += (precision + 1);
  96. return string_width;
  97. }
  98. template<typename T>
  99. std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0) {
  100. size_t string_width = NumToStringWidth(t, precision);
  101. std::string s(string_width, 0x00);
  102. // Allow snprintf to use std::string trailing null to detect buffer overflow
  103. snprintf(const_cast<char *>(s.data()), (s.size() + 1), fmt, string_width, t);
  104. return s;
  105. }
  106. #endif // FLATBUFFERS_PREFER_PRINTF
  107. // Convert an integer or floating point value to a string.
  108. // In contrast to std::stringstream, "char" values are
  109. // converted to a string of digits, and we don't use scientific notation.
  110. template<typename T> std::string NumToString(T t) {
  111. // clang-format off
  112. #ifndef FLATBUFFERS_PREFER_PRINTF
  113. std::stringstream ss;
  114. ss << t;
  115. return ss.str();
  116. #else // FLATBUFFERS_PREFER_PRINTF
  117. auto v = static_cast<long long>(t);
  118. return NumToStringImplWrapper(v, "%.*lld");
  119. #endif // FLATBUFFERS_PREFER_PRINTF
  120. // clang-format on
  121. }
  122. // Avoid char types used as character data.
  123. template<> inline std::string NumToString<signed char>(signed char t) {
  124. return NumToString(static_cast<int>(t));
  125. }
  126. template<> inline std::string NumToString<unsigned char>(unsigned char t) {
  127. return NumToString(static_cast<int>(t));
  128. }
  129. template<> inline std::string NumToString<char>(char t) {
  130. return NumToString(static_cast<int>(t));
  131. }
  132. // Special versions for floats/doubles.
  133. template<typename T> std::string FloatToString(T t, int precision) {
  134. // clang-format off
  135. #ifndef FLATBUFFERS_PREFER_PRINTF
  136. // to_string() prints different numbers of digits for floats depending on
  137. // platform and isn't available on Android, so we use stringstream
  138. std::stringstream ss;
  139. // Use std::fixed to suppress scientific notation.
  140. ss << std::fixed;
  141. // Default precision is 6, we want that to be higher for doubles.
  142. ss << std::setprecision(precision);
  143. ss << t;
  144. auto s = ss.str();
  145. #else // FLATBUFFERS_PREFER_PRINTF
  146. auto v = static_cast<double>(t);
  147. auto s = NumToStringImplWrapper(v, "%0.*f", precision);
  148. #endif // FLATBUFFERS_PREFER_PRINTF
  149. // clang-format on
  150. // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
  151. auto p = s.find_last_not_of('0');
  152. if (p != std::string::npos) {
  153. // Strip trailing zeroes. If it is a whole number, keep one zero.
  154. s.resize(p + (s[p] == '.' ? 2 : 1));
  155. }
  156. return s;
  157. }
  158. template<> inline std::string NumToString<double>(double t) {
  159. return FloatToString(t, 12);
  160. }
  161. template<> inline std::string NumToString<float>(float t) {
  162. return FloatToString(t, 6);
  163. }
  164. // Convert an integer value to a hexadecimal string.
  165. // The returned string length is always xdigits long, prefixed by 0 digits.
  166. // For example, IntToStringHex(0x23, 8) returns the string "00000023".
  167. inline std::string IntToStringHex(int i, int xdigits) {
  168. FLATBUFFERS_ASSERT(i >= 0);
  169. // clang-format off
  170. #ifndef FLATBUFFERS_PREFER_PRINTF
  171. std::stringstream ss;
  172. ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase
  173. << i;
  174. return ss.str();
  175. #else // FLATBUFFERS_PREFER_PRINTF
  176. return NumToStringImplWrapper(i, "%.*X", xdigits);
  177. #endif // FLATBUFFERS_PREFER_PRINTF
  178. // clang-format on
  179. }
  180. // clang-format off
  181. // Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}.
  182. #if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0)
  183. class ClassicLocale {
  184. #ifdef _MSC_VER
  185. typedef _locale_t locale_type;
  186. #else
  187. typedef locale_t locale_type; // POSIX.1-2008 locale_t type
  188. #endif
  189. ClassicLocale();
  190. ~ClassicLocale();
  191. locale_type locale_;
  192. static ClassicLocale instance_;
  193. public:
  194. static locale_type Get() { return instance_.locale_; }
  195. };
  196. #ifdef _MSC_VER
  197. #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get())
  198. #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get())
  199. #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get())
  200. #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get())
  201. #else
  202. #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get())
  203. #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get())
  204. #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get())
  205. #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get())
  206. #endif
  207. #else
  208. #define __strtod_impl(s, pe) strtod(s, pe)
  209. #define __strtof_impl(s, pe) static_cast<float>(strtod(s, pe))
  210. #ifdef _MSC_VER
  211. #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b)
  212. #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b)
  213. #else
  214. #define __strtoull_impl(s, pe, b) strtoull(s, pe, b)
  215. #define __strtoll_impl(s, pe, b) strtoll(s, pe, b)
  216. #endif
  217. #endif
  218. inline void strtoval_impl(int64_t *val, const char *str, char **endptr,
  219. int base) {
  220. *val = __strtoll_impl(str, endptr, base);
  221. }
  222. inline void strtoval_impl(uint64_t *val, const char *str, char **endptr,
  223. int base) {
  224. *val = __strtoull_impl(str, endptr, base);
  225. }
  226. inline void strtoval_impl(double *val, const char *str, char **endptr) {
  227. *val = __strtod_impl(str, endptr);
  228. }
  229. // UBSAN: double to float is safe if numeric_limits<float>::is_iec559 is true.
  230. __supress_ubsan__("float-cast-overflow")
  231. inline void strtoval_impl(float *val, const char *str, char **endptr) {
  232. *val = __strtof_impl(str, endptr);
  233. }
  234. #undef __strtoull_impl
  235. #undef __strtoll_impl
  236. #undef __strtod_impl
  237. #undef __strtof_impl
  238. // clang-format on
  239. // Adaptor for strtoull()/strtoll().
  240. // Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
  241. // while strtoll with base=0 interprets first leading zero as octal prefix.
  242. // In future, it is possible to add prefixed 0b0101.
  243. // 1) Checks errno code for overflow condition (out of range).
  244. // 2) If base <= 0, function try to detect base of number by prefix.
  245. //
  246. // Return value (like strtoull and strtoll, but reject partial result):
  247. // - If successful, an integer value corresponding to the str is returned.
  248. // - If full string conversion can't be performed, 0 is returned.
  249. // - If the converted value falls out of range of corresponding return type, a
  250. // range error occurs. In this case value MAX(T)/MIN(T) is returned.
  251. template<typename T>
  252. inline bool StringToIntegerImpl(T *val, const char *const str,
  253. const int base = 0,
  254. const bool check_errno = true) {
  255. // T is int64_t or uint64_T
  256. FLATBUFFERS_ASSERT(str);
  257. if (base <= 0) {
  258. auto s = str;
  259. while (*s && !is_digit(*s)) s++;
  260. if (s[0] == '0' && is_alpha_char(s[1], 'X'))
  261. return StringToIntegerImpl(val, str, 16, check_errno);
  262. // if a prefix not match, try base=10
  263. return StringToIntegerImpl(val, str, 10, check_errno);
  264. } else {
  265. if (check_errno) errno = 0; // clear thread-local errno
  266. auto endptr = str;
  267. strtoval_impl(val, str, const_cast<char **>(&endptr), base);
  268. if ((*endptr != '\0') || (endptr == str)) {
  269. *val = 0; // erase partial result
  270. return false; // invalid string
  271. }
  272. // errno is out-of-range, return MAX/MIN
  273. if (check_errno && errno) return false;
  274. return true;
  275. }
  276. }
  277. template<typename T>
  278. inline bool StringToFloatImpl(T *val, const char *const str) {
  279. // Type T must be either float or double.
  280. FLATBUFFERS_ASSERT(str && val);
  281. auto end = str;
  282. strtoval_impl(val, str, const_cast<char **>(&end));
  283. auto done = (end != str) && (*end == '\0');
  284. if (!done) *val = 0; // erase partial result
  285. return done;
  286. }
  287. // Convert a string to an instance of T.
  288. // Return value (matched with StringToInteger64Impl and strtod):
  289. // - If successful, a numeric value corresponding to the str is returned.
  290. // - If full string conversion can't be performed, 0 is returned.
  291. // - If the converted value falls out of range of corresponding return type, a
  292. // range error occurs. In this case value MAX(T)/MIN(T) is returned.
  293. template<typename T> inline bool StringToNumber(const char *s, T *val) {
  294. // Assert on `unsigned long` and `signed long` on LP64.
  295. // If it is necessary, it could be solved with flatbuffers::enable_if<B,T>.
  296. static_assert(sizeof(T) < sizeof(int64_t), "unexpected type T");
  297. FLATBUFFERS_ASSERT(s && val);
  298. int64_t i64;
  299. // The errno check isn't needed, will return MAX/MIN on overflow.
  300. if (StringToIntegerImpl(&i64, s, 0, false)) {
  301. const int64_t max = (flatbuffers::numeric_limits<T>::max)();
  302. const int64_t min = flatbuffers::numeric_limits<T>::lowest();
  303. if (i64 > max) {
  304. *val = static_cast<T>(max);
  305. return false;
  306. }
  307. if (i64 < min) {
  308. // For unsigned types return max to distinguish from
  309. // "no conversion can be performed" when 0 is returned.
  310. *val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
  311. return false;
  312. }
  313. *val = static_cast<T>(i64);
  314. return true;
  315. }
  316. *val = 0;
  317. return false;
  318. }
  319. template<> inline bool StringToNumber<int64_t>(const char *str, int64_t *val) {
  320. return StringToIntegerImpl(val, str);
  321. }
  322. template<>
  323. inline bool StringToNumber<uint64_t>(const char *str, uint64_t *val) {
  324. if (!StringToIntegerImpl(val, str)) return false;
  325. // The strtoull accepts negative numbers:
  326. // If the minus sign was part of the input sequence, the numeric value
  327. // calculated from the sequence of digits is negated as if by unary minus
  328. // in the result type, which applies unsigned integer wraparound rules.
  329. // Fix this behaviour (except -0).
  330. if (*val) {
  331. auto s = str;
  332. while (*s && !is_digit(*s)) s++;
  333. s = (s > str) ? (s - 1) : s; // step back to one symbol
  334. if (*s == '-') {
  335. // For unsigned types return the max to distinguish from
  336. // "no conversion can be performed".
  337. *val = (flatbuffers::numeric_limits<uint64_t>::max)();
  338. return false;
  339. }
  340. }
  341. return true;
  342. }
  343. template<> inline bool StringToNumber(const char *s, float *val) {
  344. return StringToFloatImpl(val, s);
  345. }
  346. template<> inline bool StringToNumber(const char *s, double *val) {
  347. return StringToFloatImpl(val, s);
  348. }
  349. inline int64_t StringToInt(const char *s, int base = 10) {
  350. int64_t val;
  351. return StringToIntegerImpl(&val, s, base) ? val : 0;
  352. }
  353. inline uint64_t StringToUInt(const char *s, int base = 10) {
  354. uint64_t val;
  355. return StringToIntegerImpl(&val, s, base) ? val : 0;
  356. }
  357. typedef bool (*LoadFileFunction)(const char *filename, bool binary,
  358. std::string *dest);
  359. typedef bool (*FileExistsFunction)(const char *filename);
  360. LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);
  361. FileExistsFunction SetFileExistsFunction(
  362. FileExistsFunction file_exists_function);
  363. // Check if file "name" exists.
  364. bool FileExists(const char *name);
  365. // Check if "name" exists and it is also a directory.
  366. bool DirExists(const char *name);
  367. // Load file "name" into "buf" returning true if successful
  368. // false otherwise. If "binary" is false data is read
  369. // using ifstream's text mode, otherwise data is read with
  370. // no transcoding.
  371. bool LoadFile(const char *name, bool binary, std::string *buf);
  372. // Save data "buf" of length "len" bytes into a file
  373. // "name" returning true if successful, false otherwise.
  374. // If "binary" is false data is written using ifstream's
  375. // text mode, otherwise data is written with no
  376. // transcoding.
  377. bool SaveFile(const char *name, const char *buf, size_t len, bool binary);
  378. // Save data "buf" into file "name" returning true if
  379. // successful, false otherwise. If "binary" is false
  380. // data is written using ifstream's text mode, otherwise
  381. // data is written with no transcoding.
  382. inline bool SaveFile(const char *name, const std::string &buf, bool binary) {
  383. return SaveFile(name, buf.c_str(), buf.size(), binary);
  384. }
  385. // Functionality for minimalistic portable path handling.
  386. // The functions below behave correctly regardless of whether posix ('/') or
  387. // Windows ('/' or '\\') separators are used.
  388. // Any new separators inserted are always posix.
  389. FLATBUFFERS_CONSTEXPR char kPathSeparator = '/';
  390. // Returns the path with the extension, if any, removed.
  391. std::string StripExtension(const std::string &filepath);
  392. // Returns the extension, if any.
  393. std::string GetExtension(const std::string &filepath);
  394. // Return the last component of the path, after the last separator.
  395. std::string StripPath(const std::string &filepath);
  396. // Strip the last component of the path + separator.
  397. std::string StripFileName(const std::string &filepath);
  398. // Concatenates a path with a filename, regardless of whether the path
  399. // ends in a separator or not.
  400. std::string ConCatPathFileName(const std::string &path,
  401. const std::string &filename);
  402. // Replaces any '\\' separators with '/'
  403. std::string PosixPath(const char *path);
  404. std::string PosixPath(const std::string &path);
  405. // This function ensure a directory exists, by recursively
  406. // creating dirs for any parts of the path that don't exist yet.
  407. void EnsureDirExists(const std::string &filepath);
  408. // Obtains the absolute path from any other path.
  409. // Returns the input path if the absolute path couldn't be resolved.
  410. std::string AbsolutePath(const std::string &filepath);
  411. // Returns files relative to the --project_root path, prefixed with `//`.
  412. std::string RelativeToRootPath(const std::string &project,
  413. const std::string &filepath);
  414. // To and from UTF-8 unicode conversion functions
  415. // Convert a unicode code point into a UTF-8 representation by appending it
  416. // to a string. Returns the number of bytes generated.
  417. inline int ToUTF8(uint32_t ucc, std::string *out) {
  418. FLATBUFFERS_ASSERT(!(ucc & 0x80000000)); // Top bit can't be set.
  419. // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
  420. for (int i = 0; i < 6; i++) {
  421. // Max bits this encoding can represent.
  422. uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
  423. if (ucc < (1u << max_bits)) { // does it fit?
  424. // Remaining bits not encoded in the first byte, store 6 bits each
  425. uint32_t remain_bits = i * 6;
  426. // Store first byte:
  427. (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) |
  428. (ucc >> remain_bits));
  429. // Store remaining bytes:
  430. for (int j = i - 1; j >= 0; j--) {
  431. (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
  432. }
  433. return i + 1; // Return the number of bytes added.
  434. }
  435. }
  436. FLATBUFFERS_ASSERT(0); // Impossible to arrive here.
  437. return -1;
  438. }
  439. // Converts whatever prefix of the incoming string corresponds to a valid
  440. // UTF-8 sequence into a unicode code. The incoming pointer will have been
  441. // advanced past all bytes parsed.
  442. // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
  443. // this case).
  444. inline int FromUTF8(const char **in) {
  445. int len = 0;
  446. // Count leading 1 bits.
  447. for (int mask = 0x80; mask >= 0x04; mask >>= 1) {
  448. if (**in & mask) {
  449. len++;
  450. } else {
  451. break;
  452. }
  453. }
  454. if ((static_cast<unsigned char>(**in) << len) & 0x80)
  455. return -1; // Bit after leading 1's must be 0.
  456. if (!len) return *(*in)++;
  457. // UTF-8 encoded values with a length are between 2 and 4 bytes.
  458. if (len < 2 || len > 4) { return -1; }
  459. // Grab initial bits of the code.
  460. int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
  461. for (int i = 0; i < len - 1; i++) {
  462. if ((**in & 0xC0) != 0x80) return -1; // Upper bits must 1 0.
  463. ucc <<= 6;
  464. ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
  465. }
  466. // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
  467. // UTF-16 surrogate pairs).
  468. if (ucc >= 0xD800 && ucc <= 0xDFFF) { return -1; }
  469. // UTF-8 must represent code points in their shortest possible encoding.
  470. switch (len) {
  471. case 2:
  472. // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
  473. if (ucc < 0x0080 || ucc > 0x07FF) { return -1; }
  474. break;
  475. case 3:
  476. // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
  477. if (ucc < 0x0800 || ucc > 0xFFFF) { return -1; }
  478. break;
  479. case 4:
  480. // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
  481. if (ucc < 0x10000 || ucc > 0x10FFFF) { return -1; }
  482. break;
  483. }
  484. return ucc;
  485. }
  486. #ifndef FLATBUFFERS_PREFER_PRINTF
  487. // Wraps a string to a maximum length, inserting new lines where necessary. Any
  488. // existing whitespace will be collapsed down to a single space. A prefix or
  489. // suffix can be provided, which will be inserted before or after a wrapped
  490. // line, respectively.
  491. inline std::string WordWrap(const std::string in, size_t max_length,
  492. const std::string wrapped_line_prefix,
  493. const std::string wrapped_line_suffix) {
  494. std::istringstream in_stream(in);
  495. std::string wrapped, line, word;
  496. in_stream >> word;
  497. line = word;
  498. while (in_stream >> word) {
  499. if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) <
  500. max_length) {
  501. line += " " + word;
  502. } else {
  503. wrapped += line + wrapped_line_suffix + "\n";
  504. line = wrapped_line_prefix + word;
  505. }
  506. }
  507. wrapped += line;
  508. return wrapped;
  509. }
  510. #endif // !FLATBUFFERS_PREFER_PRINTF
  511. inline bool EscapeString(const char *s, size_t length, std::string *_text,
  512. bool allow_non_utf8, bool natural_utf8) {
  513. std::string &text = *_text;
  514. text += "\"";
  515. for (uoffset_t i = 0; i < length; i++) {
  516. char c = s[i];
  517. switch (c) {
  518. case '\n': text += "\\n"; break;
  519. case '\t': text += "\\t"; break;
  520. case '\r': text += "\\r"; break;
  521. case '\b': text += "\\b"; break;
  522. case '\f': text += "\\f"; break;
  523. case '\"': text += "\\\""; break;
  524. case '\\': text += "\\\\"; break;
  525. default:
  526. if (c >= ' ' && c <= '~') {
  527. text += c;
  528. } else {
  529. // Not printable ASCII data. Let's see if it's valid UTF-8 first:
  530. const char *utf8 = s + i;
  531. int ucc = FromUTF8(&utf8);
  532. if (ucc < 0) {
  533. if (allow_non_utf8) {
  534. text += "\\x";
  535. text += IntToStringHex(static_cast<uint8_t>(c), 2);
  536. } else {
  537. // There are two cases here:
  538. //
  539. // 1) We reached here by parsing an IDL file. In that case,
  540. // we previously checked for non-UTF-8, so we shouldn't reach
  541. // here.
  542. //
  543. // 2) We reached here by someone calling GenerateText()
  544. // on a previously-serialized flatbuffer. The data might have
  545. // non-UTF-8 Strings, or might be corrupt.
  546. //
  547. // In both cases, we have to give up and inform the caller
  548. // they have no JSON.
  549. return false;
  550. }
  551. } else {
  552. if (natural_utf8) {
  553. // utf8 points to past all utf-8 bytes parsed
  554. text.append(s + i, static_cast<size_t>(utf8 - s - i));
  555. } else if (ucc <= 0xFFFF) {
  556. // Parses as Unicode within JSON's \uXXXX range, so use that.
  557. text += "\\u";
  558. text += IntToStringHex(ucc, 4);
  559. } else if (ucc <= 0x10FFFF) {
  560. // Encode Unicode SMP values to a surrogate pair using two \u
  561. // escapes.
  562. uint32_t base = ucc - 0x10000;
  563. auto high_surrogate = (base >> 10) + 0xD800;
  564. auto low_surrogate = (base & 0x03FF) + 0xDC00;
  565. text += "\\u";
  566. text += IntToStringHex(high_surrogate, 4);
  567. text += "\\u";
  568. text += IntToStringHex(low_surrogate, 4);
  569. }
  570. // Skip past characters recognized.
  571. i = static_cast<uoffset_t>(utf8 - s - 1);
  572. }
  573. }
  574. break;
  575. }
  576. }
  577. text += "\"";
  578. return true;
  579. }
  580. inline std::string BufferToHexText(const void *buffer, size_t buffer_size,
  581. size_t max_length,
  582. const std::string &wrapped_line_prefix,
  583. const std::string &wrapped_line_suffix) {
  584. std::string text = wrapped_line_prefix;
  585. size_t start_offset = 0;
  586. const char *s = reinterpret_cast<const char *>(buffer);
  587. for (size_t i = 0; s && i < buffer_size; i++) {
  588. // Last iteration or do we have more?
  589. bool have_more = i + 1 < buffer_size;
  590. text += "0x";
  591. text += IntToStringHex(static_cast<uint8_t>(s[i]), 2);
  592. if (have_more) { text += ','; }
  593. // If we have more to process and we reached max_length
  594. if (have_more &&
  595. text.size() + wrapped_line_suffix.size() >= start_offset + max_length) {
  596. text += wrapped_line_suffix;
  597. text += '\n';
  598. start_offset = text.size();
  599. text += wrapped_line_prefix;
  600. }
  601. }
  602. text += wrapped_line_suffix;
  603. return text;
  604. }
  605. // Remove paired quotes in a string: "text"|'text' -> text.
  606. std::string RemoveStringQuotes(const std::string &s);
  607. // Change th global C-locale to locale with name <locale_name>.
  608. // Returns an actual locale name in <_value>, useful if locale_name is "" or
  609. // null.
  610. bool SetGlobalTestLocale(const char *locale_name,
  611. std::string *_value = nullptr);
  612. // Read (or test) a value of environment variable.
  613. bool ReadEnvironmentVariable(const char *var_name,
  614. std::string *_value = nullptr);
  615. // MSVC specific: Send all assert reports to STDOUT to prevent CI hangs.
  616. void SetupDefaultCRTReportMode();
  617. } // namespace flatbuffers
  618. #endif // FLATBUFFERS_UTIL_H_