encodings.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. // Tencent is pleased to support the open source community by making RapidJSON available.
  2. //
  3. // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4. //
  5. // Licensed under the MIT License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://opensource.org/licenses/MIT
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef RAPIDJSON_ENCODINGS_H_
  15. #define RAPIDJSON_ENCODINGS_H_
  16. #include "rapidjson.h"
  17. #ifdef _MSC_VER
  18. RAPIDJSON_DIAG_PUSH
  19. RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
  20. RAPIDJSON_DIAG_OFF(4702) // unreachable code
  21. #elif defined(__GNUC__)
  22. RAPIDJSON_DIAG_PUSH
  23. RAPIDJSON_DIAG_OFF(effc++)
  24. RAPIDJSON_DIAG_OFF(overflow)
  25. #endif
  26. RAPIDJSON_NAMESPACE_BEGIN
  27. ///////////////////////////////////////////////////////////////////////////////
  28. // Encoding
  29. /*! \class rapidjson::Encoding
  30. \brief Concept for encoding of Unicode characters.
  31. \code
  32. concept Encoding {
  33. typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
  34. enum { supportUnicode = 1 }; // or 0 if not supporting unicode
  35. //! \brief Encode a Unicode codepoint to an output stream.
  36. //! \param os Output stream.
  37. //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
  38. template<typename OutputStream>
  39. static void Encode(OutputStream& os, unsigned codepoint);
  40. //! \brief Decode a Unicode codepoint from an input stream.
  41. //! \param is Input stream.
  42. //! \param codepoint Output of the unicode codepoint.
  43. //! \return true if a valid codepoint can be decoded from the stream.
  44. template <typename InputStream>
  45. static bool Decode(InputStream& is, unsigned* codepoint);
  46. //! \brief Validate one Unicode codepoint from an encoded stream.
  47. //! \param is Input stream to obtain codepoint.
  48. //! \param os Output for copying one codepoint.
  49. //! \return true if it is valid.
  50. //! \note This function just validating and copying the codepoint without actually decode it.
  51. template <typename InputStream, typename OutputStream>
  52. static bool Validate(InputStream& is, OutputStream& os);
  53. // The following functions are deal with byte streams.
  54. //! Take a character from input byte stream, skip BOM if exist.
  55. template <typename InputByteStream>
  56. static CharType TakeBOM(InputByteStream& is);
  57. //! Take a character from input byte stream.
  58. template <typename InputByteStream>
  59. static Ch Take(InputByteStream& is);
  60. //! Put BOM to output byte stream.
  61. template <typename OutputByteStream>
  62. static void PutBOM(OutputByteStream& os);
  63. //! Put a character to output byte stream.
  64. template <typename OutputByteStream>
  65. static void Put(OutputByteStream& os, Ch c);
  66. };
  67. \endcode
  68. */
  69. ///////////////////////////////////////////////////////////////////////////////
  70. // UTF8
  71. //! UTF-8 encoding.
  72. /*! http://en.wikipedia.org/wiki/UTF-8
  73. http://tools.ietf.org/html/rfc3629
  74. \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
  75. \note implements Encoding concept
  76. */
  77. template<typename CharType = char>
  78. struct UTF8 {
  79. typedef CharType Ch;
  80. enum { supportUnicode = 1 };
  81. template<typename OutputStream>
  82. static void Encode(OutputStream& os, unsigned codepoint) {
  83. if (codepoint <= 0x7F)
  84. os.Put(static_cast<Ch>(codepoint & 0xFF));
  85. else if (codepoint <= 0x7FF) {
  86. os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
  87. os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
  88. }
  89. else if (codepoint <= 0xFFFF) {
  90. os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
  91. os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
  92. os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
  93. }
  94. else {
  95. RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
  96. os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
  97. os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
  98. os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
  99. os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
  100. }
  101. }
  102. template <typename InputStream>
  103. static bool Decode(InputStream& is, unsigned* codepoint) {
  104. #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
  105. #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
  106. #define TAIL() COPY(); TRANS(0x70)
  107. Ch c = is.Take();
  108. if (!(c & 0x80)) {
  109. *codepoint = (unsigned char)c;
  110. return true;
  111. }
  112. unsigned char type = GetRange((unsigned char)c);
  113. *codepoint = (0xFF >> type) & (unsigned char)c;
  114. bool result = true;
  115. switch (type) {
  116. case 2: TAIL(); return result;
  117. case 3: TAIL(); TAIL(); return result;
  118. case 4: COPY(); TRANS(0x50); TAIL(); return result;
  119. case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
  120. case 6: TAIL(); TAIL(); TAIL(); return result;
  121. case 10: COPY(); TRANS(0x20); TAIL(); return result;
  122. case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
  123. default: return false;
  124. }
  125. #undef COPY
  126. #undef TRANS
  127. #undef TAIL
  128. }
  129. template <typename InputStream, typename OutputStream>
  130. static bool Validate(InputStream& is, OutputStream& os) {
  131. #define COPY() os.Put(c = is.Take())
  132. #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
  133. #define TAIL() COPY(); TRANS(0x70)
  134. Ch c;
  135. COPY();
  136. if (!(c & 0x80))
  137. return true;
  138. bool result = true;
  139. switch (GetRange((unsigned char)c)) {
  140. case 2: TAIL(); return result;
  141. case 3: TAIL(); TAIL(); return result;
  142. case 4: COPY(); TRANS(0x50); TAIL(); return result;
  143. case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
  144. case 6: TAIL(); TAIL(); TAIL(); return result;
  145. case 10: COPY(); TRANS(0x20); TAIL(); return result;
  146. case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
  147. default: return false;
  148. }
  149. #undef COPY
  150. #undef TRANS
  151. #undef TAIL
  152. }
  153. static unsigned char GetRange(unsigned char c) {
  154. // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
  155. // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
  156. static const unsigned char type[] = {
  157. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  158. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  159. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  160. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  161. 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
  162. 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
  163. 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
  164. 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
  165. 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  166. 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
  167. };
  168. return type[c];
  169. }
  170. template <typename InputByteStream>
  171. static CharType TakeBOM(InputByteStream& is) {
  172. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  173. Ch c = Take(is);
  174. if ((unsigned char)c != 0xEFu) return c;
  175. c = is.Take();
  176. if ((unsigned char)c != 0xBBu) return c;
  177. c = is.Take();
  178. if ((unsigned char)c != 0xBFu) return c;
  179. c = is.Take();
  180. return c;
  181. }
  182. template <typename InputByteStream>
  183. static Ch Take(InputByteStream& is) {
  184. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  185. return is.Take();
  186. }
  187. template <typename OutputByteStream>
  188. static void PutBOM(OutputByteStream& os) {
  189. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  190. os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
  191. }
  192. template <typename OutputByteStream>
  193. static void Put(OutputByteStream& os, Ch c) {
  194. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  195. os.Put(static_cast<typename OutputByteStream::Ch>(c));
  196. }
  197. };
  198. ///////////////////////////////////////////////////////////////////////////////
  199. // UTF16
  200. //! UTF-16 encoding.
  201. /*! http://en.wikipedia.org/wiki/UTF-16
  202. http://tools.ietf.org/html/rfc2781
  203. \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
  204. \note implements Encoding concept
  205. \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
  206. For streaming, use UTF16LE and UTF16BE, which handle endianness.
  207. */
  208. template<typename CharType = wchar_t>
  209. struct UTF16 {
  210. typedef CharType Ch;
  211. RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
  212. enum { supportUnicode = 1 };
  213. template<typename OutputStream>
  214. static void Encode(OutputStream& os, unsigned codepoint) {
  215. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
  216. if (codepoint <= 0xFFFF) {
  217. RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
  218. os.Put(static_cast<typename OutputStream::Ch>(codepoint));
  219. }
  220. else {
  221. RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
  222. unsigned v = codepoint - 0x10000;
  223. os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
  224. os.Put((v & 0x3FF) | 0xDC00);
  225. }
  226. }
  227. template <typename InputStream>
  228. static bool Decode(InputStream& is, unsigned* codepoint) {
  229. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
  230. Ch c = is.Take();
  231. if (c < 0xD800 || c > 0xDFFF) {
  232. *codepoint = c;
  233. return true;
  234. }
  235. else if (c <= 0xDBFF) {
  236. *codepoint = (c & 0x3FF) << 10;
  237. c = is.Take();
  238. *codepoint |= (c & 0x3FF);
  239. *codepoint += 0x10000;
  240. return c >= 0xDC00 && c <= 0xDFFF;
  241. }
  242. return false;
  243. }
  244. template <typename InputStream, typename OutputStream>
  245. static bool Validate(InputStream& is, OutputStream& os) {
  246. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
  247. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
  248. Ch c;
  249. os.Put(c = is.Take());
  250. if (c < 0xD800 || c > 0xDFFF)
  251. return true;
  252. else if (c <= 0xDBFF) {
  253. os.Put(c = is.Take());
  254. return c >= 0xDC00 && c <= 0xDFFF;
  255. }
  256. return false;
  257. }
  258. };
  259. //! UTF-16 little endian encoding.
  260. template<typename CharType = wchar_t>
  261. struct UTF16LE : UTF16<CharType> {
  262. template <typename InputByteStream>
  263. static CharType TakeBOM(InputByteStream& is) {
  264. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  265. CharType c = Take(is);
  266. return (unsigned short)c == 0xFEFFu ? Take(is) : c;
  267. }
  268. template <typename InputByteStream>
  269. static CharType Take(InputByteStream& is) {
  270. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  271. CharType c = (unsigned char)is.Take();
  272. c |= (unsigned char)is.Take() << 8;
  273. return c;
  274. }
  275. template <typename OutputByteStream>
  276. static void PutBOM(OutputByteStream& os) {
  277. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  278. os.Put(0xFFu); os.Put(0xFEu);
  279. }
  280. template <typename OutputByteStream>
  281. static void Put(OutputByteStream& os, CharType c) {
  282. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  283. os.Put(c & 0xFFu);
  284. os.Put((c >> 8) & 0xFFu);
  285. }
  286. };
  287. //! UTF-16 big endian encoding.
  288. template<typename CharType = wchar_t>
  289. struct UTF16BE : UTF16<CharType> {
  290. template <typename InputByteStream>
  291. static CharType TakeBOM(InputByteStream& is) {
  292. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  293. CharType c = Take(is);
  294. return (unsigned short)c == 0xFEFFu ? Take(is) : c;
  295. }
  296. template <typename InputByteStream>
  297. static CharType Take(InputByteStream& is) {
  298. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  299. CharType c = (unsigned char)is.Take() << 8;
  300. c |= (unsigned char)is.Take();
  301. return c;
  302. }
  303. template <typename OutputByteStream>
  304. static void PutBOM(OutputByteStream& os) {
  305. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  306. os.Put(0xFEu); os.Put(0xFFu);
  307. }
  308. template <typename OutputByteStream>
  309. static void Put(OutputByteStream& os, CharType c) {
  310. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  311. os.Put((c >> 8) & 0xFFu);
  312. os.Put(c & 0xFFu);
  313. }
  314. };
  315. ///////////////////////////////////////////////////////////////////////////////
  316. // UTF32
  317. //! UTF-32 encoding.
  318. /*! http://en.wikipedia.org/wiki/UTF-32
  319. \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
  320. \note implements Encoding concept
  321. \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
  322. For streaming, use UTF32LE and UTF32BE, which handle endianness.
  323. */
  324. template<typename CharType = unsigned>
  325. struct UTF32 {
  326. typedef CharType Ch;
  327. RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
  328. enum { supportUnicode = 1 };
  329. template<typename OutputStream>
  330. static void Encode(OutputStream& os, unsigned codepoint) {
  331. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
  332. RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
  333. os.Put(codepoint);
  334. }
  335. template <typename InputStream>
  336. static bool Decode(InputStream& is, unsigned* codepoint) {
  337. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
  338. Ch c = is.Take();
  339. *codepoint = c;
  340. return c <= 0x10FFFF;
  341. }
  342. template <typename InputStream, typename OutputStream>
  343. static bool Validate(InputStream& is, OutputStream& os) {
  344. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
  345. Ch c;
  346. os.Put(c = is.Take());
  347. return c <= 0x10FFFF;
  348. }
  349. };
  350. //! UTF-32 little endian enocoding.
  351. template<typename CharType = unsigned>
  352. struct UTF32LE : UTF32<CharType> {
  353. template <typename InputByteStream>
  354. static CharType TakeBOM(InputByteStream& is) {
  355. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  356. CharType c = Take(is);
  357. return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
  358. }
  359. template <typename InputByteStream>
  360. static CharType Take(InputByteStream& is) {
  361. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  362. CharType c = (unsigned char)is.Take();
  363. c |= (unsigned char)is.Take() << 8;
  364. c |= (unsigned char)is.Take() << 16;
  365. c |= (unsigned char)is.Take() << 24;
  366. return c;
  367. }
  368. template <typename OutputByteStream>
  369. static void PutBOM(OutputByteStream& os) {
  370. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  371. os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
  372. }
  373. template <typename OutputByteStream>
  374. static void Put(OutputByteStream& os, CharType c) {
  375. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  376. os.Put(c & 0xFFu);
  377. os.Put((c >> 8) & 0xFFu);
  378. os.Put((c >> 16) & 0xFFu);
  379. os.Put((c >> 24) & 0xFFu);
  380. }
  381. };
  382. //! UTF-32 big endian encoding.
  383. template<typename CharType = unsigned>
  384. struct UTF32BE : UTF32<CharType> {
  385. template <typename InputByteStream>
  386. static CharType TakeBOM(InputByteStream& is) {
  387. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  388. CharType c = Take(is);
  389. return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
  390. }
  391. template <typename InputByteStream>
  392. static CharType Take(InputByteStream& is) {
  393. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  394. CharType c = (unsigned char)is.Take() << 24;
  395. c |= (unsigned char)is.Take() << 16;
  396. c |= (unsigned char)is.Take() << 8;
  397. c |= (unsigned char)is.Take();
  398. return c;
  399. }
  400. template <typename OutputByteStream>
  401. static void PutBOM(OutputByteStream& os) {
  402. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  403. os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
  404. }
  405. template <typename OutputByteStream>
  406. static void Put(OutputByteStream& os, CharType c) {
  407. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  408. os.Put((c >> 24) & 0xFFu);
  409. os.Put((c >> 16) & 0xFFu);
  410. os.Put((c >> 8) & 0xFFu);
  411. os.Put(c & 0xFFu);
  412. }
  413. };
  414. ///////////////////////////////////////////////////////////////////////////////
  415. // ASCII
  416. //! ASCII encoding.
  417. /*! http://en.wikipedia.org/wiki/ASCII
  418. \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
  419. \note implements Encoding concept
  420. */
  421. template<typename CharType = char>
  422. struct ASCII {
  423. typedef CharType Ch;
  424. enum { supportUnicode = 0 };
  425. template<typename OutputStream>
  426. static void Encode(OutputStream& os, unsigned codepoint) {
  427. RAPIDJSON_ASSERT(codepoint <= 0x7F);
  428. os.Put(static_cast<Ch>(codepoint & 0xFF));
  429. }
  430. template <typename InputStream>
  431. static bool Decode(InputStream& is, unsigned* codepoint) {
  432. unsigned char c = static_cast<unsigned char>(is.Take());
  433. *codepoint = c;
  434. return c <= 0X7F;
  435. }
  436. template <typename InputStream, typename OutputStream>
  437. static bool Validate(InputStream& is, OutputStream& os) {
  438. unsigned char c = is.Take();
  439. os.Put(c);
  440. return c <= 0x7F;
  441. }
  442. template <typename InputByteStream>
  443. static CharType TakeBOM(InputByteStream& is) {
  444. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  445. Ch c = Take(is);
  446. return c;
  447. }
  448. template <typename InputByteStream>
  449. static Ch Take(InputByteStream& is) {
  450. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  451. return is.Take();
  452. }
  453. template <typename OutputByteStream>
  454. static void PutBOM(OutputByteStream& os) {
  455. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  456. (void)os;
  457. }
  458. template <typename OutputByteStream>
  459. static void Put(OutputByteStream& os, Ch c) {
  460. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  461. os.Put(static_cast<typename OutputByteStream::Ch>(c));
  462. }
  463. };
  464. ///////////////////////////////////////////////////////////////////////////////
  465. // AutoUTF
  466. //! Runtime-specified UTF encoding type of a stream.
  467. enum UTFType {
  468. kUTF8 = 0, //!< UTF-8.
  469. kUTF16LE = 1, //!< UTF-16 little endian.
  470. kUTF16BE = 2, //!< UTF-16 big endian.
  471. kUTF32LE = 3, //!< UTF-32 little endian.
  472. kUTF32BE = 4 //!< UTF-32 big endian.
  473. };
  474. //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
  475. /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
  476. */
  477. template<typename CharType>
  478. struct AutoUTF {
  479. typedef CharType Ch;
  480. enum { supportUnicode = 1 };
  481. #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
  482. template<typename OutputStream>
  483. RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
  484. typedef void (*EncodeFunc)(OutputStream&, unsigned);
  485. static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
  486. (*f[os.GetType()])(os, codepoint);
  487. }
  488. template <typename InputStream>
  489. RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
  490. typedef bool (*DecodeFunc)(InputStream&, unsigned*);
  491. static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
  492. return (*f[is.GetType()])(is, codepoint);
  493. }
  494. template <typename InputStream, typename OutputStream>
  495. RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
  496. typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
  497. static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
  498. return (*f[is.GetType()])(is, os);
  499. }
  500. #undef RAPIDJSON_ENCODINGS_FUNC
  501. };
  502. ///////////////////////////////////////////////////////////////////////////////
  503. // Transcoder
  504. //! Encoding conversion.
  505. template<typename SourceEncoding, typename TargetEncoding>
  506. struct Transcoder {
  507. //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
  508. template<typename InputStream, typename OutputStream>
  509. RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
  510. unsigned codepoint;
  511. if (!SourceEncoding::Decode(is, &codepoint))
  512. return false;
  513. TargetEncoding::Encode(os, codepoint);
  514. return true;
  515. }
  516. //! Validate one Unicode codepoint from an encoded stream.
  517. template<typename InputStream, typename OutputStream>
  518. RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
  519. return Transcode(is, os); // Since source/target encoding is different, must transcode.
  520. }
  521. };
  522. //! Specialization of Transcoder with same source and target encoding.
  523. template<typename Encoding>
  524. struct Transcoder<Encoding, Encoding> {
  525. template<typename InputStream, typename OutputStream>
  526. RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
  527. os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class.
  528. return true;
  529. }
  530. template<typename InputStream, typename OutputStream>
  531. RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
  532. return Encoding::Validate(is, os); // source/target encoding are the same
  533. }
  534. };
  535. RAPIDJSON_NAMESPACE_END
  536. #if defined(__GNUC__) || defined(_MSV_VER)
  537. RAPIDJSON_DIAG_POP
  538. #endif
  539. #endif // RAPIDJSON_ENCODINGS_H_