encodingstest.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. // Tencent is pleased to support the open source community by making RapidJSON available.
  2. //
  3. // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4. //
  5. // Licensed under the MIT License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://opensource.org/licenses/MIT
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "unittest.h"
  15. #include "rapidjson/filereadstream.h"
  16. #include "rapidjson/filewritestream.h"
  17. #include "rapidjson/encodedstream.h"
  18. #include "rapidjson/stringbuffer.h"
  19. using namespace rapidjson;
  20. // Verification of encoders/decoders with Hoehrmann's UTF8 decoder
  21. // http://www.unicode.org/Public/UNIDATA/Blocks.txt
  22. static const unsigned kCodepointRanges[] = {
  23. 0x0000, 0x007F, // Basic Latin
  24. 0x0080, 0x00FF, // Latin-1 Supplement
  25. 0x0100, 0x017F, // Latin Extended-A
  26. 0x0180, 0x024F, // Latin Extended-B
  27. 0x0250, 0x02AF, // IPA Extensions
  28. 0x02B0, 0x02FF, // Spacing Modifier Letters
  29. 0x0300, 0x036F, // Combining Diacritical Marks
  30. 0x0370, 0x03FF, // Greek and Coptic
  31. 0x0400, 0x04FF, // Cyrillic
  32. 0x0500, 0x052F, // Cyrillic Supplement
  33. 0x0530, 0x058F, // Armenian
  34. 0x0590, 0x05FF, // Hebrew
  35. 0x0600, 0x06FF, // Arabic
  36. 0x0700, 0x074F, // Syriac
  37. 0x0750, 0x077F, // Arabic Supplement
  38. 0x0780, 0x07BF, // Thaana
  39. 0x07C0, 0x07FF, // NKo
  40. 0x0800, 0x083F, // Samaritan
  41. 0x0840, 0x085F, // Mandaic
  42. 0x0900, 0x097F, // Devanagari
  43. 0x0980, 0x09FF, // Bengali
  44. 0x0A00, 0x0A7F, // Gurmukhi
  45. 0x0A80, 0x0AFF, // Gujarati
  46. 0x0B00, 0x0B7F, // Oriya
  47. 0x0B80, 0x0BFF, // Tamil
  48. 0x0C00, 0x0C7F, // Telugu
  49. 0x0C80, 0x0CFF, // Kannada
  50. 0x0D00, 0x0D7F, // Malayalam
  51. 0x0D80, 0x0DFF, // Sinhala
  52. 0x0E00, 0x0E7F, // Thai
  53. 0x0E80, 0x0EFF, // Lao
  54. 0x0F00, 0x0FFF, // Tibetan
  55. 0x1000, 0x109F, // Myanmar
  56. 0x10A0, 0x10FF, // Georgian
  57. 0x1100, 0x11FF, // Hangul Jamo
  58. 0x1200, 0x137F, // Ethiopic
  59. 0x1380, 0x139F, // Ethiopic Supplement
  60. 0x13A0, 0x13FF, // Cherokee
  61. 0x1400, 0x167F, // Unified Canadian Aboriginal Syllabics
  62. 0x1680, 0x169F, // Ogham
  63. 0x16A0, 0x16FF, // Runic
  64. 0x1700, 0x171F, // Tagalog
  65. 0x1720, 0x173F, // Hanunoo
  66. 0x1740, 0x175F, // Buhid
  67. 0x1760, 0x177F, // Tagbanwa
  68. 0x1780, 0x17FF, // Khmer
  69. 0x1800, 0x18AF, // Mongolian
  70. 0x18B0, 0x18FF, // Unified Canadian Aboriginal Syllabics Extended
  71. 0x1900, 0x194F, // Limbu
  72. 0x1950, 0x197F, // Tai Le
  73. 0x1980, 0x19DF, // New Tai Lue
  74. 0x19E0, 0x19FF, // Khmer Symbols
  75. 0x1A00, 0x1A1F, // Buginese
  76. 0x1A20, 0x1AAF, // Tai Tham
  77. 0x1B00, 0x1B7F, // Balinese
  78. 0x1B80, 0x1BBF, // Sundanese
  79. 0x1BC0, 0x1BFF, // Batak
  80. 0x1C00, 0x1C4F, // Lepcha
  81. 0x1C50, 0x1C7F, // Ol Chiki
  82. 0x1CD0, 0x1CFF, // Vedic Extensions
  83. 0x1D00, 0x1D7F, // Phonetic Extensions
  84. 0x1D80, 0x1DBF, // Phonetic Extensions Supplement
  85. 0x1DC0, 0x1DFF, // Combining Diacritical Marks Supplement
  86. 0x1E00, 0x1EFF, // Latin Extended Additional
  87. 0x1F00, 0x1FFF, // Greek Extended
  88. 0x2000, 0x206F, // General Punctuation
  89. 0x2070, 0x209F, // Superscripts and Subscripts
  90. 0x20A0, 0x20CF, // Currency Symbols
  91. 0x20D0, 0x20FF, // Combining Diacritical Marks for Symbols
  92. 0x2100, 0x214F, // Letterlike Symbols
  93. 0x2150, 0x218F, // Number Forms
  94. 0x2190, 0x21FF, // Arrows
  95. 0x2200, 0x22FF, // Mathematical Operators
  96. 0x2300, 0x23FF, // Miscellaneous Technical
  97. 0x2400, 0x243F, // Control Pictures
  98. 0x2440, 0x245F, // Optical Character Recognition
  99. 0x2460, 0x24FF, // Enclosed Alphanumerics
  100. 0x2500, 0x257F, // Box Drawing
  101. 0x2580, 0x259F, // Block Elements
  102. 0x25A0, 0x25FF, // Geometric Shapes
  103. 0x2600, 0x26FF, // Miscellaneous Symbols
  104. 0x2700, 0x27BF, // Dingbats
  105. 0x27C0, 0x27EF, // Miscellaneous Mathematical Symbols-A
  106. 0x27F0, 0x27FF, // Supplemental Arrows-A
  107. 0x2800, 0x28FF, // Braille Patterns
  108. 0x2900, 0x297F, // Supplemental Arrows-B
  109. 0x2980, 0x29FF, // Miscellaneous Mathematical Symbols-B
  110. 0x2A00, 0x2AFF, // Supplemental Mathematical Operators
  111. 0x2B00, 0x2BFF, // Miscellaneous Symbols and Arrows
  112. 0x2C00, 0x2C5F, // Glagolitic
  113. 0x2C60, 0x2C7F, // Latin Extended-C
  114. 0x2C80, 0x2CFF, // Coptic
  115. 0x2D00, 0x2D2F, // Georgian Supplement
  116. 0x2D30, 0x2D7F, // Tifinagh
  117. 0x2D80, 0x2DDF, // Ethiopic Extended
  118. 0x2DE0, 0x2DFF, // Cyrillic Extended-A
  119. 0x2E00, 0x2E7F, // Supplemental Punctuation
  120. 0x2E80, 0x2EFF, // CJK Radicals Supplement
  121. 0x2F00, 0x2FDF, // Kangxi Radicals
  122. 0x2FF0, 0x2FFF, // Ideographic Description Characters
  123. 0x3000, 0x303F, // CJK Symbols and Punctuation
  124. 0x3040, 0x309F, // Hiragana
  125. 0x30A0, 0x30FF, // Katakana
  126. 0x3100, 0x312F, // Bopomofo
  127. 0x3130, 0x318F, // Hangul Compatibility Jamo
  128. 0x3190, 0x319F, // Kanbun
  129. 0x31A0, 0x31BF, // Bopomofo Extended
  130. 0x31C0, 0x31EF, // CJK Strokes
  131. 0x31F0, 0x31FF, // Katakana Phonetic Extensions
  132. 0x3200, 0x32FF, // Enclosed CJK Letters and Months
  133. 0x3300, 0x33FF, // CJK Compatibility
  134. 0x3400, 0x4DBF, // CJK Unified Ideographs Extension A
  135. 0x4DC0, 0x4DFF, // Yijing Hexagram Symbols
  136. 0x4E00, 0x9FFF, // CJK Unified Ideographs
  137. 0xA000, 0xA48F, // Yi Syllables
  138. 0xA490, 0xA4CF, // Yi Radicals
  139. 0xA4D0, 0xA4FF, // Lisu
  140. 0xA500, 0xA63F, // Vai
  141. 0xA640, 0xA69F, // Cyrillic Extended-B
  142. 0xA6A0, 0xA6FF, // Bamum
  143. 0xA700, 0xA71F, // Modifier Tone Letters
  144. 0xA720, 0xA7FF, // Latin Extended-D
  145. 0xA800, 0xA82F, // Syloti Nagri
  146. 0xA830, 0xA83F, // Common Indic Number Forms
  147. 0xA840, 0xA87F, // Phags-pa
  148. 0xA880, 0xA8DF, // Saurashtra
  149. 0xA8E0, 0xA8FF, // Devanagari Extended
  150. 0xA900, 0xA92F, // Kayah Li
  151. 0xA930, 0xA95F, // Rejang
  152. 0xA960, 0xA97F, // Hangul Jamo Extended-A
  153. 0xA980, 0xA9DF, // Javanese
  154. 0xAA00, 0xAA5F, // Cham
  155. 0xAA60, 0xAA7F, // Myanmar Extended-A
  156. 0xAA80, 0xAADF, // Tai Viet
  157. 0xAB00, 0xAB2F, // Ethiopic Extended-A
  158. 0xABC0, 0xABFF, // Meetei Mayek
  159. 0xAC00, 0xD7AF, // Hangul Syllables
  160. 0xD7B0, 0xD7FF, // Hangul Jamo Extended-B
  161. //0xD800, 0xDB7F, // High Surrogates
  162. //0xDB80, 0xDBFF, // High Private Use Surrogates
  163. //0xDC00, 0xDFFF, // Low Surrogates
  164. 0xE000, 0xF8FF, // Private Use Area
  165. 0xF900, 0xFAFF, // CJK Compatibility Ideographs
  166. 0xFB00, 0xFB4F, // Alphabetic Presentation Forms
  167. 0xFB50, 0xFDFF, // Arabic Presentation Forms-A
  168. 0xFE00, 0xFE0F, // Variation Selectors
  169. 0xFE10, 0xFE1F, // Vertical Forms
  170. 0xFE20, 0xFE2F, // Combining Half Marks
  171. 0xFE30, 0xFE4F, // CJK Compatibility Forms
  172. 0xFE50, 0xFE6F, // Small Form Variants
  173. 0xFE70, 0xFEFF, // Arabic Presentation Forms-B
  174. 0xFF00, 0xFFEF, // Halfwidth and Fullwidth Forms
  175. 0xFFF0, 0xFFFF, // Specials
  176. 0x10000, 0x1007F, // Linear B Syllabary
  177. 0x10080, 0x100FF, // Linear B Ideograms
  178. 0x10100, 0x1013F, // Aegean Numbers
  179. 0x10140, 0x1018F, // Ancient Greek Numbers
  180. 0x10190, 0x101CF, // Ancient Symbols
  181. 0x101D0, 0x101FF, // Phaistos Disc
  182. 0x10280, 0x1029F, // Lycian
  183. 0x102A0, 0x102DF, // Carian
  184. 0x10300, 0x1032F, // Old Italic
  185. 0x10330, 0x1034F, // Gothic
  186. 0x10380, 0x1039F, // Ugaritic
  187. 0x103A0, 0x103DF, // Old Persian
  188. 0x10400, 0x1044F, // Deseret
  189. 0x10450, 0x1047F, // Shavian
  190. 0x10480, 0x104AF, // Osmanya
  191. 0x10800, 0x1083F, // Cypriot Syllabary
  192. 0x10840, 0x1085F, // Imperial Aramaic
  193. 0x10900, 0x1091F, // Phoenician
  194. 0x10920, 0x1093F, // Lydian
  195. 0x10A00, 0x10A5F, // Kharoshthi
  196. 0x10A60, 0x10A7F, // Old South Arabian
  197. 0x10B00, 0x10B3F, // Avestan
  198. 0x10B40, 0x10B5F, // Inscriptional Parthian
  199. 0x10B60, 0x10B7F, // Inscriptional Pahlavi
  200. 0x10C00, 0x10C4F, // Old Turkic
  201. 0x10E60, 0x10E7F, // Rumi Numeral Symbols
  202. 0x11000, 0x1107F, // Brahmi
  203. 0x11080, 0x110CF, // Kaithi
  204. 0x12000, 0x123FF, // Cuneiform
  205. 0x12400, 0x1247F, // Cuneiform Numbers and Punctuation
  206. 0x13000, 0x1342F, // Egyptian Hieroglyphs
  207. 0x16800, 0x16A3F, // Bamum Supplement
  208. 0x1B000, 0x1B0FF, // Kana Supplement
  209. 0x1D000, 0x1D0FF, // Byzantine Musical Symbols
  210. 0x1D100, 0x1D1FF, // Musical Symbols
  211. 0x1D200, 0x1D24F, // Ancient Greek Musical Notation
  212. 0x1D300, 0x1D35F, // Tai Xuan Jing Symbols
  213. 0x1D360, 0x1D37F, // Counting Rod Numerals
  214. 0x1D400, 0x1D7FF, // Mathematical Alphanumeric Symbols
  215. 0x1F000, 0x1F02F, // Mahjong Tiles
  216. 0x1F030, 0x1F09F, // Domino Tiles
  217. 0x1F0A0, 0x1F0FF, // Playing Cards
  218. 0x1F100, 0x1F1FF, // Enclosed Alphanumeric Supplement
  219. 0x1F200, 0x1F2FF, // Enclosed Ideographic Supplement
  220. 0x1F300, 0x1F5FF, // Miscellaneous Symbols And Pictographs
  221. 0x1F600, 0x1F64F, // Emoticons
  222. 0x1F680, 0x1F6FF, // Transport And Map Symbols
  223. 0x1F700, 0x1F77F, // Alchemical Symbols
  224. 0x20000, 0x2A6DF, // CJK Unified Ideographs Extension B
  225. 0x2A700, 0x2B73F, // CJK Unified Ideographs Extension C
  226. 0x2B740, 0x2B81F, // CJK Unified Ideographs Extension D
  227. 0x2F800, 0x2FA1F, // CJK Compatibility Ideographs Supplement
  228. 0xE0000, 0xE007F, // Tags
  229. 0xE0100, 0xE01EF, // Variation Selectors Supplement
  230. 0xF0000, 0xFFFFF, // Supplementary Private Use Area-A
  231. 0x100000, 0x10FFFF, // Supplementary Private Use Area-B
  232. 0xFFFFFFFF
  233. };
  234. // Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
  235. // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
  236. #define UTF8_ACCEPT 0u
  237. static const unsigned char utf8d[] = {
  238. // The first part of the table maps bytes to character classes that
  239. // to reduce the size of the transition table and create bitmasks.
  240. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  241. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  242. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  243. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  244. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
  245. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  246. 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  247. 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
  248. // The second part is a transition table that maps a combination
  249. // of a state of the automaton and a character class to a state.
  250. 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  251. 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  252. 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  253. 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  254. 12,36,12,12,12,12,12,12,12,12,12,12,
  255. };
  256. static unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) {
  257. unsigned type = utf8d[byte];
  258. *codep = (*state != UTF8_ACCEPT) ?
  259. (byte & 0x3fu) | (*codep << 6) :
  260. (0xffu >> type) & (byte);
  261. *state = utf8d[256 + *state + type];
  262. return *state;
  263. }
  264. //static bool IsUTF8(unsigned char* s) {
  265. // unsigned codepoint, state = 0;
  266. //
  267. // while (*s)
  268. // decode(&state, &codepoint, *s++);
  269. //
  270. // return state == UTF8_ACCEPT;
  271. //}
  272. TEST(EncodingsTest, UTF8) {
  273. StringBuffer os, os2;
  274. for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
  275. for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
  276. os.Clear();
  277. UTF8<>::Encode(os, codepoint);
  278. const char* encodedStr = os.GetString();
  279. // Decode with Hoehrmann
  280. {
  281. unsigned decodedCodepoint = 0;
  282. unsigned state = 0;
  283. unsigned decodedCount = 0;
  284. for (const char* s = encodedStr; *s; ++s)
  285. if (!decode(&state, &decodedCodepoint, static_cast<unsigned char>(*s))) {
  286. EXPECT_EQ(codepoint, decodedCodepoint);
  287. decodedCount++;
  288. }
  289. if (*encodedStr) { // This decoder cannot handle U+0000
  290. EXPECT_EQ(1u, decodedCount); // Should only contain one code point
  291. }
  292. EXPECT_EQ(UTF8_ACCEPT, state);
  293. if (UTF8_ACCEPT != state)
  294. std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
  295. }
  296. // Decode
  297. {
  298. StringStream is(encodedStr);
  299. unsigned decodedCodepoint;
  300. bool result = UTF8<>::Decode(is, &decodedCodepoint);
  301. EXPECT_TRUE(result);
  302. EXPECT_EQ(codepoint, decodedCodepoint);
  303. if (!result || codepoint != decodedCodepoint)
  304. std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
  305. }
  306. // Validate
  307. {
  308. StringStream is(encodedStr);
  309. os2.Clear();
  310. bool result = UTF8<>::Validate(is, os2);
  311. EXPECT_TRUE(result);
  312. EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
  313. }
  314. }
  315. }
  316. }
  317. TEST(EncodingsTest, UTF16) {
  318. GenericStringBuffer<UTF16<> > os, os2;
  319. GenericStringBuffer<UTF8<> > utf8os;
  320. for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
  321. for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
  322. os.Clear();
  323. UTF16<>::Encode(os, codepoint);
  324. const UTF16<>::Ch* encodedStr = os.GetString();
  325. // Encode with Hoehrmann's code
  326. if (codepoint != 0) // cannot handle U+0000
  327. {
  328. // encode with UTF8<> first
  329. utf8os.Clear();
  330. UTF8<>::Encode(utf8os, codepoint);
  331. // transcode from UTF8 to UTF16 with Hoehrmann's code
  332. unsigned decodedCodepoint = 0;
  333. unsigned state = 0;
  334. UTF16<>::Ch buffer[3], *p = &buffer[0];
  335. for (const char* s = utf8os.GetString(); *s; ++s) {
  336. if (!decode(&state, &decodedCodepoint, static_cast<unsigned char>(*s)))
  337. break;
  338. }
  339. if (codepoint <= 0xFFFF)
  340. *p++ = static_cast<UTF16<>::Ch>(decodedCodepoint);
  341. else {
  342. // Encode code points above U+FFFF as surrogate pair.
  343. *p++ = static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint >> 10));
  344. *p++ = static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF));
  345. }
  346. *p++ = '\0';
  347. EXPECT_EQ(0, StrCmp(buffer, encodedStr));
  348. }
  349. // Decode
  350. {
  351. GenericStringStream<UTF16<> > is(encodedStr);
  352. unsigned decodedCodepoint;
  353. bool result = UTF16<>::Decode(is, &decodedCodepoint);
  354. EXPECT_TRUE(result);
  355. EXPECT_EQ(codepoint, decodedCodepoint);
  356. if (!result || codepoint != decodedCodepoint)
  357. std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
  358. }
  359. // Validate
  360. {
  361. GenericStringStream<UTF16<> > is(encodedStr);
  362. os2.Clear();
  363. bool result = UTF16<>::Validate(is, os2);
  364. EXPECT_TRUE(result);
  365. EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
  366. }
  367. }
  368. }
  369. }
  370. TEST(EncodingsTest, UTF32) {
  371. GenericStringBuffer<UTF32<> > os, os2;
  372. for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
  373. for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
  374. os.Clear();
  375. UTF32<>::Encode(os, codepoint);
  376. const UTF32<>::Ch* encodedStr = os.GetString();
  377. // Decode
  378. {
  379. GenericStringStream<UTF32<> > is(encodedStr);
  380. unsigned decodedCodepoint;
  381. bool result = UTF32<>::Decode(is, &decodedCodepoint);
  382. EXPECT_TRUE(result);
  383. EXPECT_EQ(codepoint, decodedCodepoint);
  384. if (!result || codepoint != decodedCodepoint)
  385. std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
  386. }
  387. // Validate
  388. {
  389. GenericStringStream<UTF32<> > is(encodedStr);
  390. os2.Clear();
  391. bool result = UTF32<>::Validate(is, os2);
  392. EXPECT_TRUE(result);
  393. EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
  394. }
  395. }
  396. }
  397. }
  398. TEST(EncodingsTest, ASCII) {
  399. StringBuffer os, os2;
  400. for (unsigned codepoint = 0; codepoint < 128; codepoint++) {
  401. os.Clear();
  402. ASCII<>::Encode(os, codepoint);
  403. const ASCII<>::Ch* encodedStr = os.GetString();
  404. {
  405. StringStream is(encodedStr);
  406. unsigned decodedCodepoint;
  407. bool result = ASCII<>::Decode(is, &decodedCodepoint);
  408. if (!result || codepoint != decodedCodepoint)
  409. std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
  410. }
  411. // Validate
  412. {
  413. StringStream is(encodedStr);
  414. os2.Clear();
  415. bool result = ASCII<>::Validate(is, os2);
  416. EXPECT_TRUE(result);
  417. EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
  418. }
  419. }
  420. }