UTFString.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. //
  2. // UTFString.h
  3. //
  4. // Library: Foundation
  5. // Package: Text
  6. // Module: UTFString
  7. //
  8. // Definitions of strings for UTF encodings.
  9. //
  10. // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
  11. // and Contributors.
  12. //
  13. // SPDX-License-Identifier: BSL-1.0
  14. //
  15. #ifndef Foundation_UTFString_INCLUDED
  16. #define Foundation_UTFString_INCLUDED
  17. #include "Poco/Foundation.h"
  18. #include "Poco/Types.h"
  19. #include <string>
  20. namespace Poco {
  21. struct UTF16CharTraits
  22. {
  23. typedef std::fpos<std::mbstate_t> u16streampos;
  24. typedef UInt16 char_type;
  25. typedef int int_type;
  26. typedef std::streamoff off_type;
  27. typedef u16streampos pos_type;
  28. typedef std::mbstate_t state_type;
  29. static void assign(char_type& c1, const char_type& c2)
  30. {
  31. c1 = c2;
  32. }
  33. static bool eq(char_type c1, char_type c2)
  34. {
  35. return c1 == c2;
  36. }
  37. static bool lt(char_type c1, char_type c2)
  38. {
  39. return c1 < c2;
  40. }
  41. static int compare(const char_type* s1, const char_type* s2, std::size_t n)
  42. {
  43. for (; n; --n, ++s1, ++s2)
  44. {
  45. if (lt(*s1, *s2))
  46. return -1;
  47. if (lt(*s2, *s1))
  48. return 1;
  49. }
  50. return 0;
  51. }
  52. static std::size_t length(const char_type* s)
  53. {
  54. std::size_t len = 0;
  55. for (; !eq(*s, char_type(0)); ++s)
  56. ++len;
  57. return len;
  58. }
  59. static const char_type* find(const char_type* s, std::size_t n, const char_type& a)
  60. {
  61. for (; n; --n)
  62. {
  63. if (eq(*s, a))
  64. return s;
  65. ++s;
  66. }
  67. return 0;
  68. }
  69. static char_type* move(char_type* s1, const char_type* s2, std::size_t n)
  70. {
  71. char_type* r = s1;
  72. if (s1 < s2)
  73. {
  74. for (; n; --n, ++s1, ++s2)
  75. assign(*s1, *s2);
  76. }
  77. else if (s2 < s1)
  78. {
  79. s1 += n;
  80. s2 += n;
  81. for (; n; --n)
  82. assign(*--s1, *--s2);
  83. }
  84. return r;
  85. }
  86. static char_type* copy(char_type* s1, const char_type* s2, std::size_t n)
  87. {
  88. poco_assert(s2 < s1 || s2 >= s1 + n);
  89. char_type* r = s1;
  90. for (; n; --n, ++s1, ++s2)
  91. assign(*s1, *s2);
  92. return r;
  93. }
  94. static char_type* assign(char_type* s, std::size_t n, char_type a)
  95. {
  96. char_type* r = s;
  97. for (; n; --n, ++s)
  98. assign(*s, a);
  99. return r;
  100. }
  101. static int_type not_eof(int_type c)
  102. {
  103. return eq_int_type(c, eof()) ? ~eof() : c;
  104. }
  105. static char_type to_char_type(int_type c)
  106. {
  107. return char_type(c);
  108. }
  109. static int_type to_int_type(char_type c)
  110. {
  111. return int_type(c);
  112. }
  113. static bool eq_int_type(int_type c1, int_type c2)
  114. {
  115. return c1 == c2;
  116. }
  117. static int_type eof()
  118. {
  119. return int_type(0xDFFF);
  120. }
  121. };
  122. struct UTF32CharTraits
  123. {
  124. typedef std::fpos<std::mbstate_t> u32streampos;
  125. typedef UInt32 char_type;
  126. typedef int int_type;
  127. typedef std::streamoff off_type;
  128. typedef u32streampos pos_type;
  129. typedef std::mbstate_t state_type;
  130. static void assign(char_type& c1, const char_type& c2)
  131. {
  132. c1 = c2;
  133. }
  134. static bool eq(char_type c1, char_type c2)
  135. {
  136. return c1 == c2;
  137. }
  138. static bool lt(char_type c1, char_type c2)
  139. {
  140. return c1 < c2;
  141. }
  142. static int compare(const char_type* s1, const char_type* s2, std::size_t n)
  143. {
  144. for (; n; --n, ++s1, ++s2)
  145. {
  146. if (lt(*s1, *s2))
  147. return -1;
  148. if (lt(*s2, *s1))
  149. return 1;
  150. }
  151. return 0;
  152. }
  153. static std::size_t length(const char_type* s)
  154. {
  155. std::size_t len = 0;
  156. for (; !eq(*s, char_type(0)); ++s)
  157. ++len;
  158. return len;
  159. }
  160. static const char_type* find(const char_type* s, std::size_t n, const char_type& a)
  161. {
  162. for (; n; --n)
  163. {
  164. if (eq(*s, a))
  165. return s;
  166. ++s;
  167. }
  168. return 0;
  169. }
  170. static char_type* move(char_type* s1, const char_type* s2, std::size_t n)
  171. {
  172. char_type* r = s1;
  173. if (s1 < s2)
  174. {
  175. for (; n; --n, ++s1, ++s2)
  176. assign(*s1, *s2);
  177. }
  178. else if (s2 < s1)
  179. {
  180. s1 += n;
  181. s2 += n;
  182. for (; n; --n)
  183. assign(*--s1, *--s2);
  184. }
  185. return r;
  186. }
  187. static char_type* copy(char_type* s1, const char_type* s2, std::size_t n)
  188. {
  189. poco_assert(s2 < s1 || s2 >= s1 + n);
  190. char_type* r = s1;
  191. for (; n; --n, ++s1, ++s2)
  192. assign(*s1, *s2);
  193. return r;
  194. }
  195. static char_type* assign(char_type* s, std::size_t n, char_type a)
  196. {
  197. char_type* r = s;
  198. for (; n; --n, ++s)
  199. assign(*s, a);
  200. return r;
  201. }
  202. static int_type not_eof(int_type c)
  203. {
  204. return eq_int_type(c, eof()) ? ~eof() : c;
  205. }
  206. static char_type to_char_type(int_type c)
  207. {
  208. return char_type(c);
  209. }
  210. static int_type to_int_type(char_type c)
  211. {
  212. return int_type(c);
  213. }
  214. static bool eq_int_type(int_type c1, int_type c2)
  215. {
  216. return c1 == c2;
  217. }
  218. static int_type eof()
  219. {
  220. return int_type(0xDFFF);
  221. }
  222. };
  223. //#if defined(POCO_ENABLE_CPP11) //TODO
  224. // typedef char16_t UTF16Char;
  225. // typedef std::u16string UTF16String;
  226. // typedef char32_t UTF32Char;
  227. // typedef std::u32string UTF32String;
  228. //#else
  229. #ifdef POCO_NO_WSTRING
  230. typedef Poco::UInt16 UTF16Char;
  231. typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String;
  232. typedef UInt32 UTF32Char;
  233. typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String;
  234. #else // POCO_NO_WSTRING
  235. #if defined(POCO_OS_FAMILY_WINDOWS)
  236. typedef wchar_t UTF16Char;
  237. typedef std::wstring UTF16String;
  238. typedef UInt32 UTF32Char;
  239. typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String;
  240. #elif defined(__SIZEOF_WCHAR_T__) //gcc
  241. #if (__SIZEOF_WCHAR_T__ == 2)
  242. typedef wchar_t UTF16Char;
  243. typedef std::wstring UTF16String;
  244. typedef UInt32 UTF32Char;
  245. typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String;
  246. #elif (__SIZEOF_WCHAR_T__ == 4)
  247. typedef Poco::UInt16 UTF16Char;
  248. typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String;
  249. typedef wchar_t UTF32Char;
  250. typedef std::wstring UTF32String;
  251. #endif
  252. #else // default to 32-bit wchar_t
  253. typedef Poco::UInt16 UTF16Char;
  254. typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String;
  255. typedef wchar_t UTF32Char;
  256. typedef std::wstring UTF32String;
  257. #endif //POCO_OS_FAMILY_WINDOWS
  258. #endif //POCO_NO_WSTRING
  259. //#endif // POCO_ENABLE_CPP11
  260. } // namespace Poco
  261. #endif // Foundation_UTFString_INCLUDED