DecodedBitStreamParser.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. // -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
  2. /*
  3. * DecodedBitStreamParser.cpp
  4. * zxing
  5. *
  6. * Created by Christian Brunschen on 20/05/2008.
  7. * Copyright 2008 ZXing authors All rights reserved.
  8. *
  9. * Licensed under the Apache License, Version 2.0 (the "License");
  10. * you may not use this file except in compliance with the License.
  11. * You may obtain a copy of the License at
  12. *
  13. * http://www.apache.org/licenses/LICENSE-2.0
  14. *
  15. * Unless required by applicable law or agreed to in writing, software
  16. * distributed under the License is distributed on an "AS IS" BASIS,
  17. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18. * See the License for the specific language governing permissions and
  19. * limitations under the License.
  20. */
  21. #include <zxing/qrcode/decoder/DecodedBitStreamParser.h>
  22. #include <zxing/common/CharacterSetECI.h>
  23. #include <zxing/FormatException.h>
  24. #include <zxing/common/StringUtils.h>
  25. #include <iostream>
  26. #ifndef NO_ICONV
  27. #include <iconv.h>
  28. #endif
  29. // Required for compatibility. TODO: test on Symbian
  30. #ifdef ZXING_ICONV_CONST
  31. #undef ICONV_CONST
  32. #define ICONV_CONST const
  33. #endif
  34. #ifndef ICONV_CONST
  35. #define ICONV_CONST /**/
  36. #endif
  37. using namespace std;
  38. using namespace zxing;
  39. using namespace zxing::qrcode;
  40. using namespace zxing::common;
  41. const char DecodedBitStreamParser::ALPHANUMERIC_CHARS[] =
  42. { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
  43. 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  44. 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  45. 'Y', 'Z', ' ', '$', '%', '*', '+', '-', '.', '/', ':'
  46. };
  47. namespace {int GB2312_SUBSET = 1;}
  48. void DecodedBitStreamParser::append(std::string &result,
  49. string const& in,
  50. const char *src) {
  51. append(result, (char const*)in.c_str(), in.length(), src);
  52. }
  53. void DecodedBitStreamParser::append(std::string &result,
  54. const char *bufIn,
  55. size_t nIn,
  56. const char *src) {
  57. #ifndef NO_ICONV
  58. if (nIn == 0) {
  59. return;
  60. }
  61. iconv_t cd = iconv_open(StringUtils::UTF8, src);
  62. if (cd == (iconv_t)-1) {
  63. result.append((const char *)bufIn, nIn);
  64. return;
  65. }
  66. const int maxOut = 4 * nIn + 1;
  67. char* bufOut = new char[maxOut];
  68. ICONV_CONST char *fromPtr = (ICONV_CONST char *)bufIn;
  69. size_t nFrom = nIn;
  70. char *toPtr = (char *)bufOut;
  71. size_t nTo = maxOut;
  72. while (nFrom > 0) {
  73. size_t oneway = iconv(cd, &fromPtr, &nFrom, &toPtr, &nTo);
  74. if (oneway == (size_t)(-1)) {
  75. iconv_close(cd);
  76. delete[] bufOut;
  77. throw ReaderException("error converting characters");
  78. }
  79. }
  80. iconv_close(cd);
  81. int nResult = maxOut - nTo;
  82. bufOut[nResult] = '\0';
  83. result.append((const char *)bufOut);
  84. delete[] bufOut;
  85. #else
  86. result.append((const char *)bufIn, nIn);
  87. #endif
  88. }
  89. void DecodedBitStreamParser::decodeHanziSegment(Ref<BitSource> bits_,
  90. string& result,
  91. int count) {
  92. BitSource& bits (*bits_);
  93. // Don't crash trying to read more bits than we have available.
  94. if (count * 13 > bits.available()) {
  95. throw FormatException();
  96. }
  97. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  98. // and decode as GB2312 afterwards
  99. size_t nBytes = 2 * count;
  100. char* buffer = new char[nBytes];
  101. int offset = 0;
  102. while (count > 0) {
  103. // Each 13 bits encodes a 2-byte character
  104. int twoBytes = bits.readBits(13);
  105. int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060);
  106. if (assembledTwoBytes < 0x003BF) {
  107. // In the 0xA1A1 to 0xAAFE range
  108. assembledTwoBytes += 0x0A1A1;
  109. } else {
  110. // In the 0xB0A1 to 0xFAFE range
  111. assembledTwoBytes += 0x0A6A1;
  112. }
  113. buffer[offset] = (char) ((assembledTwoBytes >> 8) & 0xFF);
  114. buffer[offset + 1] = (char) (assembledTwoBytes & 0xFF);
  115. offset += 2;
  116. count--;
  117. }
  118. try {
  119. append(result, buffer, nBytes, StringUtils::GB2312);
  120. } catch (ReaderException const& ignored) {
  121. (void)ignored;
  122. delete [] buffer;
  123. throw FormatException();
  124. }
  125. delete [] buffer;
  126. }
  127. void DecodedBitStreamParser::decodeKanjiSegment(Ref<BitSource> bits, std::string &result, int count) {
  128. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  129. // and decode as Shift_JIS afterwards
  130. size_t nBytes = 2 * count;
  131. char* buffer = new char[nBytes];
  132. int offset = 0;
  133. while (count > 0) {
  134. // Each 13 bits encodes a 2-byte character
  135. int twoBytes = bits->readBits(13);
  136. int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
  137. if (assembledTwoBytes < 0x01F00) {
  138. // In the 0x8140 to 0x9FFC range
  139. assembledTwoBytes += 0x08140;
  140. } else {
  141. // In the 0xE040 to 0xEBBF range
  142. assembledTwoBytes += 0x0C140;
  143. }
  144. buffer[offset] = (char)(assembledTwoBytes >> 8);
  145. buffer[offset + 1] = (char)assembledTwoBytes;
  146. offset += 2;
  147. count--;
  148. }
  149. try {
  150. append(result, buffer, nBytes, StringUtils::SHIFT_JIS);
  151. } catch (ReaderException const& ignored) {
  152. (void)ignored;
  153. delete [] buffer;
  154. throw FormatException();
  155. }
  156. delete[] buffer;
  157. }
  158. void DecodedBitStreamParser::decodeByteSegment(Ref<BitSource> bits_,
  159. string& result,
  160. int count,
  161. CharacterSetECI* currentCharacterSetECI,
  162. ArrayRef< ArrayRef<char> >& byteSegments,
  163. Hashtable const& hints) {
  164. int nBytes = count;
  165. BitSource& bits (*bits_);
  166. // Don't crash trying to read more bits than we have available.
  167. if (count << 3 > bits.available()) {
  168. throw FormatException();
  169. }
  170. ArrayRef<char> bytes_ (count);
  171. char* readBytes = &(*bytes_)[0];
  172. for (int i = 0; i < count; i++) {
  173. readBytes[i] = (char) bits.readBits(8);
  174. }
  175. string encoding;
  176. if (currentCharacterSetECI == 0) {
  177. // The spec isn't clear on this mode; see
  178. // section 6.4.5: t does not say which encoding to assuming
  179. // upon decoding. I have seen ISO-8859-1 used as well as
  180. // Shift_JIS -- without anything like an ECI designator to
  181. // give a hint.
  182. encoding = StringUtils::guessEncoding(readBytes, count, hints);
  183. } else {
  184. encoding = currentCharacterSetECI->name();
  185. }
  186. try {
  187. append(result, readBytes, nBytes, encoding.c_str());
  188. } catch (ReaderException const& ignored) {
  189. (void)ignored;
  190. throw FormatException();
  191. }
  192. byteSegments->values().push_back(bytes_);
  193. }
  194. void DecodedBitStreamParser::decodeNumericSegment(Ref<BitSource> bits, std::string &result, int count) {
  195. int nBytes = count;
  196. char* bytes = new char[nBytes];
  197. int i = 0;
  198. // Read three digits at a time
  199. while (count >= 3) {
  200. // Each 10 bits encodes three digits
  201. if (bits->available() < 10) {
  202. throw ReaderException("format exception");
  203. }
  204. int threeDigitsBits = bits->readBits(10);
  205. if (threeDigitsBits >= 1000) {
  206. ostringstream s;
  207. s << "Illegal value for 3-digit unit: " << threeDigitsBits;
  208. delete[] bytes;
  209. throw ReaderException(s.str().c_str());
  210. }
  211. bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits / 100];
  212. bytes[i++] = ALPHANUMERIC_CHARS[(threeDigitsBits / 10) % 10];
  213. bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits % 10];
  214. count -= 3;
  215. }
  216. if (count == 2) {
  217. if (bits->available() < 7) {
  218. throw ReaderException("format exception");
  219. }
  220. // Two digits left over to read, encoded in 7 bits
  221. int twoDigitsBits = bits->readBits(7);
  222. if (twoDigitsBits >= 100) {
  223. ostringstream s;
  224. s << "Illegal value for 2-digit unit: " << twoDigitsBits;
  225. delete[] bytes;
  226. throw ReaderException(s.str().c_str());
  227. }
  228. bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits / 10];
  229. bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits % 10];
  230. } else if (count == 1) {
  231. if (bits->available() < 4) {
  232. throw ReaderException("format exception");
  233. }
  234. // One digit left over to read
  235. int digitBits = bits->readBits(4);
  236. if (digitBits >= 10) {
  237. ostringstream s;
  238. s << "Illegal value for digit unit: " << digitBits;
  239. delete[] bytes;
  240. throw ReaderException(s.str().c_str());
  241. }
  242. bytes[i++] = ALPHANUMERIC_CHARS[digitBits];
  243. }
  244. append(result, bytes, nBytes, StringUtils::ASCII);
  245. delete[] bytes;
  246. }
  247. char DecodedBitStreamParser::toAlphaNumericChar(size_t value) {
  248. if (value >= sizeof(DecodedBitStreamParser::ALPHANUMERIC_CHARS)) {
  249. throw FormatException();
  250. }
  251. return ALPHANUMERIC_CHARS[value];
  252. }
  253. void DecodedBitStreamParser::decodeAlphanumericSegment(Ref<BitSource> bits_,
  254. string& result,
  255. int count,
  256. bool fc1InEffect) {
  257. BitSource& bits (*bits_);
  258. ostringstream bytes;
  259. // Read two characters at a time
  260. while (count > 1) {
  261. if (bits.available() < 11) {
  262. throw FormatException();
  263. }
  264. int nextTwoCharsBits = bits.readBits(11);
  265. bytes << toAlphaNumericChar(nextTwoCharsBits / 45);
  266. bytes << toAlphaNumericChar(nextTwoCharsBits % 45);
  267. count -= 2;
  268. }
  269. if (count == 1) {
  270. // special case: one character left
  271. if (bits.available() < 6) {
  272. throw FormatException();
  273. }
  274. bytes << toAlphaNumericChar(bits.readBits(6));
  275. }
  276. // See section 6.4.8.1, 6.4.8.2
  277. string s = bytes.str();
  278. if (fc1InEffect) {
  279. // We need to massage the result a bit if in an FNC1 mode:
  280. ostringstream r;
  281. for (size_t i = 0; i < s.length(); i++) {
  282. if (s[i] != '%') {
  283. r << s[i];
  284. } else {
  285. if (i < s.length() - 1 && s[i + 1] == '%') {
  286. // %% is rendered as %
  287. r << s[i++];
  288. } else {
  289. // In alpha mode, % should be converted to FNC1 separator 0x1D
  290. r << (char)0x1D;
  291. }
  292. }
  293. }
  294. s = r.str();
  295. }
  296. append(result, s, StringUtils::ASCII);
  297. }
  298. namespace {
  299. int parseECIValue(BitSource& bits) {
  300. int firstByte = bits.readBits(8);
  301. if ((firstByte & 0x80) == 0) {
  302. // just one byte
  303. return firstByte & 0x7F;
  304. }
  305. if ((firstByte & 0xC0) == 0x80) {
  306. // two bytes
  307. int secondByte = bits.readBits(8);
  308. return ((firstByte & 0x3F) << 8) | secondByte;
  309. }
  310. if ((firstByte & 0xE0) == 0xC0) {
  311. // three bytes
  312. int secondThirdBytes = bits.readBits(16);
  313. return ((firstByte & 0x1F) << 16) | secondThirdBytes;
  314. }
  315. throw FormatException();
  316. }
  317. }
  318. Ref<DecoderResult>
  319. DecodedBitStreamParser::decode(ArrayRef<char> bytes,
  320. Version* version,
  321. ErrorCorrectionLevel const& ecLevel,
  322. Hashtable const& hints) {
  323. Ref<BitSource> bits_ (new BitSource(bytes));
  324. BitSource& bits (*bits_);
  325. string result;
  326. result.reserve(50);
  327. ArrayRef< ArrayRef<char> > byteSegments (0);
  328. try {
  329. CharacterSetECI* currentCharacterSetECI = 0;
  330. bool fc1InEffect = false;
  331. Mode* mode = 0;
  332. do {
  333. // While still another segment to read...
  334. if (bits.available() < 4) {
  335. // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
  336. mode = &Mode::TERMINATOR;
  337. } else {
  338. try {
  339. mode = &Mode::forBits(bits.readBits(4)); // mode is encoded by 4 bits
  340. } catch (IllegalArgumentException const& iae) {
  341. throw iae;
  342. // throw FormatException.getFormatInstance();
  343. }
  344. }
  345. if (mode != &Mode::TERMINATOR) {
  346. if ((mode == &Mode::FNC1_FIRST_POSITION) || (mode == &Mode::FNC1_SECOND_POSITION)) {
  347. // We do little with FNC1 except alter the parsed result a bit according to the spec
  348. fc1InEffect = true;
  349. } else if (mode == &Mode::STRUCTURED_APPEND) {
  350. if (bits.available() < 16) {
  351. throw FormatException();
  352. }
  353. // not really supported; all we do is ignore it
  354. // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
  355. bits.readBits(16);
  356. } else if (mode == &Mode::ECI) {
  357. // Count doesn't apply to ECI
  358. int value = parseECIValue(bits);
  359. currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue(value);
  360. if (currentCharacterSetECI == 0) {
  361. throw FormatException();
  362. }
  363. } else {
  364. // First handle Hanzi mode which does not start with character count
  365. if (mode == &Mode::HANZI) {
  366. //chinese mode contains a sub set indicator right after mode indicator
  367. int subset = bits.readBits(4);
  368. int countHanzi = bits.readBits(mode->getCharacterCountBits(version));
  369. if (subset == GB2312_SUBSET) {
  370. decodeHanziSegment(bits_, result, countHanzi);
  371. }
  372. } else {
  373. // "Normal" QR code modes:
  374. // How many characters will follow, encoded in this mode?
  375. int count = bits.readBits(mode->getCharacterCountBits(version));
  376. if (mode == &Mode::NUMERIC) {
  377. decodeNumericSegment(bits_, result, count);
  378. } else if (mode == &Mode::ALPHANUMERIC) {
  379. decodeAlphanumericSegment(bits_, result, count, fc1InEffect);
  380. } else if (mode == &Mode::BYTE) {
  381. decodeByteSegment(bits_, result, count, currentCharacterSetECI, byteSegments, hints);
  382. } else if (mode == &Mode::KANJI) {
  383. decodeKanjiSegment(bits_, result, count);
  384. } else {
  385. throw FormatException();
  386. }
  387. }
  388. }
  389. }
  390. } while (mode != &Mode::TERMINATOR);
  391. } catch (IllegalArgumentException const& iae) {
  392. (void)iae;
  393. // from readBits() calls
  394. throw FormatException();
  395. }
  396. return Ref<DecoderResult>(new DecoderResult(bytes, Ref<String>(new String(result)), byteSegments, (string)ecLevel));
  397. }