123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425 |
- // -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*-
- /*
- * DecodedBitStreamParser.cpp
- * zxing
- *
- * Created by Christian Brunschen on 20/05/2008.
- * Copyright 2008 ZXing authors All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <zxing/qrcode/decoder/DecodedBitStreamParser.h>
- #include <zxing/common/CharacterSetECI.h>
- #include <zxing/FormatException.h>
- #include <zxing/common/StringUtils.h>
- #include <iostream>
- #ifndef NO_ICONV
- #include <iconv.h>
- #endif
- // Required for compatibility. TODO: test on Symbian
- #ifdef ZXING_ICONV_CONST
- #undef ICONV_CONST
- #define ICONV_CONST const
- #endif
- #ifndef ICONV_CONST
- #define ICONV_CONST /**/
- #endif
- using namespace std;
- using namespace zxing;
- using namespace zxing::qrcode;
- using namespace zxing::common;
- const char DecodedBitStreamParser::ALPHANUMERIC_CHARS[] =
- { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
- 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
- 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
- 'Y', 'Z', ' ', '$', '%', '*', '+', '-', '.', '/', ':'
- };
- namespace {int GB2312_SUBSET = 1;}
- void DecodedBitStreamParser::append(std::string &result,
- string const& in,
- const char *src) {
- append(result, (char const*)in.c_str(), in.length(), src);
- }
- void DecodedBitStreamParser::append(std::string &result,
- const char *bufIn,
- size_t nIn,
- const char *src) {
- #ifndef NO_ICONV
- if (nIn == 0) {
- return;
- }
- iconv_t cd = iconv_open(StringUtils::UTF8, src);
- if (cd == (iconv_t)-1) {
- result.append((const char *)bufIn, nIn);
- return;
- }
- const int maxOut = 4 * nIn + 1;
- char* bufOut = new char[maxOut];
- ICONV_CONST char *fromPtr = (ICONV_CONST char *)bufIn;
- size_t nFrom = nIn;
- char *toPtr = (char *)bufOut;
- size_t nTo = maxOut;
- while (nFrom > 0) {
- size_t oneway = iconv(cd, &fromPtr, &nFrom, &toPtr, &nTo);
- if (oneway == (size_t)(-1)) {
- iconv_close(cd);
- delete[] bufOut;
- throw ReaderException("error converting characters");
- }
- }
- iconv_close(cd);
- int nResult = maxOut - nTo;
- bufOut[nResult] = '\0';
- result.append((const char *)bufOut);
- delete[] bufOut;
- #else
- result.append((const char *)bufIn, nIn);
- #endif
- }
- void DecodedBitStreamParser::decodeHanziSegment(Ref<BitSource> bits_,
- string& result,
- int count) {
- BitSource& bits (*bits_);
- // Don't crash trying to read more bits than we have available.
- if (count * 13 > bits.available()) {
- throw FormatException();
- }
- // Each character will require 2 bytes. Read the characters as 2-byte pairs
- // and decode as GB2312 afterwards
- size_t nBytes = 2 * count;
- char* buffer = new char[nBytes];
- int offset = 0;
- while (count > 0) {
- // Each 13 bits encodes a 2-byte character
- int twoBytes = bits.readBits(13);
- int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060);
- if (assembledTwoBytes < 0x003BF) {
- // In the 0xA1A1 to 0xAAFE range
- assembledTwoBytes += 0x0A1A1;
- } else {
- // In the 0xB0A1 to 0xFAFE range
- assembledTwoBytes += 0x0A6A1;
- }
- buffer[offset] = (char) ((assembledTwoBytes >> 8) & 0xFF);
- buffer[offset + 1] = (char) (assembledTwoBytes & 0xFF);
- offset += 2;
- count--;
- }
- try {
- append(result, buffer, nBytes, StringUtils::GB2312);
- } catch (ReaderException const& ignored) {
- (void)ignored;
- delete [] buffer;
- throw FormatException();
- }
- delete [] buffer;
- }
- void DecodedBitStreamParser::decodeKanjiSegment(Ref<BitSource> bits, std::string &result, int count) {
- // Each character will require 2 bytes. Read the characters as 2-byte pairs
- // and decode as Shift_JIS afterwards
- size_t nBytes = 2 * count;
- char* buffer = new char[nBytes];
- int offset = 0;
- while (count > 0) {
- // Each 13 bits encodes a 2-byte character
- int twoBytes = bits->readBits(13);
- int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
- if (assembledTwoBytes < 0x01F00) {
- // In the 0x8140 to 0x9FFC range
- assembledTwoBytes += 0x08140;
- } else {
- // In the 0xE040 to 0xEBBF range
- assembledTwoBytes += 0x0C140;
- }
- buffer[offset] = (char)(assembledTwoBytes >> 8);
- buffer[offset + 1] = (char)assembledTwoBytes;
- offset += 2;
- count--;
- }
- try {
- append(result, buffer, nBytes, StringUtils::SHIFT_JIS);
- } catch (ReaderException const& ignored) {
- (void)ignored;
- delete [] buffer;
- throw FormatException();
- }
- delete[] buffer;
- }
- void DecodedBitStreamParser::decodeByteSegment(Ref<BitSource> bits_,
- string& result,
- int count,
- CharacterSetECI* currentCharacterSetECI,
- ArrayRef< ArrayRef<char> >& byteSegments,
- Hashtable const& hints) {
- int nBytes = count;
- BitSource& bits (*bits_);
- // Don't crash trying to read more bits than we have available.
- if (count << 3 > bits.available()) {
- throw FormatException();
- }
- ArrayRef<char> bytes_ (count);
- char* readBytes = &(*bytes_)[0];
- for (int i = 0; i < count; i++) {
- readBytes[i] = (char) bits.readBits(8);
- }
- string encoding;
- if (currentCharacterSetECI == 0) {
- // The spec isn't clear on this mode; see
- // section 6.4.5: t does not say which encoding to assuming
- // upon decoding. I have seen ISO-8859-1 used as well as
- // Shift_JIS -- without anything like an ECI designator to
- // give a hint.
- encoding = StringUtils::guessEncoding(readBytes, count, hints);
- } else {
- encoding = currentCharacterSetECI->name();
- }
- try {
- append(result, readBytes, nBytes, encoding.c_str());
- } catch (ReaderException const& ignored) {
- (void)ignored;
- throw FormatException();
- }
- byteSegments->values().push_back(bytes_);
- }
- void DecodedBitStreamParser::decodeNumericSegment(Ref<BitSource> bits, std::string &result, int count) {
- int nBytes = count;
- char* bytes = new char[nBytes];
- int i = 0;
- // Read three digits at a time
- while (count >= 3) {
- // Each 10 bits encodes three digits
- if (bits->available() < 10) {
- throw ReaderException("format exception");
- }
- int threeDigitsBits = bits->readBits(10);
- if (threeDigitsBits >= 1000) {
- ostringstream s;
- s << "Illegal value for 3-digit unit: " << threeDigitsBits;
- delete[] bytes;
- throw ReaderException(s.str().c_str());
- }
- bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits / 100];
- bytes[i++] = ALPHANUMERIC_CHARS[(threeDigitsBits / 10) % 10];
- bytes[i++] = ALPHANUMERIC_CHARS[threeDigitsBits % 10];
- count -= 3;
- }
- if (count == 2) {
- if (bits->available() < 7) {
- throw ReaderException("format exception");
- }
- // Two digits left over to read, encoded in 7 bits
- int twoDigitsBits = bits->readBits(7);
- if (twoDigitsBits >= 100) {
- ostringstream s;
- s << "Illegal value for 2-digit unit: " << twoDigitsBits;
- delete[] bytes;
- throw ReaderException(s.str().c_str());
- }
- bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits / 10];
- bytes[i++] = ALPHANUMERIC_CHARS[twoDigitsBits % 10];
- } else if (count == 1) {
- if (bits->available() < 4) {
- throw ReaderException("format exception");
- }
- // One digit left over to read
- int digitBits = bits->readBits(4);
- if (digitBits >= 10) {
- ostringstream s;
- s << "Illegal value for digit unit: " << digitBits;
- delete[] bytes;
- throw ReaderException(s.str().c_str());
- }
- bytes[i++] = ALPHANUMERIC_CHARS[digitBits];
- }
- append(result, bytes, nBytes, StringUtils::ASCII);
- delete[] bytes;
- }
- char DecodedBitStreamParser::toAlphaNumericChar(size_t value) {
- if (value >= sizeof(DecodedBitStreamParser::ALPHANUMERIC_CHARS)) {
- throw FormatException();
- }
- return ALPHANUMERIC_CHARS[value];
- }
- void DecodedBitStreamParser::decodeAlphanumericSegment(Ref<BitSource> bits_,
- string& result,
- int count,
- bool fc1InEffect) {
- BitSource& bits (*bits_);
- ostringstream bytes;
- // Read two characters at a time
- while (count > 1) {
- if (bits.available() < 11) {
- throw FormatException();
- }
- int nextTwoCharsBits = bits.readBits(11);
- bytes << toAlphaNumericChar(nextTwoCharsBits / 45);
- bytes << toAlphaNumericChar(nextTwoCharsBits % 45);
- count -= 2;
- }
- if (count == 1) {
- // special case: one character left
- if (bits.available() < 6) {
- throw FormatException();
- }
- bytes << toAlphaNumericChar(bits.readBits(6));
- }
- // See section 6.4.8.1, 6.4.8.2
- string s = bytes.str();
- if (fc1InEffect) {
- // We need to massage the result a bit if in an FNC1 mode:
- ostringstream r;
- for (size_t i = 0; i < s.length(); i++) {
- if (s[i] != '%') {
- r << s[i];
- } else {
- if (i < s.length() - 1 && s[i + 1] == '%') {
- // %% is rendered as %
- r << s[i++];
- } else {
- // In alpha mode, % should be converted to FNC1 separator 0x1D
- r << (char)0x1D;
- }
- }
- }
- s = r.str();
- }
- append(result, s, StringUtils::ASCII);
- }
- namespace {
- int parseECIValue(BitSource& bits) {
- int firstByte = bits.readBits(8);
- if ((firstByte & 0x80) == 0) {
- // just one byte
- return firstByte & 0x7F;
- }
- if ((firstByte & 0xC0) == 0x80) {
- // two bytes
- int secondByte = bits.readBits(8);
- return ((firstByte & 0x3F) << 8) | secondByte;
- }
- if ((firstByte & 0xE0) == 0xC0) {
- // three bytes
- int secondThirdBytes = bits.readBits(16);
- return ((firstByte & 0x1F) << 16) | secondThirdBytes;
- }
- throw FormatException();
- }
- }
- Ref<DecoderResult>
- DecodedBitStreamParser::decode(ArrayRef<char> bytes,
- Version* version,
- ErrorCorrectionLevel const& ecLevel,
- Hashtable const& hints) {
- Ref<BitSource> bits_ (new BitSource(bytes));
- BitSource& bits (*bits_);
- string result;
- result.reserve(50);
- ArrayRef< ArrayRef<char> > byteSegments (0);
- try {
- CharacterSetECI* currentCharacterSetECI = 0;
- bool fc1InEffect = false;
- Mode* mode = 0;
- do {
- // While still another segment to read...
- if (bits.available() < 4) {
- // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
- mode = &Mode::TERMINATOR;
- } else {
- try {
- mode = &Mode::forBits(bits.readBits(4)); // mode is encoded by 4 bits
- } catch (IllegalArgumentException const& iae) {
- throw iae;
- // throw FormatException.getFormatInstance();
- }
- }
- if (mode != &Mode::TERMINATOR) {
- if ((mode == &Mode::FNC1_FIRST_POSITION) || (mode == &Mode::FNC1_SECOND_POSITION)) {
- // We do little with FNC1 except alter the parsed result a bit according to the spec
- fc1InEffect = true;
- } else if (mode == &Mode::STRUCTURED_APPEND) {
- if (bits.available() < 16) {
- throw FormatException();
- }
- // not really supported; all we do is ignore it
- // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
- bits.readBits(16);
- } else if (mode == &Mode::ECI) {
- // Count doesn't apply to ECI
- int value = parseECIValue(bits);
- currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue(value);
- if (currentCharacterSetECI == 0) {
- throw FormatException();
- }
- } else {
- // First handle Hanzi mode which does not start with character count
- if (mode == &Mode::HANZI) {
- //chinese mode contains a sub set indicator right after mode indicator
- int subset = bits.readBits(4);
- int countHanzi = bits.readBits(mode->getCharacterCountBits(version));
- if (subset == GB2312_SUBSET) {
- decodeHanziSegment(bits_, result, countHanzi);
- }
- } else {
- // "Normal" QR code modes:
- // How many characters will follow, encoded in this mode?
- int count = bits.readBits(mode->getCharacterCountBits(version));
- if (mode == &Mode::NUMERIC) {
- decodeNumericSegment(bits_, result, count);
- } else if (mode == &Mode::ALPHANUMERIC) {
- decodeAlphanumericSegment(bits_, result, count, fc1InEffect);
- } else if (mode == &Mode::BYTE) {
- decodeByteSegment(bits_, result, count, currentCharacterSetECI, byteSegments, hints);
- } else if (mode == &Mode::KANJI) {
- decodeKanjiSegment(bits_, result, count);
- } else {
- throw FormatException();
- }
- }
- }
- }
- } while (mode != &Mode::TERMINATOR);
- } catch (IllegalArgumentException const& iae) {
- (void)iae;
- // from readBits() calls
- throw FormatException();
- }
-
- return Ref<DecoderResult>(new DecoderResult(bytes, Ref<String>(new String(result)), byteSegments, (string)ecLevel));
- }
|