UTF8Encoding.h 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. //
  2. // UTF8Encoding.h
  3. //
  4. // Library: Foundation
  5. // Package: Text
  6. // Module: UTF8Encoding
  7. //
  8. // Definition of the UTF8Encoding class.
  9. //
  10. // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
  11. // and Contributors.
  12. //
  13. // SPDX-License-Identifier: BSL-1.0
  14. //
  15. #ifndef Foundation_UTF8Encoding_INCLUDED
  16. #define Foundation_UTF8Encoding_INCLUDED
  17. #include "Poco/Foundation.h"
  18. #include "Poco/TextEncoding.h"
  19. namespace Poco {
  20. class Foundation_API UTF8Encoding: public TextEncoding
  21. /// UTF-8 text encoding, as defined in RFC 2279.
  22. {
  23. public:
  24. UTF8Encoding();
  25. ~UTF8Encoding();
  26. const char* canonicalName() const;
  27. bool isA(const std::string& encodingName) const;
  28. const CharacterMap& characterMap() const;
  29. int convert(const unsigned char* bytes) const;
  30. int convert(int ch, unsigned char* bytes, int length) const;
  31. int queryConvert(const unsigned char* bytes, int length) const;
  32. int sequenceLength(const unsigned char* bytes, int length) const;
  33. static bool isLegal(const unsigned char *bytes, int length);
  34. /// Utility routine to tell whether a sequence of bytes is legal UTF-8.
  35. /// This must be called with the length pre-determined by the first byte.
  36. /// The sequence is illegal right away if there aren't enough bytes
  37. /// available. If presented with a length > 4, this function returns false.
  38. /// The Unicode definition of UTF-8 goes up to 4-byte sequences.
  39. ///
  40. /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
  41. /// Copyright 2001-2004 Unicode, Inc.
  42. private:
  43. static const char* _names[];
  44. static const CharacterMap _charMap;
  45. };
  46. } // namespace Poco
  47. #endif // Foundation_UTF8Encoding_INCLUDED