1 | // Copyright (c) Microsoft Corporation. All rights reserved.
|
2 | // Licensed under the MIT License.
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 | namespace Microsoft::Common::Unicode {
|
13 |
|
14 | // All functions in this header offer the strong exception safety guarantee and
|
15 | // may throw the following exceptions:
|
16 | // - std::bad_alloc,
|
17 | // - std::overflow_error, and
|
18 | // - UnicodeConversionException (defined below).
|
19 | //
|
20 | class UnicodeConversionException : public std::runtime_error {
|
21 | public:
|
22 | UnicodeConversionException(const char* const message, uint32_t errorCode)
|
23 | : std::runtime_error(message), m_errorCode(errorCode) {}
|
24 |
|
25 | inline uint32_t ErrorCode() const {
|
26 | return m_errorCode;
|
27 | }
|
28 |
|
29 | private:
|
30 | // The error code returned by GetLastError().
|
31 | uint32_t m_errorCode;
|
32 | };
|
33 |
|
34 | // The following functions convert UTF-8 strings to UTF-16BE strings.
|
35 | //
|
36 | // If the input UTF-8 string begins with the UTF-8 Byte Order Mark (BOM) (0xef
|
37 | // 0xbb 0xbf), then the output UTF-16BE string will begin with the UTF-16BE BOM
|
38 | // (0xfeff). For example, "<UTF-8 BOM>abc" (0xef 0xbb 0xbf 0x61 0x62 0x63) is
|
39 | // converted to "<UTF-16BE BOM>abc" (0xfeff 0x0061 0x0062 0x0063).
|
40 | //
|
41 | // If the input UTF-8 string omits the UTF-8 BOM, then the output UTF-16BE
|
42 | // string will also omit the UTF16-BE BOM. For example, "abc" (0x61 0x62 0x63)
|
43 | // is converted to "abc" (0x0061 0x0062 0x0063).
|
44 | //
|
45 | // For (1), utf8 does not have to be null terminated, and utf8Len must reflect
|
46 | // the length of utf8, without the null terminator if it has one. The behavior
|
47 | // is undefined otherwise.
|
48 | //
|
49 | // For (2), utf8 must be null terminated. The behavior is undefined otherwise.
|
50 | //
|
51 | /* (1) */ std::wstring Utf8ToUtf16(const char* utf8, size_t utf8Len);
|
52 | /* (2) */ std::wstring Utf8ToUtf16(const char* utf8);
|
53 | /* (3) */ std::wstring Utf8ToUtf16(const std::string& utf8);
|
54 |
|
55 | /* (4) */ std::wstring Utf8ToUtf16(const std::string_view& utf8);
|
56 |
|
57 |
|
58 | // The following functions convert UTF-16BE strings to UTF-8 strings. Their
|
59 | // behaviors mirror those of the above Utf8ToUtf16 functions.
|
60 | //
|
61 | // For (1) and (2), utf16 does not have to be null terminated, and utf16Len must
|
62 | // reflect the length of utf16, without the null terminator if it has one. The
|
63 | // behavior is undefined otherwise.
|
64 | //
|
65 | // For (3) and (4), utf16 must be null terminated. The behavior is undefined
|
66 | // otherwise.
|
67 | //
|
68 | /* (1) */ std::string Utf16ToUtf8(const wchar_t* utf16, size_t utf16Len);
|
69 | /* (2) */ std::string Utf16ToUtf8(const char16_t* utf16, size_t utf16Len);
|
70 | /* (3) */ std::string Utf16ToUtf8(const wchar_t* utf16);
|
71 | /* (4) */ std::string Utf16ToUtf8(const char16_t* utf16);
|
72 | /* (5) */ std::string Utf16ToUtf8(const std::wstring& utf16);
|
73 | /* (6) */ std::string Utf16ToUtf8(const std::u16string& utf16);
|
74 |
|
75 | /* (7) */ std::string Utf16ToUtf8(const std::wstring_view& utf16);
|
76 | /* (8) */ std::string Utf16ToUtf8(const std::u16string_view& utf16);
|
77 |
|
78 |
|
79 | } // namespace Microsoft::Common::Unicode
|