UNPKG

3.19 kBtext/x-cView Raw
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the MIT License.
3
4#pragma once
5#include <exception>
6#include <stdexcept>
7#include <string>
8#if _HAS_CXX17
9#include <string_view>
10#endif
11
12namespace Microsoft::Common::Unicode {
13
14 // All functions in this header offer the strong exception safety guarantee and
15 // may throw the following exceptions:
16 // - std::bad_alloc,
17 // - std::overflow_error, and
18 // - UnicodeConversionException (defined below).
19 //
20 class UnicodeConversionException : public std::runtime_error {
21 public:
22 UnicodeConversionException(const char* const message, uint32_t errorCode)
23 : std::runtime_error(message), m_errorCode(errorCode) {}
24
25 inline uint32_t ErrorCode() const {
26 return m_errorCode;
27 }
28
29 private:
30 // The error code returned by GetLastError().
31 uint32_t m_errorCode;
32 };
33
34 // The following functions convert UTF-8 strings to UTF-16BE strings.
35 //
36 // If the input UTF-8 string begins with the UTF-8 Byte Order Mark (BOM) (0xef
37 // 0xbb 0xbf), then the output UTF-16BE string will begin with the UTF-16BE BOM
38 // (0xfeff). For example, "<UTF-8 BOM>abc" (0xef 0xbb 0xbf 0x61 0x62 0x63) is
39 // converted to "<UTF-16BE BOM>abc" (0xfeff 0x0061 0x0062 0x0063).
40 //
41 // If the input UTF-8 string omits the UTF-8 BOM, then the output UTF-16BE
42 // string will also omit the UTF16-BE BOM. For example, "abc" (0x61 0x62 0x63)
43 // is converted to "abc" (0x0061 0x0062 0x0063).
44 //
45 // For (1), utf8 does not have to be null terminated, and utf8Len must reflect
46 // the length of utf8, without the null terminator if it has one. The behavior
47 // is undefined otherwise.
48 //
49 // For (2), utf8 must be null terminated. The behavior is undefined otherwise.
50 //
51 /* (1) */ std::wstring Utf8ToUtf16(const char* utf8, size_t utf8Len);
52 /* (2) */ std::wstring Utf8ToUtf16(const char* utf8);
53 /* (3) */ std::wstring Utf8ToUtf16(const std::string& utf8);
54#if _HAS_CXX17
55 /* (4) */ std::wstring Utf8ToUtf16(const std::string_view& utf8);
56#endif
57
58 // The following functions convert UTF-16BE strings to UTF-8 strings. Their
59 // behaviors mirror those of the above Utf8ToUtf16 functions.
60 //
61 // For (1) and (2), utf16 does not have to be null terminated, and utf16Len must
62 // reflect the length of utf16, without the null terminator if it has one. The
63 // behavior is undefined otherwise.
64 //
65 // For (3) and (4), utf16 must be null terminated. The behavior is undefined
66 // otherwise.
67 //
68 /* (1) */ std::string Utf16ToUtf8(const wchar_t* utf16, size_t utf16Len);
69 /* (2) */ std::string Utf16ToUtf8(const char16_t* utf16, size_t utf16Len);
70 /* (3) */ std::string Utf16ToUtf8(const wchar_t* utf16);
71 /* (4) */ std::string Utf16ToUtf8(const char16_t* utf16);
72 /* (5) */ std::string Utf16ToUtf8(const std::wstring& utf16);
73 /* (6) */ std::string Utf16ToUtf8(const std::u16string& utf16);
74#if _HAS_CXX17
75 /* (7) */ std::string Utf16ToUtf8(const std::wstring_view& utf16);
76 /* (8) */ std::string Utf16ToUtf8(const std::u16string_view& utf16);
77#endif
78
79} // namespace Microsoft::Common::Unicode