UNPKG

@react-native-community/clipboard/windows/Clipboard/Unicode.cpp

Version:

7.7 kBtext/x-cView Raw

1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the MIT License.
3#include "pch.h"
4#include "Unicode.h"
5#include "Utilities.h"
6
7#include "windows.h"
8
9#include "stringapiset.h"
10
11#include <cassert>
12#include <cstring>
13#include <exception>
14#include <string>
15
16namespace Microsoft::Common::Unicode {
17
// The implementations of the following functions heavily reference the MSDN
// article at https://msdn.microsoft.com/en-us/magazine/mt763237.aspx.
20
std::wstring Utf8ToUtf16(const char* utf8, size_t utf8Len) {
  std::wstring utf16{};
23
  // A small optimization.
  if (utf8Len == 0) {
    return utf16;
  }
28
  // Extra parentheses needed here to prevent expanding max as a
  // Windows-specific preprocessor macro.
  if (utf8Len > static_cast<size_t>((std::numeric_limits<int>::max)())) {
    throw std::overflow_error("Length of input string to Utf8ToUtf16() must fit into an int.");
  }
34
  const int utf8Length = static_cast<int>(utf8Len);
36
  // We do not specify MB_ERR_INVALID_CHARS here, which means that invalid UTF-8
  // characters are replaced with U+FFFD.
  constexpr DWORD flags = 0;
40
  const int utf16Length = ::MultiByteToWideChar(
    CP_UTF8, // Source string is in UTF-8.
    flags, // Conversion flags.
    utf8, // Source UTF-8 string pointer.
    utf8Length, // Length of the source UTF-8 string, in chars.
    nullptr, // Do not convert during this step, instead, request the size
    0 //   of the destination buffer, in wchar_ts, excluding the
    //   null termination character.
  );
50
  if (utf16Length == 0) {
    throw UnicodeConversionException(
      "Cannot get result string length when converting from UTF-8 to UTF-16 "
      "(MultiByteToWideChar failed).",
      GetLastError());
  }
57
  // Note that because the length of the input UTF-8 string was explicitly
  // passed to MultiByteToWideChar (instead of just passing -1 and asking
  // MultiByteToWideChar to scan the whole input string until a null terminator
  // is found), MultiByteToWideChar won't add an additional null terminator to
  // the result string. Therefore, there's no need to invoke
  // std::wstring::resize with a "utf16Length + 1" value.
  utf16.resize(utf16Length);
65
  // Convert from UTF-8 to UTF-16
  // Note that MultiByteToWideChar converts the UTF-8 BOM into the UTF-16BE BOM.
  // So we do not have to do anything extra here to ensure correct BOM behavior.
  int result = ::MultiByteToWideChar(
    CP_UTF8, // Source string is in UTF-8.
    flags, // Conversion flags.
    utf8, // Source UTF-8 string pointer.
    utf8Length, // Length of source UTF-8 string, in chars.
    &utf16[0], // Pointer to destination buffer. This is fine because the
    //   the C++11 standard specifies that the elements of a
    //   std::basic_string are stored continuously.
    utf16Length // Size of destination buffer, in wchar_ts.
  );
79
  if (result == 0) {
    throw UnicodeConversionException(
      "Cannot convert from UTF-8 to UTF-16 (MultiByteToWideChar failed).", GetLastError());
  }
84
  return utf16;
}
87
std::wstring Utf8ToUtf16(const char* utf8) {
  return Utf8ToUtf16(utf8, strlen(utf8));
}
91
std::wstring Utf8ToUtf16(const std::string& utf8) {
  return Utf8ToUtf16(utf8.c_str(), utf8.length());
}
95
96#if _HAS_CXX17
std::wstring Utf8ToUtf16(const std::string_view& utf8) {
  return Utf8ToUtf16(utf8.data(), utf8.length());
}
100#endif
101
std::string Utf16ToUtf8(const wchar_t* utf16, size_t utf16Len) {
  std::string utf8{};
104
  // A small optimization.
  if (utf16Len == 0) {
    return utf8;
  }
109
  // Extra parentheses needed here to prevent expanding max as a
  // Windows-specific preprocessor macro.
  if (utf16Len > static_cast<size_t>((std::numeric_limits<int>::max)())) {
    throw std::overflow_error("Length of input string to Utf16ToUtf8() must fit into an int.");
  }
115
  const int utf16Length = static_cast<int>(utf16Len);
117
  // We do not specify WC_ERR_INVALID_CHARS here, which means that invalid
  // UTF-16 characters are replaced with U+FFFD.
  constexpr DWORD flags = 0;
121
  const int utf8Length = ::WideCharToMultiByte(
    CP_UTF8, // Destination string is in UTF-8.
    flags, // Conversion flags.
    utf16, // Source UTF-16 string pointer.
    utf16Length, // Length of the source UTF-16 string, in wchar_ts.
    nullptr, // Do not convert during this step, instead, request the size
    0, //   of the destination buffer, in chars, excluding the
    //   null termination character.
    nullptr, // WideCharToMultiByte requires the last two parameters to be
    nullptr //   nullptrs when converting to UTF-8.
  );
133
  if (utf8Length == 0) {
    throw UnicodeConversionException(
      "Cannot get result string length when converting from UTF-16 to UTF-8 "
      "(WideCharToMultiByte failed).",
      GetLastError());
  }
140
  // Note that because the length of the input UTF-16 string was explicitly
  // passed to WideCharToMultiByte (instead of just passing -1 and asking
  // WideCharToMultiByte to scan the whole input string until a null terminator
  // is found), WideCharToMultiByte won't add an additional null terminator to
  // the result string. Therefore, there's no need to invoke
  // std::string::resize with a "utf8Length + 1" value.
  utf8.resize(utf8Length);
148
  // Convert from UTF-8 to UTF-16
  // Note that MultiByteToWideChar converts the UTF-8 BOM into the UTF-16BE BOM.
  // So we do not have to do anything extra here to ensure correct BOM behavior.
  int result = ::WideCharToMultiByte(
    CP_UTF8, // Destination string is in UTF-8.
    flags, // Conversion flags.
    utf16, // Source UTF-16 string pointer.
    utf16Length, // Length of the source UTF-16 string, in wchar_ts.
    &utf8[0], // Pointer to destination buffer. This is fine because the
    //   the C++11 standard specifies that the elements of a
    //   std::basic_string are stored continuously.
    utf8Length, // Size of destination buffer, in chars.
    nullptr, // WideCharToMultiByte requires the last two parameters to be
    nullptr //   nullptrs when converting to UTF-8.
  );
164
  if (result == 0) {
    throw UnicodeConversionException(
      "Cannot convert from UTF-16 to UTF-8 (WideCharToMultiByte failed).", GetLastError());
  }
169
  return utf8;
}
172
std::string Utf16ToUtf8(const char16_t* utf16, size_t utf16Len) {
  return Utf16ToUtf8(Utilities::CheckedReinterpretCast<const wchar_t*>(utf16), utf16Len);
}
176
std::string Utf16ToUtf8(const wchar_t* utf16) {
  return Utf16ToUtf8(utf16, wcslen(utf16));
}
180
std::string Utf16ToUtf8(const char16_t* utf16) {
  return Utf16ToUtf8(utf16, std::char_traits<char16_t>::length(utf16));
}
184
std::string Utf16ToUtf8(const std::wstring& utf16) {
  return Utf16ToUtf8(utf16.c_str(), utf16.length());
}
188
std::string Utf16ToUtf8(const std::u16string& utf16) {
  return Utf16ToUtf8(Utilities::CheckedReinterpretCast<const wchar_t*>(utf16.c_str()), utf16.length());
}
192
193#if _HAS_CXX17
std::string Utf16ToUtf8(const std::wstring_view& utf16) {
  return Utf16ToUtf8(utf16.data(), utf16.length());
}
197
std::string Utf16ToUtf8(const std::u16string_view& utf16) {
  return Utf16ToUtf8(Utilities::CheckedReinterpretCast<const wchar_t*>(utf16.data()), utf16.length());
}
201#endif
202
203} // namespace Microsoft::Common::Unicode

1	`// Copyright (c) Microsoft Corporation. All rights reserved.`
2	`// Licensed under the MIT License.`
3	`#include "pch.h"`
4	`#include "Unicode.h"`
5	`#include "Utilities.h"`
6
7	`#include "windows.h"`
8
9	`#include "stringapiset.h"`
10
11	`#include <cassert>`
12	`#include <cstring>`
13	`#include <exception>`
14	`#include <string>`
15
16	`namespace Microsoft::Common::Unicode {`
17
18	`// The implementations of the following functions heavily reference the MSDN`
19	`// article at https://msdn.microsoft.com/en-us/magazine/mt763237.aspx.`
20
21	`std::wstring Utf8ToUtf16(const char* utf8, size_t utf8Len) {`
22	`std::wstring utf16{};`
23
24	`// A small optimization.`
25	`if (utf8Len == 0) {`
26	`return utf16;`
27	`}`
28
29	`// Extra parentheses needed here to prevent expanding max as a`
30	`// Windows-specific preprocessor macro.`
31	`if (utf8Len > static_cast<size_t>((std::numeric_limits<int>::max)())) {`
32	`throw std::overflow_error("Length of input string to Utf8ToUtf16() must fit into an int.");`
33	`}`
34
35	`const int utf8Length = static_cast<int>(utf8Len);`
36
37	`// We do not specify MB_ERR_INVALID_CHARS here, which means that invalid UTF-8`
38	`// characters are replaced with U+FFFD.`
39	`constexpr DWORD flags = 0;`
40
41	`const int utf16Length = ::MultiByteToWideChar(`
42	`CP_UTF8, // Source string is in UTF-8.`
43	`flags, // Conversion flags.`
44	`utf8, // Source UTF-8 string pointer.`
45	`utf8Length, // Length of the source UTF-8 string, in chars.`
46	`nullptr, // Do not convert during this step, instead, request the size`
47	`0 // of the destination buffer, in wchar_ts, excluding the`
48	`// null termination character.`
49	`);`
50
51	`if (utf16Length == 0) {`
52	`throw UnicodeConversionException(`
53	`"Cannot get result string length when converting from UTF-8 to UTF-16 "`
54	`"(MultiByteToWideChar failed).",`
55	`GetLastError());`
56	`}`
57
58	`// Note that because the length of the input UTF-8 string was explicitly`
59	`// passed to MultiByteToWideChar (instead of just passing -1 and asking`
60	`// MultiByteToWideChar to scan the whole input string until a null terminator`
61	`// is found), MultiByteToWideChar won't add an additional null terminator to`
62	`// the result string. Therefore, there's no need to invoke`
63	`// std::wstring::resize with a "utf16Length + 1" value.`
64	`utf16.resize(utf16Length);`
65
66	`// Convert from UTF-8 to UTF-16`
67	`// Note that MultiByteToWideChar converts the UTF-8 BOM into the UTF-16BE BOM.`
68	`// So we do not have to do anything extra here to ensure correct BOM behavior.`
69	`int result = ::MultiByteToWideChar(`
70	`CP_UTF8, // Source string is in UTF-8.`
71	`flags, // Conversion flags.`
72	`utf8, // Source UTF-8 string pointer.`
73	`utf8Length, // Length of source UTF-8 string, in chars.`
74	`&utf16[0], // Pointer to destination buffer. This is fine because the`
75	`// the C++11 standard specifies that the elements of a`
76	`// std::basic_string are stored continuously.`
77	`utf16Length // Size of destination buffer, in wchar_ts.`
78	`);`
79
80	`if (result == 0) {`
81	`throw UnicodeConversionException(`
82	`"Cannot convert from UTF-8 to UTF-16 (MultiByteToWideChar failed).", GetLastError());`
83	`}`
84
85	`return utf16;`
86	`}`
87
88	`std::wstring Utf8ToUtf16(const char* utf8) {`
89	`return Utf8ToUtf16(utf8, strlen(utf8));`
90	`}`
91
92	`std::wstring Utf8ToUtf16(const std::string& utf8) {`
93	`return Utf8ToUtf16(utf8.c_str(), utf8.length());`
94	`}`
95
96	`#if _HAS_CXX17`
97	`std::wstring Utf8ToUtf16(const std::string_view& utf8) {`
98	`return Utf8ToUtf16(utf8.data(), utf8.length());`
99	`}`
100	`#endif`
101
102	`std::string Utf16ToUtf8(const wchar_t* utf16, size_t utf16Len) {`
103	`std::string utf8{};`
104
105	`// A small optimization.`
106	`if (utf16Len == 0) {`
107	`return utf8;`
108	`}`
109
110	`// Extra parentheses needed here to prevent expanding max as a`
111	`// Windows-specific preprocessor macro.`
112	`if (utf16Len > static_cast<size_t>((std::numeric_limits<int>::max)())) {`
113	`throw std::overflow_error("Length of input string to Utf16ToUtf8() must fit into an int.");`
114	`}`
115
116	`const int utf16Length = static_cast<int>(utf16Len);`
117
118	`// We do not specify WC_ERR_INVALID_CHARS here, which means that invalid`
119	`// UTF-16 characters are replaced with U+FFFD.`
120	`constexpr DWORD flags = 0;`
121
122	`const int utf8Length = ::WideCharToMultiByte(`
123	`CP_UTF8, // Destination string is in UTF-8.`
124	`flags, // Conversion flags.`
125	`utf16, // Source UTF-16 string pointer.`
126	`utf16Length, // Length of the source UTF-16 string, in wchar_ts.`
127	`nullptr, // Do not convert during this step, instead, request the size`
128	`0, // of the destination buffer, in chars, excluding the`
129	`// null termination character.`
130	`nullptr, // WideCharToMultiByte requires the last two parameters to be`
131	`nullptr // nullptrs when converting to UTF-8.`
132	`);`
133
134	`if (utf8Length == 0) {`
135	`throw UnicodeConversionException(`
136	`"Cannot get result string length when converting from UTF-16 to UTF-8 "`
137	`"(WideCharToMultiByte failed).",`
138	`GetLastError());`
139	`}`
140
141	`// Note that because the length of the input UTF-16 string was explicitly`
142	`// passed to WideCharToMultiByte (instead of just passing -1 and asking`
143	`// WideCharToMultiByte to scan the whole input string until a null terminator`
144	`// is found), WideCharToMultiByte won't add an additional null terminator to`
145	`// the result string. Therefore, there's no need to invoke`
146	`// std::string::resize with a "utf8Length + 1" value.`
147	`utf8.resize(utf8Length);`
148
149	`// Convert from UTF-8 to UTF-16`
150	`// Note that MultiByteToWideChar converts the UTF-8 BOM into the UTF-16BE BOM.`
151	`// So we do not have to do anything extra here to ensure correct BOM behavior.`
152	`int result = ::WideCharToMultiByte(`
153	`CP_UTF8, // Destination string is in UTF-8.`
154	`flags, // Conversion flags.`
155	`utf16, // Source UTF-16 string pointer.`
156	`utf16Length, // Length of the source UTF-16 string, in wchar_ts.`
157	`&utf8[0], // Pointer to destination buffer. This is fine because the`
158	`// the C++11 standard specifies that the elements of a`
159	`// std::basic_string are stored continuously.`
160	`utf8Length, // Size of destination buffer, in chars.`
161	`nullptr, // WideCharToMultiByte requires the last two parameters to be`
162	`nullptr // nullptrs when converting to UTF-8.`
163	`);`
164
165	`if (result == 0) {`
166	`throw UnicodeConversionException(`
167	`"Cannot convert from UTF-16 to UTF-8 (WideCharToMultiByte failed).", GetLastError());`
168	`}`
169
170	`return utf8;`
171	`}`
172
173	`std::string Utf16ToUtf8(const char16_t* utf16, size_t utf16Len) {`
174	`return Utf16ToUtf8(Utilities::CheckedReinterpretCast<const wchar_t*>(utf16), utf16Len);`
175	`}`
176
177	`std::string Utf16ToUtf8(const wchar_t* utf16) {`
178	`return Utf16ToUtf8(utf16, wcslen(utf16));`
179	`}`
180
181	`std::string Utf16ToUtf8(const char16_t* utf16) {`
182	`return Utf16ToUtf8(utf16, std::char_traits<char16_t>::length(utf16));`
183	`}`
184
185	`std::string Utf16ToUtf8(const std::wstring& utf16) {`
186	`return Utf16ToUtf8(utf16.c_str(), utf16.length());`
187	`}`
188
189	`std::string Utf16ToUtf8(const std::u16string& utf16) {`
190	`return Utf16ToUtf8(Utilities::CheckedReinterpretCast<const wchar_t*>(utf16.c_str()), utf16.length());`
191	`}`
192
193	`#if _HAS_CXX17`
194	`std::string Utf16ToUtf8(const std::wstring_view& utf16) {`
195	`return Utf16ToUtf8(utf16.data(), utf16.length());`
196	`}`
197
198	`std::string Utf16ToUtf8(const std::u16string_view& utf16) {`
199	`return Utf16ToUtf8(Utilities::CheckedReinterpretCast<const wchar_t*>(utf16.data()), utf16.length());`
200	`}`
201	`#endif`
202
203	`} // namespace Microsoft::Common::Unicode`