NexusForce 1.0.0
A Modern C++ Library with extended functionality, web components, and utility libraries
载入中...
搜索中...
未找到
codepoint.hpp
浏览该文件的文档.
1#ifndef NEFORCE_CORE_STRING_CODEPOINT_HPP__
2#define NEFORCE_CORE_STRING_CODEPOINT_HPP__
3
11
14NEFORCE_BEGIN_NAMESPACE__
15
117
126class NEFORCE_API codepoint {
127public:
129 static constexpr uint32_t REPLACEMENT_VALUE = 0xFFFD;
131 static constexpr uint32_t MAX_VALUE = 0x10FFFF;
132
140 NEFORCE_CONST_FUNCTION static constexpr bool is_high_surrogate(const char16_t c) noexcept {
141 return c >= 0xD800 && c <= 0xDBFF;
142 }
143
151 NEFORCE_CONST_FUNCTION static constexpr bool is_low_surrogate(const char16_t c) noexcept {
152 return c >= 0xDC00 && c <= 0xDFFF;
153 }
154
163 NEFORCE_CONST_FUNCTION static constexpr codepoint combine_surrogates(const char16_t high,
164 const char16_t low) noexcept {
165 return codepoint{0x10000 + ((static_cast<uint32_t>(high) - 0xD800) << 10) +
166 (static_cast<uint32_t>(low) - 0xDC00)};
167 }
168
179 static constexpr bool is_valid_codepoint(const uint32_t v) noexcept {
180 return v <= MAX_VALUE && !is_high_surrogate(v) && !is_low_surrogate(v);
181 }
182
189 static constexpr codepoint replacement() noexcept {
190 codepoint cp;
191 cp.value_ = REPLACEMENT_VALUE;
192 return cp;
193 }
194
199 static constexpr codepoint null() noexcept { return codepoint(0u); }
200
211 static codepoint decode_utf8(const byte_t* data, size_t& i, size_t len) noexcept;
212
225 template <typename T>
226 static codepoint decode_utf16(const T* data, size_t& index, const size_t len, const bool need_swap) noexcept {
227 uint32_t cp;
228 size_t consumed;
229 const bool ok = codepoint::utf16_codepoint(data, index, len, cp, consumed, need_swap);
230 if (consumed == 0) {
231 return replacement();
232 }
233 index += consumed;
234 return ok ? codepoint(cp) : replacement();
235 }
236
244 static constexpr codepoint from_utf32(char32_t value) noexcept { return codepoint(static_cast<uint32_t>(value)); }
245
246private:
247 uint32_t value_;
248
249private:
250 template <typename T>
251 static bool utf16_codepoint(const T* data, size_t index, const size_t len, uint32_t& cp, size_t& consumed,
252 const bool need_swap) {
253 if (index >= len) {
254 cp = 0xFFFD;
255 consumed = 0;
256 return false;
257 }
258
259 const auto raw1 = static_cast<uint16_t>(data[index]);
260 const auto c1 = static_cast<uint32_t>(need_swap ? endian::byteswap16(raw1) : raw1);
261 consumed = 1;
262
263 if (is_high_surrogate(c1)) {
264 if (index + 1 < len) {
265 const auto raw2 = static_cast<uint16_t>(data[index + 1]);
266 const auto c2 = static_cast<uint32_t>(need_swap ? endian::byteswap16(raw2) : raw2);
267
268 if (is_low_surrogate(c2)) {
269 cp = combine_surrogates(c1, c2).value();
270 consumed = 2;
271 return true;
272 }
273 }
274
275 cp = 0xFFFD;
276 return false;
277 }
278
279 if (is_low_surrogate(c1)) {
280 cp = 0xFFFD;
281 return false;
282 }
283
284 cp = c1;
285 return true;
286 }
287
288public:
293 constexpr codepoint() noexcept :
294 value_(0) {}
295
302 constexpr explicit codepoint(uint32_t value) noexcept :
304
309 constexpr explicit codepoint(const char32_t value) noexcept :
310 codepoint(static_cast<uint32_t>(value)) {}
311
312 constexpr codepoint(const codepoint&) noexcept = default;
313 constexpr codepoint& operator=(const codepoint&) noexcept = default;
314 constexpr codepoint(codepoint&&) noexcept = default;
315 constexpr codepoint& operator=(codepoint&&) noexcept = default;
316
321 constexpr uint32_t value() const noexcept { return value_; }
322
327 constexpr char32_t to_char32() const noexcept { return static_cast<char32_t>(value_); }
328
333 constexpr bool is_replacement() const noexcept { return value_ == REPLACEMENT_VALUE; }
334
339 constexpr bool is_ascii() const noexcept { return value_ <= 0x7F; }
340
345 constexpr bool is_bmp() const noexcept { return value_ <= 0xFFFF; }
346
351 constexpr bool is_supplementary() const noexcept { return value_ > 0xFFFF && value_ <= MAX_VALUE; }
352
357 constexpr bool needs_surrogate_pair() const noexcept { return is_supplementary(); }
358
369 constexpr size_t utf8_length() const noexcept {
370 if (value_ <= 0x7F) {
371 return 1;
372 }
373 if (value_ <= 0x7FF) {
374 return 2;
375 }
376 if (value_ <= 0xFFFF) {
377 return 3;
378 }
379 return 4;
380 }
381
388 constexpr size_t utf16_length() const noexcept { return is_supplementary() ? 2u : 1u; }
389
396 void append_to(string& result) const;
397
398#ifdef NEFORCE_STANDARD_20
405 void append_to(u8string& result) const;
406#endif
407
415 void append_to(u16string& result) const;
416
423 void append_to(u32string& result) const { result.push_back(static_cast<char32_t>(value_)); }
424
429 void append_to(wstring& result) const;
430
431 constexpr bool operator==(const codepoint& other) const noexcept { return value_ == other.value_; }
432 constexpr bool operator!=(const codepoint& other) const noexcept { return value_ != other.value_; }
433 constexpr bool operator<(const codepoint& other) const noexcept { return value_ < other.value_; }
434 constexpr bool operator<=(const codepoint& other) const noexcept { return value_ <= other.value_; }
435 constexpr bool operator>(const codepoint& other) const noexcept { return value_ > other.value_; }
436 constexpr bool operator>=(const codepoint& other) const noexcept { return value_ >= other.value_; }
437
438 constexpr bool operator==(uint32_t v) const noexcept { return value_ == v; }
439 constexpr bool operator!=(uint32_t v) const noexcept { return value_ != v; }
440};
441 // CodePoint
443
444NEFORCE_END_NAMESPACE__
445#endif // NEFORCE_CORE_STRING_CODEPOINT_HPP__
NEFORCE_CONSTEXPR20 void push_back(value_type value)
在末尾插入字符
Unicode码点包装类
static NEFORCE_CONST_FUNCTION constexpr bool is_low_surrogate(const char16_t c) noexcept
检查字符是否为低代理项
constexpr bool is_replacement() const noexcept
是否为替换符U+FFFD
static codepoint decode_utf16(const T *data, size_t &index, const size_t len, const bool need_swap) noexcept
从UTF-16序列解码一个码点,并推进索引
static constexpr codepoint null() noexcept
构造空字符U+0000
constexpr char32_t to_char32() const noexcept
获取码点的char32_t值
static constexpr uint32_t MAX_VALUE
Unicode 最大合法码点
static constexpr uint32_t REPLACEMENT_VALUE
Unicode 替换符 U+FFFD
constexpr bool needs_surrogate_pair() const noexcept
是否需要UTF-16代理对表示
static NEFORCE_CONST_FUNCTION constexpr bool is_high_surrogate(const char16_t c) noexcept
检查字符是否为高代理项
static constexpr codepoint from_utf32(char32_t value) noexcept
从UTF-32值直接构造码点
static constexpr codepoint replacement() noexcept
构造替换符U+FFFD
static NEFORCE_CONST_FUNCTION constexpr codepoint combine_surrogates(const char16_t high, const char16_t low) noexcept
组合高代理项和低代理项为完整的Unicode码点
void append_to(u8string &result) const
追加UTF-8编码到u8string
constexpr bool is_ascii() const noexcept
是否为ASCII字符(U+0000 ~ U+007F)
void append_to(wstring &result) const
追加编码到wstring
constexpr size_t utf16_length() const noexcept
UTF-16编码后的码元数
constexpr codepoint(uint32_t value) noexcept
从uint32_t构造码点
constexpr bool is_supplementary() const noexcept
是否为辅助平面字符(需要UTF-16代理对)
constexpr codepoint() noexcept
默认构造函数
constexpr size_t utf8_length() const noexcept
UTF-8编码后的字节数
constexpr codepoint(const char32_t value) noexcept
从char32_t构造码点
constexpr bool is_bmp() const noexcept
是否位于基本多文种平面(BMP, U+0000 ~ U+FFFF)
static constexpr bool is_valid_codepoint(const uint32_t v) noexcept
检查码点值是否合法
void append_to(string &result) const
追加UTF-8编码到string
static codepoint decode_utf8(const byte_t *data, size_t &i, size_t len) noexcept
从UTF-8字节流解码一个码点,并推进索引
void append_to(u32string &result) const
追加UTF-32编码到u32string
void append_to(u16string &result) const
追加UTF-16编码到u16string
constexpr uint32_t value() const noexcept
获取码点的uint32_t值
端序转换工具
unsigned char byte_t
字节类型,定义为无符号字符
unsigned int uint32_t
32位无符号整数类型
unsigned short uint16_t
16位无符号整数类型
bool operator!=(const function< Res(Args...)> &f, nullptr_t np) noexcept
不等于空指针比较
bool operator==(const function< Res(Args...)> &f, nullptr_t np) noexcept
等于空指针比较
NEFORCE_NODISCARD constexpr bool operator<=(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
小于等于比较运算符
NEFORCE_NODISCARD constexpr bool operator<(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
小于比较运算符
NEFORCE_NODISCARD constexpr bool operator>(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
大于比较运算符
NEFORCE_NODISCARD constexpr bool operator>=(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
大于等于比较运算符
basic_string< char16_t > u16string
UTF-16字符串
basic_string< char8_t > u8string
UTF-8字符串
basic_string< wchar_t > wstring
宽字符字符串
basic_string< char32_t > u32string
UTF-32字符串
NEFORCE_NODISCARD NEFORCE_ALWAYS_INLINE constexpr decltype(auto) data(Container &cont) noexcept(noexcept(cont.data()))
获取容器的底层数据指针
字符串类型别名和实用函数
static constexpr uint16_t byteswap16(uint16_t value) noexcept
16位整数字节序反转