1#ifndef NEFORCE_CORE_STRING_CODEPOINT_HPP__
2#define NEFORCE_CORE_STRING_CODEPOINT_HPP__
14NEFORCE_BEGIN_NAMESPACE__
141 return c >= 0xD800 && c <= 0xDBFF;
152 return c >= 0xDC00 && c <= 0xDFFF;
164 const char16_t low)
noexcept {
166 (
static_cast<uint32_t>(low) - 0xDC00)};
225 template <
typename T>
229 const bool ok = codepoint::utf16_codepoint(
data, index, len, cp, consumed, need_swap);
250 template <
typename T>
251 static bool utf16_codepoint(
const T*
data,
size_t index,
const size_t len,
uint32_t& cp,
size_t& consumed,
252 const bool need_swap) {
263 if (is_high_surrogate(c1)) {
264 if (index + 1 < len) {
265 const auto raw2 =
static_cast<uint16_t>(
data[index + 1]);
268 if (is_low_surrogate(c2)) {
269 cp = combine_surrogates(c1, c2).value();
279 if (is_low_surrogate(c1)) {
327 constexpr char32_t to_char32() const noexcept {
return static_cast<char32_t>(value_); }
339 constexpr bool is_ascii() const noexcept {
return value_ <= 0x7F; }
345 constexpr bool is_bmp() const noexcept {
return value_ <= 0xFFFF; }
370 if (value_ <= 0x7F) {
373 if (value_ <= 0x7FF) {
376 if (value_ <= 0xFFFF) {
398#ifdef NEFORCE_STANDARD_20
431 constexpr bool operator==(
const codepoint& other)
const noexcept {
return value_ == other.value_; }
432 constexpr bool operator!=(
const codepoint& other)
const noexcept {
return value_ != other.value_; }
433 constexpr bool operator<(
const codepoint& other)
const noexcept {
return value_ < other.value_; }
434 constexpr bool operator<=(
const codepoint& other)
const noexcept {
return value_ <= other.value_; }
435 constexpr bool operator>(
const codepoint& other)
const noexcept {
return value_ > other.value_; }
436 constexpr bool operator>=(
const codepoint& other)
const noexcept {
return value_ >= other.value_; }
444NEFORCE_END_NAMESPACE__
NEFORCE_CONSTEXPR20 void push_back(value_type value)
在末尾插入字符
static NEFORCE_CONST_FUNCTION constexpr bool is_low_surrogate(const char16_t c) noexcept
检查字符是否为低代理项
constexpr bool is_replacement() const noexcept
是否为替换符U+FFFD
static codepoint decode_utf16(const T *data, size_t &index, const size_t len, const bool need_swap) noexcept
从UTF-16序列解码一个码点,并推进索引
static constexpr codepoint null() noexcept
构造空字符U+0000
constexpr char32_t to_char32() const noexcept
获取码点的char32_t值
static constexpr uint32_t MAX_VALUE
Unicode 最大合法码点
static constexpr uint32_t REPLACEMENT_VALUE
Unicode 替换符 U+FFFD
constexpr bool needs_surrogate_pair() const noexcept
是否需要UTF-16代理对表示
static NEFORCE_CONST_FUNCTION constexpr bool is_high_surrogate(const char16_t c) noexcept
检查字符是否为高代理项
static constexpr codepoint from_utf32(char32_t value) noexcept
从UTF-32值直接构造码点
static constexpr codepoint replacement() noexcept
构造替换符U+FFFD
static NEFORCE_CONST_FUNCTION constexpr codepoint combine_surrogates(const char16_t high, const char16_t low) noexcept
组合高代理项和低代理项为完整的Unicode码点
void append_to(u8string &result) const
追加UTF-8编码到u8string
constexpr bool is_ascii() const noexcept
是否为ASCII字符(U+0000 ~ U+007F)
void append_to(wstring &result) const
追加编码到wstring
constexpr size_t utf16_length() const noexcept
UTF-16编码后的码元数
constexpr codepoint(uint32_t value) noexcept
从uint32_t构造码点
constexpr bool is_supplementary() const noexcept
是否为辅助平面字符(需要UTF-16代理对)
constexpr codepoint() noexcept
默认构造函数
constexpr size_t utf8_length() const noexcept
UTF-8编码后的字节数
constexpr codepoint(const char32_t value) noexcept
从char32_t构造码点
constexpr bool is_bmp() const noexcept
是否位于基本多文种平面(BMP, U+0000 ~ U+FFFF)
static constexpr bool is_valid_codepoint(const uint32_t v) noexcept
检查码点值是否合法
void append_to(string &result) const
追加UTF-8编码到string
static codepoint decode_utf8(const byte_t *data, size_t &i, size_t len) noexcept
从UTF-8字节流解码一个码点,并推进索引
void append_to(u32string &result) const
追加UTF-32编码到u32string
void append_to(u16string &result) const
追加UTF-16编码到u16string
constexpr uint32_t value() const noexcept
获取码点的uint32_t值
unsigned char byte_t
字节类型,定义为无符号字符
unsigned int uint32_t
32位无符号整数类型
unsigned short uint16_t
16位无符号整数类型
bool operator!=(const function< Res(Args...)> &f, nullptr_t np) noexcept
不等于空指针比较
bool operator==(const function< Res(Args...)> &f, nullptr_t np) noexcept
等于空指针比较
NEFORCE_NODISCARD constexpr bool operator<=(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
小于等于比较运算符
NEFORCE_NODISCARD constexpr bool operator<(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
小于比较运算符
NEFORCE_NODISCARD constexpr bool operator>(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
大于比较运算符
NEFORCE_NODISCARD constexpr bool operator>=(const normal_iterator< LeftIter > &lhs, const normal_iterator< RightIter > &rhs) noexcept
大于等于比较运算符
basic_string< char16_t > u16string
UTF-16字符串
basic_string< char8_t > u8string
UTF-8字符串
basic_string< wchar_t > wstring
宽字符字符串
basic_string< char32_t > u32string
UTF-32字符串
NEFORCE_NODISCARD NEFORCE_ALWAYS_INLINE constexpr decltype(auto) data(Container &cont) noexcept(noexcept(cont.data()))
获取容器的底层数据指针
static constexpr uint16_t byteswap16(uint16_t value) noexcept
16位整数字节序反转