Unicode码点包装类更多...

#include <codepoint.hpp>

Public 成员函数
constexpr	codepoint () noexcept
	默认构造函数
constexpr	codepoint (uint32_t value) noexcept
	从uint32_t构造码点
constexpr	codepoint (const char32_t value) noexcept
	从char32_t构造码点
constexpr uint32_t	value () const noexcept
	获取码点的uint32_t值
constexpr char32_t	to_char32 () const noexcept
	获取码点的char32_t值
constexpr bool	is_replacement () const noexcept
	是否为替换符U+FFFD
constexpr bool	is_ascii () const noexcept
	是否为ASCII字符（U+0000 ~ U+007F）
constexpr bool	is_bmp () const noexcept
	是否位于基本多文种平面（BMP, U+0000 ~ U+FFFF）
constexpr bool	is_supplementary () const noexcept
	是否为辅助平面字符（需要UTF-16代理对）
constexpr bool	needs_surrogate_pair () const noexcept
	是否需要UTF-16代理对表示
constexpr size_t	utf8_length () const noexcept
	UTF-8编码后的字节数
constexpr size_t	utf16_length () const noexcept
	UTF-16编码后的码元数
void	append_to (string &result) const
	追加UTF-8编码到string
void	append_to (u8string &result) const
	追加UTF-8编码到u8string
void	append_to (u16string &result) const
	追加UTF-16编码到u16string
void	append_to (u32string &result) const
	追加UTF-32编码到u32string
void	append_to (wstring &result) const
	追加编码到wstring

静态 Public 成员函数
static NEFORCE_CONST_FUNCTION constexpr bool	is_high_surrogate (const char16_t c) noexcept
	检查字符是否为高代理项
static NEFORCE_CONST_FUNCTION constexpr bool	is_low_surrogate (const char16_t c) noexcept
	检查字符是否为低代理项
static NEFORCE_CONST_FUNCTION constexpr codepoint	combine_surrogates (const char16_t high, const char16_t low) noexcept
	组合高代理项和低代理项为完整的Unicode码点
static constexpr bool	is_valid_codepoint (const uint32_t v) noexcept
	检查码点值是否合法
static constexpr codepoint	replacement () noexcept
	构造替换符U+FFFD
static constexpr codepoint	null () noexcept
	构造空字符U+0000
static codepoint	decode_utf8 (const byte_t *data, size_t &i, size_t len) noexcept
	从UTF-8字节流解码一个码点，并推进索引
template<typename T>
static codepoint	decode_utf16 (const T *data, size_t &index, const size_t len, const bool need_swap) noexcept
	从UTF-16序列解码一个码点，并推进索引
static constexpr codepoint	from_utf32 (char32_t value) noexcept
	从UTF-32值直接构造码点

静态 Public 属性
static constexpr uint32_t	REPLACEMENT_VALUE = 0xFFFD
	Unicode 替换符 U+FFFD
static constexpr uint32_t	MAX_VALUE = 0x10FFFF
	Unicode 最大合法码点

详细描述

Unicode码点包装类

封装一个经过验证的Unicode码点（U+0000 ~ U+10FFFF，排除代理项）。非法值在构造时自动替换为U+FFFD（替换字符）。提供UTF-8、UTF-16、UTF-32之间的转换功能。

在文件 codepoint.hpp 第 126 行定义.

构造及析构函数说明

◆ codepoint() [1/3]

codepoint::codepoint ( )

inlineconstexprnoexcept

默认构造函数

构造空字符U+0000

在文件 codepoint.hpp 第 293 行定义.

被这些函数引用 append_to(), codepoint(), combine_surrogates(), decode_utf16(), decode_utf8(), from_utf32(), null() , 以及 replacement().

◆ codepoint() [2/3]

codepoint::codepoint ( uint32_t value )

inlineexplicitconstexprnoexcept

从uint32_t构造码点

参数

value 原始码点值

如果值非法，自动替换为U+FFFD。

在文件 codepoint.hpp 第 302 行定义.

引用了 is_valid_codepoint(), REPLACEMENT_VALUE , 以及 value().

◆ codepoint() [3/3]

codepoint::codepoint ( const char32_t value )

inlineexplicitconstexprnoexcept

从char32_t构造码点

参数

value UTF-32字符

在文件 codepoint.hpp 第 309 行定义.

引用了 codepoint() , 以及 value().

成员函数说明

◆ append_to() [1/5]

void codepoint::append_to ( string & result ) const

追加UTF-8编码到string

参数

result 目标字符串

将码点以UTF-8编码追加到string中。

被这些函数引用 u16character::to_string(), u32character::to_string(), u8character::to_string(), wcharacter::to_string(), character::to_u16string(), u16character::to_u16string(), u32character::to_u16string(), u8character::to_u16string(), wcharacter::to_u16string(), character::to_u32string(), u16character::to_u32string(), u8character::to_u32string(), wcharacter::to_u32string(), character::to_u8string(), u16character::to_u8string(), u32character::to_u8string(), wcharacter::to_u8string(), character::to_wstring(), u16character::to_wstring(), u32character::to_wstring() , 以及 u8character::to_wstring().

◆ append_to() [2/5]

void codepoint::append_to ( u16string & result ) const

追加UTF-16编码到u16string

参数

result 目标UTF-16字符串

将码点以UTF-16编码追加到u16string中。 BMP字符追加单个码元，辅助平面字符追加代理对。

◆ append_to() [3/5]

void codepoint::append_to ( u32string & result ) const

inline

追加UTF-32编码到u32string

参数

result 目标UTF-32字符串

将码点直接追加到u32string中（一个码元）。

在文件 codepoint.hpp 第 423 行定义.

引用了 basic_string< CharT, Traits, Alloc >::push_back().

◆ append_to() [4/5]

void codepoint::append_to ( u8string & result ) const

追加UTF-8编码到u8string

参数

result 目标UTF-8字符串

将码点以UTF-8编码追加到u8string中。

◆ append_to() [5/5]

void codepoint::append_to ( wstring & result ) const

追加编码到wstring

参数

result 目标宽字符串

引用了 codepoint().

◆ combine_surrogates()

NEFORCE_CONST_FUNCTION constexpr codepoint codepoint::combine_surrogates	(	const char16_t	high,
		const char16_t	low )

inlinestaticconstexprnoexcept

组合高代理项和低代理项为完整的Unicode码点

参数

high	高代理项
low	低代理项

返回: 组合后的Unicode码点

根据UTF-16编码规则将两个代理项组合为完整的码点。

在文件 codepoint.hpp 第 163 行定义.

引用了 codepoint().

◆ decode_utf16()

template<typename T>

codepoint codepoint::decode_utf16	(	const T *	data,
		size_t &	index,
		const size_t	len,
		const bool	need_swap )

inlinestaticnoexcept

从UTF-16序列解码一个码点，并推进索引

模板参数

T	char16_t或wchar_t类型

参数

data	UTF-16数据指针
index	当前位置（解码后自动推进）
len	数据总长度
need_swap	是否需要字节序反转

返回: 解码结果，失败时返回替换符

支持单码元和代理对解码，自动处理字节序转换。无效序列返回替换符。

在文件 codepoint.hpp 第 226 行定义.

引用了 codepoint(), data() , 以及 replacement().

被这些函数引用 u16character::to_string(), wcharacter::to_string(), u16character::to_u16string(), u16character::to_u32string(), wcharacter::to_u32string(), u16character::to_u8string(), wcharacter::to_u8string() , 以及 u16character::to_wstring().

◆ decode_utf8()

codepoint codepoint::decode_utf8	(	const byte_t *	data,
		size_t &	i,
		size_t	len )

staticnoexcept

从UTF-8字节流解码一个码点，并推进索引

参数

data	UTF-8字节数据
i	当前位置（解码后自动推进）
len	数据总长度

返回: 解码结果，失败时返回替换符

支持1-4字节的UTF-8序列，自动验证序列的有效性。无效序列或截断序列返回替换符。

引用了 codepoint() , 以及 data().

被这些函数引用 u8character::to_string(), character::to_u16string(), u8character::to_u16string(), character::to_u32string(), u8character::to_u32string(), character::to_u8string(), character::to_wstring() , 以及 u8character::to_wstring().

◆ from_utf32()

constexpr codepoint codepoint::from_utf32 ( char32_t value )

inlinestaticconstexprnoexcept

从UTF-32值直接构造码点

参数

value UTF-32字符

返回: 码点对象

UTF-32编码直接对应Unicode码点值。

在文件 codepoint.hpp 第 244 行定义.

引用了 codepoint() , 以及 value().

被这些函数引用 u32character::to_string(), u32character::to_u16string(), u32character::to_u8string() , 以及 u32character::to_wstring().

◆ is_ascii()

bool codepoint::is_ascii ( ) const

inlineconstexprnoexcept

是否为ASCII字符（U+0000 ~ U+007F）

返回: 是ASCII字符返回true

在文件 codepoint.hpp 第 339 行定义.

◆ is_bmp()

bool codepoint::is_bmp ( ) const

inlineconstexprnoexcept

是否位于基本多文种平面（BMP, U+0000 ~ U+FFFF）

返回: 在BMP内返回true

在文件 codepoint.hpp 第 345 行定义.

◆ is_high_surrogate()

NEFORCE_CONST_FUNCTION constexpr bool codepoint::is_high_surrogate ( const char16_t c )

inlinestaticconstexprnoexcept

检查字符是否为高代理项

参数

c UTF-16字符

返回: 如果字符是高代理项则返回true，否则返回false

高代理项的范围是0xD800-0xDBFF。

在文件 codepoint.hpp 第 140 行定义.

被这些函数引用 is_valid_codepoint().

◆ is_low_surrogate()

NEFORCE_CONST_FUNCTION constexpr bool codepoint::is_low_surrogate ( const char16_t c )

inlinestaticconstexprnoexcept

检查字符是否为低代理项

参数

c UTF-16字符

返回: 如果字符是低代理项则返回true，否则返回false

低代理项的范围是0xDC00-0xDFFF。

在文件 codepoint.hpp 第 151 行定义.

被这些函数引用 is_valid_codepoint().

◆ is_replacement()

bool codepoint::is_replacement ( ) const

inlineconstexprnoexcept

是否为替换符U+FFFD

返回: 是替换符返回true

在文件 codepoint.hpp 第 333 行定义.

引用了 REPLACEMENT_VALUE.

◆ is_supplementary()

bool codepoint::is_supplementary ( ) const

inlineconstexprnoexcept

是否为辅助平面字符（需要UTF-16代理对）

返回: 是辅助平面字符返回true

在文件 codepoint.hpp 第 351 行定义.

引用了 MAX_VALUE.

被这些函数引用 needs_surrogate_pair() , 以及 utf16_length().

◆ is_valid_codepoint()

constexpr bool codepoint::is_valid_codepoint ( const uint32_t v )

inlinestaticconstexprnoexcept

检查码点值是否合法

参数

v 要检查的码点值

返回: 合法返回true，否则返回false

合法条件：

不超过0x10FFFF
不是高代理项（0xD800-0xDBFF）
不是低代理项（0xDC00-0xDFFF）

在文件 codepoint.hpp 第 179 行定义.

引用了 is_high_surrogate(), is_low_surrogate() , 以及 MAX_VALUE.

被这些函数引用 codepoint().

◆ needs_surrogate_pair()

bool codepoint::needs_surrogate_pair ( ) const

inlineconstexprnoexcept

是否需要UTF-16代理对表示

返回: 需要代理对返回true

在文件 codepoint.hpp 第 357 行定义.

引用了 is_supplementary().

◆ null()

constexpr codepoint codepoint::null ( )

inlinestaticconstexprnoexcept

构造空字符U+0000

返回: 空字符码点对象

在文件 codepoint.hpp 第 199 行定义.

引用了 codepoint().

◆ replacement()

constexpr codepoint codepoint::replacement ( )

inlinestaticconstexprnoexcept

构造替换符U+FFFD

返回: 替换符码点对象

用于表示无效或无法表示的Unicode字符。

在文件 codepoint.hpp 第 189 行定义.

引用了 codepoint() , 以及 REPLACEMENT_VALUE.

被这些函数引用 decode_utf16().

◆ to_char32()

char32_t codepoint::to_char32 ( ) const

inlineconstexprnoexcept

获取码点的char32_t值

返回: UTF-32字符

在文件 codepoint.hpp 第 327 行定义.

◆ utf16_length()

size_t codepoint::utf16_length ( ) const

inlineconstexprnoexcept

UTF-16编码后的码元数

返回: 码元数（1~2）

BMP字符需要1个码元，辅助平面字符需要2个码元（代理对）。

在文件 codepoint.hpp 第 388 行定义.

引用了 is_supplementary().

◆ utf8_length()

size_t codepoint::utf8_length ( ) const

inlineconstexprnoexcept

UTF-8编码后的字节数

返回: 字节数（1~4）

根据码点值计算所需UTF-8编码长度：

U+0000 ~ U+007F: 1字节
U+0080 ~ U+07FF: 2字节
U+0800 ~ U+FFFF: 3字节
U+10000 ~ U+10FFFF: 4字节

在文件 codepoint.hpp 第 369 行定义.

◆ value()

uint32_t codepoint::value ( ) const

inlineconstexprnoexcept

获取码点的uint32_t值

返回: 码点数值

在文件 codepoint.hpp 第 321 行定义.

引用了 value().

被这些函数引用 codepoint(), codepoint(), from_utf32() , 以及 value().

该类的文档由以下文件生成:

codepoint.hpp

Public 成员函数

静态 Public 成员函数

静态 Public 属性

详细描述

构造及析构函数说明

◆ codepoint() [1/3]

◆ codepoint() [2/3]

◆ codepoint() [3/3]

成员函数说明

◆ append_to() [1/5]

◆ append_to() [2/5]

◆ append_to() [3/5]

◆ append_to() [4/5]

◆ append_to() [5/5]

◆ combine_surrogates()

◆ decode_utf16()

◆ decode_utf8()

◆ from_utf32()

◆ is_ascii()

◆ is_bmp()

◆ is_high_surrogate()

◆ is_low_surrogate()

◆ is_replacement()

◆ is_supplementary()

◆ is_valid_codepoint()

◆ needs_surrogate_pair()

◆ null()

◆ replacement()

◆ to_char32()

◆ utf16_length()

◆ utf8_length()

◆ value()