NexusForce 1.0.0
A Modern C++ Library with extended functionality, web components, and utility libraries
载入中...
搜索中...
未找到
utf.hpp
浏览该文件的文档.
1#ifndef NEFORCE_CORE_STRING_UTF_HPP__
2#define NEFORCE_CORE_STRING_UTF_HPP__
3
10
13NEFORCE_BEGIN_NAMESPACE__
14
20
126
134struct character : icharacter<character, char> {
135 using value_type = char;
136 using base = icharacter<character, char>;
137
138 constexpr character() noexcept = default;
139 NEFORCE_CONSTEXPR20 ~character() = default;
140
141 constexpr character(const character&) noexcept = default;
142 constexpr character(character&&) noexcept = default;
143
144 constexpr character& operator=(const character& other) noexcept = default;
145 constexpr character& operator=(character&& other) noexcept = default;
146
147 explicit constexpr character(const value_type value) noexcept :
148 base(value) {}
149
150 constexpr character& operator=(const value_type value) noexcept {
151 value_ = value;
152 return *this;
153 }
154
160 static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view<value_type>& obj) { return string{obj}; }
161
167 static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view<value_type>& obj) {
168 if (obj.empty()) {
169 return {};
170 }
171 wstring result;
172 result.reserve(obj.size());
173
174 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
175 size_t i = 0;
176 const size_t len = obj.size();
177
178 while (i < len) {
179 codepoint::decode_utf8(data, i, len).append_to(result);
180 }
181 return result;
182 }
183
184#if defined(NEFORCE_STANDARD_20) || defined(NEXUSFORCE_ENABLE_DOXYGEN)
190 static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view<value_type>& obj) {
191 if (obj.empty()) {
192 return {};
193 }
194 u8string result;
195 result.reserve(obj.size());
196
197 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
198 size_t i = 0;
199 const size_t len = obj.size();
200
201 while (i < len) {
202 codepoint::decode_utf8(data, i, len).append_to(result);
203 }
204 return result;
205 }
206#endif
207
213 static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view<value_type>& obj) {
214 if (obj.empty()) {
215 return {};
216 }
217 u16string result;
218 result.reserve(obj.size() * 2);
219
220 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
221 size_t i = 0;
222 const size_t len = obj.size();
223
224 while (i < len) {
225 codepoint::decode_utf8(data, i, len).append_to(result);
226 }
227 return result;
228 }
229
235 static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view<value_type>& obj) {
236 if (obj.empty()) {
237 return {};
238 }
239 u32string result;
240 result.reserve(obj.size());
241
242 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
243 size_t i = 0;
244 const size_t len = obj.size();
245
246 while (i < len) {
247 codepoint::decode_utf8(data, i, len).append_to(result);
248 }
249 return result;
250 }
251};
252
253template <>
254struct package<char> {
255 using type = character;
256};
257
258template <>
259struct unpackage<character> {
260 using type = char;
261};
262
270struct wcharacter : icharacter<wcharacter, wchar_t> {
271 using value_type = wchar_t;
272 using base = icharacter<wcharacter, wchar_t>;
273
274 constexpr wcharacter() noexcept = default;
275 NEFORCE_CONSTEXPR20 ~wcharacter() = default;
276
277 constexpr wcharacter(const wcharacter&) noexcept = default;
278 constexpr wcharacter(wcharacter&&) noexcept = default;
279
280 constexpr wcharacter& operator=(const wcharacter& other) noexcept = default;
281 constexpr wcharacter& operator=(wcharacter&& other) noexcept = default;
282
283 explicit constexpr wcharacter(const value_type value) noexcept :
284 base(value) {}
285
286 constexpr wcharacter& operator=(const value_type value) noexcept {
287 value_ = value;
288 return *this;
289 }
290
296 static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view<value_type>& obj) {
297 if (obj.empty()) {
298 return {};
299 }
300 string result;
301
302#ifdef NEFORCE_PLATFORM_WINDOWS
303 size_t i = 0;
304 while (i < obj.size()) {
305 codepoint::decode_utf16(obj.data(), i, obj.size(), false).append_to(result);
306 }
307#else
308 for (const value_type c: obj) {
309 codepoint(static_cast<uint32_t>(c)).append_to(result);
310 }
311#endif
312 return result;
313 }
314
320 static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view<value_type>& obj) { return wstring{obj}; }
321
322#if defined(NEFORCE_STANDARD_20) || defined(NEXUSFORCE_ENABLE_DOXYGEN)
328 static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view<value_type>& obj) {
329 if (obj.empty()) {
330 return {};
331 }
332 u8string result;
333
334# ifdef NEFORCE_PLATFORM_WINDOWS
335 size_t i = 0;
336 while (i < obj.size()) {
337 codepoint::decode_utf16(obj.data(), i, obj.size(), false).append_to(result);
338 }
339# else
340 for (const value_type c: obj) {
341 codepoint(static_cast<uint32_t>(c)).append_to(result);
342 }
343# endif
344 return result;
345 }
346#endif
347
353 static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view<value_type>& obj) {
354 if (obj.empty()) {
355 return {};
356 }
357 u16string result;
358
359#ifdef NEFORCE_PLATFORM_WINDOWS
360 result.reserve(obj.size());
361 for (size_t i = 0; i < obj.size(); ++i) {
362 result.push_back(static_cast<char16_t>(static_cast<uint16_t>(obj[i])));
363 }
364#else
365 result.reserve(obj.size() * 2);
366 for (const value_type c: obj) {
367 codepoint(static_cast<uint32_t>(c)).append_to(result);
368 }
369#endif
370 return result;
371 }
372
378 static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view<value_type>& obj) {
379 if (obj.empty()) {
380 return {};
381 }
382 u32string result;
383 result.reserve(obj.size());
384
385#ifdef NEFORCE_PLATFORM_WINDOWS
386 size_t i = 0;
387 while (i < obj.size()) {
388 codepoint::decode_utf16(obj.data(), i, obj.size(), false).append_to(result);
389 }
390#else
391 for (const value_type c: obj) {
392 codepoint(static_cast<uint32_t>(c)).append_to(result);
393 }
394#endif
395 return result;
396 }
397};
398
399template <>
400struct package<wchar_t> {
401 using type = wcharacter;
402};
403
404template <>
405struct unpackage<wcharacter> {
406 using type = wchar_t;
407};
408
409
410#if defined(NEFORCE_STANDARD_20) || defined(NEXUSFORCE_ENABLE_DOXYGEN)
411
419struct u8character : icharacter<u8character, char8_t> {
420 using value_type = char8_t;
421 using base = icharacter<u8character, char8_t>;
422
423 constexpr u8character() noexcept = default;
424 NEFORCE_CONSTEXPR20 ~u8character() = default;
425
426 constexpr u8character(const u8character&) noexcept = default;
427 constexpr u8character(u8character&&) noexcept = default;
428
429 constexpr u8character& operator=(const u8character& other) noexcept = default;
430 constexpr u8character& operator=(u8character&& other) noexcept = default;
431
432 explicit constexpr u8character(const value_type value) noexcept :
433 base(value) {}
434
435 constexpr u8character& operator=(const value_type value) noexcept {
436 value_ = value;
437 return *this;
438 }
439
445 static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view<value_type>& obj) {
446 if (obj.empty()) {
447 return {};
448 }
449 string result;
450 result.reserve(obj.size());
451
452 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
453 size_t i = 0;
454 const size_t len = obj.size();
455
456 while (i < len) {
457 codepoint::decode_utf8(data, i, len).append_to(result);
458 }
459 return result;
460 }
461
467 static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view<value_type>& obj) {
468 if (obj.empty()) {
469 return {};
470 }
471 wstring result;
472 result.reserve(obj.size());
473
474 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
475 size_t i = 0;
476 const size_t len = obj.size();
477
478 while (i < len) {
479 codepoint::decode_utf8(data, i, len).append_to(result);
480 }
481 return result;
482 }
483
489 static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view<value_type>& obj) { return u8string{obj}; }
490
496 static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view<value_type>& obj) {
497 if (obj.empty()) {
498 return {};
499 }
500 u16string result;
501 result.reserve(obj.size());
502
503 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
504 size_t i = 0;
505 const size_t len = obj.size();
506
507 while (i < len) {
508 codepoint::decode_utf8(data, i, len).append_to(result);
509 }
510 return result;
511 }
512
518 static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view<value_type>& obj) {
519 if (obj.empty()) {
520 return {};
521 }
522 u32string result;
523 result.reserve(obj.size());
524
525 const auto* data = reinterpret_cast<const byte_t*>(obj.data());
526 size_t i = 0;
527 const size_t len = obj.size();
528
529 while (i < len) {
530 codepoint::decode_utf8(data, i, len).append_to(result);
531 }
532 return result;
533 }
534};
535
536template <>
537struct package<char8_t> {
538 using type = u8character;
539};
540
541template <>
542struct unpackage<u8character> {
543 using type = char8_t;
544};
545
546#endif
547
555struct u16character : icharacter<u16character, char16_t> {
556public:
557 using value_type = char16_t;
558 using base = icharacter<u16character, char16_t>;
559
560private:
561 template <typename T>
562 static void parse_utf16_bom(const basic_string_view<T>& obj, size_t& start_pos, bool& need_swap) noexcept {
563 start_pos = 0;
564 need_swap = false;
565
566 if (obj.empty()) {
567 return;
568 }
569
570 if (static_cast<uint16_t>(obj[0]) == 0xFEFF) {
571 start_pos = 1;
572 need_swap = endian::is_big_endian;
573 } else if (static_cast<uint16_t>(obj[0]) == 0xFFFE) {
574 start_pos = 1;
575 need_swap = endian::is_little_endian;
576 }
577 }
578
579public:
580 constexpr u16character() noexcept = default;
581 NEFORCE_CONSTEXPR20 ~u16character() = default;
582
583 constexpr u16character(const u16character&) noexcept = default;
584 constexpr u16character(u16character&&) noexcept = default;
585
586 constexpr u16character& operator=(const u16character& other) noexcept = default;
587 constexpr u16character& operator=(u16character&& other) noexcept = default;
588
589 explicit constexpr u16character(const value_type value) noexcept :
590 base(value) {}
591
592 constexpr u16character& operator=(const value_type value) noexcept {
593 value_ = value;
594 return *this;
595 }
596
602 static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view<value_type>& obj) {
603 if (obj.empty()) {
604 return {};
605 }
606 string result;
607
608 size_t start_pos;
609 bool need_swap;
610 parse_utf16_bom(obj, start_pos, need_swap);
611
612 size_t i = start_pos;
613 while (i < obj.size()) {
614 codepoint::decode_utf16(obj.data(), i, obj.size(), need_swap).append_to(result);
615 }
616 return result;
617 }
618
624 static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view<value_type>& obj) {
625 if (obj.empty()) {
626 return {};
627 }
628 wstring result;
629 result.reserve(obj.size());
630
631 size_t start_pos;
632 bool need_swap;
633 parse_utf16_bom(obj, start_pos, need_swap);
634
635 size_t i = start_pos;
636 while (i < obj.size()) {
637 codepoint::decode_utf16(obj.data(), i, obj.size(), need_swap).append_to(result);
638 }
639 return result;
640 }
641
642#if defined(NEFORCE_STANDARD_20) || defined(NEXUSFORCE_ENABLE_DOXYGEN)
648 static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view<value_type>& obj) {
649 if (obj.empty()) {
650 return {};
651 }
652 u8string result;
653 result.reserve(obj.size() * 3);
654
655 size_t start_pos;
656 bool need_swap;
657 parse_utf16_bom(obj, start_pos, need_swap);
658
659 size_t i = start_pos;
660 while (i < obj.size()) {
661 codepoint::decode_utf16(obj.data(), i, obj.size(), need_swap).append_to(result);
662 }
663 return result;
664 }
665#endif
666
672 static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view<value_type>& obj) {
673 if (obj.empty()) {
674 return {};
675 }
676
677 size_t start_pos;
678 bool need_swap;
679 parse_utf16_bom(obj, start_pos, need_swap);
680
681 if (start_pos == 0 && !need_swap) {
682 return u16string{obj};
683 }
684
685 u16string result;
686 result.reserve(obj.size());
687
688 size_t i = start_pos;
689 while (i < obj.size()) {
690 codepoint::decode_utf16(obj.data(), i, obj.size(), need_swap).append_to(result);
691 }
692 return result;
693 }
694
700 static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view<value_type>& obj) {
701 if (obj.empty()) {
702 return {};
703 }
704 u32string result;
705 result.reserve(obj.size());
706
707 size_t start_pos;
708 bool need_swap;
709 parse_utf16_bom(obj, start_pos, need_swap);
710
711 size_t i = start_pos;
712 while (i < obj.size()) {
713 codepoint::decode_utf16(obj.data(), i, obj.size(), need_swap).append_to(result);
714 }
715 return result;
716 }
717};
718
719template <>
720struct package<char16_t> {
721 using type = u16character;
722};
723
724template <>
725struct unpackage<u16character> {
726 using type = char16_t;
727};
728
736struct u32character : icharacter<u32character, char32_t> {
737 using value_type = char32_t;
738 using base = icharacter<u32character, char32_t>;
739
740 constexpr u32character() noexcept = default;
741 NEFORCE_CONSTEXPR20 ~u32character() = default;
742
743 constexpr u32character(const u32character&) noexcept = default;
744 constexpr u32character(u32character&&) noexcept = default;
745
746 constexpr u32character& operator=(const u32character& other) noexcept = default;
747 constexpr u32character& operator=(u32character&& other) noexcept = default;
748
749 explicit constexpr u32character(const value_type value) noexcept :
750 base(value) {}
751
752 constexpr u32character& operator=(const value_type value) noexcept {
753 value_ = value;
754 return *this;
755 }
756
762 static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view<value_type>& obj) {
763 if (obj.empty()) {
764 return {};
765 }
766 string result;
767 for (const value_type c: obj) {
769 }
770 return result;
771 }
772
778 static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view<value_type>& obj) {
779 if (obj.empty()) {
780 return {};
781 }
782 wstring result;
783 result.reserve(obj.size());
784 for (const value_type c: obj) {
786 }
787 return result;
788 }
789
790#if defined(NEFORCE_STANDARD_20) || defined(NEXUSFORCE_ENABLE_DOXYGEN)
796 static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view<value_type>& obj) {
797 if (obj.empty()) {
798 return {};
799 }
800 u8string result;
801 result.reserve(obj.size() * 4);
802 for (const value_type c: obj) {
804 }
805 return result;
806 }
807#endif
808
814 static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view<value_type>& obj) {
815 if (obj.empty()) {
816 return {};
817 }
818 u16string result;
819 result.reserve(obj.size() * 2);
820 for (const value_type c: obj) {
822 }
823 return result;
824 }
825
831 static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view<value_type>& obj) {
832 return u32string{obj};
833 }
834};
835
836template <>
837struct package<char32_t> {
838 using type = u32character;
839};
840
841template <>
842struct unpackage<u32character> {
843 using type = char32_t;
844};
845 // UTF
847 // Packages
849
850NEFORCE_END_NAMESPACE__
851#endif // NEFORCE_CORE_STRING_UTF_HPP__
基本字符串视图模板
NEFORCE_NODISCARD constexpr size_type size() const noexcept
获取字符串长度
NEFORCE_NODISCARD constexpr const_pointer data() const noexcept
获取底层数据指针
NEFORCE_NODISCARD constexpr bool empty() const noexcept
检查是否为空
NEFORCE_CONSTEXPR20 void reserve(const size_type n)
预留容量
NEFORCE_CONSTEXPR20 void push_back(value_type value)
在末尾插入字符
Unicode码点包装类
static codepoint decode_utf16(const T *data, size_t &index, const size_t len, const bool need_swap) noexcept
从UTF-16序列解码一个码点,并推进索引
static constexpr codepoint from_utf32(char32_t value) noexcept
从UTF-32值直接构造码点
void append_to(string &result) const
追加UTF-8编码到string
static codepoint decode_utf8(const byte_t *data, size_t &i, size_t len) noexcept
从UTF-8字节流解码一个码点,并推进索引
Unicode码点处理类
unsigned char byte_t
字节类型,定义为无符号字符
unsigned int uint32_t
32位无符号整数类型
unsigned short uint16_t
16位无符号整数类型
basic_string< char16_t > u16string
UTF-16字符串
basic_string< char8_t > u8string
UTF-8字符串
basic_string< wchar_t > wstring
宽字符字符串
basic_string< char32_t > u32string
UTF-32字符串
NEFORCE_NODISCARD NEFORCE_ALWAYS_INLINE constexpr decltype(auto) data(Container &cont) noexcept(noexcept(cont.data()))
获取容器的底层数据指针
字符类型接口
char类型包装类
icharacter< character, char > base
基类类型
static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view< value_type > &obj)
转换为普通字符串
static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view< value_type > &obj)
转换为UTF-32字符串
char value_type
值类型
static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view< value_type > &obj)
转换为UTF-8字符串
static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view< value_type > &obj)
转换为宽字符串
static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view< value_type > &obj)
转换为UTF-16字符串
static constexpr bool is_big_endian
编译时检测是否为大端序
static constexpr bool is_little_endian
编译时检测是否为小端序
package_type value_
存储的数值
NEFORCE_NODISCARD constexpr package_type value() const noexcept
获取数值
类型包装器模板
char16_t类型包装类
static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view< value_type > &obj)
转换为宽字符串
static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view< value_type > &obj)
转换为UTF-16字符串
static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view< value_type > &obj)
转换为UTF-32字符串
static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view< value_type > &obj)
转换为UTF-8字符串
icharacter< u16character, char16_t > base
基类类型
char16_t value_type
值类型
static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view< value_type > &obj)
转换为普通字符串
char32_t类型包装类
static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view< value_type > &obj)
转换为UTF-16字符串
static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view< value_type > &obj)
转换为UTF-32字符串
static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view< value_type > &obj)
转换为UTF-8字符串
char32_t value_type
值类型
static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view< value_type > &obj)
转换为宽字符串
icharacter< u32character, char32_t > base
基类类型
static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view< value_type > &obj)
转换为普通字符串
char8_t类型包装类
static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view< value_type > &obj)
转换为宽字符串
static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view< value_type > &obj)
转换为UTF-8字符串
icharacter< u8character, char8_t > base
基类类型
char8_t value_type
值类型
static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view< value_type > &obj)
转换为UTF-32字符串
static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view< value_type > &obj)
转换为普通字符串
static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view< value_type > &obj)
转换为UTF-16字符串
类型解包器模板
wchar_t类型包装类
static NEFORCE_CONSTEXPR20 wstring to_wstring(const basic_string_view< value_type > &obj)
转换为宽字符串
static NEFORCE_CONSTEXPR20 u32string to_u32string(const basic_string_view< value_type > &obj)
转换为UTF-32字符串
static NEFORCE_CONSTEXPR20 u16string to_u16string(const basic_string_view< value_type > &obj)
转换为UTF-16字符串
wchar_t value_type
值类型
static NEFORCE_CONSTEXPR20 u8string to_u8string(const basic_string_view< value_type > &obj)
转换为UTF-8字符串
static NEFORCE_CONSTEXPR20 string to_string(const basic_string_view< value_type > &obj)
转换为普通字符串
icharacter< wcharacter, wchar_t > base
基类类型