NexusForce 1.0.0
A Modern C++ Library with extended functionality, web components, and utility libraries
载入中...
搜索中...
未找到
msvc_intrinsic.hpp
浏览该文件的文档.
1#ifndef NEFORCE_CORE_CONFIG_MSVC_INTRINSIC_HPP__
2#define NEFORCE_CORE_CONFIG_MSVC_INTRINSIC_HPP__
3
16
18NEFORCE_BEGIN_NAMESPACE__
19
25
36NEFORCE_CONSTEXPR14 uint8_t _addcarry_u64(const uint8_t carry_in, const uint64_t a, const uint64_t b,
37 uint64_t* out) noexcept {
38 const auto a_lo = static_cast<uint32_t>(a);
39 const auto a_hi = static_cast<uint32_t>(a >> 32);
40 const auto b_lo = static_cast<uint32_t>(b);
41 const auto b_hi = static_cast<uint32_t>(b >> 32);
42
43 const uint64_t sum_lo = static_cast<uint64_t>(a_lo) + static_cast<uint64_t>(b_lo) + carry_in;
44 const uint64_t sum_hi = static_cast<uint64_t>(a_hi) + static_cast<uint64_t>(b_hi) + (sum_lo >> 32);
45
46 *out = (sum_hi << 32) | (sum_lo & 0xFFFFFFFFULL);
47 return static_cast<uint8_t>(sum_hi >> 32);
48}
49
60NEFORCE_CONSTEXPR14 uint8_t _subborrow_u64(const uint8_t borrow_in, const uint64_t a, const uint64_t b,
61 uint64_t* out) noexcept {
62 const auto a_lo = static_cast<uint32_t>(a);
63 const auto a_hi = static_cast<uint32_t>(a >> 32);
64 const auto b_lo = static_cast<uint32_t>(b);
65 const auto b_hi = static_cast<uint32_t>(b >> 32);
66
67 const uint64_t diff_lo = static_cast<uint64_t>(a_lo) - static_cast<uint64_t>(b_lo) - borrow_in;
68 const uint64_t borrow_lo = (diff_lo >> 63);
69
70 const uint64_t diff_hi = static_cast<uint64_t>(a_hi) - static_cast<uint64_t>(b_hi) - borrow_lo;
71 const uint64_t borrow_hi = (diff_hi >> 63);
72
73 *out = ((diff_hi & 0xFFFFFFFFULL) << 32) | (diff_lo & 0xFFFFFFFFULL);
74 return static_cast<uint8_t>(borrow_hi);
75}
76
86NEFORCE_CONSTEXPR14 uint64_t _umul128(const uint64_t a, const uint64_t b, uint64_t* hi_out) noexcept {
87 const auto a_lo = static_cast<uint32_t>(a);
88 const auto a_hi = static_cast<uint32_t>(a >> 32);
89 const auto b_lo = static_cast<uint32_t>(b);
90 const auto b_hi = static_cast<uint32_t>(b >> 32);
91
92 const uint64_t p_ll = static_cast<uint64_t>(a_lo) * b_lo; // [63:0]
93 const uint64_t p_lh = static_cast<uint64_t>(a_lo) * b_hi; // [95:32]
94 const uint64_t p_hl = static_cast<uint64_t>(a_hi) * b_lo; // [95:32]
95 const uint64_t p_hh = static_cast<uint64_t>(a_hi) * b_hi; // [127:64]
96
97 const uint64_t mid = (p_ll >> 32) + (p_lh & 0xFFFFFFFFULL) + (p_hl & 0xFFFFFFFFULL);
98 const uint64_t lo = (p_ll & 0xFFFFFFFFULL) | (mid << 32);
99 const uint64_t hi = p_hh + (p_lh >> 32) + (p_hl >> 32) + (mid >> 32);
100
101 *hi_out = hi;
102 return lo;
103}
104
113NEFORCE_CONSTEXPR14 uint64_t _udiv128(const uint64_t dividend_hi, const uint64_t dividend_lo, const uint64_t divisor,
114 uint64_t* remainder) noexcept {
115 if (dividend_hi == 0) {
116 if (remainder != nullptr) {
117 *remainder = dividend_lo % divisor;
118 }
119 return dividend_lo / divisor;
120 }
121
122 const int s = _NEFORCE clz64(divisor);
123 const uint64_t d = divisor << s;
124
125 uint64_t u2 = 0, u1 = 0, u0 = 0;
126 if (s == 0) {
127 u1 = dividend_hi;
128 u0 = dividend_lo;
129 } else {
130 u2 = dividend_hi >> (64 - s);
131 u1 = (dividend_hi << s) | (dividend_lo >> (64 - s));
132 u0 = dividend_lo << s;
133 }
134
135 const auto d_hi = static_cast<uint32_t>(d >> 32);
136 const auto d_lo = static_cast<uint32_t>(d);
137
138 uint64_t u_hi = (static_cast<uint64_t>(u2) << 32) | (u1 >> 32);
139 uint64_t q1 = u_hi / d_hi;
140 uint64_t r1 = u_hi % d_hi;
141
142 while (q1 >= 0x100000000ULL || q1 * d_lo > ((r1 << 32) | (u1 & 0xFFFFFFFF))) {
143 --q1;
144 r1 += d_hi;
145 if (r1 >= 0x100000000ULL) {
146 break;
147 }
148 }
149
150 uint64_t prod_hi = 0;
151 uint64_t prod_lo = _NEFORCE _umul128(q1, d, &prod_hi);
152 uint64_t rem_hi = u2 - prod_hi;
153 uint64_t rem_lo = u1 - prod_lo;
154 if (u1 < prod_lo) {
155 --rem_hi;
156 }
157
158 if ((rem_hi & (1ULL << 63)) != 0U) {
159 --q1;
160 rem_lo += d;
161 if (rem_lo < d) {
162 ++rem_hi;
163 }
164 }
165
166 u_hi = rem_lo >> 32;
167 uint64_t q0 = u_hi / d_hi;
168 uint64_t r0 = u_hi % d_hi;
169
170 while (q0 >= 0x100000000ULL || q0 * d_lo > ((r0 << 32) | (rem_lo & 0xFFFFFFFF))) {
171 --q0;
172 r0 += d_hi;
173 if (r0 >= 0x100000000ULL) {
174 break;
175 }
176 }
177
178 prod_lo = _NEFORCE _umul128(q0, d, &prod_hi);
179 uint64_t rem_mid_hi = 0 - prod_hi;
180 uint64_t rem_mid_lo = rem_lo - prod_lo;
181 if (rem_lo < prod_lo) {
182 --rem_mid_hi;
183 }
184
185 if ((rem_mid_hi & (1ULL << 63)) != 0U) {
186 --q0;
187 rem_mid_lo += d;
188 }
189
190 const uint64_t quotient = (q1 << 32) | q0;
191
192 if (remainder != nullptr) {
193 if (s > 0) {
194 *remainder = (rem_mid_lo << (64 - s)) | (u0 >> s);
195 } else {
196 *remainder = u0;
197 }
198 }
199
200 return quotient;
201}
202 // MSVCCompilerIntrinsics
204
205NEFORCE_END_NAMESPACE__
206#endif // NEFORCE_CORE_CONFIG_MSVC_INTRINSIC_HPP__
位操作函数
constexpr int clz64(uint64_t x) noexcept
计算64位整数前导零的个数
定义 bit.hpp:74
unsigned char uint8_t
8位无符号整数类型
unsigned int uint32_t
32位无符号整数类型
unsigned long long uint64_t
64位无符号整数类型
constexpr uint64_t _umul128(const uint64_t a, const uint64_t b, uint64_t *hi_out) noexcept
64位无符号乘法
constexpr uint8_t _subborrow_u64(const uint8_t borrow_in, const uint64_t a, const uint64_t b, uint64_t *out) noexcept
带借位的64位无符号减法
constexpr uint64_t _udiv128(const uint64_t dividend_hi, const uint64_t dividend_lo, const uint64_t divisor, uint64_t *remainder) noexcept
128位无符号除法(基于Knuth-D)
constexpr uint8_t _addcarry_u64(const uint8_t carry_in, const uint64_t a, const uint64_t b, uint64_t *out) noexcept
带进位的64位无符号加法
constexpr decimal_t remainder(const decimal_t x, const decimal_t y) noexcept
计算余数