1#ifndef NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
2#define NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
17NEFORCE_BEGIN_NAMESPACE__
39template <
typename T,
typename Hash = hash<T>>
55 static size_t compute_m(
const size_t n,
const double p)
noexcept {
57 const double m = -
static_cast<double>(n) *
logarithm_e(p) / (ln2 * ln2);
58 return static_cast<size_t>(
ceil(m));
69 static size_t compute_k(
const size_t n,
const size_t m)
noexcept {
70 const double k = (
static_cast<double>(m) / n) *
logarithm_e(2.);
82 size_t h1 = hasher_(key);
99 size_t nth_hash(
const size_t i,
const size_t h1,
const size_t h2)
const noexcept {
return (h1 + i * h2) % m_; }
102 bloom_filter(
const bloom_filter&)
noexcept =
default;
103 bloom_filter(bloom_filter&&)
noexcept =
default;
104 bloom_filter& operator=(
const bloom_filter&) =
default;
105 bloom_filter& operator=(bloom_filter&&) =
default;
120 bloom_filter(
const size_t expected_insertions,
const double false_positive_prob) :
121 m_(compute_m(expected_insertions, false_positive_prob)),
122 k_(compute_k(expected_insertions, m_)),
125 if (expected_insertions == 0 || false_positive_prob <= 0.0 || false_positive_prob >= 1.0) {
126 NEFORCE_THROW_EXCEPTION(
127 value_exception(
"expected_insertions must be positive and false_positive_prob in (0,1)"));
144 if (m == 0 || k == 0) {
155 NEFORCE_NODISCARD
size_t capacity() const noexcept {
156 return static_cast<size_t>(
static_cast<double>(m_) *
logarithm_e(2.) / k_);
163 NEFORCE_NODISCARD
bool empty() const noexcept {
164 for (
size_t i = 0; i < m_; ++i) {
176 NEFORCE_NODISCARD
size_t bit_size() const noexcept {
return m_; }
182 NEFORCE_NODISCARD
size_t hash_count() const noexcept {
return k_; }
192 for (
size_t i = 0; i < m_; ++i) {
197 const double x =
static_cast<double>(bits_set) / m_;
198 return static_cast<size_t>(-(
static_cast<double>(m_) / k_) *
logarithm_e(1 - x));
209 for (
size_t i = 0; i < m_; ++i) {
214 const double x =
static_cast<double>(bits_set) / m_;
226 NEFORCE_NODISCARD
bool contains(
const T& key)
const noexcept {
227 auto h = this->hash_values(key);
228 for (
size_t i = 0; i < k_; ++i) {
229 const size_t index = this->nth_hash(i, h.first, h.second);
244 auto h = this->hash_values(key);
245 for (
size_t i = 0; i < k_; ++i) {
246 const size_t index = this->nth_hash(i, h.first, h.second);
256 void clear() noexcept {
fill(bits_.begin(), bits_.end(),
false); }
267 bloom_filter&
merge(
const bloom_filter& other) {
268 if (m_ != other.m_ || k_ != other.k_) {
269 NEFORCE_THROW_EXCEPTION(
value_exception(
"Filters must have same parameters to merge"));
272 for (
size_t i = 0; i < m_; ++i) {
273 if (other.bits_[i]) {
289 bloom_filter
intersect(
const bloom_filter& other)
const {
290 if (m_ != other.m_ || k_ != other.k_) {
291 NEFORCE_THROW_EXCEPTION(
value_exception(
"Filters must have same parameters for intersection"));
294 bloom_filter result(m_, k_);
295 for (
size_t i = 0; i < m_; ++i) {
296 result.bits_[i] = bits_[i] && other.bits_[i];
309 bloom_filter
unite(
const bloom_filter& other)
const {
310 bloom_filter result(*
this);
311 return result.
merge(other);
322 for (
size_t i = 0; i < m_; ++i) {
324 bytes[i / 8] |= (1 << (i % 8));
338 if (bytes.
size() * 8 < m_) {
341 for (
size_t i = 0; i < m_; ++i) {
342 bits_[i] = (bytes[i / 8] >> (i % 8)) & 1;
349NEFORCE_END_NAMESPACE__
byte_vector to_bytes() const
转换为字节数组
~bloom_filter()=default
析构函数
NEFORCE_NODISCARD size_t hash_count() const noexcept
获取哈希函数数量
NEFORCE_NODISCARD size_t approximate_count() const noexcept
估计当前元素数量
bloom_filter intersect(const bloom_filter &other) const
交集操作
bloom_filter(const size_t expected_insertions, const double false_positive_prob)
基于预期插入数和误报率构造
bloom_filter & merge(const bloom_filter &other)
合并另一个过滤器
NEFORCE_NODISCARD size_t capacity() const noexcept
获取理论容量
NEFORCE_NODISCARD double false_positive_rate() const noexcept
估计当前误报率
NEFORCE_NODISCARD bool contains(const T &key) const noexcept
检查元素是否可能存在
bloom_filter(const size_t m, const size_t k)
基于直接参数构造
void clear() noexcept
清空过滤器
void from_bytes(const byte_vector &bytes)
从字节数组恢复
bloom_filter unite(const bloom_filter &other) const
并集操作
NEFORCE_NODISCARD size_t bit_size() const noexcept
获取位数组大小
NEFORCE_NODISCARD bool empty() const noexcept
检查过滤器是否为空
void insert(const T &key) noexcept
插入元素
NEFORCE_NODISCARD NEFORCE_CONSTEXPR20 size_type size() const noexcept
获取当前元素数量
NEFORCE_CONSTEXPR14 int rotate_l(const uintptr_t x, const int s) noexcept
整数循环左移
constexpr const T & max(const T &a, const T &b, Compare comp) noexcept(noexcept(comp(a, b)))
返回两个值中的较大者
vector< byte_t > byte_vector
字节向量类型别名
long double decimal_t
扩展精度浮点数类型
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 decimal_t logarithm_e(const decimal_t x) noexcept
计算自然对数
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 T power(const T &x, uint32_t n) noexcept
幂运算
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 decimal_t ceil(const decimal_t x) noexcept
向上取整
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 decimal_t round(const decimal_t x) noexcept
四舍五入
constexpr void fill(Iterator first, Iterator last, const T &value) noexcept(is_nothrow_assignable_v< iter_value_t< Iterator >, T >)
填充范围元素