1#ifndef NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
2#define NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
17NEFORCE_BEGIN_NAMESPACE__
163template <
typename T,
typename Hash = hash<T>>
179 static size_t compute_m(
const size_t n,
const double p)
noexcept {
180 if (p <= 0.0 || p >= 1.0) {
185 return static_cast<size_t>(
ceil(m));
196 static size_t compute_k(
const size_t n,
const size_t m)
noexcept {
197 if (n == 0 || m == 0) {
213 size_t h1 = hasher_(key);
230 NEFORCE_NODISCARD
size_t nth_hash(
const size_t i,
const size_t h1,
const size_t h2)
const noexcept {
231 return (h1 + i * h2) % m_;
235 bloom_filter(
const bloom_filter&)
noexcept =
default;
236 bloom_filter(bloom_filter&&)
noexcept =
default;
237 bloom_filter& operator=(
const bloom_filter&) =
default;
238 bloom_filter& operator=(bloom_filter&&) =
default;
253 bloom_filter(
const size_t expected_insertions,
const double false_positive_prob) :
254 m_(compute_m(expected_insertions, false_positive_prob)),
255 k_(compute_k(expected_insertions, m_)),
258 if (expected_insertions == 0 || false_positive_prob <= 0.0 || false_positive_prob >= 1.0) {
259 NEFORCE_THROW_EXCEPTION(
260 value_exception(
"expected_insertions must be positive and false_positive_prob in (0,1)"));
277 if (m == 0 || k == 0) {
288 NEFORCE_NODISCARD
size_t capacity() const noexcept {
289 return static_cast<size_t>(
static_cast<double>(m_) *
logarithm_e(2.) / k_);
296 NEFORCE_NODISCARD
bool empty() const noexcept {
297 for (
size_t i = 0; i < m_; ++i) {
309 NEFORCE_NODISCARD
size_t bit_size() const noexcept {
return m_; }
315 NEFORCE_NODISCARD
size_t hash_count() const noexcept {
return k_; }
325 for (
size_t i = 0; i < m_; ++i) {
330 const double x =
static_cast<double>(bits_set) / m_;
331 return static_cast<size_t>(-(
static_cast<double>(m_) / k_) *
logarithm_e(1 - x));
342 for (
size_t i = 0; i < m_; ++i) {
347 const double x =
static_cast<double>(bits_set) / m_;
359 NEFORCE_NODISCARD
bool contains(
const T& key)
const noexcept {
360 auto h = this->hash_values(key);
361 for (
size_t i = 0; i < k_; ++i) {
362 const size_t index = this->nth_hash(i, h.first, h.second);
377 auto h = this->hash_values(key);
378 for (
size_t i = 0; i < k_; ++i) {
379 const size_t index = this->nth_hash(i, h.first, h.second);
389 void clear() noexcept {
fill(bits_.begin(), bits_.end(),
false); }
400 bloom_filter&
merge(
const bloom_filter& other) {
401 if (m_ != other.m_ || k_ != other.k_) {
402 NEFORCE_THROW_EXCEPTION(
value_exception(
"Filters must have same parameters to merge"));
405 for (
size_t i = 0; i < m_; ++i) {
406 if (other.bits_[i]) {
422 bloom_filter
intersect(
const bloom_filter& other)
const {
423 if (m_ != other.m_ || k_ != other.k_) {
424 NEFORCE_THROW_EXCEPTION(
value_exception(
"Filters must have same parameters for intersection"));
427 bloom_filter result(m_, k_);
428 for (
size_t i = 0; i < m_; ++i) {
429 result.bits_[i] = bits_[i] && other.bits_[i];
442 bloom_filter
unite(
const bloom_filter& other)
const {
443 bloom_filter result(*
this);
444 return result.
merge(other);
455 for (
size_t i = 0; i < m_; ++i) {
457 bytes[i / 8] |= (1 << (i % 8));
471 if (bytes.
size() * 8 < m_) {
474 for (
size_t i = 0; i < m_; ++i) {
475 bits_[i] = ((bytes[i / 8] >> (i % 8)) & 1) != 0;
482NEFORCE_END_NAMESPACE__
byte_vector to_bytes() const
转换为字节数组
~bloom_filter()=default
析构函数
bloom_filter intersect(const bloom_filter &other) const
交集操作
size_t bit_size() const noexcept
获取位数组大小
bloom_filter(const size_t expected_insertions, const double false_positive_prob)
基于预期插入数和误报率构造
size_t approximate_count() const noexcept
估计当前元素数量
size_t capacity() const noexcept
获取理论容量
bloom_filter & merge(const bloom_filter &other)
合并另一个过滤器
bool contains(const T &key) const noexcept
检查元素是否可能存在
size_t hash_count() const noexcept
获取哈希函数数量
bloom_filter(const size_t m, const size_t k)
基于直接参数构造
void clear() noexcept
清空过滤器
void from_bytes(const byte_vector &bytes)
从字节数组恢复
bool empty() const noexcept
检查过滤器是否为空
bloom_filter unite(const bloom_filter &other) const
并集操作
double false_positive_rate() const noexcept
估计当前误报率
void insert(const T &key) noexcept
插入元素
constexpr size_type size() const noexcept
获取当前元素数量
constexpr uintptr_t rotate_l(const uintptr_t x, const int s) noexcept
整数循环左移
constexpr const T & max(const T &a, const T &b, Compare comp) noexcept(noexcept(comp(a, b)))
返回两个值中的较大者
vector< byte_t > byte_vector
字节向量类型别名
long double decimal_t
扩展精度浮点数类型
constexpr decimal_t ceil(const decimal_t x) noexcept
向上取整
constexpr decimal_t logarithm_e(const decimal_t x) noexcept
计算自然对数
constexpr T power(const T &x, uint32_t n) noexcept
幂运算
constexpr decimal_t round(const decimal_t x) noexcept
四舍五入
constexpr void fill(Iterator first, Iterator last, const T &value) noexcept(is_nothrow_assignable_v< iter_value_t< Iterator >, T >)
填充范围元素