Longfellow ZK 0290cb32
Loading...
Searching...
No Matches
pdqhash.h
1// Copyright 2025 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef PRIVACY_PROOFS_ZK_LIB_CIRCUITS_COMPILER_PDQHASH_H_
16#define PRIVACY_PROOFS_ZK_LIB_CIRCUITS_COMPILER_PDQHASH_H_
17
18#include <stddef.h>
19#include <stdint.h>
20
21#include <functional>
22#include <vector>
23
24#define DEFINE_STRONG_INT_TYPE(a, b) using a = b
25
26namespace proofs {
27
28// Old-school, quick and dirty hash table specialized for
29// uint64_t key, value_t value. Support multiple keys
30// ("multimap").
31//
32// This is supposed to solve the same problem as the C++
33// unordered_multimap, except that the unordered_multimap
34// stores key/value as a linked list and leaves the malloc()
35// arena so fragmented that malloc_coalesce() takes several
36// hundred ms to reconstruct the heap.
37class PdqHash {
38 public:
39 // value of NIL denotes empty slot
40 using value_t = uint32_t;
41 static const constexpr value_t kNil = ~static_cast<value_t>(0);
42
43 // Store the key as uint32_t to save space. This is ok because
44 // "key" is not really a key. Instead, find() invokes pred() which
45 // compares against the full key (stored outside this class).
46 DEFINE_STRONG_INT_TYPE(stored_key_t, uint32_t);
47
48 static stored_key_t narrow(uint64_t k) { return stored_key_t(k + (k >> 32)); }
49
50 struct kv {
51 stored_key_t k;
52 value_t v;
53
54 kv() : k(stored_key_t(0)), v(kNil) {}
55 };
56
57 PdqHash() : bits_(10), sz_(0), table_(capacity()) {}
58
59 void insert(uint64_t k64, value_t v) {
60 if (2 * sz_ > capacity()) {
61 rehash();
62 }
63 insert0(narrow(k64), v);
64 }
65
66 size_t find(uint64_t k64, const std::function<bool(value_t)> &pred) {
67 stored_key_t k = narrow(k64);
68 size_t mask = (size_t(1) << bits_) - 1;
69 size_t dh = dhash(k);
70 for (size_t h = hash(k);; h += dh) {
71 const kv *p = &table_[h & mask];
72 if (p->v == kNil) {
73 // not found
74 return kNil;
75 }
76 if (p->k == k && pred(p->v)) {
77 // found
78 return p->v;
79 }
80 }
81 }
82
83 private:
84 void insert0(stored_key_t k, value_t v) {
85 size_t mask = (size_t(1) << bits_) - 1;
86 size_t dh = dhash(k);
87 for (size_t h = hash(k);; h += dh) {
88 kv *p = &table_[h & mask];
89 if (p->v == kNil) {
90 p->k = k;
91 p->v = v;
92 ++sz_;
93 return;
94 }
95 }
96 }
97
98 // Adhoc hash function suffices for this application.
99 uint64_t hash(uint64_t k) {
100 return k + 3 * (k >> bits_) + 7 * (k >> (2 * bits_));
101 }
102 uint64_t hash(stored_key_t nk) { return hash(static_cast<uint64_t>(nk)); }
103 uint64_t dhash(stored_key_t nk) {
104 // If gcd(dhash, capacity()) == 1, the insert loop does not have a short
105 // cycle.
106 return 2 * hash(hash(nk)) + 1;
107 }
108 void rehash() {
109 ++bits_;
110 std::vector<kv> table1(capacity());
111 table_.swap(table1);
112 for (const auto &p : table1) {
113 if (p.v != kNil) {
114 insert0(p.k, p.v);
115 }
116 }
117 }
118
119 size_t capacity() { return size_t(1) << bits_; }
120
121 size_t bits_;
122 size_t sz_;
123 std::vector<kv> table_;
124};
125} // namespace proofs
126
127#endif // PRIVACY_PROOFS_ZK_LIB_CIRCUITS_COMPILER_PDQHASH_H_
Definition pdqhash.h:50