Longfellow ZK 0290cb32
Loading...
Searching...
No Matches
sysdep.h
1// Copyright 2025 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef PRIVACY_PROOFS_ZK_LIB_ALGEBRA_SYSDEP_H_
16#define PRIVACY_PROOFS_ZK_LIB_ALGEBRA_SYSDEP_H_
17
18#include <stddef.h>
19
20#include <cstdint>
21
22#include "util/panic.h" // IWYU pragma: keep
23
24#if defined(__x86_64__) || defined(__i386__)
25// system-dependent basic arithmetic functions: add with carry
26// and 64x64->128 bit multiplication
27#include <x86intrin.h> // IWYU pragma: keep
28#endif
29
30namespace proofs {
31
32#if defined(__x86_64__)
33static inline uint64_t adc(uint64_t* a, uint64_t b, uint64_t c) {
34 // unsigned long long (not uint64_t) is *required* by the
35 // _addcarry_u64() prototype. uint64_t is unsigned long on
36 // linux, and pointers to the two types are incompatible even
37 // though the conversion is a no-op.
38 unsigned long long out;
39 c = _addcarry_u64(c, *a, b, &out);
40 *a = out;
41 return c;
42}
43static inline uint32_t adc(uint32_t* a, uint32_t b, uint32_t c) {
44 return _addcarry_u32(c, *a, b, a);
45}
46static inline uint64_t sbb(uint64_t* a, uint64_t b, uint64_t c) {
47 unsigned long long out;
48 c = _subborrow_u64(c, *a, b, &out);
49 *a = out;
50 return c;
51}
52static inline uint32_t sbb(uint32_t* a, uint32_t b, uint32_t c) {
53 return _subborrow_u32(c, *a, b, a);
54}
55static inline void mulq(uint64_t* l, uint64_t* h, uint64_t a, uint64_t b) {
56 asm("mulx %2, %0, %1" : "=r"(*l), "=r"(*h) : "r"(b), "d"(a));
57}
58#elif defined(__i386__)
59static inline uint32_t adc(uint32_t* a, uint32_t b, uint32_t c) {
60 return _addcarry_u32(c, *a, b, a);
61}
62static inline uint32_t sbb(uint32_t* a, uint32_t b, uint32_t c) {
63 return _subborrow_u32(c, *a, b, a);
64}
65
66// these two functions are supposed to be defined but are
67// never called
68static inline unsigned long long adc(unsigned long long* a,
69 unsigned long long b,
70 unsigned long long c) {
71 check(false, "adcll() not defined");
72 return 0;
73}
74static inline unsigned long long sbb(unsigned long long* a,
75 unsigned long long b,
76 unsigned long long c) {
77 check(false, "sbbll() not defined");
78 return 0;
79}
80
81#define SYSDEP_MULQ64_NOT_DEFINED
82#elif defined(__clang__)
83// The clang intrinsics use the builtin-types int, long, etc.
84// Thus we define adc() and sbb() in terms of those types.
85static inline unsigned long long adc(unsigned long long* a,
86 unsigned long long b,
87 unsigned long long c) {
88 *a = __builtin_addcll(*a, b, c, &c);
89 return c;
90}
91static inline unsigned long adc(unsigned long* a, unsigned long b,
92 unsigned long c) {
93 *a = __builtin_addcl(*a, b, c, &c);
94 return c;
95}
96static inline unsigned int adc(unsigned int* a, unsigned int b,
97 unsigned int c) {
98 *a = __builtin_addc(*a, b, c, &c);
99 return c;
100}
101
102static inline unsigned long long sbb(unsigned long long* a,
103 unsigned long long b,
104 unsigned long long c) {
105 *a = __builtin_subcll(*a, b, c, &c);
106 return c;
107}
108static inline unsigned long sbb(unsigned long* a, unsigned long b,
109 unsigned long c) {
110 *a = __builtin_subcl(*a, b, c, &c);
111 return c;
112}
113static inline unsigned int sbb(unsigned int* a, unsigned int b,
114 unsigned int c) {
115 *a = __builtin_subc(*a, b, c, &c);
116 return c;
117}
118
119#if defined(__SIZEOF_INT128__)
120// It seems that __SIZEOF_INT128__ is defined if __uint128_t is.
121static inline void mulq(uint64_t* l, uint64_t* h, uint64_t a, uint64_t b) {
122 __uint128_t p = (__uint128_t)b * (__uint128_t)a;
123 *l = p;
124 *h = p >> 64;
125}
126#else // defined(__SIZEOF_INT128__)
127#define SYSDEP_MULQ64_NOT_DEFINED
128#endif // defined(__SIZEOF_INT128__)
129#endif
130
131static inline void mulq(uint32_t* l, uint32_t* h, uint32_t a, uint32_t b) {
132 uint64_t p = (uint64_t)b * (uint64_t)a;
133 *l = p;
134 *h = p >> 32;
135}
136
137// Identity function whose only purpose is to confuse the compiler.
138// We have no coherent theory of when and why this is useful, but
139// here are a couple of cases where this hack makes a difference:
140//
141// * Passing the cmov() values through identity_limb() seems
142// to favor the generation of a conditional move instruction
143// as opposed to a conditional branch.
144// * Clang and gcc match a+b+carry to generate the adcq instruction,
145// but a+0+carry becomes a+carry and the match fails. So
146// we pretend that the zero is not a zero.
147// * A similar issue arises in subtract with carry.
148//
149// This function is obviously a hack. Works for me today but YMMV.
150//
151template <class limb_t>
152static inline limb_t identity_limb(limb_t v) {
153 asm("" : "+r"(v)::);
154 return v;
155}
156
157template <class limb_t>
158static inline limb_t zero_limb() {
159 return identity_limb<limb_t>(0);
160}
161
162// a += b
163template <class limb_t>
164static inline void accum(size_t Wa, limb_t a[/*Wa*/], size_t Wb,
165 const limb_t b[/*Wb*/]) {
166 limb_t c = 0;
167 for (size_t i = 0; i < Wb; ++i) {
168 c = adc(&a[i], b[i], c);
169 }
170 for (size_t i = Wb; i < Wa; ++i) {
171 c = adc(&a[i], 0, c);
172 }
173}
174
175// a -= b
176template <class limb_t>
177static inline void negaccum(size_t Wa, limb_t a[/*Wa*/], size_t Wb,
178 const limb_t b[/*Wb*/]) {
179 limb_t c = 0;
180 for (size_t i = 0; i < Wb; ++i) {
181 c = sbb(&a[i], b[i], c);
182 }
183 for (size_t i = Wb; i < Wa; ++i) {
184 c = sbb(&a[i], 0, c);
185 }
186}
187
188// h::a += b
189template <class limb_t>
190static inline limb_t add_limb(size_t W, limb_t a[/*W*/],
191 const limb_t b[/*W*/]) {
192 limb_t c = 0;
193 for (size_t i = 0; i < W; ++i) {
194 c = adc(&a[i], b[i], c);
195 }
196 limb_t h = zero_limb<limb_t>();
197 c = adc(&h, 0, c);
198 return h;
199}
200
201// h::a += b * 2^(bits per limb)
202template <class limb_t>
203static inline limb_t addh(size_t W, limb_t a[/*W*/], const limb_t b[/*W*/]) {
204 limb_t c = 0;
205 for (size_t i = 1; i < W; ++i) {
206 c = adc(&a[i], b[i - 1], c);
207 }
208 limb_t h = zero_limb<limb_t>();
209 c = adc(&h, b[W - 1], c);
210 return h;
211}
212
213// h::a -= b
214template <class limb_t>
215static inline limb_t sub_limb(size_t W, limb_t a[/*W*/],
216 const limb_t b[/*W*/]) {
217 limb_t c = 0;
218 for (size_t i = 0; i < W; ++i) {
219 c = sbb(&a[i], b[i], c);
220 }
221 limb_t h = zero_limb<limb_t>();
222 c = sbb(&h, 0, c);
223 return h;
224}
225
226// h:l = a*b
227template <class limb_t>
228static inline void mulhl(size_t W, limb_t l[/*W*/], limb_t h[/*W*/], limb_t a,
229 const limb_t b[/*W*/]) {
230 for (size_t i = 0; i < W; ++i) {
231 mulq(&l[i], &h[i], a, b[i]);
232 }
233}
234
235// a = b
236template <class limb_t>
237static inline void mov(size_t W, limb_t a[/*W*/], const limb_t b[/*W*/]) {
238 for (size_t i = 0; i < W; ++i) {
239 a[i] = b[i];
240 }
241}
242
243// It seems that using assembly code is the only way to
244// force gcc and clang to use conditional moves.
245#if defined(__x86_64__)
246static inline void cmovnz(size_t W, uint64_t a[/*W*/], uint64_t nz,
247 const uint64_t b[/*W*/]) {
248 if (W == 1) {
249 asm("testq %[nz], %[nz]\n\t"
250 "cmovneq %[b0], %[a0]\n\t"
251 : [a0] "+r"(a[0])
252 : [nz] "r"(nz), [b0] "r"(b[0]));
253 } else if (W == 2) {
254 asm("testq %[nz], %[nz]\n\t"
255 "cmovneq %[b0], %[a0]\n\t"
256 "cmovneq %[b1], %[a1]\n\t"
257 : [a0] "+r"(a[0]), [a1] "+r"(a[1])
258 : [nz] "r"(nz), [b0] "r"(b[0]), [b1] "r"(b[1]));
259 } else if (W == 3) {
260 asm("testq %[nz], %[nz]\n\t"
261 "cmovneq %[b0], %[a0]\n\t"
262 "cmovneq %[b1], %[a1]\n\t"
263 "cmovneq %[b2], %[a2]\n\t"
264 : [a0] "+r"(a[0]), [a1] "+r"(a[1]), [a2] "+r"(a[2])
265 : [nz] "r"(nz), [b0] "r"(b[0]), [b1] "r"(b[1]), [b2] "r"(b[2]));
266 } else if (W == 4) {
267 asm("testq %[nz], %[nz]\n\t"
268 "cmovneq %[b0], %[a0]\n\t"
269 "cmovneq %[b1], %[a1]\n\t"
270 "cmovneq %[b2], %[a2]\n\t"
271 "cmovneq %[b3], %[a3]\n\t"
272 : [a0] "+r"(a[0]), [a1] "+r"(a[1]), [a2] "+r"(a[2]), [a3] "+r"(a[3])
273 : [nz] "r"(nz), [b0] "r"(b[0]), [b1] "r"(b[1]), [b2] "r"(b[2]),
274 [b3] "r"(b[3]));
275 } else {
276 for (size_t i = 0; i < W; ++i) {
277 a[i] = (nz != 0) ? b[i] : a[i];
278 }
279 }
280}
281
282static inline void cmovne(size_t W, uint64_t a[/*W*/], uint64_t x, uint64_t y,
283 const uint64_t b[/*W*/]) {
284 if (W == 1) {
285 asm("cmpq %[x], %[y]\n\t"
286 "cmovneq %[b0], %[a0]\n\t"
287 : [a0] "+r"(a[0])
288 : [x] "r"(x), [y] "r"(y), [b0] "r"(b[0])
289 : "cc");
290 } else if (W == 2) {
291 asm("cmpq %[x], %[y]\n\t"
292 "cmovneq %[b0], %[a0]\n\t"
293 "cmovneq %[b1], %[a1]\n\t"
294 : [a0] "+r"(a[0]), [a1] "+r"(a[1])
295 : [x] "r"(x), [y] "r"(y), [b0] "r"(b[0]), [b1] "r"(b[1])
296 : "cc");
297 } else if (W == 3) {
298 asm("cmpq %[x], %[y]\n\t"
299 "cmovneq %[b0], %[a0]\n\t"
300 "cmovneq %[b1], %[a1]\n\t"
301 "cmovneq %[b2], %[a2]\n\t"
302 : [a0] "+r"(a[0]), [a1] "+r"(a[1]), [a2] "+r"(a[2])
303 : [x] "r"(x), [y] "r"(y), [b0] "r"(b[0]), [b1] "r"(b[1]), [b2] "r"(b[2])
304 : "cc");
305 } else if (W == 4) {
306 asm("cmpq %[x], %[y]\n\t"
307 "cmovneq %[b0], %[a0]\n\t"
308 "cmovneq %[b1], %[a1]\n\t"
309 "cmovneq %[b2], %[a2]\n\t"
310 "cmovneq %[b3], %[a3]\n\t"
311 : [a0] "+r"(a[0]), [a1] "+r"(a[1]), [a2] "+r"(a[2]), [a3] "+r"(a[3])
312 : [x] "r"(x), [y] "r"(y), [b0] "r"(b[0]), [b1] "r"(b[1]),
313 [b2] "r"(b[2]), [b3] "r"(b[3])
314 : "cc");
315 } else {
316 for (size_t i = 0; i < W; ++i) {
317 a[i] = (x != y) ? b[i] : a[i];
318 }
319 }
320}
321
322static inline uint64_t addcmovc(uint64_t a, uint64_t b, uint64_t c) {
323 asm("add %[b], %[a]\n\t"
324 "cmovaeq %[c], %[a]\n\t"
325 : [a] "+r"(a)
326 : [b] "r"(b), [c] "r"(c)
327 : "cc");
328 return a;
329}
330
331static inline uint64_t sub_sysdep(uint64_t a, uint64_t y, uint64_t m) {
332 uint64_t z = 0;
333 asm("subq %[y], %[a]\n\t"
334 "cmovbq %[m], %[z]\n\t"
335 : [a] "+r"(a), [z] "+r"(z)
336 : [y] "r"(y), [m] "r"(m)
337 : "cc");
338 return a + z;
339}
340
341#elif defined(__aarch64__)
342
343static inline void cmovne(size_t W, uint64_t a[/*W*/], uint64_t x, uint64_t y,
344 const uint64_t b[/*W*/]) {
345 if (W == 1) {
346 asm("cmp %[x], %[y]\n\t" //
347 "csel %[a0], %[a0], %[b0], eq\n\t" //
348 : [a0] "+r"(a[0]) //
349 : [x] "r"(x), [y] "ri"(y), //
350 [b0] "r"(b[0]) //
351 : "cc");
352 } else if (W == 2) {
353 asm("cmp %[x], %[y]\n\t" //
354 "csel %[a0], %[a0], %[b0], eq\n\t" //
355 "csel %[a1], %[a1], %[b1], eq\n\t" //
356 : [a0] "+r"(a[0]), //
357 [a1] "+r"(a[1]) //
358 : [x] "r"(x), [y] "ri"(y), //
359 [b0] "r"(b[0]), //
360 [b1] "r"(b[1]) //
361 : "cc");
362 } else if (W == 3) {
363 asm("cmp %[x], %[y]\n\t" //
364 "csel %[a0], %[a0], %[b0], eq\n\t" //
365 "csel %[a1], %[a1], %[b1], eq\n\t" //
366 "csel %[a2], %[a2], %[b2], eq\n\t" //
367 : [a0] "+r"(a[0]), //
368 [a1] "+r"(a[1]), //
369 [a2] "+r"(a[2]) //
370 : [x] "r"(x), [y] "ri"(y), //
371 [b0] "r"(b[0]), //
372 [b1] "r"(b[1]), //
373 [b2] "r"(b[2]) //
374 : "cc");
375 } else if (W == 4) {
376 asm("cmp %[x], %[y]\n\t" //
377 "csel %[a0], %[a0], %[b0], eq\n\t" //
378 "csel %[a1], %[a1], %[b1], eq\n\t" //
379 "csel %[a2], %[a2], %[b2], eq\n\t" //
380 "csel %[a3], %[a3], %[b3], eq\n\t" //
381 : [a0] "+r"(a[0]), //
382 [a1] "+r"(a[1]), //
383 [a2] "+r"(a[2]), //
384 [a3] "+r"(a[3]) //
385 : [x] "r"(x), [y] "ri"(y), //
386 [b0] "r"(b[0]), //
387 [b1] "r"(b[1]), //
388 [b2] "r"(b[2]), //
389 [b3] "r"(b[3]) //
390 : "cc");
391 } else {
392 for (size_t i = 0; i < W; ++i) {
393 a[i] = (x != y) ? b[i] : a[i];
394 }
395 }
396}
397
398// a = (nz != 0) ? b : a
399static inline void cmovnz(size_t W, uint64_t a[/*W*/], uint64_t nz,
400 const uint64_t b[/*W*/]) {
401 constexpr uint64_t z = 0;
402 cmovne(W, a, nz, z, b);
403}
404
405static inline uint64_t addcmovc(uint64_t a, uint64_t b, uint64_t c) {
406 asm("adds %[a], %[a], %[b]\n\t"
407 "csel %[a], %[a], %[c], hs\n\t"
408 : [a] "+r"(a)
409 : [b] "r"(b), [c] "r"(c)
410 : "cc");
411 return a;
412}
413
414static inline uint64_t sub_sysdep(uint64_t a, uint64_t y, uint64_t m) {
415 asm("subs %[a], %[a], %[y]\n\t"
416 "csel %[m], %[m], xzr, lo"
417 : [a] "+r"(a), [m] "+r"(m)
418 : [y] "r"(y)
419 : "cc");
420 return a + m;
421}
422
423#else // generic portable code
424
425// a = (x != y) ? b : a
426template <class limb_t>
427static inline void cmovne(size_t W, limb_t a[/*W*/], limb_t x, limb_t y,
428 const limb_t b[/*W*/]) {
429 for (size_t i = 0; i < W; ++i) {
430 a[i] = (x != y) ? b[i] : a[i];
431 }
432}
433
434// a = (nz != 0) ? b : a
435template <class limb_t>
436static inline void cmovnz(size_t W, limb_t a[/*W*/], limb_t nz,
437 const limb_t b[/*W*/]) {
438 constexpr limb_t z = 0;
439 cmovne(W, a, nz, z, b);
440}
441
442template <class limb_t>
443static inline limb_t addcmovc(limb_t a, limb_t b, limb_t c) {
444 limb_t t = a + b;
445 return (a > t) ? t : c;
446}
447
448template <class limb_t>
449static inline limb_t sub_sysdep(limb_t a, limb_t y, limb_t m) {
450 limb_t t0 = a - y;
451 return (y > a) ? (t0 + m) : t0;
452}
453
454#endif
455
456} // namespace proofs
457
458#endif // PRIVACY_PROOFS_ZK_LIB_ALGEBRA_SYSDEP_H_