15#ifndef PRIVACY_PROOFS_ZK_LIB_ALGEBRA_SYSDEP_H_
16#define PRIVACY_PROOFS_ZK_LIB_ALGEBRA_SYSDEP_H_
22#include "util/panic.h"
24#if defined(__x86_64__) || defined(__i386__)
32#if defined(__x86_64__)
33static inline uint64_t adc(uint64_t* a, uint64_t b, uint64_t c) {
38 unsigned long long out;
39 c = _addcarry_u64(c, *a, b, &out);
43static inline uint32_t adc(uint32_t* a, uint32_t b, uint32_t c) {
44 return _addcarry_u32(c, *a, b, a);
46static inline uint64_t sbb(uint64_t* a, uint64_t b, uint64_t c) {
47 unsigned long long out;
48 c = _subborrow_u64(c, *a, b, &out);
52static inline uint32_t sbb(uint32_t* a, uint32_t b, uint32_t c) {
53 return _subborrow_u32(c, *a, b, a);
55static inline void mulq(uint64_t* l, uint64_t* h, uint64_t a, uint64_t b) {
56 asm(
"mulx %2, %0, %1" :
"=r"(*l),
"=r"(*h) :
"r"(b),
"d"(a));
58#elif defined(__i386__)
59static inline uint32_t adc(uint32_t* a, uint32_t b, uint32_t c) {
60 return _addcarry_u32(c, *a, b, a);
62static inline uint32_t sbb(uint32_t* a, uint32_t b, uint32_t c) {
63 return _subborrow_u32(c, *a, b, a);
68static inline unsigned long long adc(
unsigned long long* a,
70 unsigned long long c) {
71 check(
false,
"adcll() not defined");
74static inline unsigned long long sbb(
unsigned long long* a,
76 unsigned long long c) {
77 check(
false,
"sbbll() not defined");
81#define SYSDEP_MULQ64_NOT_DEFINED
82#elif defined(__clang__)
85static inline unsigned long long adc(
unsigned long long* a,
87 unsigned long long c) {
88 *a = __builtin_addcll(*a, b, c, &c);
91static inline unsigned long adc(
unsigned long* a,
unsigned long b,
93 *a = __builtin_addcl(*a, b, c, &c);
96static inline unsigned int adc(
unsigned int* a,
unsigned int b,
98 *a = __builtin_addc(*a, b, c, &c);
102static inline unsigned long long sbb(
unsigned long long* a,
103 unsigned long long b,
104 unsigned long long c) {
105 *a = __builtin_subcll(*a, b, c, &c);
108static inline unsigned long sbb(
unsigned long* a,
unsigned long b,
110 *a = __builtin_subcl(*a, b, c, &c);
113static inline unsigned int sbb(
unsigned int* a,
unsigned int b,
115 *a = __builtin_subc(*a, b, c, &c);
119#if defined(__SIZEOF_INT128__)
121static inline void mulq(uint64_t* l, uint64_t* h, uint64_t a, uint64_t b) {
122 __uint128_t p = (__uint128_t)b * (__uint128_t)a;
127#define SYSDEP_MULQ64_NOT_DEFINED
131static inline void mulq(uint32_t* l, uint32_t* h, uint32_t a, uint32_t b) {
132 uint64_t p = (uint64_t)b * (uint64_t)a;
151template <
class limb_t>
152static inline limb_t identity_limb(limb_t v) {
157template <
class limb_t>
158static inline limb_t zero_limb() {
159 return identity_limb<limb_t>(0);
163template <
class limb_t>
164static inline void accum(
size_t Wa, limb_t a[],
size_t Wb,
167 for (
size_t i = 0; i < Wb; ++i) {
168 c = adc(&a[i], b[i], c);
170 for (
size_t i = Wb; i < Wa; ++i) {
171 c = adc(&a[i], 0, c);
176template <
class limb_t>
177static inline void negaccum(
size_t Wa, limb_t a[],
size_t Wb,
180 for (
size_t i = 0; i < Wb; ++i) {
181 c = sbb(&a[i], b[i], c);
183 for (
size_t i = Wb; i < Wa; ++i) {
184 c = sbb(&a[i], 0, c);
189template <
class limb_t>
190static inline limb_t add_limb(
size_t W, limb_t a[],
193 for (
size_t i = 0; i < W; ++i) {
194 c = adc(&a[i], b[i], c);
196 limb_t h = zero_limb<limb_t>();
202template <
class limb_t>
203static inline limb_t addh(
size_t W, limb_t a[],
const limb_t b[]) {
205 for (
size_t i = 1; i < W; ++i) {
206 c = adc(&a[i], b[i - 1], c);
208 limb_t h = zero_limb<limb_t>();
209 c = adc(&h, b[W - 1], c);
214template <
class limb_t>
215static inline limb_t sub_limb(
size_t W, limb_t a[],
218 for (
size_t i = 0; i < W; ++i) {
219 c = sbb(&a[i], b[i], c);
221 limb_t h = zero_limb<limb_t>();
227template <
class limb_t>
228static inline void mulhl(
size_t W, limb_t l[], limb_t h[], limb_t a,
230 for (
size_t i = 0; i < W; ++i) {
231 mulq(&l[i], &h[i], a, b[i]);
236template <
class limb_t>
237static inline void mov(
size_t W, limb_t a[],
const limb_t b[]) {
238 for (
size_t i = 0; i < W; ++i) {
245#if defined(__x86_64__)
246static inline void cmovnz(
size_t W, uint64_t a[], uint64_t nz,
247 const uint64_t b[]) {
249 asm(
"testq %[nz], %[nz]\n\t"
250 "cmovneq %[b0], %[a0]\n\t"
252 : [nz]
"r"(nz), [b0]
"r"(b[0]));
254 asm(
"testq %[nz], %[nz]\n\t"
255 "cmovneq %[b0], %[a0]\n\t"
256 "cmovneq %[b1], %[a1]\n\t"
257 : [a0]
"+r"(a[0]), [a1]
"+r"(a[1])
258 : [nz]
"r"(nz), [b0]
"r"(b[0]), [b1]
"r"(b[1]));
260 asm(
"testq %[nz], %[nz]\n\t"
261 "cmovneq %[b0], %[a0]\n\t"
262 "cmovneq %[b1], %[a1]\n\t"
263 "cmovneq %[b2], %[a2]\n\t"
264 : [a0]
"+r"(a[0]), [a1]
"+r"(a[1]), [a2]
"+r"(a[2])
265 : [nz]
"r"(nz), [b0]
"r"(b[0]), [b1]
"r"(b[1]), [b2]
"r"(b[2]));
267 asm(
"testq %[nz], %[nz]\n\t"
268 "cmovneq %[b0], %[a0]\n\t"
269 "cmovneq %[b1], %[a1]\n\t"
270 "cmovneq %[b2], %[a2]\n\t"
271 "cmovneq %[b3], %[a3]\n\t"
272 : [a0]
"+r"(a[0]), [a1]
"+r"(a[1]), [a2]
"+r"(a[2]), [a3]
"+r"(a[3])
273 : [nz]
"r"(nz), [b0]
"r"(b[0]), [b1]
"r"(b[1]), [b2]
"r"(b[2]),
276 for (
size_t i = 0; i < W; ++i) {
277 a[i] = (nz != 0) ? b[i] : a[i];
282static inline void cmovne(
size_t W, uint64_t a[], uint64_t x, uint64_t y,
283 const uint64_t b[]) {
285 asm(
"cmpq %[x], %[y]\n\t"
286 "cmovneq %[b0], %[a0]\n\t"
288 : [x]
"r"(x), [y]
"r"(y), [b0]
"r"(b[0])
291 asm(
"cmpq %[x], %[y]\n\t"
292 "cmovneq %[b0], %[a0]\n\t"
293 "cmovneq %[b1], %[a1]\n\t"
294 : [a0]
"+r"(a[0]), [a1]
"+r"(a[1])
295 : [x]
"r"(x), [y]
"r"(y), [b0]
"r"(b[0]), [b1]
"r"(b[1])
298 asm(
"cmpq %[x], %[y]\n\t"
299 "cmovneq %[b0], %[a0]\n\t"
300 "cmovneq %[b1], %[a1]\n\t"
301 "cmovneq %[b2], %[a2]\n\t"
302 : [a0]
"+r"(a[0]), [a1]
"+r"(a[1]), [a2]
"+r"(a[2])
303 : [x]
"r"(x), [y]
"r"(y), [b0]
"r"(b[0]), [b1]
"r"(b[1]), [b2]
"r"(b[2])
306 asm(
"cmpq %[x], %[y]\n\t"
307 "cmovneq %[b0], %[a0]\n\t"
308 "cmovneq %[b1], %[a1]\n\t"
309 "cmovneq %[b2], %[a2]\n\t"
310 "cmovneq %[b3], %[a3]\n\t"
311 : [a0]
"+r"(a[0]), [a1]
"+r"(a[1]), [a2]
"+r"(a[2]), [a3]
"+r"(a[3])
312 : [x]
"r"(x), [y]
"r"(y), [b0]
"r"(b[0]), [b1]
"r"(b[1]),
313 [b2]
"r"(b[2]), [b3]
"r"(b[3])
316 for (
size_t i = 0; i < W; ++i) {
317 a[i] = (x != y) ? b[i] : a[i];
322static inline uint64_t addcmovc(uint64_t a, uint64_t b, uint64_t c) {
323 asm(
"add %[b], %[a]\n\t"
324 "cmovaeq %[c], %[a]\n\t"
326 : [b]
"r"(b), [c]
"r"(c)
331static inline uint64_t sub_sysdep(uint64_t a, uint64_t y, uint64_t m) {
333 asm(
"subq %[y], %[a]\n\t"
334 "cmovbq %[m], %[z]\n\t"
335 : [a]
"+r"(a), [z]
"+r"(z)
336 : [y]
"r"(y), [m]
"r"(m)
341#elif defined(__aarch64__)
343static inline void cmovne(
size_t W, uint64_t a[], uint64_t x, uint64_t y,
344 const uint64_t b[]) {
346 asm(
"cmp %[x], %[y]\n\t"
347 "csel %[a0], %[a0], %[b0], eq\n\t"
349 : [x]
"r"(x), [y]
"ri"(y),
353 asm(
"cmp %[x], %[y]\n\t"
354 "csel %[a0], %[a0], %[b0], eq\n\t"
355 "csel %[a1], %[a1], %[b1], eq\n\t"
358 : [x]
"r"(x), [y]
"ri"(y),
363 asm(
"cmp %[x], %[y]\n\t"
364 "csel %[a0], %[a0], %[b0], eq\n\t"
365 "csel %[a1], %[a1], %[b1], eq\n\t"
366 "csel %[a2], %[a2], %[b2], eq\n\t"
370 : [x]
"r"(x), [y]
"ri"(y),
376 asm(
"cmp %[x], %[y]\n\t"
377 "csel %[a0], %[a0], %[b0], eq\n\t"
378 "csel %[a1], %[a1], %[b1], eq\n\t"
379 "csel %[a2], %[a2], %[b2], eq\n\t"
380 "csel %[a3], %[a3], %[b3], eq\n\t"
385 : [x]
"r"(x), [y]
"ri"(y),
392 for (
size_t i = 0; i < W; ++i) {
393 a[i] = (x != y) ? b[i] : a[i];
399static inline void cmovnz(
size_t W, uint64_t a[], uint64_t nz,
400 const uint64_t b[]) {
401 constexpr uint64_t z = 0;
402 cmovne(W, a, nz, z, b);
405static inline uint64_t addcmovc(uint64_t a, uint64_t b, uint64_t c) {
406 asm(
"adds %[a], %[a], %[b]\n\t"
407 "csel %[a], %[a], %[c], hs\n\t"
409 : [b]
"r"(b), [c]
"r"(c)
414static inline uint64_t sub_sysdep(uint64_t a, uint64_t y, uint64_t m) {
415 asm(
"subs %[a], %[a], %[y]\n\t"
416 "csel %[m], %[m], xzr, lo"
417 : [a]
"+r"(a), [m]
"+r"(m)
426template <
class limb_t>
427static inline void cmovne(
size_t W, limb_t a[], limb_t x, limb_t y,
429 for (
size_t i = 0; i < W; ++i) {
430 a[i] = (x != y) ? b[i] : a[i];
435template <
class limb_t>
436static inline void cmovnz(
size_t W, limb_t a[], limb_t nz,
438 constexpr limb_t z = 0;
439 cmovne(W, a, nz, z, b);
442template <
class limb_t>
443static inline limb_t addcmovc(limb_t a, limb_t b, limb_t c) {
445 return (a > t) ? t : c;
448template <
class limb_t>
449static inline limb_t sub_sysdep(limb_t a, limb_t y, limb_t m) {
451 return (y > a) ? (t0 + m) : t0;