34 static void a0a1(
const Elt* A, Elt* B,
size_t s,
const Elt& winv,
37 Elt x1 = F.mulf(A[s], winv);
38 B[0] = F.addf(x0, x1);
39 B[s] = F.subf(x0, x1);
42 static void a0a1(
const Elt* A, Elt* B,
size_t s,
const Field& F) {
45 B[0] = F.addf(x0, x1);
46 B[s] = F.subf(x0, x1);
50 static void b0b1(Elt* A,
const Elt* B,
size_t s,
const Elt& w,
52 Elt x0 = F.mulf(F.half(), F.addf(B[0], B[s]));
53 Elt x1 = F.mulf(F.half(), F.subf(B[0], B[s]));
58 static void b0b1_unscaled(Elt* A,
const Elt* B,
size_t s,
const Elt& w,
60 Elt x0 = F.addf(B[0], B[s]);
61 Elt x1 = F.subf(B[0], B[s]);
65 static void b0b1_unscaled(Elt* A,
const Elt* B,
size_t s,
const Field& F) {
66 Elt x0 = F.addf(B[0], B[s]);
67 Elt x1 = F.subf(B[0], B[s]);
77 static void a0b0(Elt* A, Elt* B,
size_t s,
const Elt& w,
const Field& F) {
79 Elt x1 = F.subf(B[0], x0);
81 B[s] = F.subf(x0, x1);
89 static void a0b1(Elt* A, Elt* B,
size_t s,
const Elt& w,
const Field& F) {
91 Elt x1 = F.subf(x0, B[s]);
93 B[0] = F.addf(x0, x1);
97 static void fftb(Elt A[],
const Elt B[],
size_t n,
99 for (
size_t j = 0; j < n; ++j) {
105 for (
size_t m = n; m > 2;) {
107 size_t ws = roots.order_ / (2 * m);
108 for (
size_t k = 0; k < n; k += 2 * m) {
109 b0b1_unscaled(&A[k], &A[k], m, F);
110 for (
size_t j = 1; j < m; ++j) {
111 b0b1_unscaled(&A[k + j], &A[k + j], m, roots.w_[j * ws], F);
114 F.mul(scale, F.half());
118 for (
size_t k = 0; k < n; k += 2) {
119 b0b1_unscaled(&A[k], &A[k], 1, F);
121 F.mul(scale, F.half());
124 Blas<Field>::scale(n, A, 1, scale, F);
128 static void fftf(
const Elt A[], Elt B[],
size_t n,
130 for (
size_t j = 0; j < n; ++j) {
136 for (
size_t k = 0; k < n; k += 2) {
137 a0a1(&B[k], &B[k], 1, F);
142 for (
size_t m = 2; m < n; m = 2 * m) {
143 size_t ws = rootsinv.order_ / (2 * m);
144 for (
size_t k = 0; k < n; k += 2 * m) {
145 a0a1(&B[k], &B[k], m, F);
146 for (
size_t j = 1; j < m; ++j) {
147 a0a1(&B[k + j], &B[k + j], m, rootsinv.w_[j * ws], F);
153 static bool in_range(
size_t j,
size_t b0,
size_t n,
size_t k) {
154 size_t b1 = b0 + (n - k);
155 return (b0 <= j && j < b1) || (b0 <= j + n && j + n < b1);
177 static void bidir(
size_t n, Elt A[], Elt B[],
size_t k,
size_t b0,
179 Elt workspace[],
const Field& F) {
180 check(k <= n,
"k <= n");
181 check(b0 < n,
"b0 < n");
184 fftb(A, B, n, roots, F);
186 fftf(A, B, n, rootsinv, F);
188 size_t ws = roots.order_ / n;
197 fftf(A, &T[0], n2, rootsinv, F);
200 for (
size_t j = 0; j < n2; ++j) {
201 if (in_range(j, b0, n, k)) {
202 if (in_range(j + n2, b0, n, k)) {
204 check(
false,
"can't happen");
206 a0b0(&T[j], &B[j], n2, roots.w_[j * ws], F);
209 if (in_range(j + n2, b0, n, k)) {
210 a0b1(&T[j], &B[j], n2, roots.w_[j * ws], F);
218 size_t bb0 = (b0 >= n2) ? (b0 - n2) : b0;
219 bidir(n2, &A[n2], &T[n2], k - n2, bb0, roots, rootsinv, workspace, F);
222 for (
size_t j = 0; j < n2; ++j) {
223 if (in_range(j, b0, n, k)) {
224 if (in_range(j + n2, b0, n, k)) {
226 check(
false,
"can't happen");
231 if (in_range(j + n2, b0, n, k)) {
234 a0a1(&T[j], &B[j], n2, rootsinv.w_[j * ws], F);
240 for (
size_t j = 0; j < n2; ++j) {
241 if (in_range(j, b0, n, k)) {
242 if (in_range(j + n2, b0, n, k)) {
243 b0b1(&T[j], &B[j], n2, roots.w_[j * ws], F);
253 size_t bb0 = (b0 >= n2) ? (b0 - n2) : b0;
254 bidir(n2, &A[0], &T[0], k, bb0, roots, rootsinv, workspace, F);
257 for (
size_t j = 0; j < n2; ++j) {
258 if (in_range(j, b0, n, k)) {
259 if (in_range(j + n2, b0, n, k)) {
262 a0b0(&T[j], &B[j], n2, roots.w_[j * ws], F);
265 if (in_range(j + n2, b0, n, k)) {
266 a0b1(&T[j], &B[j], n2, roots.w_[j * ws], F);
270 check(
false,
"can't happen");
276 fftb(&A[n2], &T[n2], n2, roots, F);
282 static void interpolate(
size_t n, Elt A[], Elt B[],
size_t k,
283 size_t b0,
const Elt& omega_m, uint64_t m,
286 Elt omega_n = Twiddle<Field>::reroot(omega_m, m, n, F);
289 std::vector<Elt> workspace(2 * n);
290 bidir(n, A, B, k, b0, roots, rootsinv, &workspace[0], F);