67 void FSTSemiFly(
double* rdata,
double* idata,
double* rcoeffs,
double* icoeffs,
const int bw,
double* workspace,
68 DataFormat data_format,
const int cutoff, fftw_plan* DCT_plan, fftw_plan* FFT_plan,
double* weights) {
72 double* rres = workspace;
73 double* ires = rres + (size * size);
74 double* fltres = ires + (size * size);
75 double* sin_values = fltres + bw;
76 double* eval_pts = sin_values + (size);
77 double* pmls = eval_pts + (size);
78 double* scratchpad = pmls + (2 * bw * bw);
81 fftw_execute_split_dft(*FFT_plan, rdata, idata, rres, ires);
84 double normed_coeff = sqrt(2. * M_PI) / size;
85 for (
int i = 0; i < size * size; ++i) {
86 rres[i] *= normed_coeff;
87 ires[i] *= normed_coeff;
91 double* rdataptr = rcoeffs;
92 double* idataptr = icoeffs;
94 for (
int m = 0; m < cutoff; ++m) {
99 DLTSemi(rres + (m * size), bw, m, fltres, scratchpad, pmls, weights, DCT_plan);
101 memcpy(rdataptr, fltres,
sizeof(
double) * (bw - m));
105 DLTSemi(ires + (m * size), bw, m, fltres, scratchpad, pmls, weights, DCT_plan);
107 memcpy(idataptr, fltres,
sizeof(
double) * (bw - m));
111 for (
int m = cutoff; m < bw; ++m) {
116 DLTNaive(rres + (m * size), bw, m, weights, fltres, pmls, scratchpad);
117 memcpy(rdataptr, fltres,
sizeof(
double) * (bw - m));
121 DLTNaive(ires + (m * size), bw, m, weights, fltres, pmls, scratchpad);
122 memcpy(idataptr, fltres,
sizeof(
double) * (bw - m));
135 if (data_format ==
REAL) {
137 for (
int i = 1; i < bw; ++i) {
139 for (
int j = i; j < bw; ++j) {
143 rcoeffs[index1] = coeff * rcoeffs[index0];
144 icoeffs[index1] = -coeff * icoeffs[index0];
158 for (
int m = bw + 1; m <= size - cutoff; ++m) {
163 DLTNaive(rres + (m * size), bw, size - m, weights, fltres, pmls, scratchpad);
166 for (
int i = 0; i < m - bw; ++i)
167 rdataptr[i] = -fltres[i];
169 memcpy(rdataptr, fltres,
sizeof(
double) * (m - bw));
174 DLTNaive(ires + (m * size), bw, size - m, weights, fltres, pmls, scratchpad);
177 for (
int i = 0; i < m - bw; ++i)
178 idataptr[i] = -fltres[i];
180 memcpy(idataptr, fltres,
sizeof(
double) * (m - bw));
185 for (
int m = size - cutoff + 1; m < size; ++m) {
190 DLTSemi(rres + (m * size), bw, size - m, fltres, scratchpad, pmls, weights, DCT_plan);
193 for (
int i = 0; i < m - bw; ++i)
194 rdataptr[i] = -fltres[i];
196 memcpy(rdataptr, fltres,
sizeof(
double) * (m - bw));
201 DLTSemi(ires + (m * size), bw, size - m, fltres, scratchpad, pmls, weights, DCT_plan);
204 for (
int i = 0; i < m - bw; ++i)
205 idataptr[i] = -fltres[i];
207 memcpy(idataptr, fltres,
sizeof(
double) * (m - bw));
235 void InvFSTSemiFly(
double* rcoeffs,
double* icoeffs,
double* rdata,
double* idata,
const int bw,
double* workspace,
236 DataFormat data_format,
const int cutoff, fftw_plan* inv_DCT_plan, fftw_plan* inv_FFT_plan) {
239 double* rfourdata = workspace;
240 double* ifourdata = rfourdata + (size * size);
241 double* rinvfltres = ifourdata + (size * size);
242 double* iminvfltres = rinvfltres + (size);
243 double* sin_values = iminvfltres + (size);
244 double* eval_pts = sin_values + (size);
245 double* pmls = eval_pts + (size);
246 double* scratchpad = pmls + (2 * bw * bw);
250 for (
int i = 0; i < size; ++i)
251 sin_values[i] = sin(eval_pts[i]);
254 double* rdataptr = rcoeffs;
255 double* idataptr = icoeffs;
257 for (
int m = 0; m < cutoff; ++m) {
264 InvDLTSemi(rdataptr, bw, m, rinvfltres, pmls +
TableSize(m, bw), sin_values, scratchpad, inv_DCT_plan);
266 InvDLTSemi(idataptr, bw, m, iminvfltres, pmls +
TableSize(m, bw), sin_values, scratchpad, inv_DCT_plan);
269 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
270 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
276 for (
int m = cutoff; m < bw; ++m) {
285 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
286 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
293 memset(rfourdata + (bw * size), 0,
sizeof(
double) * size);
294 memset(ifourdata + (bw * size), 0,
sizeof(
double) * size);
306 for (
int m = bw + 1; m <= size - cutoff; ++m) {
310 InvDLTNaive(rdataptr, bw, size - m, rinvfltres, pmls);
311 InvDLTNaive(idataptr, bw, size - m, iminvfltres, pmls);
315 for (
int i = 0; i < size; ++i) {
316 rinvfltres[i] *= -1.0;
317 iminvfltres[i] *= -1.0;
320 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
321 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
323 rdataptr += bw - (size - m);
324 idataptr += bw - (size - m);
327 for (
int m = size - cutoff + 1; m < size; ++m) {
334 scratchpad, inv_DCT_plan);
335 InvDLTSemi(idataptr, bw, size - m, iminvfltres, pmls +
TableSize(size - m, bw), sin_values,
336 scratchpad, inv_DCT_plan);
340 for (
int i = 0; i < size; ++i) {
341 rinvfltres[i] *= -1.0;
342 iminvfltres[i] *= -1.0;
345 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
346 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
348 rdataptr += bw - (size - m);
349 idataptr += bw - (size - m);
352 for (
int m = bw + 1; m < size; ++m) {
353 memcpy(rfourdata + (m * size), rfourdata + ((size - m) * size),
sizeof(
double) * size);
354 memcpy(ifourdata + (m * size), ifourdata + ((size - m) * size),
sizeof(
double) * size);
356 for (
int i = 0; i < size; ++i)
357 ifourdata[(m * size) + i] *= -1.0;
362 double normed_coeff = 1. / (sqrt(2. * M_PI));
363 for (
int i = 0; i < 4 * bw * bw; ++i) {
364 rfourdata[i] *= normed_coeff;
365 ifourdata[i] *= normed_coeff;
368 fftw_execute_split_dft(*inv_FFT_plan, ifourdata, rfourdata, idata, rdata);
390 void FZTSemiFly(
double* rdata,
double* idata,
double* rres,
double* ires,
const int bw,
double* workspace,
391 DataFormat data_format, fftw_plan* DCT_plan,
double* weights) {
394 double* r0 = workspace;
395 double* i0 = r0 + size;
396 double* pmls = i0 + size;
397 double* scratchpad = pmls + (2 * bw * bw);
399 double dsize = sqrt(2. * M_PI) / size;
401 for (
int i = 0; i < size; ++i) {
402 double tmpreal = 0.0;
403 double tmpimag = 0.0;
405 for (
int j = 0; j < size; ++j) {
406 tmpreal += rdata[(i * size) + j];
407 tmpimag += idata[(i * size) + j];
411 r0[i] = tmpreal * dsize;
412 i0[i] = tmpimag * dsize;
419 DLTSemi(r0, bw, 0, rres, scratchpad, pmls, weights, DCT_plan);
422 DLTSemi(i0, bw, 0, ires, scratchpad, pmls, weights, DCT_plan);
424 memset(ires, 0,
sizeof(
double) * size);
454 void ConvOn2SphereSemiFly(
double* rdata,
double* idata,
double* rfilter,
double* ifilter,
double* rres,
double* ires,
455 const int bw,
double* workspace) {
458 double* frres = workspace;
459 double* fires = frres + (bw * bw);
460 double* trres = fires + (bw * bw);
461 double* tires = trres + (bw * bw);
462 double* filtrres = tires + (bw * bw);
463 double* filtires = filtrres + bw;
464 double* scratchpad = filtires + bw;
466 double* weights = (
double*)malloc(
sizeof(
double) * 4 * bw);
470 fftw_plan DCT_plan = fftw_plan_r2r_1d(size, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE);
471 fftw_plan inv_DCT_plan = fftw_plan_r2r_1d(size, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE);
476 fftw_iodim dims[rank];
481 int howmany_rank = 1;
482 fftw_iodim howmany_dims[howmany_rank];
483 howmany_dims[0].n = size;
484 howmany_dims[0].is = size;
485 howmany_dims[0].os = 1;
487 fftw_plan FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
488 workspace + (4 * bw * bw), FFTW_ESTIMATE);
499 howmany_dims[0].n = size;
500 howmany_dims[0].is = 1;
501 howmany_dims[0].os = size;
503 fftw_plan inv_FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
504 workspace + (4 * bw * bw), FFTW_ESTIMATE);
506 FSTSemiFly(rdata, idata, frres, fires, bw, scratchpad, 1, bw, &DCT_plan, &FFT_plan, weights);
507 FZTSemiFly(rfilter, ifilter, filtrres, filtires, bw, scratchpad, 1, &DCT_plan, weights);
509 TransMult(frres, fires, filtrres, filtires, trres, tires, bw);
511 InvFSTSemiFly(trres, tires, rres, ires, bw, scratchpad, 1, bw, &inv_DCT_plan, &inv_FFT_plan);
515 fftw_destroy_plan(inv_FFT_plan);
516 fftw_destroy_plan(FFT_plan);
517 fftw_destroy_plan(inv_DCT_plan);
518 fftw_destroy_plan(DCT_plan);