68 void FSTSemiMemo(
double* rdata,
double* idata,
double* rcoeffs,
double* icoeffs,
const int bw,
69 double** seminaive_naive_table,
double* workspace,
DataFormat data_format,
70 const int cutoff, fftw_plan* DCT_plan, fftw_plan* FFT_plan,
double* weights) {
74 double* rres = workspace;
75 double* ires = rres + (size * size);
76 double* fltres = ires + (size * size);
77 double* eval_pts = fltres + bw;
78 double* scratchpad = eval_pts + (size);
81 fftw_execute_split_dft(*FFT_plan, rdata, idata, rres, ires);
86 double normed_coeff = sqrt(2. * M_PI) / size;
87 for (
int i = 0; i < size * size; ++i) {
88 rres[i] *= normed_coeff;
89 ires[i] *= normed_coeff;
93 double* rdataptr = rcoeffs;
94 double* idataptr = icoeffs;
96 for (
int m = 0; m < cutoff; ++m) {
98 DLTSemi(rres + (m * size), bw, m, fltres, scratchpad, seminaive_naive_table[m], weights, DCT_plan);
100 memcpy(rdataptr, fltres,
sizeof(
double) * (bw - m));
104 DLTSemi(ires + (m * size), bw, m, fltres, scratchpad, seminaive_naive_table[m], weights, DCT_plan);
106 memcpy(idataptr, fltres,
sizeof(
double) * (bw - m));
110 for (
int m = cutoff; m < bw; ++m) {
112 DLTNaive(rres + (m * size), bw, m, weights, fltres, seminaive_naive_table[m], scratchpad);
113 memcpy(rdataptr, fltres,
sizeof(
double) * (bw - m));
117 DLTNaive(ires + (m * size), bw, m, weights, fltres, seminaive_naive_table[m], scratchpad);
118 memcpy(idataptr, fltres,
sizeof(
double) * (bw - m));
131 if (data_format ==
REAL) {
133 for (
int i = 1; i < bw; ++i) {
135 for (
int j = i; j < bw; ++j) {
139 rcoeffs[index1] = coeff * rcoeffs[index0];
140 icoeffs[index1] = -coeff * icoeffs[index0];
153 for (
int m = bw + 1; m <= size - cutoff; ++m) {
155 DLTNaive(rres + (m * size), bw, size - m, weights, fltres, seminaive_naive_table[size - m], scratchpad);
158 for (
int i = 0; i < m - bw; ++i)
159 rdataptr[i] = -fltres[i];
161 memcpy(rdataptr, fltres,
sizeof(
double) * (m - bw));
166 DLTNaive(ires + (m * size), bw, size - m, weights, fltres, seminaive_naive_table[size - m], scratchpad);
169 for (
int i = 0; i < m - bw; ++i)
170 idataptr[i] = -fltres[i];
172 memcpy(idataptr, fltres,
sizeof(
double) * (m - bw));
177 for (
int m = size - cutoff + 1; m < size; ++m) {
179 DLTSemi(rres + (m * size), bw, size - m, fltres, scratchpad, seminaive_naive_table[size - m],
183 for (
int i = 0; i < m - bw; ++i)
184 rdataptr[i] = -fltres[i];
186 memcpy(rdataptr, fltres,
sizeof(
double) * (m - bw));
191 DLTSemi(ires + (m * size), bw, size - m, fltres, scratchpad, seminaive_naive_table[size - m],
195 for (
int i = 0; i < m - bw; ++i)
196 idataptr[i] = -fltres[i];
198 memcpy(idataptr, fltres,
sizeof(
double) * (m - bw));
228 void InvFSTSemiMemo(
double* rcoeffs,
double* icoeffs,
double* rdata,
double* idata,
const int bw,
229 double** transpose_seminaive_naive_table,
double* workspace,
DataFormat data_format,
230 const int cutoff, fftw_plan* inv_DCT_plan, fftw_plan* inv_FFT_plan) {
235 double* rfourdata = workspace;
236 double* ifourdata = rfourdata + (size * size);
237 double* rinvfltres = ifourdata + (size * size);
238 double* iminvfltres = rinvfltres + (size);
239 double* sin_values = iminvfltres + (size);
240 double* eval_pts = sin_values + (size);
241 double* scratchpad = eval_pts + (size);
245 for (
int i = 0; i < size; ++i)
246 sin_values[i] = sin(eval_pts[i]);
249 double* rdataptr = rcoeffs;
250 double* idataptr = icoeffs;
252 for (
int m = 0; m < cutoff; ++m) {
254 InvDLTSemi(rdataptr, bw, m, rinvfltres, transpose_seminaive_naive_table[m], sin_values, scratchpad,
257 InvDLTSemi(idataptr, bw, m, iminvfltres, transpose_seminaive_naive_table[m], sin_values, scratchpad,
261 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
262 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
268 for (
int m = cutoff; m < bw; ++m) {
270 InvDLTNaive(rdataptr, bw, m, rinvfltres, transpose_seminaive_naive_table[m]);
272 InvDLTNaive(idataptr, bw, m, iminvfltres, transpose_seminaive_naive_table[m]);
275 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
276 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
283 memset(rfourdata + (bw * size), 0,
sizeof(
double) * size);
284 memset(ifourdata + (bw * size), 0,
sizeof(
double) * size);
296 for (
int m = bw + 1; m <= size - cutoff; ++m) {
297 InvDLTNaive(rdataptr, bw, size - m, rinvfltres, transpose_seminaive_naive_table[size - m]);
298 InvDLTNaive(idataptr, bw, size - m, iminvfltres, transpose_seminaive_naive_table[size - m]);
302 for (
int i = 0; i < size; ++i) {
303 rinvfltres[i] *= -1.0;
304 iminvfltres[i] *= -1.0;
307 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
308 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
310 rdataptr += bw - (size - m);
311 idataptr += bw - (size - m);
314 for (
int m = size - cutoff + 1; m < size; ++m) {
315 InvDLTSemi(rdataptr, bw, size - m, rinvfltres, transpose_seminaive_naive_table[size - m],
316 sin_values, scratchpad, inv_DCT_plan);
317 InvDLTSemi(idataptr, bw, size - m, iminvfltres, transpose_seminaive_naive_table[size - m],
318 sin_values, scratchpad, inv_DCT_plan);
322 for (
int i = 0; i < size; ++i) {
323 rinvfltres[i] *= -1.0;
324 iminvfltres[i] *= -1.0;
327 memcpy(rfourdata + (m * size), rinvfltres,
sizeof(
double) * size);
328 memcpy(ifourdata + (m * size), iminvfltres,
sizeof(
double) * size);
330 rdataptr += bw - (size - m);
331 idataptr += bw - (size - m);
334 for (
int m = bw + 1; m < size; ++m) {
335 memcpy(rfourdata + (m * size), rfourdata + ((size - m) * size),
sizeof(
double) * size);
336 memcpy(ifourdata + (m * size), ifourdata + ((size - m) * size),
sizeof(
double) * size);
338 for (
int i = 0; i < size; ++i)
339 ifourdata[(m * size) + i] *= -1.0;
344 double normed_coeff = 1. / (sqrt(2. * M_PI));
345 for (
int i = 0; i < 4 * bw * bw; i++) {
346 rfourdata[i] *= normed_coeff;
347 ifourdata[i] *= normed_coeff;
350 fftw_execute_split_dft(*inv_FFT_plan, ifourdata, rfourdata, idata, rdata);
374 void FZTSemiMemo(
double* rdata,
double* idata,
double* rres,
double* ires,
const int bw,
double* cos_pml_table,
375 double* workspace,
const DataFormat data_format, fftw_plan* DCT_plan,
double* weights) {
378 double* r0 = workspace;
379 double* i0 = r0 + size;
380 double* scratchpad = i0 + size;
384 double dsize = sqrt(2. * M_PI) / size;
386 for (
int i = 0; i < size; ++i) {
387 double tmpreal = 0.0;
388 double tmpimag = 0.0;
390 for (
int j = 0; j < size; ++j) {
391 tmpreal += rdata[(i * size) + j];
392 tmpimag += idata[(i * size) + j];
396 r0[i] = tmpreal * dsize;
397 i0[i] = tmpimag * dsize;
401 DLTSemi(r0, bw, 0, rres, scratchpad, cos_pml_table, weights, DCT_plan);
404 DLTSemi(i0, bw, 0, ires, scratchpad, cos_pml_table, weights, DCT_plan);
406 memset(ires, 0,
sizeof(
double) * size);
439 void ConvOn2SphereSemiMemo(
double* rdata,
double* idata,
double* rfilter,
double* ifilter,
double* rres,
double* ires,
440 const int bw,
double* workspace) {
446 double* spharmonic_result_space = workspace;
447 double* transpose_spharmonic_result_space = spharmonic_result_space + legendre_size;
449 double* frres = transpose_spharmonic_result_space + legendre_size;
450 double* fires = frres + (bw * bw);
451 double* trres = fires + (bw * bw);
452 double* tires = trres + (bw * bw);
453 double* filtrres = tires + (bw * bw);
454 double* filtires = filtrres + bw;
455 double* scratchpad = filtires + bw;
457 double* weights = (
double*)malloc(
sizeof(
double) * 4 * bw);
461 fftw_plan DCT_plan = fftw_plan_r2r_1d(size, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE);
462 fftw_plan inv_DCT_plan = fftw_plan_r2r_1d(size, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE);
467 fftw_iodim dims[rank];
472 int howmany_rank = 1;
473 fftw_iodim howmany_dims[howmany_rank];
474 howmany_dims[0].n = size;
475 howmany_dims[0].is = size;
476 howmany_dims[0].os = 1;
478 fftw_plan FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
479 workspace + (4 * bw * bw), FFTW_ESTIMATE);
490 howmany_dims[0].n = size;
491 howmany_dims[0].is = 1;
492 howmany_dims[0].os = size;
494 fftw_plan inv_FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
495 workspace + (4 * bw * bw), FFTW_ESTIMATE);
499 double** transpose_spharmonic_pml_table =
502 FSTSemiMemo(rdata, idata, frres, fires, bw, spharmonic_pml_table, scratchpad, 1, bw, &DCT_plan, &FFT_plan, weights);
503 FZTSemiMemo(rfilter, ifilter, filtrres, filtires, bw, spharmonic_pml_table[0], scratchpad, 1, &DCT_plan, weights);
505 TransMult(frres, fires, filtrres, filtires, trres, tires, bw);
507 InvFSTSemiMemo(trres, tires, rres, ires, bw, transpose_spharmonic_pml_table, scratchpad, 1, bw, &inv_DCT_plan,
511 free(spharmonic_pml_table);
512 free(transpose_spharmonic_pml_table);
514 fftw_destroy_plan(inv_FFT_plan);
515 fftw_destroy_plan(FFT_plan);
516 fftw_destroy_plan(inv_DCT_plan);
517 fftw_destroy_plan(DCT_plan);