S2kit  1.1
Toolkit for working with functions defined on the sphere
FST_semi_fly.c
Go to the documentation of this file.
1 
11 #include "s2kit/FST_semi_fly.h"
12 
13 #include <math.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <fftw3.h>
18 
19 #include "s2kit/cospml.h"
20 #include "s2kit/pml.h"
21 #include "s2kit/naive.h"
22 #include "s2kit/seminaive.h"
23 #include "s2kit/weights.h"
24 #include "s2kit/chebyshev_nodes.h"
25 #include "s2kit/util.h"
26 
67 void FSTSemiFly(double* rdata, double* idata, double* rcoeffs, double* icoeffs, const int bw, double* workspace,
68  DataFormat data_format, const int cutoff, fftw_plan* DCT_plan, fftw_plan* FFT_plan, double* weights) {
69  // TODO add check that cutoff<bw?
70  int size = 2 * bw;
71 
72  double* rres = workspace; // needs (size * size) = (4 * bw^2)
73  double* ires = rres + (size * size); // needs (size * size) = (4 * bw^2)
74  double* fltres = ires + (size * size); // needs bw
75  double* sin_values = fltres + bw; // needs (2 * bw)
76  double* eval_pts = sin_values + (size); // needs (2 * bw)
77  double* pmls = eval_pts + (size); // needs (2 * bw * bw)
78  double* scratchpad = pmls + (2 * bw * bw); // needs (16 * bw)
79 
80  // do the FFTs along phi
81  fftw_execute_split_dft(*FFT_plan, rdata, idata, rres, ires);
82 
83  // normalize
84  double normed_coeff = sqrt(2. * M_PI) / size;
85  for (int i = 0; i < size * size; ++i) {
86  rres[i] *= normed_coeff;
87  ires[i] *= normed_coeff;
88  }
89 
90  // point to start of output data buffers
91  double* rdataptr = rcoeffs;
92  double* idataptr = icoeffs;
93 
94  for (int m = 0; m < cutoff; ++m) { // semi-naive part
95  // generate cosine series of pmls
96  GenerateCosPmlTable(bw, m, pmls, scratchpad);
97 
98  // real part
99  DLTSemi(rres + (m * size), bw, m, fltres, scratchpad, pmls, weights, DCT_plan);
100  // load real part of coefficients into output space
101  memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
102  rdataptr += bw - m;
103 
104  // imaginary part
105  DLTSemi(ires + (m * size), bw, m, fltres, scratchpad, pmls, weights, DCT_plan);
106  // load imaginary part of coefficients into output space
107  memcpy(idataptr, fltres, sizeof(double) * (bw - m));
108  idataptr += bw - m;
109  }
110 
111  for (int m = cutoff; m < bw; ++m) { // naive part
112  // generate pmls, note we are using CosPmls array
113  GeneratePmlTable(bw, m, pmls, scratchpad);
114 
115  // real part
116  DLTNaive(rres + (m * size), bw, m, weights, fltres, pmls, scratchpad);
117  memcpy(rdataptr, fltres, sizeof(double) * (bw - m));
118  rdataptr += bw - m;
119 
120  // imaginary part
121  DLTNaive(ires + (m * size), bw, m, weights, fltres, pmls, scratchpad);
122  memcpy(idataptr, fltres, sizeof(double) * (bw - m));
123  idataptr += bw - m;
124  }
125 
126  // upper coefficients
127  /*
128  If the data is real, we don't have to compute the coeffs whose order is less than 0,
129  i.e. since the data is real, we know that
130 
131  f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)),
132 
133  so use that to get the rest of the coefficients
134  */
135  if (data_format == REAL) {
136  double coeff = 1.;
137  for (int i = 1; i < bw; ++i) {
138  coeff *= -1.0;
139  for (int j = i; j < bw; ++j) {
140  int index0 = IndexOfHarmonicCoeff(i, j, bw);
141  int index1 = IndexOfHarmonicCoeff(-i, j, bw);
142 
143  rcoeffs[index1] = coeff * rcoeffs[index0];
144  icoeffs[index1] = -coeff * icoeffs[index0];
145  }
146  }
147 
148  return;
149  }
150 
151 
152  // complex data
153  /*
154  Note that `m` is greater than `bw` here, but this is for
155  purposes of indexing the input data arrays. The "true"
156  value of `m` as a parameter for Pml is `size-m`
157  */
158  for (int m = bw + 1; m <= size - cutoff; ++m) { // naive part
159  // generate pmls, note we are using CosPmls array
160  GeneratePmlTable(bw, size - m, pmls, scratchpad);
161 
162  // real part
163  DLTNaive(rres + (m * size), bw, size - m, weights, fltres, pmls, scratchpad);
164  // load real part of coefficients into output space
165  if (m % 2) {
166  for (int i = 0; i < m - bw; ++i)
167  rdataptr[i] = -fltres[i];
168  } else {
169  memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
170  }
171  rdataptr += m - bw;
172 
173  // imaginary part
174  DLTNaive(ires + (m * size), bw, size - m, weights, fltres, pmls, scratchpad);
175  // load imaginary part of coefficients into output space
176  if (m % 2) {
177  for (int i = 0; i < m - bw; ++i)
178  idataptr[i] = -fltres[i];
179  } else {
180  memcpy(idataptr, fltres, sizeof(double) * (m - bw));
181  }
182  idataptr += m - bw;
183  }
184 
185  for (int m = size - cutoff + 1; m < size; ++m) { // semi-naive part
186  // generate cosine series of pmls
187  GenerateCosPmlTable(bw, size - m, pmls, scratchpad);
188 
189  // real part
190  DLTSemi(rres + (m * size), bw, size - m, fltres, scratchpad, pmls, weights, DCT_plan);
191  // load real part of coefficients into output space
192  if (m % 2) {
193  for (int i = 0; i < m - bw; ++i)
194  rdataptr[i] = -fltres[i];
195  } else {
196  memcpy(rdataptr, fltres, sizeof(double) * (m - bw));
197  }
198  rdataptr += m - bw;
199 
200  // imaginary part
201  DLTSemi(ires + (m * size), bw, size - m, fltres, scratchpad, pmls, weights, DCT_plan);
202  // load imaginary part of coefficients into output space
203  if (m % 2) {
204  for (int i = 0; i < m - bw; ++i)
205  idataptr[i] = -fltres[i];
206  } else {
207  memcpy(idataptr, fltres, sizeof(double) * (m - bw));
208  }
209  idataptr += m - bw;
210  }
211 }
212 
235 void InvFSTSemiFly(double* rcoeffs, double* icoeffs, double* rdata, double* idata, const int bw, double* workspace,
236  DataFormat data_format, const int cutoff, fftw_plan* inv_DCT_plan, fftw_plan* inv_FFT_plan) {
237  int size = 2 * bw;
238 
239  double* rfourdata = workspace; // needs (size * size)
240  double* ifourdata = rfourdata + (size * size); // needs (size * size)
241  double* rinvfltres = ifourdata + (size * size); // needs (2 * bw)
242  double* iminvfltres = rinvfltres + (size); // needs (2 * bw)
243  double* sin_values = iminvfltres + (size); // needs (2 * bw)
244  double* eval_pts = sin_values + (size); // needs (2 * bw)
245  double* pmls = eval_pts + (size); // needs (2 * bw * bw)
246  double* scratchpad = pmls + (2 * bw * bw); // needs (16 * bw)
247 
248  // load up the sin_values array
249  AcosOfChebyshevNodes(size, eval_pts);
250  for (int i = 0; i < size; ++i)
251  sin_values[i] = sin(eval_pts[i]);
252 
253  // do all of the inverse Legendre transforms
254  double* rdataptr = rcoeffs;
255  double* idataptr = icoeffs;
256 
257  for (int m = 0; m < cutoff; ++m) { // semi-naive part
258  // generate cosine series of pmls
259  GenerateCosPmlTable(bw, m, pmls, scratchpad);
260  // take transpose
261  TransposeCosPmlTable(bw, m, pmls, pmls + TableSize(m, bw));
262 
263  // real part
264  InvDLTSemi(rdataptr, bw, m, rinvfltres, pmls + TableSize(m, bw), sin_values, scratchpad, inv_DCT_plan);
265  // imaginary part
266  InvDLTSemi(idataptr, bw, m, iminvfltres, pmls + TableSize(m, bw), sin_values, scratchpad, inv_DCT_plan);
267 
268  // store normal, then tranpose before doing inverse fft
269  memcpy(rfourdata + (m * size), rinvfltres, sizeof(double) * size);
270  memcpy(ifourdata + (m * size), iminvfltres, sizeof(double) * size);
271 
272  rdataptr += bw - m;
273  idataptr += bw - m;
274  }
275 
276  for (int m = cutoff; m < bw; ++m) { // naive part
277  // generate pmls
278  // Note we are using CosPmls array and don't have to transpose
279  GeneratePmlTable(bw, m, pmls, scratchpad);
280 
281  InvDLTNaive(rdataptr, bw, m, rinvfltres, pmls); // real part
282  InvDLTNaive(idataptr, bw, m, iminvfltres, pmls); // imaginary part
283 
284  // store normal, then tranpose before doing inverse fft
285  memcpy(rfourdata + (m * size), rinvfltres, sizeof(double) * size);
286  memcpy(ifourdata + (m * size), iminvfltres, sizeof(double) * size);
287 
288  rdataptr += bw - m;
289  idataptr += bw - m;
290  }
291 
292  // fill in zero values where m = bw (from problem definition)
293  memset(rfourdata + (bw * size), 0, sizeof(double) * size);
294  memset(ifourdata + (bw * size), 0, sizeof(double) * size);
295 
296  /*
297  If the data is real, we don't have to compute the coeffs whose order is less than 0,
298  i.e. since the data is real, we know that
299 
300  invf-hat(l,-m) = conjugate(invf-hat(l,m)),
301 
302  so use that to get the rest of the coefficients
303  */
304  // TODO swap if and for? (almost same 2nd part)
305  if (data_format == COMPLEX) {
306  for (int m = bw + 1; m <= size - cutoff; ++m) { // naive part
307  // Note we are using CosPmls array and don't have to transpose
308  GeneratePmlTable(bw, size - m, pmls, scratchpad);
309 
310  InvDLTNaive(rdataptr, bw, size - m, rinvfltres, pmls);
311  InvDLTNaive(idataptr, bw, size - m, iminvfltres, pmls);
312 
313  // store normal, then tranpose before doing inverse fft
314  if (m % 2)
315  for (int i = 0; i < size; ++i) {
316  rinvfltres[i] *= -1.0;
317  iminvfltres[i] *= -1.0;
318  }
319 
320  memcpy(rfourdata + (m * size), rinvfltres, sizeof(double) * size);
321  memcpy(ifourdata + (m * size), iminvfltres, sizeof(double) * size);
322 
323  rdataptr += bw - (size - m);
324  idataptr += bw - (size - m);
325  }
326 
327  for (int m = size - cutoff + 1; m < size; ++m) { // semi-naive part
328  // generate cosine series of pmls
329  GenerateCosPmlTable(bw, size - m, pmls, scratchpad);
330  // take transpose
331  TransposeCosPmlTable(bw, size - m, pmls, pmls + TableSize(size - m, bw));
332 
333  InvDLTSemi(rdataptr, bw, size - m, rinvfltres, pmls + TableSize(size - m, bw), sin_values,
334  scratchpad, inv_DCT_plan);
335  InvDLTSemi(idataptr, bw, size - m, iminvfltres, pmls + TableSize(size - m, bw), sin_values,
336  scratchpad, inv_DCT_plan);
337 
338  // store normal, then tranpose before doing inverse fft
339  if (m % 2)
340  for (int i = 0; i < size; ++i) {
341  rinvfltres[i] *= -1.0;
342  iminvfltres[i] *= -1.0;
343  }
344 
345  memcpy(rfourdata + (m * size), rinvfltres, sizeof(double) * size);
346  memcpy(ifourdata + (m * size), iminvfltres, sizeof(double) * size);
347 
348  rdataptr += bw - (size - m);
349  idataptr += bw - (size - m);
350  }
351  } else { // real data
352  for (int m = bw + 1; m < size; ++m) {
353  memcpy(rfourdata + (m * size), rfourdata + ((size - m) * size), sizeof(double) * size);
354  memcpy(ifourdata + (m * size), ifourdata + ((size - m) * size), sizeof(double) * size);
355 
356  for (int i = 0; i < size; ++i)
357  ifourdata[(m * size) + i] *= -1.0;
358  }
359  }
360 
361  // normalize
362  double normed_coeff = 1. / (sqrt(2. * M_PI));
363  for (int i = 0; i < 4 * bw * bw; ++i) {
364  rfourdata[i] *= normed_coeff;
365  ifourdata[i] *= normed_coeff;
366  }
367 
368  fftw_execute_split_dft(*inv_FFT_plan, ifourdata, rfourdata, idata, rdata);
369 }
370 
390 void FZTSemiFly(double* rdata, double* idata, double* rres, double* ires, const int bw, double* workspace,
391  DataFormat data_format, fftw_plan* DCT_plan, double* weights) {
392  int size = 2 * bw;
393 
394  double* r0 = workspace; // needs (2 * bw)
395  double* i0 = r0 + size; // needs (2 * bw)
396  double* pmls = i0 + size; // needs (2 * bw * bw)
397  double* scratchpad = pmls + (2 * bw * bw); // needs (4 * bw)
398 
399  double dsize = sqrt(2. * M_PI) / size;
400  // compute the m = 0 components
401  for (int i = 0; i < size; ++i) {
402  double tmpreal = 0.0;
403  double tmpimag = 0.0;
404 
405  for (int j = 0; j < size; ++j) {
406  tmpreal += rdata[(i * size) + j];
407  tmpimag += idata[(i * size) + j];
408  }
409 
410  // normalize
411  r0[i] = tmpreal * dsize;
412  i0[i] = tmpimag * dsize;
413  }
414 
415  // generate cosine series of pmls
416  GenerateCosPmlTable(bw, 0, pmls, scratchpad);
417 
418  // real part
419  DLTSemi(r0, bw, 0, rres, scratchpad, pmls, weights, DCT_plan);
420 
421  if (data_format == COMPLEX) // if data is complex, do imaginary part
422  DLTSemi(i0, bw, 0, ires, scratchpad, pmls, weights, DCT_plan);
423  else // otherwise set coefficients to zero
424  memset(ires, 0, sizeof(double) * size);
425 }
426 
454 void ConvOn2SphereSemiFly(double* rdata, double* idata, double* rfilter, double* ifilter, double* rres, double* ires,
455  const int bw, double* workspace) {
456  int size = 2 * bw;
457 
458  double* frres = workspace; // needs (bw * bw)
459  double* fires = frres + (bw * bw); // needs (bw * bw)
460  double* trres = fires + (bw * bw); // needs (bw * bw)
461  double* tires = trres + (bw * bw); // needs (bw * bw)
462  double* filtrres = tires + (bw * bw); // needs bw
463  double* filtires = filtrres + bw; // needs bw
464  double* scratchpad = filtires + bw; // needs (10 * bw^2) + (24 * bw)
465 
466  double* weights = (double*)malloc(sizeof(double) * 4 * bw);
467  GenerateWeightsForDLT(bw, weights);
468 
469  // Make DCT plans. Note that I will be using the GURU interface to execute these plans within the routines
470  fftw_plan DCT_plan = fftw_plan_r2r_1d(size, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE);
471  fftw_plan inv_DCT_plan = fftw_plan_r2r_1d(size, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE);
472 
473  // fftw "preamble"
474  // Note that FFT plan places the output in a transposed array
475  int rank = 1;
476  fftw_iodim dims[rank];
477  dims[0].n = size;
478  dims[0].is = 1;
479  dims[0].os = size;
480 
481  int howmany_rank = 1;
482  fftw_iodim howmany_dims[howmany_rank];
483  howmany_dims[0].n = size;
484  howmany_dims[0].is = size;
485  howmany_dims[0].os = 1;
486 
487  fftw_plan FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
488  workspace + (4 * bw * bw), FFTW_ESTIMATE);
489 
490  // Note that FFT plans assumes that I'm working with a transposed array, e.g. the inputs for a length 2*bw transform
491  // are placed every 2*bw apart, the output will be consecutive entries in the array
492 
493  rank = 1;
494  dims[0].n = size;
495  dims[0].is = size;
496  dims[0].os = 1;
497 
498  howmany_rank = 1;
499  howmany_dims[0].n = size;
500  howmany_dims[0].is = 1;
501  howmany_dims[0].os = size;
502 
503  fftw_plan inv_FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
504  workspace + (4 * bw * bw), FFTW_ESTIMATE);
505 
506  FSTSemiFly(rdata, idata, frres, fires, bw, scratchpad, 1, bw, &DCT_plan, &FFT_plan, weights);
507  FZTSemiFly(rfilter, ifilter, filtrres, filtires, bw, scratchpad, 1, &DCT_plan, weights);
508 
509  TransMult(frres, fires, filtrres, filtires, trres, tires, bw);
510 
511  InvFSTSemiFly(trres, tires, rres, ires, bw, scratchpad, 1, bw, &inv_DCT_plan, &inv_FFT_plan);
512 
513  free(weights);
514 
515  fftw_destroy_plan(inv_FFT_plan);
516  fftw_destroy_plan(FFT_plan);
517  fftw_destroy_plan(inv_DCT_plan);
518  fftw_destroy_plan(DCT_plan);
519 }
TransposeCosPmlTable
void TransposeCosPmlTable(const int, const int, double *, double *)
Definition: cospml.c:301
FST_semi_fly.h
TransMult
void TransMult(double *, double *, double *, double *, double *, double *, const int)
Multiplies harmonic coefficients of a function and a filter.
Definition: util.c:68
weights.h
GenerateCosPmlTable
void GenerateCosPmlTable(const int, const int, double *, double *)
Definition: cospml.c:161
chebyshev_nodes.h
IndexOfHarmonicCoeff
int IndexOfHarmonicCoeff(const int, const int, const int)
Gives the position of the coefficient f-hat(m,l) in the one-row array.
Definition: util.c:42
COMPLEX
Definition: util.h:11
naive.h
FZTSemiFly
void FZTSemiFly(double *rdata, double *idata, double *rres, double *ires, const int bw, double *workspace, DataFormat data_format, fftw_plan *DCT_plan, double *weights)
Computes zonal harmonic transform using seminaive algorithm.
Definition: FST_semi_fly.c:390
InvDLTSemi
void InvDLTSemi(double *, const int, const int, double *, double *, double *, double *, fftw_plan *)
Computes the inverse Legendre transform using the transposed seminaive algorithm.
Definition: seminaive.c:56
InvFSTSemiFly
void InvFSTSemiFly(double *rcoeffs, double *icoeffs, double *rdata, double *idata, const int bw, double *workspace, DataFormat data_format, const int cutoff, fftw_plan *inv_DCT_plan, fftw_plan *inv_FFT_plan)
Computes inverse spherical harmonic transform.
Definition: FST_semi_fly.c:235
InvDLTNaive
void InvDLTNaive(double *, const int, const int, double *, double *)
The inverse discrete Legendre transform.
Definition: naive.c:78
GenerateWeightsForDLT
void GenerateWeightsForDLT(const int, double *)
Generates weights for both even and odd order Legendre transforms for a given bandwidth.
Definition: weights.c:32
ConvOn2SphereSemiFly
void ConvOn2SphereSemiFly(double *rdata, double *idata, double *rfilter, double *ifilter, double *rres, double *ires, const int bw, double *workspace)
Convolves two functions defined on the 2-sphere.
Definition: FST_semi_fly.c:454
REAL
Definition: util.h:12
AcosOfChebyshevNodes
void AcosOfChebyshevNodes(const int, double *)
Generates an array of the angular arguments of n Chebyshev nodes.
Definition: chebyshev_nodes.c:16
DLTNaive
void DLTNaive(double *, const int, const int, double *, double *, double *, double *)
The naive forward discrete Legendre transform.
Definition: naive.c:34
DataFormat
DataFormat
Result data format for FST functions.
Definition: util.h:10
TableSize
int TableSize(const int, const int)
Computes the number of non-zero entries of coeffs in a table for seminaive transform.
Definition: cospml.c:39
cospml.h
GeneratePmlTable
void GeneratePmlTable(const int, const int, double *, double *)
Generates all of the Pmls for a specified value of m.
Definition: pml.c:41
DLTSemi
void DLTSemi(double *, const int, const int, double *, double *, double *, double *, fftw_plan *)
Computes the Legendre transform of data using the seminaive algorithm.
Definition: seminaive.c:153
FSTSemiFly
void FSTSemiFly(double *rdata, double *idata, double *rcoeffs, double *icoeffs, const int bw, double *workspace, DataFormat data_format, const int cutoff, fftw_plan *DCT_plan, fftw_plan *FFT_plan, double *weights)
Computes spherical harmonic transform using the seminaive and naive algorithms.
Definition: FST_semi_fly.c:67
pml.h
util.h
seminaive.h