62 int main(
int argc,
char** argv) {
64 fprintf(stdout,
"Usage: test_s2_semi_memo bw loops [error_file]\n");
68 int bw = atoi(argv[1]);
69 int loops = atoi(argv[2]);
77 double* workspace = (
double*)malloc(
sizeof(
double) * ((8 * (bw * bw)) + (16 * bw)));
78 double* seminaive_naive_tablespace = (
double*)malloc(
80 double* trans_seminaive_naive_tablespace = (
double*)malloc(
84 fprintf(stdout,
"Generating seminaive_naive tables...\n");
87 fprintf(stdout,
"Generating trans_seminaive_naive tables...\n");
89 seminaive_naive_table, bw, cutoff, trans_seminaive_naive_tablespace, workspace);
95 double* weights = (
double*)malloc(
sizeof(
double) * 4 * bw);
96 double* rdata = (
double*)malloc(
sizeof(
double) * (size * size));
97 double* idata = (
double*)malloc(
sizeof(
double) * (size * size));
100 fftw_plan DCT_plan = fftw_plan_r2r_1d(2 * bw, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE);
101 fftw_plan inv_DCT_plan = fftw_plan_r2r_1d(2 * bw, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE);
106 fftw_iodim dims[rank];
111 int howmany_rank = 1;
112 fftw_iodim howmany_dims[howmany_rank];
113 howmany_dims[0].n = 2 * bw;
114 howmany_dims[0].is = 2 * bw;
115 howmany_dims[0].os = 1;
117 fftw_plan FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
118 workspace + (4 * bw * bw), FFTW_ESTIMATE);
129 howmany_dims[0].n = 2 * bw;
130 howmany_dims[0].is = 1;
131 howmany_dims[0].os = 2 * bw;
133 fftw_plan inv_FFT_plan = fftw_plan_guru_split_dft(rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace,
134 workspace + (4 * bw * bw), FFTW_ESTIMATE);
138 double* rcoeffs = (
double*)malloc(
sizeof(
double) * (bw * bw));
139 double* icoeffs = (
double*)malloc(
sizeof(
double) * (bw * bw));
141 double* rresult = (
double*)malloc(
sizeof(
double) * (bw * bw));
142 double* iresult = (
double*)malloc(
sizeof(
double) * (bw * bw));
144 double* relerror = (
double*)malloc(
sizeof(
double) * loops);
145 double* curmax = (
double*)malloc(
sizeof(
double) * loops);
147 double fwd_time = 0.0;
148 double inv_time = 0.0;
149 double total_error = 0.0;
150 double total_relerror = 0.0;
152 fprintf(stdout,
"about to enter loop\n\n");
153 for (
int i = 0; i < loops; ++i) {
156 for (
int m = 0; m < bw; ++m)
157 for (
int l = m; l < bw; ++l) {
158 double x = 2.0 * (drand48() - 0.5);
159 double y = 2.0 * (drand48() - 0.5);
166 rcoeffs[index] = pow(-1.0, m) * x;
167 icoeffs[index] = pow(-1.0, m + 1) * y;
171 for (
int m = 0; m < bw; ++m)
176 InvFSTSemiMemo(rcoeffs, icoeffs, rdata, idata, bw, trans_seminaive_naive_table, workspace, data_format, cutoff,
177 &inv_DCT_plan, &inv_FFT_plan);
179 double duration =
csecond() - time_start;
180 inv_time += duration;
181 fprintf(stdout,
"inv time \t = %.4e\n", duration);
185 FSTSemiMemo(rdata, idata, rresult, iresult, bw, seminaive_naive_table, workspace, data_format, cutoff,
186 &DCT_plan, &FFT_plan, weights);
188 duration =
csecond() - time_start;
189 fwd_time += duration;
190 fprintf(stdout,
"forward time \t = %.4e\n", duration);
196 for (
int j = 0; j < (bw * bw); ++j) {
197 double rtmp = rresult[j] - rcoeffs[j];
198 double itmp = iresult[j] - icoeffs[j];
199 double origmag = sqrt((rcoeffs[j] * rcoeffs[j]) + (icoeffs[j] * icoeffs[j]));
200 double tmpmag = sqrt((rtmp * rtmp) + (itmp * itmp));
201 relerror[i] = fmax(relerror[i], tmpmag / (origmag + pow(10.0, -50.0)));
202 curmax[i] = fmax(curmax[i], tmpmag);
205 fprintf(stdout,
"r-o error\t = %.12f\n", curmax[i]);
206 fprintf(stdout,
"(r-o)/o error\t = %.12f\n\n", relerror[i]);
208 total_error += curmax[i];
209 total_relerror += relerror[i];
212 double avg_error = total_error / loops;
213 double avg_relerror = total_relerror / loops;
214 double stddev_error = 0.0;
215 double stddev_relerror = 0.0;
217 for (
int i = 0; i < loops; ++i) {
218 stddev_error += pow(avg_error - curmax[i], 2.0);
219 stddev_relerror += pow(avg_relerror - relerror[i], 2.0);
223 stddev_error = sqrt(stddev_error / (loops - 1));
224 stddev_relerror = sqrt(stddev_relerror / (loops - 1));
227 fprintf(stdout,
"Program: test_s2_semi_memo\n");
228 fprintf(stdout,
"Bandwidth = %d\n", bw);
230 const char* timing_object =
"cpu";
232 timing_object =
"wall";
235 double total_time = inv_time + fwd_time;
237 fprintf(stdout,
"Total elapsed %s time :\t\t %.4e seconds.\n", timing_object, total_time);
238 fprintf(stdout,
"Average %s forward per iteration:\t %.4e seconds.\n", timing_object, fwd_time / loops);
239 fprintf(stdout,
"Average %s inverse per iteration:\t %.4e seconds.\n", timing_object, inv_time / loops);
241 fprintf(stdout,
"Average r-o error:\t\t %.4e\t", total_error / loops);
242 fprintf(stdout,
"std dev: %.4e\n", stddev_error);
243 fprintf(stdout,
"Average (r-o)/o error:\t\t %.4e\t", total_relerror / loops);
244 fprintf(stdout,
"std dev: %.4e\n\n", stddev_relerror);
247 FILE* errorsfp = fopen(argv[3],
"w");
249 for (
int m = 0; m < bw; ++m) {
250 for (
int l = m; l < bw; ++l) {
252 fprintf(errorsfp,
"index = %d\t m = %d\tl = %d\t%.10f %.10f\n", index, m, l,
253 fabs(rcoeffs[index] - rresult[index]), fabs(icoeffs[index] - iresult[index]));
256 fprintf(errorsfp,
"index = %d\t m = %d\tl = %d\t%.10f %.10f\n", index, -m, l,
257 fabs(rcoeffs[index] - rresult[index]), fabs(icoeffs[index] - iresult[index]));
264 fftw_destroy_plan(inv_FFT_plan);
265 fftw_destroy_plan(FFT_plan);
266 fftw_destroy_plan(inv_DCT_plan);
267 fftw_destroy_plan(DCT_plan);
270 free(trans_seminaive_naive_table);
271 free(seminaive_naive_table);
275 free(trans_seminaive_naive_tablespace);
276 free(seminaive_naive_tablespace);