S2kit/html/cospml_8c_source.html

#include "s2kit/cospml.h"


#include <math.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>


#include <fftw3.h>


#include "s2kit/chebyshev_nodes.h"

#include "s2kit/pml.h"

#include "s2kit/pmm.h"


#include "util/l2_norms.h"

#include "util/vector_funcs.h"


const int BW_LIMIT = 512;

int TableSize(const int m, const int bw) {

    int k = bw / 2;


    int fudge, a1, a2;

    if (bw % 2) { // if the bandwidth is odd

        fudge = (m + 1) % 2;


        a1 = k * (k + 1);

        a2 = fudge * (k + 1);

    } else { // bandwidth is even

        fudge = m % 2;


        a1 = (k - fudge) * (k - fudge + 1);

        a2 = fudge * k;

    }


    fudge = m / 2;

    int a3 = fudge * (fudge + 1);


    return a1 + a2 - a3;

}


int Spharmonic_TableSize(const int bw) {

    // TODO check bw > 512 (~750 ok?)

    if (bw <= BW_LIMIT) {

        return (((4 * bw * bw * bw) + (6 * bw * bw) - (8 * bw)) / 24) + bw;

    }


    return Reduced_SpharmonicTableSize(bw, bw);

}


/*

    This is a "reduced" version of `Spharmonic_TableSize(m)`.


    Returns an integer value for the amount of space necessary

    to fill out a spharmonic table

    if interesting in using it only for orders up to (but NOT

    including) order `m`.

    This will be used in the hybrid algorithm's call of the

    semi-naive algorithm (which won't need the full table ...

    hopefully this'll cut down on the memory usage).


    Also, the transpose is exactly the same size, obviously.

 */

int Reduced_SpharmonicTableSize(const int bw, const int m) {

    // TODO optimize? or just use Spharmonic_TableSize? (say no to economy)


    int sum = 0;

    for (int i = 0; i < m; ++i)

        sum += TableSize(i, bw);


    return sum;

}


/*

    Computes the location of the first coefficient of Pml for an array

    containing cosine series coefficients of Pml or Gml functions.


    Assumes the table is generated by `GenerateCosPmlTable()`.

 */

int TableOffset(int m, int l) {

    if (m % 2) {

        --m;

        --l;

    }


    int offset = ((l / 2) * ((l / 2) + 1)) - ((m / 2) * ((m / 2) + 1));

    if (l % 2)

        offset += (l / 2) + 1;


    return offset;

}


/*

    Generates all of the cosine series for L2-normalized Pmls or Gmls for

    a specified value of `m`. Note especially that since series are

    zero-striped, all zeroes have been removed.


    tablespace points to a double array of size TableSize(m,bw);


    Workspace needs to be `9 * bw`


    Let P(m,l,j) represent the j-th coefficient of the

    cosine series representation of Pml. The array

    stuffed into tablespace is organized as follows:


    P(m,m,0)    P(m,m,2)   ... P(m,m,m)

    P(m,m+1,1)  P(m,m+1,3) ... P(m,m+1,m+1)

    P(m,m+2,0)  P(m,m+2,2) ... P(m,m+2,m+2)


    etc.  Appropriate modifications are made for `m` odd (Gml functions).


    NOTE that the Pmls or Gmls are being sampled at bw-many points,

    and not 2*bw-many points. I can get away with this. HOWEVER, I

    need to multiply the coefficients by sqrt(2), because the expected

    input of the seminaive transform of bandwidth bw will be sampled

    at 2-bw many points. So the sqrt(2) is a scaling factor.

 */

void GenerateCosPmlTable(const int bw, const int m, double* tablespace, double* workspace) {

    double* prevprev = workspace;

    double* prev = prevprev + bw;

    double* temp1 = prev + bw;

    double* temp2 = temp1 + bw;

    double* temp3 = temp2 + bw;

    double* temp4 = temp3 + bw;

    double* x_i = temp4 + bw;

    double* eval_args = x_i + bw;

    double* cosres = eval_args + bw;


    double* tableptr = tablespace;


    // set the initial number of evaluation points to appropriate amount


    // get the evaluation nodes

    ChebyshevNodes(bw, x_i);

    AcosOfChebyshevNodes(bw, eval_args);


    // set initial values of first two Pmls

    for (int i = 0; i < bw; ++i)

        prevprev[i] = 0.0;


    if (m == 0)

        for (int i = 0; i < bw; ++i)

            prev[i] = M_SQRT1_2; // sqrt(1/2)

    else

        Pmm_L2(m, eval_args, bw, prev);


    if (m % 2)

        for (int i = 0; i < bw; ++i)

            prev[i] /= sin(eval_args[i]);


    int k; // set k to highest degree coefficient

    if ((m % 2) == 0)

        k = m;

    else

        k = m - 1;


    // compute cosine transform

    memcpy(temp4, prev, sizeof(double) * bw);

    fftw_plan plan = fftw_plan_r2r_1d(bw, temp4, cosres, FFTW_REDFT10, FFTW_ESTIMATE);

    fftw_execute(plan);

    cosres[0] *= M_SQRT1_2;

    double fudge = 1. / sqrt(bw);

    for (int i = 0; i < bw; ++i)

        cosres[i] *= fudge;


    // store what we've got so far

    for (int i = 0; i <= k; i += 2)

        tableptr[i / 2] = cosres[i];


    tableptr += k / 2 + 1;


    // generate remaining Pmls

    for (int i = 0; i < bw - m - 1; ++i) {

        vec_mul(L2_cn(m, m + i), prevprev, temp1, bw);

        vec_dot(prev, x_i, temp2, bw);

        vec_mul(L2_an(m, m + i), temp2, temp3, bw);

        vec_add(temp3, temp1, temp4, bw); // temp4 now contains P(m,m+i+1)


        // compute cosine transform

        fftw_execute(plan);

        cosres[0] *= M_SQRT1_2;

        for (int j = 0; j < bw; ++j)

            cosres[j] *= fudge;


        ++k; // update degree counter


        for (int j = (i % 2) ? 0 : 1; j <= k; j += 2)

            tableptr[j / 2] = cosres[j];


        tableptr += k / 2 + 1;


        // update Pi and P(i+1)

        memcpy(prevprev, prev, sizeof(double) * bw);

        memcpy(prev, temp4, sizeof(double) * bw);

    }


    fftw_destroy_plan(plan);

}


/*

    RowSize returns the number of non-zero coefficients in a row of the

    cospmltable if were really in matrix form.  Helpful in transpose

    computations. It is helpful to think of the parameter l as

    the row of the corresponding matrix.

*/

int RowSize(const int m, const int l) {

    if (l < m)

        return 0;


    if (!(m % 2))

        return (l / 2) + 1;


    return ((l - 1) / 2) + 1;

}


/*

    Transposed row size returns the number of non-zero coefficients

    in the transposition of the matrix representing a cospmltable.

    Used for generating arrays for inverse seminaive transform.

    Unlike `RowSize()`, need to know the bandwidth `bw`. Also, in

    the cospml array, the first `m+1` rows are empty, but in

    the transpose, all rows have non-zero entries, and the first

    `m+1` columns are empty. So the input parameters are a bit different

    in the you need to specify the row you want.

*/

int Transpose_RowSize(const int row, const int m, const int bw) {

    if (row >= bw)

        return 0;


    if (!(m % 2)) {

        if (row <= m)

            return (bw - m) / 2;


        return  ((bw - row - 1) / 2) + 1;

    }


    if (row == (bw - 1))

        return 0;


    if (row >= m)

        return Transpose_RowSize(row + 1, m - 1, bw);


    return Transpose_RowSize(row + 1, m - 1, bw) - (row % 2);

}


/*

    Inverse transform is transposition of forward transform.

    Thus, need to provide transposed version of table

    returned by `GenerateCosPmlTable()`. This function does that

    by taking as input a `cos_pml_table` for a particular value

    of `bw` and `m`, and loads the `result` as a transposed,

    decimated version of it for use by an inverse seminaive

    transform computation.


    `result` needs to be of size `TableSize(m, bw)`

*/

void TransposeCosPmlTable(const int bw, const int m, double* cos_pml_table, double* result) {

    // Recall that `cos_pml_table` has had all the zeroes stripped out,

    // and that if `m` is odd, then it is really a Gml function, which affects indexing a bit.


    // note that the number of non-zero entries is the same as in the non-transposed case

    double* trans_tableptr = result;


    // traverse the `cos_pml_table`, loading appropriate values into the rows of transposed array

    if (m == bw - 1) {

        memcpy(result, cos_pml_table, sizeof(double) * TableSize(m, bw));

        return;

    }


    for (int row = 0; row < bw; ++row) {

        // if `m` odd, no need to do last row - all zeroes

        if (row == (bw - 1) && (m % 2))

            return;


        double* tableptr;


        // compute the starting point for values in `cos_pml_table`

        if (row <= m) {

            if (!(row % 2))

                tableptr = cos_pml_table + (row / 2);

            else

                tableptr = cos_pml_table + (m / 2) + 1 + (row / 2);

        } else {

            // then the highest degree coefficient of P(m,row) should be the first coefficient loaded

            // into the transposed array, so figure out where this point is

            int offset = 0;

            int end_row = (m % 2) == 0 ? row : row + 1;

            for (int i = m; i <= end_row; ++i)

                offset += RowSize(m, i);


            --offset; // we are pointing one element too far, so decrement


            tableptr = cos_pml_table + offset;

        }


        // `stride` is how far we need to jump between values in `cos_pml_table`, i.e.,

        // to traverse the columns of the `cos_pml_table`. Need to set initial value.

        // `stride` always increases by 2 after that.

        int stride;

        if (row <= m)

            stride = m + 2 - (m % 2) + (row % 2);

        else

            stride = row + 2;


        // get the rowsize for the transposed array

        int rowsize = Transpose_RowSize(row, m, bw);


        // load up this row of the transposed table

        int costable_offset = 0;

        for (int i = 0; i < rowsize; ++i) {

            trans_tableptr[i] = tableptr[costable_offset];

            costable_offset += stride;

            stride += 2;

        }


        trans_tableptr += rowsize;

    }

}


/*

    Returns all of the (cosine transforms of) Pmls and Gmls necessary

    to do a full spherical harmonic transform, i.e., it calls

    `GenerateCosPmlTable()` for each value of `m` less than `bw`, returning a

    table of tables (a pointer of type (double**), which points

    to an array of size `m`, each containing a (double*) pointer

    to a set of CosPml or CosGml values, which are the (decimated)

    cosine series representations of Pml (even `m`) or Gml (odd `m`)

    functions. See `GenerateCosPmlTable()` for further clarification.


    bw - bandwidth of the problem;

    resultspace - need to allocate `Spharmonic_TableSize(bw)` for storing results

    workspace - needs to be `16*bw`


    Note that `resultspace` is necessary and contains the results/values

    so one should be careful about when it is OK to re-use this space.

    workspace, though, does not have any meaning after this function is

    finished executing.


// from FST_semi_memo.c:

     spharmonic_pml_table will be an array of (double *) pointers

   the array being of length TableSize(m,bw)

*/

double** Spharmonic_Pml_Table(const int bw, double* resultspace, double* workspace) {

    double** spharmonic_pml_table = (double**)malloc(sizeof(double*) * bw);


    spharmonic_pml_table[0] = resultspace;

    GenerateCosPmlTable(bw, 0, spharmonic_pml_table[0], workspace);


    // traverse the array, assigning a location in the `resultspace` to each pointer

    // and load up the array with CosPml and CosGml values

    for (int i = 1; i < bw; ++i) {

        spharmonic_pml_table[i] = spharmonic_pml_table[i - 1] + TableSize(i - 1, bw);

        GenerateCosPmlTable(bw, i, spharmonic_pml_table[i], workspace);

    }


    return spharmonic_pml_table;

}


/*

    For the inverse semi-naive spharmonic transform, the "transpose"

    of the `spharmonic_pml_table` is needed. Need to be careful because the

    entries in the `spharmonic_pml_table` have been decimated, i.e.,

    the zeroes have been stripped out.


    spharmonic_pml_table - generated by `Spharmonic_Pml_Table()`;

    bw - bandwidth of the problem;

    resultspace - need to allocate Spharmonic_TableSize(bw) for storing results.


    Allocates memory for the (double**) `resultspace`.

*/

double** Transpose_Spharmonic_Pml_Table(double** spharmonic_pml_table, const int bw, double* resultspace) {

    double** transpose_spharmonic_pml_table = (double**)malloc(sizeof(double*) * bw);


    transpose_spharmonic_pml_table[0] = resultspace;

    TransposeCosPmlTable(bw, 0, spharmonic_pml_table[0], transpose_spharmonic_pml_table[0]);


    // load up the `transpose_spharmonic_pml_table` by transposing the tables in `spharmonic_pml_table`

    for (int i = 1; i < bw; ++i) {

        transpose_spharmonic_pml_table[i] = transpose_spharmonic_pml_table[i - 1] + TableSize(i - 1, bw);

        TransposeCosPmlTable(bw, i, spharmonic_pml_table[i], transpose_spharmonic_pml_table[i]);

    }


    return transpose_spharmonic_pml_table;

}


/*

    Returns an integer value for the amount of space necessary to fill out

    a reduced naive table of Pmls if interested in using it only for orders `m` through `bw-1`.

*/

int Reduced_Naive_TableSize(const int bw, const int m) {

    int sum = 0;

    for (int i = m; i < bw; ++i)

        sum += bw - i;


    return 2 * bw * sum;

}


/*

    Just like Spharmonic_Pml_Table(), except generates a table for use

    with the semi-naive and naive algorithms.


    bw - bandwidth of the problem;

    m - the cutoff order, where to switch from semi-naive to naive algorithms;

    resultspace - stores results, must be of size

        `Reduced_Naive_TableSize(bw, m) + Reduced_SpharmonicTableSize(bw, m)`;

*/

double** SemiNaive_Naive_Pml_Table(const int bw, const int m, double* resultspace, double* workspace) {

    double** seminaive_naive_table = (double**)malloc(sizeof(double) * (bw + 1));


    // load up the array with CosPml and CosGml values

    seminaive_naive_table[0] = resultspace;

    GenerateCosPmlTable(bw, 0, seminaive_naive_table[0], workspace);


    for (int i = 1; i < m; ++i) {

        seminaive_naive_table[i] = seminaive_naive_table[i - 1] + TableSize(i - 1, bw);

        GenerateCosPmlTable(bw, i, seminaive_naive_table[i], workspace);

    }


    // load up with Pml values

    if (m) {

        seminaive_naive_table[m] = seminaive_naive_table[m - 1] + TableSize(m - 1, bw);

        GeneratePmlTable(bw, m, seminaive_naive_table[m], workspace);

    }


    for (int i = m + 1; i < bw; ++i) {

        seminaive_naive_table[i] = seminaive_naive_table[i - 1] + (2 * bw * (bw - (i - 1)));

        GeneratePmlTable(bw, i, seminaive_naive_table[i], workspace);

    }


    return seminaive_naive_table;

}


/*

    For the inverse seminaive_naive transform, need the "transpose"

    of the seminaive_naive_pml_table. Need to be careful because the

    entries in the seminaive portion have been decimated, i.e.,

    the zeroes have been stripped out.


    seminaive_naive_pml_table - generated by `SemiNaive_Naive_Pml_Table()`;

    bw - bandwidth of the problem;

    m - the cutoff order, where to switch from semi-naive to naive algorithms;

    resultspace - need to allocate

        Reduced_Naive_TableSize(bw, m) + Reduced_SpharmonicTableSize(bw, m) for storing results;

    workspace - size of `16*bw`

*/

double** Transpose_SemiNaive_Naive_Pml_Table(double** seminaive_naive_pml_table, const int bw, const int m,

                                             double* resultspace, double* workspace) {

    double**  trans_seminaive_naive_pml_table = (double**)malloc(sizeof(double*) * (bw + 1));


    // need to load up the `transpose_seminaive_naive_pml_table` by transposing

    // the tables in the seminaive portion of `seminaive_naive_pml_table`


    // load up the array with CosPml and CosGml values

    trans_seminaive_naive_pml_table[0] = resultspace;

    TransposeCosPmlTable(bw, 0, seminaive_naive_pml_table[0], trans_seminaive_naive_pml_table[0]);


    for (int i = 1; i < m; ++i) {

        trans_seminaive_naive_pml_table[i] = trans_seminaive_naive_pml_table[i - 1] + TableSize(i - 1, bw);

        TransposeCosPmlTable(bw, i, seminaive_naive_pml_table[i], trans_seminaive_naive_pml_table[i]);

    }


    // load up with Pml values

    if (m) {

        trans_seminaive_naive_pml_table[m] = trans_seminaive_naive_pml_table[m - 1] + TableSize(m - 1, bw);

        GeneratePmlTable(bw, m, trans_seminaive_naive_pml_table[m], workspace);

    }


    for (int i = m + 1; i < bw; ++i) {

        trans_seminaive_naive_pml_table[i] = trans_seminaive_naive_pml_table[i - 1] + (2 * bw * (bw - (i - 1)));

        GeneratePmlTable(bw, i, trans_seminaive_naive_pml_table[i], workspace);

    }


    return trans_seminaive_naive_pml_table;

}