runPca.js

import * as gc from "./gc.js";
import * as utils from "./utils.js";

/** 
 * Wrapper for the PCA results on the Wasm heap, typically created by {@linkcode runPca}.
 * @hideconstructor
 */
export class RunPcaResults {
    #id;
    #results;

    #filledComponents;
    #filledVariances;
    #filledTotalVariance;

    constructor(id, raw, filled = true) {
        this.#id = id;
        this.#results = raw;

        this.#filledComponents = filled;
        this.#filledVariances = filled;
        this.#filledTotalVariance = filled;

        return;
    }

    /**
     * @param {object} [options={}] - Optional parameters.
     * @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
     * @param {boolean} [options.fillable=false] - Whether to return a fillable array, to write to this object.
     * If `true`, this method automatically sets `copy = false` if `copy` was previously true.
     * If `false` and the array was not previously filled, `null` is returned.
     * 
     * @return {?(Float64Array|Float64Wasmarray)} Array containing the principal components for all cells.
     * This should be treated as a column-major array where the rows are the PCs and columns are the cells.
     * Alternatively `null`, if `fillable = false` and the array was not already filled.
     */
    principalComponents({ copy = true, fillable = false } = {}) {
        return utils.checkFillness(
            fillable, 
            copy, 
            this.#filledComponents, 
            () => { this.#filledComponents = true }, 
            COPY => utils.possibleCopy(this.#results.pcs(), COPY)
        );
    }

    /**
     * @param {number} total - Total variance in the dataset,
     * equal to the sum of the variances across all PCs (including those that were not explicitly computed).
     *
     * @return Total varaiance in this object is set to `total`.
     * This is primarily intended for use with {@linkcode emptyRunPcaResults}.
     */
    setTotalVariance(total) {
        if (!this.#filledTotalVariance) {
            this.#filledTotalVariance = true;
        }
        this.#results.set_total_variance(total);
        return;
    }

    /**
     * @param {object} [options={}] - Optional parameters.
     * @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
     * @param {boolean} [options.fillable=false] - Whether to return a fillable array, to write to this object.
     * If `true`, this method automatically sets `copy = false` if `copy` was previously true.
     * If `false` and the array was not previously filled, `null` is returned.
     * 
     * @return {?(Float64Array|Float64WasmArray)} Array containing the variance explained for each requested PC.
     * Alternatively `null`, if `fillable = false` and the array was not already filled.
     */
    varianceExplained({ copy = true, fillable = false } = {}) {
        return utils.checkFillness(
            fillable, 
            copy, 
            this.#filledVariances, 
            () => { this.#filledVariances = true }, 
            COPY => utils.possibleCopy(this.#results.variance_explained(), COPY)
        );
    }

    /**
     * @return {?number} The total variance in the dataset,
     * typically used with {@linkcode PCAResults#varianceExplained varianceExplained} to compute the proportion of variance explained.
     * Alternatively `null`, if this value has not been filled by {@linkcode ClusterKmeansResults#setTotalVariance setTotalVariance}.
     */
    totalVariance() {
        if (!this.#filledTotalVariance) {
            return null;
        } else {
            return this.#results.total_variance();
        }
    }

    /**
     * @return {number} Number of PCs available in these results.
     */
    numberOfPCs() {
        return this.#results.num_pcs();
    }

    /**
     * @return {number} Number of cells used to compute these results.
     */
    numberOfCells() {
        // TODO: switch to this.#results.num_cells();
        return this.principalComponents({ copy: false }).length / this.numberOfPCs();

    }

    /**
     * @return Frees the memory allocated on the Wasm heap for this object.
     * This invalidates this object and all references to it.
     */
    free() {
        if (this.#results !== null) {
            gc.release(this.#id);
            this.#results = null;
        }
        return;
    }
}

/**
 * Run a principal components analysis on the log-expression matrix.
 * This is usually done on a subset of features, and possibly with some kind of blocking on a per-cell batch factor.
 *
 * @param {ScranMatrix} x - The log-normalized expression matrix.
 * @param {object} [options={}] - Optional parameters. 
 * @param {?(Uint8WasmArray|Array|TypedArray)} [options.features=null] - Array specifying which features should be retained (e.g., HVGs).
 * This should be of length equal to the number of rows in `x`; elements should be `true` to retain each row.
 * If `null`, all features are retained.
 * @param {number} [options.numberOfPCs=25] - Number of top principal components to compute.
 * @param {boolean} [options.scale=false] - Whether to scale each feature to unit variance.
 * @param {?(Int32WasmArray|Array|TypedArray)} [options.block=null] - Array containing the block assignment for each cell.
 * This should have length equal to the number of cells and contain all values from 0 to `n - 1` at least once, where `n` is the number of blocks.
 * This is used to segregate cells in order to compute filters within each block.
 * Alternatively, this may be `null`, in which case all cells are assumed to be in the same block.
 * @param {string} [options.blockMethod="regress"] - How to adjust the PCA for the blocking factor.
 *
 * - `"regress"` will regress out the factor, effectively performing a PCA on the residuals.
 *   This only makes sense in limited cases, e.g., inter-block differences are linear and the composition of each block is the same.
 * - `"project"` will compute the rotation vectors from the residuals but will project the cells onto the PC space.
 *   This focuses the PCA on within-block variance while avoiding any assumptions about the nature of the inter-block differences.
 * - `"none"` will ignore any blocking factor, i.e., as if `block = null`.
 *   Any inter-block differences will both contribute to the determination of the rotation vectors and also be preserved in the PC space.
 *
 * This option is only used if `block` is not `null`.
 * @param {bool} [options.blockWeights=true] Whether to weight each block so that it contributes the same number of effective observations to the covariance matrix.
 * This ensures that, past a certain size (default 1000 cells), larger blocks do not dominate the definition of the PC space.
 * Only used if `block` is not `null`.
 * @param {?number} [options.numberOfThreads=null] - Number of threads to use.
 * If `null`, defaults to {@linkcode maximumThreads}.
 *
 * @return {RunPcaResults} Object containing the computed PCs.
 */
export function runPca(x, { features = null, numberOfPCs = 25, scale = false, block = null, blockMethod = "regress", blockWeights = true, numberOfThreads = null } = {}) {
    var feat_data;
    var block_data;
    var output;

    utils.matchOptions("blockMethod", blockMethod, ["none", "regress", "project" ]);
    let nthreads = utils.chooseNumberOfThreads(numberOfThreads);

    try {
        var use_feat = false;
        var fptr = 0;

        if (features !== null) {
            feat_data = utils.wasmifyArray(features, "Uint8WasmArray");
            if (feat_data.length != x.numberOfRows()) {
                throw new Error("length of 'features' should be equal to number of rows in 'x'");
            }
            use_feat = true;
            fptr = feat_data.offset;
        }

        // Avoid asking for more PCs than is possible.
        // Remember that centering removes one df, so we subtract 1 from the dimensions.
        numberOfPCs = Math.min(numberOfPCs, x.numberOfRows() - 1, x.numberOfColumns() - 1);

        if (block === null || (blockMethod == 'none' && !blockWeights)) {
            output = gc.call(
                module => module.run_pca(x.matrix, numberOfPCs, use_feat, fptr, scale, nthreads),
                RunPcaResults
            );

        } else {
            block_data = utils.wasmifyArray(block, "Int32WasmArray");
            if (block_data.length != x.numberOfColumns()) {
                throw new Error("length of 'block' should be equal to the number of columns in 'x'");
            }

            if (blockMethod == "regress") {
                output = gc.call(
                    module => module.run_residual_pca(x.matrix, numberOfPCs, use_feat, fptr, scale, block_data.offset, blockWeights, nthreads),
                    RunPcaResults
                );
            } else {
                output = gc.call(
                    module => module.run_multibatch_pca(x.matrix, numberOfPCs, use_feat, fptr, scale, block_data.offset, (blockMethod == "project"), blockWeights, nthreads),
                    RunPcaResults
                );
            }
        }

    } catch (e) {
        utils.free(output);
        throw e;

    } finally {
        utils.free(feat_data);
        utils.free(block_data);
    }

    return output;
}