import * as gc from "./gc.js";
import * as utils from "./utils.js";
/**
* Wrapper for the PCA results on the Wasm heap, typically created by {@linkcode runPca}.
* @hideconstructor
*/
export class RunPcaResults {
#id;
#results;
constructor(id, raw, filled = true) {
this.#id = id;
this.#results = raw;
return;
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @return {Float64Array|Float64Wasmarray} Array containing the principal components for all cells.
* This should be treated as a column-major array where the rows are the PCs and columns are the cells.
*/
principalComponents(options = {}) {
const { copy = true, ...others } = options;
utils.checkOtherOptions(others);
return utils.possibleCopy(this.#results.components(), copy);
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @return {Float64Array|Float64Wasmarray} Array containing the rotation matrix for all cells.
* This should be treated as a column-major array where the rows are the genes and the columns are the PCs.
*/
rotation(options = {}) {
const { copy = true, ...others } = options;
utils.checkOtherOptions(others);
return utils.possibleCopy(this.#results.pcs(), copy);
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @return {Float64Array|Float64WasmArray} Array containing the variance explained for each requested PC.
*/
varianceExplained(options = {}) {
const { copy = true, ...others } = options;
utils.checkOtherOptions(others);
return utils.possibleCopy(this.#results.variance_explained(), copy);
}
/**
* @return {number} The total variance in the dataset,
* typically used with {@linkcode PCAResults#varianceExplained varianceExplained} to compute the proportion of variance explained.
*/
totalVariance() {
return this.#results.total_variance();
}
/**
* @return {number} Number of PCs available in these results.
*/
numberOfPCs() {
return this.#results.num_pcs();
}
/**
* @return {number} Number of cells used to compute these results.
*/
numberOfCells() {
return this.#results.num_cells();
}
/**
* @return Frees the memory allocated on the Wasm heap for this object.
* This invalidates this object and all references to it.
*/
free() {
if (this.#results !== null) {
gc.release(this.#id);
this.#results = null;
}
return;
}
}
/**
* Run a principal components analysis on the log-expression matrix.
* This is usually done on a subset of features, and possibly with some kind of blocking on a per-cell batch factor.
*
* @param {ScranMatrix} x - The log-normalized expression matrix.
* @param {object} [options={}] - Optional parameters.
* @param {?(Uint8WasmArray|Array|TypedArray)} [options.features=null] - Array specifying which features should be retained (e.g., HVGs).
* This should be of length equal to the number of rows in `x`; elements should be `true` to retain each row.
* If `null`, all features are retained.
* @param {number} [options.numberOfPCs=25] - Number of top principal components to compute.
* @param {boolean} [options.scale=false] - Whether to scale each feature to unit variance.
* @param {?(Int32WasmArray|Array|TypedArray)} [options.block=null] - Array containing the block assignment for each cell.
* This should have length equal to the number of cells and contain all values from 0 to `n - 1` at least once, where `n` is the number of blocks.
* This is used to segregate cells in order to compute filters within each block.
* Alternatively, this may be `null`, in which case all cells are assumed to be in the same block.
* @param {string} [options.blockMethod="regress"] - How to adjust the PCA for the blocking factor.
*
* - `"regress"` will regress out the factor, effectively performing a PCA on the residuals.
* This only makes sense in limited cases, e.g., inter-block differences are linear and the composition of each block is the same.
* - `"project"` will compute the rotation vectors from the residuals but will project the cells onto the PC space.
* This focuses the PCA on within-block variance while avoiding any assumptions about the nature of the inter-block differences.
* - `"none"` will ignore any blocking factor, i.e., as if `block = null`.
* Any inter-block differences will both contribute to the determination of the rotation vectors and also be preserved in the PC space.
*
* This option is only used if `block` is not `null`.
* @param {string} [options.blockWeightPolicy="variable"] The policy for weighting each block so that it contributes the same number of effective observations to the covariance matrix.
*
* - `"variable"` ensures that, past a certain size (default 1000 cells), larger blocks do not dominate the definition of the PC space.
* Below the threshold size, blocks are weighted in proportion to their size to reduce the influence of very small blocks.
* - `"equal"` uses the same weight for each block, regardless of size.
* - `"none"` does not apply any extra weighting, i.e., the contribution of each block is proportional to its size.
*
* This option is only used if `block` is not `null`.
* @param {?boolean} [options.realizeMatrix=null] - Whether to realize the submatrix into its own memory.
* This is more efficient but consumes more memory.
* Defaults to true if `subset` is supplied, otherwise it is false.
* @param {?number} [options.numberOfThreads=null] - Number of threads to use.
* If `null`, defaults to {@linkcode maximumThreads}.
*
* @return {RunPcaResults} Object containing the computed PCs.
*/
export function runPca(x, options = {}) {
let {
features = null,
numberOfPCs = 25,
scale = false,
block = null,
blockMethod = "regress",
blockWeightPolicy = "variable",
realizeMatrix = null,
numberOfThreads = null,
...others
} = options;
utils.checkOtherOptions(others);
var feat_data;
var block_data;
var output;
utils.matchOptions("blockMethod", blockMethod, ["none", "regress", "project"]);
let nthreads = utils.chooseNumberOfThreads(numberOfThreads);
try {
var use_feat = false;
var fptr = 0;
if (features !== null) {
feat_data = utils.wasmifyArray(features, "Uint8WasmArray");
if (feat_data.length != x.numberOfRows()) {
throw new Error("length of 'features' should be equal to number of rows in 'x'");
}
use_feat = true;
fptr = feat_data.offset;
}
if (realizeMatrix === null) {
realizeMatrix = use_feat;
}
// Avoid asking for more PCs than is possible.
// Remember that centering removes one df, so we subtract 1 from the dimensions.
numberOfPCs = Math.min(numberOfPCs, x.numberOfRows() - 1, x.numberOfColumns() - 1);
var use_block = false;
var bptr = 0;
var comp_as_resid = false;
if (block !== null && blockMethod !== 'none') {
block_data = utils.wasmifyArray(block, "Int32WasmArray");
if (block_data.length != x.numberOfColumns()) {
throw new Error("length of 'block' should be equal to the number of columns in 'x'");
}
use_block = true;
bptr = block_data.offset;
comp_as_resid = (blockMethod == "regress");
}
output = gc.call(
module => module.run_pca(x.matrix, numberOfPCs, use_feat, fptr, scale, use_block, bptr, blockWeightPolicy, comp_as_resid, realizeMatrix, nthreads),
RunPcaResults
);
} catch (e) {
utils.free(output);
throw e;
} finally {
utils.free(feat_data);
utils.free(block_data);
}
return output;
}