steps/feature_set_enrichment.js

import * as scran from "scran.js";
import * as bioc from "bioconductor";
import * as gesel from "gesel";

import * as utils from "./utils/general.js";
import * as mutils from "./utils/markers.js";
import * as rutils from "../readers/index.js";
import * as inputs_module from "./inputs.js";
import * as filter_module from "./cell_filtering.js";
import * as norm_module from "./rna_normalization.js";
import * as markers_module from "./marker_detection.js";

export const step_name = "feature_set_enrichment";

/********************************************
 ******** Internals for collections *********
 ********************************************/

class FeatureSetManager {
    #cache;

    constructor() {
        this.#cache = {};
    }

    free() {
        utils.freeCache(this.#cache.set_buffer);
        this.#cache = {};
    }

    static flush() {
        // TODO: call a gesel flush() function.
        return;
    }

    static setDownload(fun) {
        console.warn("'FeatureSetState.setDownload' is a no-op, uses 'gesel.setReferenceDownload' and 'gesel.setGeneDownload' instead");
        return;
    }

    async #prepare(feats, species, gene_id_column, gene_id_type) {
        let data_id_col;
        if (gene_id_column == null) {
            data_id_col = feats.rowNames();
            if (data_id_col == null) {
                // If there truly is no annotation, then we avoid throwing,
                // and we just make the rest of this function a no-op.
                species = []; 
            }
        } else {
            data_id_col = feats.column(gene_id_column);
        }

        let search_options = { types: [ gene_id_type.toLowerCase() ] };

        // To avoid repeated rellocations on array resizing, we create
        // preallocated arrays within each species and then do a single COMBINE
        // across species. We provide an initial element so that COMBINE works
        // correctly when there are no species.
        let collection_offset = 0;
        let all_collection_names = [[]];
        let all_collection_descriptions = [[]];
        let all_collection_species = [[]];

        let set_offset = 0;
        let all_set_names = [[]];
        let all_set_descriptions = [[]];
        let all_set_indices = [[]];
        let all_set_sizes = [new Int32Array];
        let all_set_collections = [new Int32Array];

        let mapped_genes = new Set;
        let remapped = new Array(feats.numberOfRows());
        for (var r = 0; r < remapped.length; r++) {
            remapped[r] = [];
        }

        for (const spec of species) {
            // Mapping our features to those in the gesel database. 
            let gene_mapping = await gesel.searchGenes(spec, data_id_col, search_options);
            for (var i = 0; i < gene_mapping.length; i++) {
                if (gene_mapping[i].length > 0) {
                    mapped_genes.add(i);
                }
            }

            // Formatting the details for each set. This includes reindexing
            // the gesel gene IDs to refer to row indices of 'feats'.
            let all_sets2genes = await gesel.fetchGenesForAllSets(spec);
            let set_indices = gesel.reindexGenesForAllSets(gene_mapping, all_sets2genes);

            let all_sets = await gesel.fetchAllSets(spec);
            let nsets = all_sets.length;
            let set_names = new Array(nsets);
            let set_descriptions = new Array(nsets);
            let set_sizes = new Int32Array(nsets);
            let set_collections = new Int32Array(nsets);

            for (var i = 0; i < nsets; i++) {
                let current = all_sets[i];
                set_names[i] = current.name;
                set_descriptions[i] = current.description;
                set_collections[i] = current.collection + collection_offset; // offset effectively "namespaces" collections from different species.
                set_sizes[i] = set_indices[i].length;
            }

            all_set_names.push(set_names);
            all_set_descriptions.push(set_descriptions);
            all_set_indices.push(set_indices);
            all_set_sizes.push(set_sizes);
            all_set_collections.push(set_collections);

            // Updating the gene->set mapping for input features.
            let all_genes2sets = await gesel.fetchSetsForAllGenes(spec);
            let current_remapped = gesel.reindexSetsForAllGenes(gene_mapping, all_genes2sets);
            for (var i = 0; i < gene_mapping.length; i++) {
                let current = current_remapped[i];
                for (var j = 0; j < current.length; j++) {
                    current[j] += set_offset; // offset effectively "namespaces" sets from different species.
                }
                remapped[i].push(current);
            }

            // Sticking the collection details somewhere.
            let all_collections = await gesel.fetchAllCollections(spec);
            let ncollections = all_collections.length;
            let collection_names = new Array(ncollections);
            let collection_descriptions = new Array(ncollections);
            let collection_species = new Array(ncollections);

            for (var i = 0; i < ncollections; i++) {
                collection_names[i] = all_collections[i].title;
                collection_descriptions[i] = all_collections[i].description;
                collection_species[i] = spec;
            }

            all_collection_names.push(collection_names);
            all_collection_descriptions.push(collection_descriptions);
            all_collection_species.push(collection_species);

            set_offset += nsets;
            collection_offset += ncollections;
        }

        this.#cache.universe = (new Int32Array(mapped_genes)).sort();

        this.#cache.sets = {
            names: bioc.COMBINE(all_set_names),
            descriptions: bioc.COMBINE(all_set_descriptions),
            sets: bioc.COMBINE(all_set_indices),
            sizes: bioc.COMBINE(all_set_sizes),
            collections: bioc.COMBINE(all_set_collections)
        };

        this.#cache.collections = {
            names: bioc.COMBINE(all_collection_names),
            descriptions: bioc.COMBINE(all_collection_descriptions),
            species: bioc.COMBINE(all_collection_species)
        };

        if (species.length > 0) {
            for (var r = 0; r < remapped.length; r++) {
                remapped[r] = bioc.COMBINE(remapped[r]);
            }
        } else {
            for (var r = 0; r < remapped.length; r++) {
                remapped[r] = new Uint32Array;
            }
        }
        this.#cache.mapping_to_sets = remapped;

        return;
    }

    async buildCollections(old_parameters, guess_ids, species, gene_id_column, gene_id_type, annofun, guessfun) {
        if (
            guess_ids !== old_parameters.guess_ids ||
            (
                !guess_ids && 
                (
                    old_parameters.gene_id_column !== gene_id_column || 
                    old_parameters.gene_id_type !== gene_id_type ||
                    utils.changedParameters(old_parameters.species, species)
                )
            )
        ) {
            let gene_id_column2 = gene_id_column;
            let gene_id_type2 = gene_id_type;
            let species2 = species;

            if (guess_ids) {
                let auto = configure_feature_parameters(guessfun());
                gene_id_column2 = auto.gene_id_column;
                gene_id_type2 = auto.gene_id_type;
                species2 = auto.species;
            }

            await this.#prepare(annofun(), species2, gene_id_column2, gene_id_type2);
            return true;
        }

        return false;
    }

    fetchCollectionDetails() {
        return this.#cache.collections;
    }

    fetchSetDetails() {
        return { 
            names: this.#cache.sets.names,
            descriptions: this.#cache.sets.descriptions,
            sizes: this.#cache.sets.sizes,
            collections: this.#cache.sets.collections
        };
    }

    fetchUniverseSize() {
        return this.#cache.universe.length;
    }

    computeEnrichment(group, effect_size, summary, markers, top_markers) {
        if (effect_size == "delta_detected") {
            effect_size = "deltaDetected";
        }

        // Avoid picking down-regulated genes in the marker set.
        let min_threshold = effect_size == "auc" ? 0.5 : 0;

        // Larger is better except for 'min_rank'.
        let use_largest = effect_size !== "min_rank"; 
        let sumidx = mutils.summaries2int[summary];

        let stats = markers[effect_size](group, { summary: sumidx, copy: false });
        let curstats = bioc.SLICE(stats, this.#cache.universe);
        let threshold = scran.computeTopThreshold(curstats, top_markers, { largest: use_largest });

        let in_set = [];
        let add = i => {
            let gene = this.#cache.universe[i];
            in_set.push(this.#cache.mapping_to_sets[gene]);
        };

        if (use_largest) {
            if (threshold < min_threshold) {
                threshold = min_threshold;
            }
            curstats.forEach((x, i) => {
                if (x >= threshold) {
                    add(i);
                }
            });
        } else {
            curstats.forEach((x, i) => {
                if (x <= threshold) {
                    add(i);
                }
            });
        }

        let overlaps = gesel.countSetOverlaps(in_set);
        let set_ids = new Int32Array(overlaps.length);
        let counts = new Int32Array(overlaps.length);
        let sizes = new Int32Array(overlaps.length);
        let indices = new Int32Array(overlaps.length);
        for (var i = 0; i < overlaps.length; i++) {
            indices[i] = i;
            let x = overlaps[i];
            set_ids[i] = x.id;
            counts[i] = x.count;
            sizes[i] = this.#cache.sets.sizes[x.id];
        }

        let num_top = in_set.length;
        let pvalues = scran.hypergeometricTest(counts, num_top, sizes, this.#cache.universe.length);
        indices.sort((a, b) => pvalues[a] - pvalues[b]); // Sorting by p-value.
        return {
            set_ids: bioc.SLICE(set_ids, indices),
            counts: bioc.SLICE(counts, indices),
            pvalues: bioc.SLICE(pvalues, indices),
            num_markers: num_top
        };
    }

    fetchFeatureSetIndices(set_id) {
        return this.#cache.sets.sets[set_id];
    }

    computePerCellScores(set_id, normalized, block) {
        let indices = this.fetchFeatureSetIndices(set_id);
        // console.log(bioc.SLICE(this.#inputs.fetchFeatureAnnotations().RNA.column("id"), indices));

        let features = utils.allocateCachedArray(normalized.numberOfRows(), "Uint8Array", this.#cache, "set_buffer");
        features.fill(0);
        let farr = features.array();
        indices.forEach(x => { farr[x] = 1; }); 

        return scran.scoreFeatureSet(normalized, features, { block: block });
    }
}

/*******************************************
 ******** Internals for parameters *********
 *******************************************/

function all_defaults() {
    return {
        skip: false,
        guess_ids: true,
        species: [],
        gene_id_column: null, 
        gene_id_type: "ENSEMBL", 
        top_markers: 100
    };
}

function configure_feature_parameters(guesses) {
    let best_key = null;
    let best = { type: "symbol", species: "human", confidence: 0 };

    if ("row_names" in guesses) {
        let val = guesses.row_names;
        if (val.confidence > best.confidence) {
            best = val;
        }
    }

    for (const [key, val] of Object.entries(guesses.columns)) {
        if (val.confidence > best.confidence) {
            best = val;
            best_key = key;
        }
    }

    return {
        gene_id_column: best_key,
        gene_id_type: best.type.toUpperCase(),
        species: [best.species]
    };
}

function transplant_parameters(parameters, guess_ids, species, gene_id_column, gene_id_type, top_markers) {
    parameters.guess_ids = guess_ids;
    parameters.species = bioc.CLONE(species); // make a copy to avoid pass-by-ref behavior.
    parameters.gene_id_column = gene_id_column;
    parameters.gene_id_type = gene_id_type;
    parameters.top_markers = top_markers;
}

function fetch_parameters(parameters) {
    // Avoid pass-by-reference behavior.
    let out = { ...parameters };
    out.species = bioc.CLONE(out.species);
    return out;
}

/************************
 ******** State *********
 ************************/

/**
 * This step tests for enrichment of particular feature sets in the set of top marker genes,
 * based on marker rankings from {@linkplain MarkerDetectionState}.
 * It wraps the [`testFeatureSetEnrichment`](https://kanaverse.github.io/scran.js/global.html#testFeatureSetEnrichment) 
 * and [`scoreFeatureSet`](https://kanaverse.github.io/scran.js/global.html#scoreFeatureSet) functions
 * from [**scran.js**](https://github.com/kanaverse/scran.js).
 *
 * This class uses the [**gesel**](https://npmjs.org/package/gesel) package to download the default gene set databases for the relevant organisms.
 * Its behavior can be tuned by setting global **gesel** variables, e.g., using the [`referenceDownload`](https://ltla.github.io/gesel/global.html#referenceDownload) function.
 * 
 * Methods not documented here are not part of the stable API and should not be used by applications.
 * @hideconstructor
 */
export class FeatureSetEnrichmentState {
    #inputs;
    #filter;
    #normalized;

    #parameters;
    #manager;

    constructor(inputs, filter, normalized, markers, parameters = null, cache = null) {
        if (!(inputs instanceof inputs_module.InputsState)) {
            throw new Error("'inputs' should be a State object from './inputs.js'");
        }
        this.#inputs = inputs;

        if (!(filter instanceof filter_module.CellFilteringState)) {
            throw new Error("'filter' should be a CellFilteringState object");
        }
        this.#filter = filter;

        if (!(normalized instanceof norm_module.RnaNormalizationState)) {
            throw new Error("'normalized' should be a RnaNormalizationState object from './rna_normalization.js'");
        }
        this.#normalized = normalized;

        this.#parameters = (parameters === null ? {} : parameters);
        this.#manager = new FeatureSetManager;
        this.changed = false;
    }

    /**
     * Frees all resources associated with this instance.
     */
    free() {
        this.#manager.free();
        return; 
    }

    valid() {
        let mat = this.#inputs.fetchCountMatrix();
        return mat.has("RNA");
    }

    /**
     * Obtain the details about the feature set collections in the reference database.
     * It is assumed that {@linkcode runAnalysis} was already run on this FeatureSetEnrichmentState instance before calling this method.
     *
     * @return {object} Object with the following properties:
     *
     * - `names`: Array of strings of length equal to the number of feature set collections, containing the names of the collections.
     * - `descriptions`: Array of strings of length equal to `names`, containing the descriptions for all collections.
     * - `species`: Array of strings of length equal to `names`, containing the taxonomy IDs for all collections.
     */
    fetchCollectionDetails() {
        return this.#manager.fetchCollectionDetails();
    }

    /**
     * Obtain the details about the feature sets in the reference database.
     * It is assumed that {@linkcode runAnalysis} was already run on this FeatureSetEnrichmentState instance before calling this method.
     *
     * @return {object} Object with the following properties:
     *
     * - `names`: Array of strings of length equal to the number of feature sets across all collections, containing the names of those sets.
     * - `descriptions`: Array of strings of length equal to `names`, containing the set descriptions.
     * - `sizes`: Int32Array of length equal to `names`, containing the set sizes.
     *   Each set's size is defined as the number of features in the dataset that are successfully mapped to a member of the set.
     * - `collections`: Int32Array of length equal to `names`, specifying the collection to which the set belongs.
     *   This is interpreted as the index of the arrays in {@linkcode fetchCollectionDetails}.
     */
    fetchSetDetails() {
        return this.#manager.fetchSetDetails();
    }

    /**
     * Obtain the size of the universe of features that were successfully mapped to features in the reference database.
     * It is assumed that {@linkcode runAnalysis} was already run on this FeatureSetEnrichmentState instance before calling this method.
     *
     * @return {number} Number of features from the input dataset that were successfully mapped to at least one gene in the reference database.
     */
    fetchUniverseSize() {
        return this.#manager.fetchUniverseSize();
    }

    /**
     * Compute enrichment of top markers in each feature set.
     * It is assumed that {@linkcode runAnalysis} was already run on this FeatureSetEnrichmentState instance before calling this method.
     *
     * @param {external:ScoreMarkersResults} markers - Arbitrary marker detection results for an RNA modality, with the same order and identity of genes as from the upstream {@linkplain InputsState}.
     * This is most typically the output from {@linkcode MarkerDetectionState#fetchResults MarkerDetectionState.fetchResults} or equivalents from {@linkplain CustomSelectionsState}.
     * @param {number} group - Index of the group of interest inside `markers`.
     * @param {string} effect_size - Effect size to use for ranking.
     * This should be one of `"cohen"`, `"auc"`, `"lfc"` or `"delta_detected"`.
     * @param {string} summary - Summary statistic to use for ranking.
     * This should be one of `"min"`, `"mean"` or `"min_rank"`.
     *
     * @return {object} Object containing the following properties:
     *
     * - `set_ids`: Int32Array of length equal to the number of sets, containing the set IDs.
     *   Each entry is an index into the arrays returned by {@linkcode FeatureSetEnrichmentState#fetchSetDetails fetchSetDetails}.
     * - `counts`: Int32Array of length equal to `set_ids`, containing the number of markers present in each set.
     * - `pvalues`: Float64Array of length equal to `counts`, containing the enrichment p-values for each set.
     * - `num_markers`: number of markers selected for testing.
     *
     * Sets are guaranteed to be sorted by increasing p-value in `pvalues`.
     */
    computeEnrichment(markers, group, effect_size, summary) {
        return this.#manager.computeEnrichment(group, effect_size, summary, markers, this.#parameters.top_markers);
    }

    /**
     * Extract row indices of the members of a desired feature set of interest.
     * It is assumed that {@linkcode runAnalysis} was already run on this FeatureSetEnrichmentState instance before calling this method.
     *
     * @param {number} set_id - Feature set ID, defined as an index into the arrays returned by {@linkcode FeatureSetEnrichmentState#fetchSetDetails fetchSetDetails}.
     *
     * @return {Int32Array} Array containing the row indices of the RNA count matrix corresponding to the genes in the specified set.
     */
    fetchFeatureSetIndices(set_id) {
        return this.#manager.fetchFeatureSetIndices(set_id);
    }

    /**
     * Compute per-cell scores for the activity of a feature set.
     * It is assumed that {@linkcode runAnalysis} was already run on this FeatureSetEnrichmentState instance before calling this method.
     *
     * @param {number} set_id - Feature set ID, defined as an index into the arrays returned by {@linkcode FeatureSetEnrichmentState#fetchSetDetails fetchSetDetails}.
     *
     * @return {Object} Object containing:
     *
     * - `indices`: Int32Array containing the row indices of the genes in the set, relative to the RNA count matrix.
     * - `weights`: Float64Array containing the weights of each gene in the set.
     * - `scores`: Float64Array containing the feature set score for each cell.
     */
    computePerCellScores(set_id) {
        return this.#manager.computePerCellScores(set_id, this.#normalized.fetchNormalizedMatrix(), this.#filter.fetchFilteredBlock());
    }

    // Soft-deprecated.
    fetchPerCellScores(collection, set_index) {
        return this.computePerCellScores(collection, set_index);
    }

    /**
     * @return {object} Object containing the parameters.
     */
    fetchParameters() {
        return fetch_parameters(this.#parameters);
    }

    /****************************
     ******** Defaults **********
     ****************************/

    /**
     * @return {object} Default parameters that may be modified and fed into {@linkcode FeatureSetEnrichmentState#compute compute}.
     */
    static defaults() {
        return all_defaults();
    }

    /***************************
     ******** Remotes **********
     ***************************/

    static flush() {
        return;
    }

    static setDownload(fun) {
        return FeatureSetManager.setDownload(fun);
    }

    /***************************
     ******** Compute **********
     ***************************/

    /**
     * This method should not be called directly by users, but is instead invoked by {@linkcode runAnalysis}.
     *
     * @param {object} parameters - Parameter object, equivalent to the `feature_set_enrichment` property of the `parameters` of {@linkcode runAnalysis}.
     * @param {boolean} parameters.skip - Whether to skip the preparation of feature set collections.
     * If `true`, none of the other methods (e.g., {@linkcode computeEnrichment}, {@linkcode computePerCellScores}) should be called.
     * @param {boolean} parameters.guess_ids - Automatically choose feature-based parameters based on the feature annotation for the RNA modality.
     * If `true`, the column of the annotation that best matches human/mouse Ensembl/symbols is identified and used to set `species`, `gene_id_column`, `gene_id_type`.
     * @param {Array} parameters.species - Array of strings specifying zero, one or more species involved in this dataset.
     * Each entry should be a taxonomy ID (e.g. `"9606"`, `"10090"`) supported by **gesel**.
     * This is used internally to filter `collections` to the entries relevant to these species. 
     * Ignored if `guess_ids = true`.
     * @param {?(string|number)} parameters.gene_id_column - Name or index of the column of the RNA entry of {@linkcode InputsState#fetchFeatureAnnotations InputsState.fetchFeatureAnnotations} containing the identity of each gene. 
     * If `null`, identifiers are taken from the row names.
     * Ignored if `guess_ids = true`.
     * @param {string} parameters.gene_id_type - Type of feature identifier in `gene_id_column`.
     * This should be one of `"ENSEMBL"`, `"SYMBOL"` or `"ENTREZ"`
     * Ignored if `guess_ids = true`.
     * @param {number} parameters.top_markers - Number of top markers to use when testing for enrichment.
     *
     * @return The state is updated with new results.
     */
    async compute(parameters) {
        this.changed = false;
        if (this.#inputs.changed) {
            this.changed = true;
        }

        let { skip, guess_ids, species, gene_id_column, gene_id_type, top_markers } = parameters;
        if (skip !== this.#parameters.skip) {
            this.changed = true;
        }

        if (this.valid() && !skip) {
            if (this.changed) { // Force an update.
                this.#parameters = {};
            }

            let modified = await this.#manager.buildCollections(
                this.#parameters, 
                guess_ids, 
                species, 
                gene_id_column, 
                gene_id_type, 
                () => this.#inputs.fetchFeatureAnnotations()["RNA"],
                () => this.#inputs.guessRnaFeatureTypes()
            );
            if (modified) {
                this.changed = true;
            }

            if (top_markers !== this.#parameters.top_markers) {
                this.changed = true;
            }
        }

        transplant_parameters(this.#parameters, guess_ids, species, gene_id_column, gene_id_type, top_markers);
        this.#parameters.skip = skip;
        return;
    }
}

/*****************************
 ******** Standalone *********
 *****************************/

/**
 * Standalone version of {@linkplain FeatureSetEnrichmentState} that provides the same functionality outside of {@linkcode runAnalysis}.
 * Users can supply their own annotations to prepare the collections for enrichment calculations.
 * Users should await on the return value of the {@linkcode FeatureSetEnrichmentStandalone#ready ready} method after construction.
 * Once resolved, other methods in this class may be used.
 *
 * This class uses the [**gesel**](https://npmjs.org/package/gesel) package to download the default gene set databases for the relevant organisms.
 * Its behavior can be tuned by setting global **gesel** variables, e.g., using the [`referenceDownload`](https://ltla.github.io/gesel/global.html#referenceDownload) function.
 */
export class FeatureSetEnrichmentStandalone {
    #annotations;
    #guesses;

    #normalized;
    #block;
    #backmap;

    #pre_parameters;
    #parameters;
    #manager;

    /**
     * @param {external:DataFrame} annotations - A {@linkplain external:DataFrame DataFrame} of per-gene annotations, where each row corresponds to a gene.
     * @param {object} [options={}] - Optional parameters.
     * @param {?(external:ScranMatrix)} [options.normalized=null] - A {@linkcode external:ScranMatrix ScranMatrix} of log-normalized expression values,
     * to be used in {@linkcode FeatureSetEnrichmentStandalone#computePerCellScores FeatureSetEnrichmentStandalone.computePerCellScores}.
     * Each row corresponds to a gene in the same order as `annotations`. 
     * @param {?(Array|TypedArray)} [options.block=null] - Array of length equal to the number of columns in `normalized`, containing the block assignments for each column. 
     * If `null`, all columns are assigned to the same block.
     */
    constructor(annotations, { normalized = null, block = null } = {}) {
        this.#annotations = annotations;
        this.#guesses = null;

        this.#normalized = null;
        this.#block = null;
        this.#backmap = null;

        if (normalized !== null) {
            if (normalized.numberOfRows() !== this.#annotations.numberOfRows()) {
                throw new Error("number of rows of 'annotations' and 'normalized' should be identical");
            }

            if (block !== null) {
                if (normalized.numberOfColumns() !== block.length) {
                    throw new Error("number of columns of 'normalized' should equal the length of 'block'");
                }

                let dump = utils.subsetInvalidFactors([ block ]);
                if (dump.retain !== null) {
                    this.#normalized = scran.subsetColumns(normalized, dump.retain);
                    this.#backmap = dump.retain;
                } else {
                    this.#normalized = normalized.clone();
                }

                this.#block = dump.arrays[0].ids;
            } else {
                this.#normalized = normalized.clone();
            }
        }

        this.#pre_parameters = FeatureSetEnrichmentStandalone.defaults();
        this.#parameters = {};
        this.#manager = new FeatureSetManager; 
    }

    #guessFeatureTypes() {
        if (this.#guesses == null) {
            this.#guesses = utils.guessFeatureTypes(this.#annotations);
        }
        return this.#guesses;
    }

    // Testing functions to check that the sanitization worked correctly.
    _peekMatrices() {
        return this.#normalized;
    }

    _peekBlock() {
        return this.#block;
    }

    /**
     * Frees all resources associated with this instance.
     */
    free() {
        scran.free(this.#block);
        scran.free(this.#normalized);
        this.#manager.free();
        return; // nothing extra to free here.
    }

    /**
     * @return {object} Default parameters that may be modified and fed into {@linkcode FeatureSetEnrichmentStandalone#compute compute}.
     */
    static defaults() {
        return all_defaults();
    }

    /**
     * If this method is not called, the parameters default to those in {@linkcode FeatureSetEnrichmentStandalone#defaults FeatureSetEnrichmentStandalone.defaults}.
     *
     * @param {object} parameters - Parameter object.
     * @param {boolean} parameters.guess_ids - Automatically choose feature-based parameters based on the feature annotation for the RNA modality.
     * If `true`, the column of the annotation that best matches human/mouse Ensembl/symbols is identified and used to set `species`, `gene_id_column`, `gene_id_type`.
     * @param {Array} parameters.species - Array of strings specifying zero, one or more species involved in this dataset.
     * Each entry should be a taxonomy ID (e.g. `"9606"`, `"10090"`) supported by **gesel**.
     * This is used internally to filter `collections` to the entries relevant to these species. 
     * Ignored if `guess_ids = true`.
     * @param {?(string|number)} parameters.gene_id_column - Name or index of the column of the `annotations` (supplied in the constructor) containing the identity of each gene. 
     * If `null`, identifiers are taken from the row names.
     * Ignored if `guess_ids = true`.
     * @param {string} parameters.gene_id_type - Type of feature identifier in `gene_id_column`.
     * This should be one of `"ENSEMBL"`, `"SYMBOL"` or `"ENTREZ"`
     * Ignored if `guess_ids = true`.
     * @param {number} parameters.top_markers - Number of top markers to use when testing for enrichment.
     *
     * @return The object is updated with new parameters.
     * Note that the {@linkcode FeatureSetEnrichmentStandalone#ready ready} method should be called in order for the new parameters to take effect.
     */
    setParameters(parameters) {
        let { guess_ids, species, gene_id_column, gene_id_type, top_markers } = parameters;

        // For some back-compatibility.
        if (typeof guess_ids == "undefined") {
            guess_ids = parameters.automatic;
        }

        transplant_parameters(this.#pre_parameters, guess_ids, species, gene_id_column, gene_id_type, top_markers);
    }

    /**
     * This should be called after construction and/or {@linkcode FeatureSetEnrichmenStandalone#setParameters setParameters}. 
     * Users should wait for the return value to resolve before calling any other methods of this class.
     * 
     * @return Feature set collections are loaded into memory. 
     * @async
     */
    async ready() {
        let { guess_ids, species, gene_id_column, gene_id_type, top_markers } = this.#pre_parameters;

        // For some back-compatibility.
        if (typeof guess_ids == "undefined") {
            guess_ids = parameters.automatic;
        }

        await this.#manager.buildCollections(
            this.#parameters,
            guess_ids, 
            species, 
            gene_id_column, 
            gene_id_type, 
            () => this.#annotations,
            () => this.#guessFeatureTypes()
        );

        this.#parameters = this.#pre_parameters;
    }

    /**
     * Obtain the details about the feature set collections in the reference database.
     * It is assumed that the {@linkcode FeatureSetEnrichmenStandalone#ready ready} method has already resolved before calling this method.
     *
     * @return {object} Object containing the details about the available feature set collections,
     * see {@linkcode FeatureSetEnrichmentStandalone#fetchCollectionDetails FeatureSetEnrichmentStandalone.fetchCollectionDetails} for more details.
     */
    fetchCollectionDetails() {
        return this.#manager.fetchCollectionDetails();
    }

    /**
     * Obtain the details about the feature sets in the reference database.
     * It is assumed that the {@linkcode FeatureSetEnrichmenStandalone#ready ready} method has already resolved before calling this method.
     *
     * @return {object} Object containing the details about the available feature sets,
     * see {@linkcode FeatureSetEnrichmentStandalone#fetchSetDetails FeatureSetEnrichmentStandalone.fetchSetDetails} for more details.
     */
    fetchSetDetails() {
        return this.#manager.fetchSetDetails();
    }

    /**
     * Obtain the size of the universe of features that were successfully mapped to features in the reference database.
     * It is assumed that the {@linkcode FeatureSetEnrichmenStandalone#ready ready} method has already resolved before calling this method.
     *
     * @return {number} Number of features from the input dataset that were successfully mapped to at least one gene in the reference database.
     */
    fetchUniverseSize() {
        return this.#manager.fetchUniverseSize();
    }

    /**
     * Compute enrichment of top markers in each feature set.
     * It is assumed that the {@linkcode FeatureSetEnrichmenStandalone#ready ready} method has already resolved before calling this method.
     *
     * @param {external:ScoreMarkersResults} markers - Marker detection results for an RNA modality.
     * @param {number} group - Group index of interest.
     * @param {string} effect_size - Effect size to use for ranking.
     * This should be one of `"cohen"`, `"auc"`, `"lfc"` or `"delta_detected"`.
     * @param {string} summary - Summary statistic to use for ranking.
     * This should be one of `"min"`, `"mean"` or `"min_rank"`.
     *
     * @return {object} Object containing statistics for the enrichment of the top marker genes in each feature set.
     * See {@linkcode FeatureSetEnrichmentStandalone#computeEnrichment FeatureSetEnrichmentStandalone.computeEnrichment} for more details.
     */
    computeEnrichment(markers, group, effect_size, summary) {
        return this.#manager.computeEnrichment(group, effect_size, summary, markers, this.#parameters.top_markers);
    }

    /**
     * Extract row indices of the members of a desired feature set of interest.
     * It is assumed that the {@linkcode FeatureSetEnrichmenStandalone#ready ready} method has already resolved before calling this method.
     *
     * @param {number} set_id - Feature set ID, defined as an index into the arrays returned by {@linkcode FeatureSetEnrichmentStandlone#fetchSetDetails fetchSetDetails}.
     *
     * @return {Int32Array} Array containing the row indices of the RNA count matrix corresponding to the genes in the specified set.
     */
    fetchFeatureSetIndices(set_id) {
        return this.#manager.fetchFeatureSetIndices(set_id);
    }

    /**
     * @return {object} Object containing the parameters.
     */
    fetchParameters() {
        return fetch_parameters(this.#pre_parameters);
    }

    /**
     * Compute per-cell scores for the activity of a feature set.
     * It is assumed that the {@linkcode FeatureSetEnrichmenStandalone#ready ready} method has already resolved before calling this method.
     *
     * @param {number} set_id - Feature set ID, defined as an index into the arrays returned by {@linkcode FeatureSetEnrichmentStandlone#fetchSetDetails fetchSetDetails}.
     *
     * @return {Object} Object containing the per-cell scores for the feature set activity.
     * See {@linkcode FeatureSetEnrichmentStandalone#computePerCellScores FeatureSetEnrichmentStandalone.computePerCellScores} for more details.
     */
    computePerCellScores(set_id) {
        if (this.#normalized == null) {
            throw new Error("no normalized matrix supplied in constructor");
        }

        let output = this.#manager.computePerCellScores(set_id, this.#normalized, this.#block);

        if (this.#backmap !== null) {
            let backfilled = new Float64Array(output.scores.length);
            backfilled.fill(Number.NaN);
            this.#backmap.forEach((x, i) => {
                backfilled[x] = output.scores[i];
            });
            output.scores = backfilled;
        }

        return output;
    }
}