steps/cell_labelling.js - Documentation

import * as scran from "scran.js";
import * as bioc from "bioconductor";
import * as utils from "./utils/general.js";
import * as rutils from "../readers/index.js";
import * as inputs_module from "./inputs.js";
import * as norm_module from "./rna_normalization.js";

const baseUrl = "https://github.com/kanaverse/singlepp-references/releases/download/2023-04-28";

export const step_name = "cell_labelling";

/************************************
 ****** Internals for loading *******
 ************************************/

var download_fun  = utils.defaultDownload;

function set_download(fun) {
    let previous = download_fun;
    download_fun = fun;
    return previous;
}

async function acquire_file(name, suffix) {
    let full = name + "_" + suffix;
    let b = await download_fun(baseUrl + "/" + full);
    return new rutils.SimpleFile(b, { name: full })
}

const all_loaded = {};

function flush_prepared(cache) {
    if ("prepared" in cache) {
        for (const v of Object.values(cache.prepared)) {
            v.built.raw.free();
        }
        delete cache.prepared;
    }
}

async function process_genes(file) {
    let gene_lines = await rutils.readLines2(file.content(), { compression: "gz" }); // gene names
    let acquired = [];

    for (const x of gene_lines) {
        let val = null;
        if (x !== "") {
            val = x.split("\t");
            if (val.length == 1) {
                val = val[0];
            }
        }
        acquired.push(val);
    }

    return acquired;
}

async function load_reference(name, gene_id_type) {
    let gene_suffix = "genes_" + gene_id_type.toLowerCase() + ".csv.gz";

    if (name in all_loaded) {
        let output = all_loaded[name];
        let known_genes = output.genes;
        if (!(gene_id_type in known_genes)) {
            known_genes[gene_id_type] = await process_genes(await acquire_file(name, gene_suffix));
        }
        return output;
    }

    const suffixes = [ 
        "labels_fine.csv.gz",
        "label_names_fine.csv.gz",
        "markers_fine.gmt.gz",
        "matrix.csv.gz",
        gene_suffix
    ];

    let contents = await Promise.all(suffixes.map(x => acquire_file(name, x)));

    let loaded;
    let stored;
    try {
        loaded = scran.loadLabelledReferenceFromBuffers(
            contents[3].buffer(), // rank matrix
            contents[2].buffer(), // markers
            contents[0].buffer()  // label per sample
        );

        let labels = await rutils.readLines2(contents[1].content(), { compression: "gz" }); // full label names
        stored = {
            "raw": loaded, 
            "labels": labels,
            "genes": {}
        };

        stored.genes[gene_id_type] = await process_genes(contents[4]);
        all_loaded[name] = stored;

    } catch (e) {
        utils.freeCache(loaded);
        throw e;
    }

    return stored;
}

function flush_loaded() {
    for (const [k, v] of Object.entries(all_loaded)) {
        v.raw.free();
        delete all_loaded[k];
    }
}

/*************************************
 ****** Internals for building *******
 *************************************/

const available_references = {
    "9606": [ "BlueprintEncode", "DatabaseImmuneCellExpression", "HumanPrimaryCellAtlas", "MonacoImmune", "NovershternHematopoietic" ],
    "10090": [ "ImmGen", "MouseRNAseq" ]
};

function internal_build_reference(name, gene_ids, gene_id_type) {
    let built;
    let output;
    try {
        let current = all_loaded[name];
        let loaded = current.raw;

        if (!(gene_id_type in current.genes)) {
            throw new Error("unknown gene type '" + gene_id_type + "'");
        }
        let chosen_ids = current.genes[gene_id_type];

        built = scran.buildLabelledReference(gene_ids, loaded, chosen_ids); 
        output = {
            "loaded": current,
            "built": {
                "features": chosen_ids,
                "raw": built
            }
        };

    } catch (e) {
        utils.freeCache(built);
        throw e;
    }

    return output;
}

async function build_reference(cache, references, guess_ids, species, gene_id_column, gene_id_type, old_parameters, annofun, guessfun) {
    if (
        guess_ids !== old_parameters.guess_ids ||
        utils.changedParameters(references, old_parameters.references) ||
        (
            !guess_ids &&
            (
                species !== old_parameters.species ||
                gene_id_column !== old_parameters.gene_id_column ||
                gene_id_type !== old_parameters.gene_id_type
            )
        )
    ) {
        let species2 = species;
        let gene_id_column2 = gene_id_column;
        let gene_id_type2 = gene_id_type;

        if (guess_ids) {
            let auto = CellLabellingState.configureFeatureParameters(guessfun());
            species2 = auto.species;
            gene_id_column2 = auto.gene_id_column;
            gene_id_type2 = auto.gene_id_type;
        }

        let allowable = new Set;
        for (const s of species2) {
            if (s in available_references) {
                available_references[s].forEach(x => { allowable.add(x); });
            }
        }

        // Building each individual reference.
        let feats = annofun();
        let gene_ids = (gene_id_column2 == null ? feats.rowNames() : feats.column(gene_id_column2));
        cache.gene_ids = gene_ids;

        let valid = {};
        if (gene_ids !== null) {
            if (references == null) {
                references = Array.from(allowable);
            }
            for (const ref of references) {
                if (allowable.has(ref)) {
                    await load_reference(ref, gene_id_type2);
                    valid[ref] = internal_build_reference(ref, gene_ids, gene_id_type2);
                }
            }
        }

        flush_prepared(cache);
        cache.prepared = valid;

        // Building an integrated reference, if necessary.
        let used_refs = Object.keys(valid);
        if (used_refs.length > 1) {
            let arr = Object.values(valid);
            let loaded = arr.map(x => x.loaded.raw);
            let feats = arr.map(x => x.built.features);
            let built = arr.map(x => x.built.raw);

            utils.freeCache(cache.integrated);
            cache.integrated = scran.integrateLabelledReferences(gene_ids, loaded, feats, built);
        } else {
            utils.freeCache(cache.integrated);
            delete cache.integrated;
        }
        cache.used_refs = used_refs;

       return true;
    }

    return false;
}

function create_defaults() {
    return {
        references: null,
        guess_ids: true,
        species: [],
        gene_id_column: null,
        gene_id_type: "ENSEMBL"
    };
}

function transplant_parameters(references, guess_ids, species, gene_id_column, gene_id_type, parameters) {
    parameters.references = bioc.CLONE(references); // make a copy to avoid pass-by-reference behavior.
    parameters.guess_ids = guess_ids;
    parameters.species = bioc.CLONE(species);
    parameters.gene_id_column = gene_id_column;
    parameters.gene_id_type = gene_id_type;
}

function fetch_parameters(parameters) {
    // Avoid pass-by-reference behavior.
    let out = { ...parameters };
    out.references = bioc.CLONE(out.references);
    out.species = bioc.CLONE(out.species);
    return out;
}

/************************************
 ****** Internals for compute *******
 ************************************/

function transform_results(names, results, assigned) {
    let nclusters = results.numberOfCells();
    let ntargets = names.length;
    let output = new Array(nclusters);

    for (var r = 0; r < nclusters; r++) {
        let all_scores = {};
        let cscores = results.scoresForCell(r);
        for (var l = 0; l < ntargets; l++) {
            all_scores[names[l]] = cscores[l];
        }
        output[r] = { best: names[assigned[r]], all: all_scores };
    }

    return output;
}

function assign_labels_internal(x, cache) {
    let matrix = x;
    let temp_cluster_means;
    let temp_matrix;

    // Converting marker results into means.
    if (x instanceof scran.ScoreMarkersResults) {
        let ngroups = x.numberOfGroups();

        if (cache.gene_ids === null) {
            matrix = null;                
        } else {
            let ngenes = cache.gene_ids.length;

            // Creating a column-major array of mean vectors for each cluster.
            temp_cluster_means = scran.createFloat64WasmArray(ngroups * ngenes);
            for (var g = 0; g < ngroups; g++) {
                let means = x.means(g, { copy: false }); // Warning: direct view in wasm space - be careful.
                if (means.length !== ngenes) {
                    throw new Error("unexpected number of genes in marker results");
                }
                let cluster_array = temp_cluster_means.array();
                cluster_array.set(means, g * ngenes);
            }

            temp_matrix = scran.ScranMatrix.createDenseMatrix(ngenes, ngroups, temp_cluster_means, { columnMajor: true, copy: false });
            matrix = temp_matrix;
        }
    } else {
        if (cache.gene_ids !== null && x.numberOfRows() !== cache.gene_ids.length) {
            throw new Error("unexpected number of genes in the input matrix"); 
        }
    }

    // Running classifications; this is a no-op if gene_ids = null as 'valid' should be empty.
    let valid = cache.prepared;
    let results = { per_reference: {} };
    let raw = {};
    for (const [key, ref] of Object.entries(valid)) {
        let current = scran.labelCells(matrix, ref.built.raw);
        raw[key] = current;
        results.per_reference[key] = transform_results(ref.loaded.labels, current, current.predictedLabels({ copy: false }));
    }

    if ("integrated" in cache) {
        let single_results = [];
        for (const key of cache.used_refs) { // enforce correct order.
            single_results.push(raw[key]);
        }

        let current = scran.integrateCellLabels(matrix, single_results, cache.integrated);
        results.integrated = transform_results(cache.used_refs, current, current.predictedReferences({ copy: false }));
        current.free();
    }

    for (const v of Object.values(raw)) {
        v.free();
    }
    utils.freeCache(temp_matrix);
    utils.freeCache(temp_cluster_means);

    return results;
}

function assign_labels(x, group, cache) {
    if (group === null) {
        return assign_labels_internal(x, cache);
    }

    let to_collect = [];
    let output;
    try {
        let dump = utils.subsetInvalidFactors([group]);
        to_collect.push(dump.arrays[0].ids);

        let mat = x;
        if (dump.retain !== null) {
            let sub = scran.subsetColumns(x, dump.retain);
            to_collect.push(sub);
            mat = sub;
        }

        let aggr = scran.aggregateAcrossCells(mat, dump.arrays[0].ids, { average: true });
        to_collect.push(aggr);

        let aggrmat = scran.ScranMatrix.createDenseMatrix(
            mat.numberOfRows(), 
            aggr.numberOfGroups(), 
            aggr.sums(null, { copy: "view" }),
            { columnMajor: true, copy: false }
        );
        to_collect.push(aggrmat);

        output = assign_labels_internal(aggrmat, cache);
        output.groups = dump.arrays[0].levels;
    } finally {
        to_collect.forEach(utils.freeCache);
    }

    return output;
}

/********************
 ****** State *******
 ********************/

/**
 * Cell labelling involves assigning cell type labels to clusters using the [**SingleR** algorithm](https://github.com/LTLA/CppSingleR),
 * based on [pre-formatted reference expression profiles](https://github.com/clusterfork/singlepp-references).
 * This wraps [`labelCells`](https://kanaverse.github.io/scran.js/global.html#labelCells)
 * and related functions from [**scran.js**](https://github.com/kanaverse/scran.js).
 *
 * In theory, we could do this at the single-cell level, but we use clusters instead to expedite the computation and simplify interpretation.
 * If multiple references are requested, we will use each for assignment before attempting to choose the best label for each cluster across references.
 *
 * Methods not documented here are not part of the stable API and should not be used by applications.
 * @hideconstructor
 */
export class CellLabellingState {
    #inputs;
    #parameters;
    #cache;

    constructor(inputs, parameters = null, cache = null) {
        if (!(inputs instanceof inputs_module.InputsState)) {
            throw new Error("'inputs' should be a State object from './inputs.js'");
        }
        this.#inputs = inputs;

        this.#parameters = (parameters === null ? {} : parameters);
        this.#cache = (cache === null ? {} : cache);
        this.changed = false;
    }

    free() {
        flush_prepared(this.#cache);
    }

    /***************************
     ******** Getters **********
     ***************************/

    valid() {
        let mat = this.#inputs.fetchCountMatrix();
        return mat.has("RNA");
    }

    /**
     * @return {object} Object containing the parameters.
     */
    fetchParameters() {
        // Avoid any pass-by-reference activity.
        let out = { ...this.#parameters };
        out.references = bioc.CLONE(out.references);
        out.species = bioc.CLONE(out.species);
        return out;
    }

    /**
     * @return {object} Object where each key is the name of a reference and each value is the number of shared features between the test and reference daatasets.
     */
    fetchNumberOfSharedFeatures() {
        let output = {};
        for (const key of this.#cache.used_refs) {
            output[key] = this.#cache.prepared[key].built.raw.sharedFeatures();
        }
        return output;
    }

    /****************************
     ******** Defaults **********
     ****************************/

    /**
     * @return {object} Default parameters that may be modified and fed into {@linkcode CellLabellingCore#compute compute}.
     */
    static defaults() {
        return create_defaults();
    }

    static configureFeatureParameters(guesses) {
        let best_key = null;
        let best = { type: "symbol", species: "human", confidence: 0 };

        if ("row_names" in guesses) {
            let val = guesses.row_names;
            if (val.confidence > best.confidence) {
                best = val;
            }
        }

        for (const [key, val] of Object.entries(guesses.columns)) {
           if (val.confidence > best.confidence) {
                best = val;
                best_key = key;
            }
        }

        return {
            gene_id_column: best_key,
            species: [best.species],
            gene_id_type: best.type.toUpperCase()
        };
    }

    /**
     * @return {object} Object containing the parameters.
     */
    fetchParameters() {
        return fetch_parameters(this.#parameters);
    }

    /***************************
     ******** Remotes **********
     ***************************/

    /**
     * Available references for each species.
     * Each key is a taxonomy ID and each value is an array of strings containing the names of references for that species.
     * @type {object}
     */
    static availableReferences = available_references;

    /**
     * Flush all cached references.
     *
     * By default, {@linkcode CellLabellingState#compute compute} will cache the loaded references in a global cache for re-use across {@linkplain CellLabellingState} instances.
     * These cached references are not tied to any single instance and will not be removed by garbage collectors or by {@linkcode freeAnalysis}.
     * Rather, this function should be called to release the relevant memory.
     */
    static flush() {
        flush_loaded();
        return;
    }

    /**
     * Specify a function to download references for the cell labelling step.
     *
     * @param {function} fun - Function that accepts a single string containing a URL and returns any value that can be used in the {@linkplain SimpleFile} constructor.
     * This is most typically a Uint8Array of that URL's contents, but it can also be a path to a locally cached file on Node.js.
     *
     * @return `fun` is set as the global downloader for this step. 
     * The _previous_ value of the downloader is returned.
     */
    static setDownload(fun) {
        return set_download(fun);
    }

    /***************************
     ******** Compute **********
     ***************************/

    /**
     * This method should not be called directly by users, but is instead invoked by {@linkcode runAnalysis}.
     *
     * @param {object} parameters - Parameter object, equivalent to the `cell_labelling` property of the `parameters` of {@linkcode runAnalysis}.
     * @param {?Array} parameters.references - Array of strings specifying the names of the reference datasets, see {@linkcode CellLabellingState.availableReferences availableReferences} for more details.
     * If `null`, all reference datasets from all species are used.
     * @param {boolean} parameters.guess_ids - Automatically choose feature-based parameters based on the feature annotation for the RNA modality.
     * If `true`, the column of the annotation that best matches human/mouse Ensembl/symbols is identified and used to set `species`, `gene_id_column` and `gene_id_type`.
     * @param {Array} parameters.species - Array of strings specifying zero, one or more species involved in this dataset.
     * Each entry should be a taxonomy ID (e.g. `"9606"`, `"10090"`) as specified in {@linkcode CellLabellingState.availableReferences availableReferences}.
     * This is used internally to filter `references` to the entries relevant to these species. 
     * Ignored if `guess_ids = true`.
     * @param {?(string|number)} parameters.gene_id_column - Name or index of the column of the RNA entry of {@linkcode InputsState#fetchFeatureAnnotations InputsState.fetchFeatureAnnotations} containing the identity of each gene. 
     * If `null`, identifiers are taken from the row names.
     * Ignored if `guess_ids = true`.
     * @param {string} parameters.gene_id_type - Type of feature identifier in `gene_id_column`.
     * This should be one of `"ENSEMBL"`, `"SYMBOL"` or `"ENTREZ"`
     * Ignored if `guess_ids = true`.
     *
     * @return The object is updated with the new results.
     * @async
     */
    async compute(parameters) {
        let references;
        let guess_ids;
        let species;
        let gene_id_column;
        let gene_id_type;

        if ("references" in parameters) {
            references = parameters.references;
            guess_ids = parameters.guess_ids;
            species = parameters.species;
            gene_id_column = parameters.gene_id_column;
            gene_id_type = parameters.gene_id_type;
        } else {
            references = null;
            guess_ids = true;
            let def = CellLabellingState.defaults();
            species = def.species;
            gene_id_column = def.gene_id_column;
            gene_id_type = def.gene_id_type;
        }

        this.changed = false;

        if (this.valid()) {
            this.changed = await build_reference(
                this.#cache, 
                references, 
                guess_ids, 
                species, 
                gene_id_column, 
                gene_id_type, 
                this.#parameters, 
                () => this.#inputs.fetchFeatureAnnotations()["RNA"],
                () => this.#inputs.guessRnaFeatureTypes()
            );
        }

        transplant_parameters(
            references, 
            guess_ids, 
            species, 
            gene_id_column, 
            gene_id_type, 
            this.#parameters
        );
    }

    /**
     * @param {external:ScranMatrix|external:ScoreMarkersResults} x - A matrix of (normalized or unnormalized) expression values, with genes in rows and cells/clusters in columns.
     * Alternatively, an object containing marker results, e.g., as computed by {@linkcode MarkerDetectionState}. 
     *
     * In both cases, the identity of genes should correspond to that in the upstream {@linkcode InputsState}.
     * @param {object} [options={}] - Optional parameters.
     * @param {?(Array|TypedArray)} [options.group=null] - Array of length equal to the number of columns of `x`, containing grouping assignments.
     * If provided, the average expression profile of each group is used for cell type labelling.
     * This is only used if `x` is a {@linkplain external:ScranMatrix ScranMatrix}.
     * 
     * @return {object} Object containing:
     *
     * - `per_reference`: an object where each key is the name of a reference dataset and its value is an array.
     *   Each array is of length equal to the number of columns of `x` (if matrix), groups in `x` (if marker results), or groups in `group`.
     *   Each entry is an object containing `best`, the name of the best label assigned to a column/group in this reference;
     *   and `all`, an object where each key is a label in this reference dataset and its value is the score for assigning that label to this column/group.
     * - (optional) `integrated`: an array of length equal to the number of columns/groups.
     *   Each entry is an object containing `best`, the name of the best reference for this column/group;
     *   and `all`, an object where each key is the name of a reference dataset and its value is the score for this column/group.
     *   This property is only reported if multiple references are used.
     * - (optional) `groups`: an array of length equal to the number of groups, containing the identity of each group.
     *   Only reported if an input `group` is supplied and `x` is a {@linkplain externl:ScranMatrix ScranMatrix}.
     */
    computeLabels(x, { group = null } = {}) {
        return assign_labels(x, group, this.#cache);
    }
}

/*****************************
 ******** Standalone *********
 *****************************/

/**
 * Standalone version of {@linkplain CellLabellingState} that provides the same functionality outside of {@linkcode runAnalysis}.
 * Users can supply their own feature annotations to build the reference datasets prior to label assignment.
 * Users should await on the return value of the {@linkcode CellLabellingStandalone#ready ready} method after construction.
 * Once resolved, other methods in this class may be used.
 */
export class CellLabellingStandalone {
    #parameters;
    #cache;
    #annotations;
    #guesses;
    #pre_parameters;

    /**
     * @param {external:DataFrame} annotations - Feature annotations for the dataset.
     */
    constructor(annotations) {
        this.#parameters = {};
        this.#pre_parameters = CellLabellingStandalone.defaults();
        this.#annotations = annotations;
        this.#cache = {};
        this.#guesses = null;
    }

    free() {
        flush_prepared(this.#cache);
    }

    /**
     * @return {object} Default parameters that may be modified and fed into {@linkcode CellLabellingCore#compute compute}.
     */
    static defaults() {
        return create_defaults();
    }

    /**
     * @return {object} Object containing the parameters.
     */
    fetchParameters() {
        return fetch_parameters(this.#pre_parameters);
    }

    /***************************
     ******** Remotes **********
     ***************************/

    /**
     * Available references for each species.
     * Each key is a taxonomy ID and each value is an array of strings containing the names of references for that species.
     * @type {object}
     */
    static availableReferences = available_references;

    /**
     * Flush all cached references.
     *
     * By default, this class will cache the loaded references in a global cache for re-use across {@linkplain CellLabellingStandlone} instances.
     * These cached references are not tied to any single instance and will not be removed by garbage collectors or by {@linkcode CellLabellingStandalone#free free}.
     * Rather, this function should be called to release the relevant memory.
     */
    static flush() {
        flush_loaded();
        return;
    }

    /**
     * Specify a function to download references for the cell labelling step.
     *
     * @param {function} fun - Function that accepts a single string containing a URL and returns any value that can be used in the {@linkplain SimpleFile} constructor.
     * This is most typically a Uint8Array of that URL's contents, but it can also be a path to a locally cached file on Node.js.
     *
     * @return `fun` is set as the global downloader for this step. 
     * The _previous_ value of the downloader is returned.
     */
    static setDownload(fun) {
        return set_download(fun);
    }

    /***************************
     ******** Compute **********
     ***************************/

    #guessFeatureTypes() {
        if (this.#guesses == null) {
            this.#guesses = utils.guessFeatureTypes(this.#annotations);
        }
        return this.#guesses;
    }

    /**
     * @param {object} parameters - Parameter object, equivalent to the `cell_labelling` property of the `parameters` of {@linkcode runAnalysis}.
     * @param {?Array} parameters.references - Array of strings specifying the names of the reference datasets, see {@linkcode CellLabellingState.availableReferences availableReferences} for more details.
     * If `null`, all reference datasets from all species are used.
     * @param {boolean} parameters.guess_ids - Automatically choose feature-based parameters based on the feature annotation for the RNA modality.
     * If `true`, the column of the annotation that best matches human/mouse Ensembl/symbols is identified and used to set `species`, `gene_id_column` and `gene_id_type`.
     * @param {Array} parameters.species - Array of strings specifying zero, one or more species involved in this dataset.
     * Each entry should be a taxonomy ID (e.g. `"9606"`, `"10090"`) as specified in {@linkcode CellLabellingState.availableReferences availableReferences}.
     * This is used internally to filter `references` to the entries relevant to these species. 
     * Ignored if `guess_ids = true`.
     * @param {?(string|number)} parameters.gene_id_column - Name or index of the column of the RNA entry of {@linkcode InputsState#fetchFeatureAnnotations InputsState.fetchFeatureAnnotations} containing the identity of each gene. 
     * If `null`, identifiers are taken from the row names.
     * Ignored if `guess_ids = true`.
     * @param {string} parameters.gene_id_type - Type of feature identifier in `gene_id_column`.
     * This should be one of `"ENSEMBL"`, `"SYMBOL"` or `"ENTREZ"`
     * Ignored if `guess_ids = true`.
     *
     * @return The object is updated with the new results.
     * @async
     */
    async setParameters(parameters) {
        let { references, guess_ids, species, gene_id_column, gene_id_type } = parameters;
        transplant_parameters(references, guess_ids, species, gene_id_column, gene_id_type, this.#pre_parameters);
    }

    /**
     * This should be called after construction and/or {@linkcode FeatureSetEnrichmenStandalone#setParameters setParameters}. 
     * Users should wait for the return value to resolve before calling any other methods of this class.
     * 
     * @return Reference datasets are loaded into memory. 
     * @async
     */
    async ready() {
        let { references, guess_ids, species, gene_id_column, gene_id_type } = this.#pre_parameters;

        await build_reference(
            this.#cache, 
            references, 
            guess_ids, 
            species, 
            gene_id_column, 
            gene_id_type, 
            this.#parameters, 
            () => this.#annotations,
            () => this.#guessFeatureTypes()
        );

        this.#parameters = this.#pre_parameters;
    }

    /**
     * @param {external:ScranMatrix|external:ScoreMarkersResults} x - A matrix of (normalized or unnormalized) expression values, with genes in rows and cells/clusters in columns.
     * Alternatively, an object containing marker results, e.g., as computed by {@linkcode MarkerDetectionState}. 
     *
     * In both cases, the identity of genes should correspond to that in `annotations` in the constructor of this instance.
     * @param {object} [options={}] - Optional parameters.
     * @param {?(Array|TypedArray)} [options.group=null] - Array of length equal to the number of columns of `x`, containing grouping assignments.
     * If provided, the average expression profile of each group is used for cell type labelling.
     * This is only used if `x` is a {@linkplain external:ScranMatrix ScranMatrix}.
     * 
     * @return {object} Object containing:
     *
     * - `per_reference`: an object where each key is the name of a reference dataset and its value is an array.
     *   Each array is of length equal to the number of columns of `x` (if matrix), groups in `x` (if marker results), or groups in `group`.
     *   Each entry is an object containing `best`, the name of the best label assigned to a column/group in this reference;
     *   and `all`, an object where each key is a label in this reference dataset and its value is the score for assigning that label to this column/group.
     * - (optional) `integrated`: an array of length equal to the number of columns/groups.
     *   Each entry is an object containing `best`, the name of the best reference for this column/group;
     *   and `all`, an object where each key is the name of a reference dataset and its value is the score for this column/group.
     *   This property is only reported if multiple references are used.
     * - (optional) `groups`: an array of length equal to the number of groups, containing the identity of each group.
     *   Only reported if an input `group` is supplied and `x` is a {@linkplain external:ScranMatrix ScranMatrix}.
     */
    computeLabels(x, { group = null } = {}) {
        return assign_labels(x, group, this.#cache);
    }
}