import { Navigator } from "./Navigator.js";
import { readReducedDimensions, readSingleCellExperiment } from "./readers/SingleCellExperiment.js";
import { readAssay } from "./readers/SummarizedExperiment.js";
import * as scran from "scran.js";
import * as bioc from "bioconductor";
/**
* Pre-computed analysis results stored as a SummarizedExperiment object (or one of its subclasses) in the * [**takane** format](https://github.com/ArtifactDB/takane).
* This is intended as a virtual base class; applications should define their own subclasses with appropriate getter and listing methods.
*/
export class AbstractResult {
#path;
#navigator;
#raw_components;
#options;
/**
* @param {string} path - Some kind of the path to the SummarizedExperiment.
* The exact interpretation of this argument is left to subclasses.
* @param {function} getter - A (possibly `async`) function that accepts a string containing the relative path to the file of interest, and returns a Uint8Array of that file's contents.
* Each path is created by adding unix-style file separators to `path`.
* @param {function} lister - A (possibly `async`) function that accepts a string containing the relative path to the directory of interest, and returns an array of the contents of that directory (non-recursive).
* Each path is created by adding unix-style file separators to `path`.
*/
constructor(path, getter, lister) {
this.#path = path;
this.#navigator = new Navigator(getter, lister);
this.#raw_components = null;
this.#options = AbstractResult.defaults();
}
/**
* @return {object} Default options, see {@linkcode AbstractResult#setOptions setOptions} for more details.
*/
static defaults() {
return {
primaryAssay: 0,
isPrimaryNormalized: true,
reducedDimensionNames: null,
sizeFactors: true,
};
}
/**
* @return {object} Object containing all options used for loading.
*/
options() {
return { ...(this.#options) };
}
/**
* @param {object} options - Optional parameters that affect {@linkcode AbstractResult#load load} (but not {@linkcode AbstractResult#summary summary}.
* @param {object|string|number} [options.primaryAssay] - Assay containing the relevant data for each modality.
*
* - If a string, this is used as the name of the assay across all modalities.
* - If a number, this is used as the index of the assay across all modalities.
* - If any object, the key should be the name of a modality and the value may be either a string or number specifying the assay to use for that modality.
* Modalities absent from this object will not be loaded.
* @param {object|boolean} [options.isPrimaryNormalized] - Whether or not the assay for a particular modality has already been log-normalized.
*
* - If a boolean, this is used to indicate normalization status of assays across all modalities.
* If `false`, that modality's assay is assumed to contain count data and is subjected to scaling normalization and log-transformation.
* - If any object, the key should be the name of a modality and the value should be a boolean indicating whether that modality's assay has been log-normalized.
* Modalities absent from this object are assumed to have been normalized.
* @param {object|string|boolean} [options.sizeFactors] - What size factors to use for scaling normalization.
*
* - If `false`, the library size (i.e., total sum of counts) is used to normalize each assay.
* No size factors are used from the column data of any experiment.
* - If `true`, each experiment's column data is searched for the `sizefactor`, `size_factor`, `sizefactors` or `size_factors` columns (ignoring any case).
* If found and the column contains only numeric values, it is used to supply the size factors.
* Otherwise, the library sizes are used.
* - If a string, the column of the same name is assumed to store the size factors for each experiment.
* If no column exists with this name, the library sizes are used.
* - If an object, the key should be the name of a modality and the value should be a boolean or string specifying the size factors to use for that modality based on the points mentioned above.
* For any modality absent from this object, the value is assumed to be `true`.
*
* Note that this option only has an effect on the assays that need log-normalization, according to `isPrimaryNormalized`.
* @param {?Array} [options.reducedDimensionNames] - Array of names of the reduced dimensions to load.
* If `null`, all reduced dimensions found in the file are loaded.
*/
setOptions(options) {
// Cloning to avoid pass-by-reference links.
for (const [k, v] of Object.entries(options)) {
this.#options[k] = bioc.CLONE(v);
}
}
/**
* Destroy caches if present, releasing the associated memory.
* This may be called at any time but only has an effect if `cache = true` in {@linkcode AbstractResult#load load} or {@linkcode AbstractResult#summary summary}.
*/
clear() {
this.#raw_components = null;
this.#navigator.clear();
}
async #load_components() {
if (this.#raw_components === null) {
this.#raw_components = readSingleCellExperiment(this.#path, this.#navigator, { includeAlternativeExperimentColumnData: true });
}
return this.#raw_components;
}
#get_main_name(comp) {
if (!("main_experiment_name" in comp) || comp.main_experiment_name == null) {
return "";
} else {
return comp.main_experiment_name;
}
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.cache=false] - Whether to cache the results for re-use in subsequent calls to this method or {@linkcode AbstractResult#load load}.
* If `true`, users should consider calling {@linkcode AbstractResult#clear clear} to release the memory once this dataset instance is no longer needed.
*
* @return {object} Object containing the per-feature and per-cell annotations.
* This has the following properties:
*
* - `modality_features`: an object where each key is a modality name and each value is a {@linkplain external:DataFrame DataFrame} of per-feature annotations for that modality.
* - `cells`: a {@linkplain external:DataFrame DataFrame} of per-cell annotations.
* - `modality_assay_names`: an object where each key is a modality name and each value is an Array containing the names of available assays for that modality.
* Unnamed assays are represented as `null` names.
* - `reduced_dimension_names`: an Array of strings containing names of dimensionality reduction results.
* - `other_metadata`: an object containing other metadata.
*
* @async
*/
async summary({ cache = false } = {}) {
const comp = await this.#load_components();
const main_name = this.#get_main_name(comp);
const features = {};
features[main_name] = comp.row_data;
const assays = {};
assays[main_name] = comp.assay_names;
if ("alternative_experiments" in comp) {
for (const { name, experiment } of comp.alternative_experiments) {
features[name] = experiment.row_data;
assays[name] = experiment.assay_names;
}
}
let output = {
modality_features: features,
cells: comp.column_data,
modality_assay_names: assays,
reduced_dimension_names: comp.reduced_dimension_names,
other_metadata: comp.metadata,
};
if (!cache) {
this.clear();
}
return output;
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean} [options.cache=false] - Whether to cache the results for re-use in subsequent calls to this method or {@linkcode AbstractResult#summary summary}.
* If `true`, users should consider calling {@linkcode AbstractResult#clear clear} to release the memory once this dataset instance is no longer needed.
*
* @return {object} Object containing the per-feature and per-cell annotations.
* This has the following properties:
*
* - `features`: an object where each key is a modality name and each value is a {@linkplain external:DataFrame DataFrame} of per-feature annotations for that modality.
* - `cells`: a {@linkplain external:DataFrame DataFrame} containing per-cell annotations.
* - `matrix`: a {@linkplain external:MultiMatrix MultiMatrix} containing one {@linkplain external:ScranMatrix ScranMatrix} per modality.
* - `reduced_dimensions`: an object containing the dimensionality reduction results.
* Each value is an array of arrays, where each inner array contains the coordinates for one dimension.
* - `other_metadata`: an object containing other metadata.
*
* @async
*/
async load({ cache = false } = {}) {
const comp = await this.#load_components();
const main_name = this.#get_main_name(comp);
let output = {
matrix: new scran.MultiMatrix,
features: {},
cells: comp.column_data,
reduced_dimensions: {},
other_metadata: comp.metadata,
};
if ("reduced_dimension_names" in comp) {
let reddims = this.#options.reducedDimensionNames;
if (reddims == null) {
reddims = comp.reduced_dimension_names;
}
if (reddims.length > 0) {
for (const k of reddims) {
output.reduced_dimensions[k] = (await readReducedDimensions(this.#path, k, this.#navigator)).values;
}
}
}
// Now fetching the assay matrix.
{
const my_assay = this.#options.primaryAssay;
const my_normalized = this.#options.isPrimaryNormalized;
const my_sf = this.#options.sizeFactors;
const my_navigator = this.#navigator;
const allowed_names = new Set([ "sizefactor", "size_factor", "sizefactors", "size_factors" ]);
async function add_experiment(name, info) {
let curassay = my_assay;
if (typeof curassay == "object") {
if (name in curassay) {
curassay = curassay[name];
} else {
return;
}
}
let curnormalized = my_normalized;
if (typeof curnormalized == "object") {
if (name in curnormalized) {
curnormalized = curnormalized[name];
} else {
curnormalized = true;
}
}
let cursf = my_sf;
if (typeof cursf == "object") {
if (name in cursf) {
cursf = cursf[name];
} else {
cursf = true;
}
}
let loaded = await readAssay(info["_path"], curassay, my_navigator);
output.matrix.add(name, loaded);
if (!curnormalized) {
let fac = null;
if (typeof cursf == "string") {
if (info.column_data.hasColumn(cursf)) {
fac = info.column_data.column(cursf);
}
} else if (cursf) {
const allcols = info.column_data.columnNames();
for (const n of allcols) {
const nl = n.toLowerCase();
if (allowed_names.has(nl)) {
fac = info.column_data.column(n);
break;
}
}
}
if (fac instanceof Array) {
for (const x of fac) {
if (typeof x != "number") {
fac = null;
break;
}
}
}
let normed = scran.logNormCounts(loaded, { allowZeros: true, sizeFactors: fac });
output.matrix.add(name, normed);
}
output.features[name] = info.row_data;
}
try {
await add_experiment(main_name, comp);
if ("alternative_experiments" in comp) {
for (const { name, experiment } of comp.alternative_experiments) {
await add_experiment(name, experiment);
}
}
} catch (e) {
scran.free(output.matrix);
throw e;
}
}
if (!cache) {
this.clear();
}
return output;
}
}