import * as utils from "./utils.js";
import * as gc from "./gc.js";
import { RunPcaResults } from "./runPca.js";
/**
* Wrapper around the k-means clustering results on the Wasm heap, produced by {@linkcode clusterKmeans}.
* @hideconstructor
*/
export class ClusterKmeansResults {
#id;
#results;
#filledClusters;
#filledSizes;
#filledCenters;
#filledWcss;
#filledIterations;
#filledStatus;
constructor(id, raw, filled = true) {
this.#results = raw;
this.#id = id;
this.#filledClusters = filled;
this.#filledSizes = filled;
this.#filledCenters = filled;
this.#filledWcss = filled;
this.#filledIterations = filled;
this.#filledStatus = filled;
return;
}
/**
* @return {number} Number of cells in the results.
*/
numberOfCells() {
return this.#results.num_obs();
}
/**
* @return {number} Number of clusters in the results.
*/
numberOfClusters() {
return this.#results.num_clusters();
}
/**
* @param {number} iterations - Number of iterations.
* @return The specified number of iterations is set in this object.
* Typically only used after {@linkcode emptyClusterKmeansResults}.
*/
setIterations(iterations) {
if (!this.#filledIterations) {
this.#filledIterations = true;
}
this.#results.set_iterations(iterations);
return;
}
/**
* @param {number} status - Status of the k-means clustering.
* @return The status is set in this object.
* Typically only used after {@linkcode emptyClusterKmeansResults}.
*/
setStatus(status) {
if (!this.#filledStatus) {
this.#filledStatus = true;
}
this.#results.set_status(status);
return;
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean|string} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @param {boolean} [options.fillable=false] - Whether to return a fillable array, to write to this object.
* If `true`, this method automatically sets `copy = false` if `copy` was previously true.
* If `false` and the array was not previously filled, `null` is returned.
*
* @return {?(Int32Array|Int32WasmArray)} Array containing the cluster assignment for each cell.
* Alternatively `null`, if `fillable = false` and the array was not already filled.
*/
clusters({ copy = true, fillable = false } = {}) {
return utils.checkFillness(
fillable,
copy,
this.#filledClusters,
() => { this.#filledClusters = true },
COPY => utils.possibleCopy(this.#results.clusters(), COPY),
"clusters"
);
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean|string} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @param {boolean} [options.fillable=false] - Whether to return a fillable array, to write to this object.
* If `true`, this method automatically sets `copy = false` if `copy` was previously true.
* If `false` and the array was not previously filled, `null` is returned.
*
* @return {?(Int32Array|Int32WasmArray)} Array containing the number of cells in each cluster.
* Alternatively `null`, if `fillable = false` and the array was not already filled.
*/
clusterSizes({ copy = true, fillable = false } = {}) {
return utils.checkFillness(
fillable,
copy,
this.#filledSizes,
() => { this.#filledSizes = true },
COPY => utils.possibleCopy(this.#results.cluster_sizes(), COPY),
"clusterSizes"
);
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean|string} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @param {boolean} [options.fillable=false] - Whether to return a fillable array, to write to this object.
* If `true`, this method automatically sets `copy = false` if `copy` was previously true.
* If `false` and the array was not previously filled, `null` is returned.
*
* @return {?(Float64Array|Float64WasmArray)} Array containing the within-cluster sum of squares in each cluster.
* Alternatively `null`, if `fillable = false` and the array was not already filled.
*/
withinClusterSumSquares({ copy = true, fillable = false } = {}) {
return utils.checkFillness(
fillable,
copy,
this.#filledWcss,
() => { this.#filledWcss = true },
COPY => utils.possibleCopy(this.#results.wcss(), COPY)
);
}
/**
* @param {object} [options={}] - Optional parameters.
* @param {boolean|string} [options.copy=true] - Whether to copy the results from the Wasm heap, see {@linkcode possibleCopy}.
* @param {boolean} [options.fillable=false] - Whether to return a fillable array, to write to this object.
* If `true`, this method automatically sets `copy = false` if `copy` was previously true.
* If `false` and the array was not previously filled, `null` is returned.
*
* @return {?(Float64Array|Float64WasmArray)} Array containing the cluster centers in column-major format,
* where rows are dimensions and columns are the clusters.
* Alternatively `null`, if `fillable = false` and the array was not already filled.
*/
clusterCenters({ copy = true, fillable = false } = {}) {
return utils.checkFillness(
fillable,
copy,
this.#filledCenters,
() => { this.#filledCenters = true },
COPY => utils.possibleCopy(this.#results.centers(), COPY)
);
}
/**
* @return {?number} Number of refinement iterations performed by the algorithm.
* Alternatively `null`, if this value has not been filled by {@linkcode ClusterKmeansResults#setIterations setIterations}.
*/
iterations() {
if (!this.#filledIterations) {
return null;
} else {
return this.#results.iterations();
}
}
/**
* @return {?number} Status of the algorithm - anything other than zero usually indicates a problem with convergence.
* Alternatively `null`, if this value has not been filled by {@linkcode ClusterKmeansResults#setStatus setStatus}.
*/
status() {
if (!this.#filledStatus) {
return null;
} else {
return this.#results.status();
}
}
/**
* @return Frees the memory allocated on the Wasm heap for this object.
* This invalidates this object and all references to it.
*/
free() {
if (this.#results !== null) {
gc.release(this.#id);
this.#results = null;
}
return;
}
}
/**
* Cluster cells using k-means.
*
* @param {(RunPcaResults|Float64WasmArray|Array|TypedArray)} x - Numeric coordinates of each cell in the dataset.
* For array inputs, this is expected to be in column-major format where the rows are the variables and the columns are the cells.
* For a {@linkplain RunPcaResults} input, we extract the principal components.
* @param {number} clusters Number of clusters to create.
* This should not be greater than the number of cells.
* @param {object} [options={}] - Optional parameters.
* @param {?number} [options.numberOfDims=null] - Number of variables/dimensions per cell.
* Only used (and required) for array-like `x`.
* @param {?number} [options.numberOfCells=null] - Number of cells.
* Only used (and required) for array-like `x`.
* @param {string} [options.initMethod="pca-part"] - Initialization method.
* Setting `"random"` will randomly select `clusters` cells as centers.
* Setting `"kmeans++"` will use the weighted sampling approach of Arthur and Vassilvitskii (2007).
* Setting `"pca-part"` will use PCA partitioning.
* @param {number} [options.initSeed=5768] - Seed to use for random number generation during initialization.
* @param {number} [options.initPCASizeAdjust=1] - Adjustment factor for the cluster sizes, used when `initMethod = "pca-part"`.
* Larger values (up to 1) will prioritize partitioning of clusters with more cells.
* @param {?number} [options.numberOfThreads=null] - Number of threads to use.
* If `null`, defaults to {@linkcode maximumThreads}.
*
* @return {ClusterKmeansResults} Object containing the clustering results.
*/
export function clusterKmeans(x, clusters, { numberOfDims = null, numberOfCells = null, initMethod = "pca-part", initSeed = 5768, initPCASizeAdjust = 1, numberOfThreads = null } = {}) {
var buffer;
var output;
let nthreads = utils.chooseNumberOfThreads(numberOfThreads);
try {
let pptr;
if (x instanceof RunPcaResults) {
numberOfDims = x.numberOfPCs();
numberOfCells = x.numberOfCells();
let pcs = x.principalComponents({ copy: false });
pptr = pcs.byteOffset;
} else {
if (numberOfDims === null || numberOfCells === null) {
throw new Error("'numberOfDims' and 'numberOfCells' must be specified when 'x' is an Array");
}
buffer = utils.wasmifyArray(x, "Float64WasmArray");
if (buffer.length != numberOfDims * numberOfCells) {
throw new Error("length of 'x' must be the product of 'numberOfDims' and 'numberOfCells'");
}
pptr = buffer.offset;
}
output = gc.call(
module => module.cluster_kmeans(pptr, numberOfDims, numberOfCells, clusters, initMethod, initSeed, initPCASizeAdjust, nthreads),
ClusterKmeansResults
);
} catch (e) {
utils.free(output);
throw e;
} finally {
utils.free(buffer);
}
return output;
}