Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
import { MLTelemetry } from "chrome://global/content/ml/MLTelemetry.sys.mjs";
/**
* @import { MLEngineChild } from "./MLEngineChild.sys.mjs"
* @import { RemoteSettingsClient } from "resource://services-settings/RemoteSettingsClient.sys.mjs"
* @import {
* ChunkResponse,
* EngineFeatureIds,
* EngineResponses,
* EngineRequests,
* ParsedModelHubUrl,
* RecordsML,
* RemoteSettingsInferenceOptions,
* StatusByEngineId,
* SyncEvent,
* } from "../ml.d.ts"
* @import { WasmRecord } from "../../translations/translations.d.ts"
* @import { ModelHub } from "chrome://global/content/ml/ModelHub.sys.mjs"
* @import { ProgressAndStatusCallbackParams } from "chrome://global/content/ml/Utils.sys.mjs"
* @import { PipelineOptions } from "chrome://global/content/ml/EngineProcess.sys.mjs";
*/
const lazy = XPCOMUtils.declareLazy({
TranslationsParent: "resource://gre/actors/TranslationsParent.sys.mjs",
setTimeout: "resource://gre/modules/Timer.sys.mjs",
clearTimeout: "resource://gre/modules/Timer.sys.mjs",
ModelHub: "chrome://global/content/ml/ModelHub.sys.mjs",
Progress: "chrome://global/content/ml/Utils.sys.mjs",
OPFS: "chrome://global/content/ml/OPFS.sys.mjs",
BACKENDS: "chrome://global/content/ml/EngineProcess.sys.mjs",
stringifyForLog: "chrome://global/content/ml/Utils.sys.mjs",
console: () =>
console.createInstance({
maxLogLevelPref: "browser.ml.logLevel",
prefix: "GeckoMLEngineParent",
}),
mlUtils: { service: "@mozilla.org/ml-utils;1", iid: Ci.nsIMLUtils },
CHECK_FOR_MEMORY: { pref: "browser.ml.checkForMemory" },
MINIMUM_PHYSICAL_MEMORY: { pref: "browser.ml.minimumPhysicalMemory" },
});
const ONE_GiB = 1024 * 1024 * 1024;
const RS_RUNTIME_COLLECTION = "ml-onnx-runtime";
const RS_INFERENCE_OPTIONS_COLLECTION = "ml-inference-options";
const RS_ALLOW_DENY_COLLECTION = "ml-model-allow-deny-list";
const TERMINATE_TIMEOUT = 5000;
const RS_FALLBACK_BASE_URL =
const RUNTIME_ROOT_IN_OPFS = "mlRuntimeFiles";
/**
* Custom error class for handling insufficient memory errors.
*
* @augments Error
*/
export class NotEnoughMemoryError extends Error {
/**
* Creates an instance of NotEnoughMemoryError.
*
* @param {object} options - The error options.
* @param {string} [options.message="Not enough physical memory available"] - The error message.
* @param {number} options.requiredMemory - The amount of memory required in bytes.
* @param {number} options.availableMemory - The amount of available memory in bytes.
*/
constructor({
message = "Not enough physical memory available",
requiredMemory,
availableMemory,
}) {
super(message);
this.name = "NotEnoughMemoryError";
this.requiredMemory = requiredMemory;
this.availableMemory = availableMemory;
// TypeScript doesn't support this yet.
// @ts-expect-error - Property 'captureStackTrace' does not exist on type 'ErrorConstructor'.ts(2339)
Error.captureStackTrace(this, this.constructor);
}
/**
* Returns a formatted string with details about the memory issue.
*
* @returns {string} A string describing the required and available memory in GiB.
*/
getDetails() {
return `Required Memory: ${(this.requiredMemory / ONE_GiB).toFixed(2)} GiB, Available Memory: ${(this.availableMemory / ONE_GiB).toFixed(2)} GiB`;
}
}
/**
* The ML engine is in its own content process. This actor handles the
* marshalling of the data such as the engine payload.
*/
export class MLEngineParent extends JSProcessActorParent {
/**
* The RemoteSettingsClient that downloads the wasm binaries.
*
* @type {Record<string, RemoteSettingsClient>}
*/
static #remoteClients = {};
/** @type {Record<string, Promise<WasmRecord> | null>} */
static #wasmRecord = {};
/**
* Locks to prevent race conditions when creating engines.
*
* @type {Map<string, Promise<void>>}
*/
static engineLocks = new Map();
/**
* AbortSignal to potentially cancel the engine creation.
*
* @type {Map<string, ?AbortSignal>}
*/
static engineCreationAbortSignal = new Map();
/**
* AbortControllers used to cancel ongoing operations initiated by the engine worker
* but not owned by it (e.g. operations executed in the parent process such as
* model downloads). This ensures that when the engine is terminated, any related
* in-flight work started on its behalf can also be aborted.
*
* Key: engineId
*
* @type {Map<string, AbortController>}
*/
static engineCreationAbortControllers = new Map();
/**
* The ChildID gets set by EngineProcess.sys.mjs
*
* @type {null | number}
*/
childID = null;
/**
* The following constant controls the major and minor version for onnx wasm downloaded from
* Remote Settings.
*
* When a breaking change is introduced, increment this value and add a corresponding version
*
* onnx:
* - 1 => Transformers 2.x
* - 2 => Transformers < 3.1
* - 3 => Transformers < 3.4
* - 4 => Transformers >= 3.4.0
* - 5 => Transformers >= 3.5.1
*
* wllama:
* - 3 => wllama 2.2.x
* - 4 => wllama 2.3.x
*
* @type {Record<string, number>}
*/
static WASM_MAJOR_VERSION = {
[lazy.BACKENDS.onnx]: 5,
[lazy.BACKENDS.wllama]: 4,
};
/**
* This wasm file supports CPU, WebGPU and WebNN.
*
* Since SIMD is supported by all major JavaScript engines, non-SIMD build is no longer provided.
* We also serve the threaded build since we can simply set numThreads to 1 to disable multi-threading.
*
* @type {Record<string, string>}
*/
static WASM_FILENAME = {
[lazy.BACKENDS.onnx]: "ort-wasm-simd-threaded.jsep.wasm",
[lazy.BACKENDS.wllama]: "wllama.wasm",
};
/**
* This default backend to use when none is specified.
*/
static DEFAULT_BACKEND = lazy.BACKENDS.onnx;
/**
* The modelhub used to retrieve files.
*
* @type {ModelHub | null}
*/
modelHub = null;
/**
* Tracks the most recent revision for each task and model pair that are marked for deletion.
* Keys are task names and model names. Values contain their respective revisions.
*
* @type {Map<string, ParsedModelHubUrl & { taskName: string }>}
*/
#modelFilesInUse = new Map();
/**
* The callback to call for updating about notifications such as dowload progress status.
*
* @type {?function(ProgressAndStatusCallbackParams):void}
*/
notificationsCallback = null;
/**
* Set by EngineProcess when creating the MLEngineParent.
* Keeps the "inference" process alive until it is cleared.
*
* NOTE: Invalidating this keepAlive does not guarantee that the process will
* exit, and this actor may be re-used if it does not (e.g. because the
* inference process was kept alive by TranslationsEngine).
*
* @type {nsIContentParentKeepAlive | null}
*/
processKeepAlive = null;
/**
* Remote settings isn't available in tests, so provide mocked responses.
*
* @param {Record<string, RemoteSettingsClient>} remoteClients
*/
static mockRemoteSettings(remoteClients) {
lazy.console.log("Mocking remote settings in MLEngineParent.");
MLEngineParent.#remoteClients = remoteClients;
MLEngineParent.#wasmRecord = {};
}
/**
* Remove anything that could have been mocked.
*/
static removeMocks() {
lazy.console.log("Removing mocked remote client in MLEngineParent.");
MLEngineParent.#remoteClients = {};
MLEngineParent.#wasmRecord = {};
}
/**
* Creates a new MLEngine.
*
* @template {EngineFeatureIds} FeatureId
*
* If there's an existing engine with the same pipelineOptions, it will be reused.
*
* @param {object} params Parameters object.
* @param {PipelineOptions} params.pipelineOptions
* @param {?function(ProgressAndStatusCallbackParams):void} params.notificationsCallback A function to call to indicate progress status.
* @param {?AbortSignal} params.abortSignal - AbortSignal to cancel the download.
* @returns {Promise<MLEngine<FeatureId>>}
*/
async getEngine({ pipelineOptions, notificationsCallback, abortSignal }) {
if (
lazy.CHECK_FOR_MEMORY &&
lazy.mlUtils.totalPhysicalMemory < lazy.MINIMUM_PHYSICAL_MEMORY * ONE_GiB
) {
throw new NotEnoughMemoryError({
availableMemory: lazy.mlUtils.totalPhysicalMemory,
requiredMemory: lazy.MINIMUM_PHYSICAL_MEMORY * ONE_GiB,
});
}
const { featureId, engineId } = pipelineOptions;
if (!engineId) {
throw new Error("Expected to receive an engineId in the PipelineOptions");
}
// Allow notifications callback changes even when reusing engine.
this.notificationsCallback = notificationsCallback;
if (MLEngineParent.engineLocks.has(engineId)) {
// Wait for the existing lock to resolve
await MLEngineParent.engineLocks.get(engineId);
}
/** @type {PromiseWithResolvers<void>} */
const { promise: lockPromise, resolve: resolveLock } =
Promise.withResolvers();
MLEngineParent.engineLocks.set(engineId, lockPromise);
/** @type {?AbortController} */
// Parent-owned controller used to cancel engine-related operations
// (e.g. model downloads) when the engine is terminated.
let abortController = null;
try {
const currentEngine = MLEngine.getInstance(engineId);
if (currentEngine) {
if (
currentEngine.pipelineOptions.equals(pipelineOptions) &&
currentEngine.engineStatus === "ready"
) {
lazy.console.debug(`Reusing existing engine for ${engineId}`);
// Coerce the return type since we can't key off of the engineId here.
return /** @type {MLEngine<FeatureId>} */ (currentEngine);
}
lazy.console.debug(`Replacing existing engine for ${engineId}`);
try {
Services.obs.removeObserver(currentEngine, "ipc:content-shutdown");
} catch (e) {
lazy.console.error("Failed to remove observer", e);
}
await MLEngine.removeInstance(
engineId,
/* shutdown */ false,
/* replacement*/ true
);
}
const start = ChromeUtils.now();
// Parent-owned controller used to cancel engine-related operations
// (e.g. model downloads) if the engine is terminated during creation.
abortController = new AbortController();
// Allow cancellation from either the parent or an optional caller-provided signal.
const signals = [abortController.signal, abortSignal].filter(s =>
AbortSignal.isInstance(s)
);
// Signal passed to operations initiated by the engine.
MLEngineParent.engineCreationAbortSignal.set(
engineId,
AbortSignal.any(signals)
);
// Keep the controller so the parent can trigger cancellation.
MLEngineParent.engineCreationAbortControllers.set(
engineId,
abortController
);
/** @type {MLEngine<any>} */
const engine = await MLEngine.initialize({
mlEngineParent: this,
pipelineOptions,
notificationsCallback,
});
// engine will observe ipc:content-shutdown to get notified if the inference process crashes
Services.obs.addObserver(engine, "ipc:content-shutdown");
const creationTime = ChromeUtils.now() - start;
engine.telemetry.recordEngineCreationSuccessFlow({
engineId,
duration: creationTime,
});
// TODO - What happens if the engine is already killed here?
return engine;
} catch (error) {
// Abort any pending operations as the engine creating failed
abortController?.abort();
const { modelId, taskName, flowId } = pipelineOptions;
const telemetry = new MLTelemetry({ featureId, flowId });
telemetry.recordEngineCreationFailure({
modelId,
featureId,
taskName,
engineId,
error,
});
throw error;
} finally {
MLEngineParent.engineLocks.delete(engineId);
resolveLock();
}
}
/**
* Validates a taskName
*
* Throws an exception if the task name is invalid.
*
* @param {string} taskName
*/
checkTaskName(taskName) {
// Define a regular expression to verify taskName pattern (alphanumeric and underscores/dashes)
const validTaskNamePattern = /^[a-zA-Z0-9_\-]+$/;
// Check if taskName matches the pattern
if (!validTaskNamePattern.test(taskName)) {
// Handle invalid taskName, e.g., throw an error or return null
throw new Error(
"Invalid task name. Task name should contain only alphanumeric characters and underscores/dashes."
);
}
}
/**
* @see {MLEngineChild#sendQuery} for the message shapes.
* @param {any} message
*/
// eslint-disable-next-line consistent-return
async receiveMessage(message) {
switch (message.name) {
case "MLEngine:GetWasmArrayBuffer":
return MLEngineParent.getWasmArrayBuffer(message.data);
case "MLEngine:GetModelFile":
return this.getModelFile(message.data);
case "MLEngine:NotifyModelDownloadComplete":
return this.notifyModelDownloadComplete(message.data);
case "MLEngine:GetWorkerConfig":
return MLEngineParent.getWorkerConfig();
case "MLEngine:ChooseBestBackend":
return MLEngineParent.chooseBestBackend(message.data);
case "MLEngine:DestroyEngineProcess":
if (this.processKeepAlive) {
ChromeUtils.addProfilerMarker(
"EngineProcess",
{},
`Dropping MLEngine "inference" process keep-alive`
);
this.processKeepAlive.invalidateKeepAlive();
this.processKeepAlive = null;
}
return null;
case "MLEngine:GetInferenceOptions":
this.checkTaskName(message.json.taskName);
return MLEngineParent.getInferenceOptions(
message.json.featureId,
message.json.taskName,
message.json.modelId
);
case "MLEngine:Removed":
if (!message.json.replacement) {
try {
// when receiving this message from the child, we know it's not a replacement.
await MLEngine.removeInstance(
message.json.engineId,
message.json.shutdown,
/* replacement */ false
);
} catch (e) {
lazy.console.error("Failed to remove instance", e);
}
}
return null;
}
}
/**
* Deletes all previous revisions for the current task and model used by the engine.
*
* @returns {Promise<void>}
*/
async deletePreviousModelRevisions() {
if (this.modelHub === null) {
lazy.console.debug(
"Ignored attempt to delete previous models when the engine is not fully initialized."
);
return;
}
const modelHub = this.modelHub;
await Promise.all(
[...this.#modelFilesInUse].map(async ([key, entry]) => {
await modelHub.deleteNonMatchingModelRevisions(
entry.modelWithHostname,
entry.taskName,
entry.revision
);
this.#modelFilesInUse.delete(key);
})
);
}
/**
* Retrieves a model file from the specified URL.
* This function normalizes the URL, extracts the organization, model name, and file path,
* then fetches the model file using the ModelHub API. The `modelHub` instance is created
* only once and reused for subsequent calls to optimize performance.
*
* @param {object} config
* @param {string} config.engineId - The engine id.
* @param {string} config.taskName - name of the inference task.
* @param {string} config.url - The URL of the model file to fetch. Can be a path relative to
* the model hub root or an absolute URL.
* @param {string} config.rootUrl - The URL of the model file to fetch. Can be a path relative to
* the model hub root or an absolute URL.
* @param {string} config.urlTemplate - The URL of the model file to fetch. Can be a path relative to
* the model hub root or an absolute URL.
* @param {string} config.featureId - The engine id.
* @param {string} config.sessionId - Shared across the same model download session.
* @returns {Promise<[string, object]>} The file local path and headers
*/
async getModelFile({
engineId,
taskName,
url,
rootUrl,
urlTemplate,
featureId,
sessionId,
}) {
let downloadStart = ChromeUtils.now();
// Create the model hub instance if needed
if (this.modelHub === null) {
lazy.console.debug("Creating model hub instance");
this.modelHub = new lazy.ModelHub({
rootUrl,
urlTemplate,
allowDenyList: await MLEngineParent.getAllowDenyList(),
});
}
if (url.startsWith(rootUrl)) {
url = url.slice(rootUrl.length);
// Make sure we get a front slash
if (!url.startsWith("/")) {
url = `/${url}`;
}
}
// Parsing url to get model name, and file path.
// if this errors out, it will be caught in the worker
const parsedUrl = this.modelHub.parseUrl(url, { rootUrl, urlTemplate });
const [data, headers] = await this.modelHub.getModelDataAsFile({
engineId,
taskName,
model: parsedUrl.model,
revision: parsedUrl.revision,
file: parsedUrl.file,
modelHubRootUrl: rootUrl,
modelHubUrlTemplate: urlTemplate,
progressCallback: this.notificationsCallback?.bind(this),
abortSignal: MLEngineParent.engineCreationAbortSignal.get(engineId),
featureId,
sessionId,
});
// Keep the latest revision for each task, model
this.#modelFilesInUse.set(`${taskName}-${parsedUrl.model}`, {
taskName,
...parsedUrl,
});
const sizeMB = Math.round(headers.fileSize / (1024 * 1024));
lazy.console.debug(
`Model ${parsedUrl.model} was fetched from ${url}, size ${sizeMB}MiB`
);
ChromeUtils.addProfilerMarker(
"MLEngineParent",
{ startTime: downloadStart },
`Downloaded model ${parsedUrl.file}: ${sizeMB}MB`
);
return [data, headers];
}
/**
* Notify that a model download is complete.
*
* @param {object} config
* @param {string} config.engineId - The engine id.
* @param {string} config.model - The model name (organization/name).
* @param {string} config.revision - The model revision.
* @param {string} config.featureId - The engine id.
* @param {string} config.sessionId - Shared across the same model download session.
* @returns {Promise<void>}
*/
async notifyModelDownloadComplete({
engineId,
model,
revision,
featureId,
sessionId,
}) {
if (this.modelHub === null) {
lazy.console.debug(
"Model hub instance not created, skipping notifyModelDownloadComplete"
);
return;
}
await this.modelHub.notifyModelDownloadComplete({
engineId,
sessionId,
featureId,
model,
revision,
});
}
/**
* Gets the wasm file from remote settings.
*
* @param {RemoteSettingsClient} client
* @param {string} backend - The ML engine for which the WASM buffer is requested.
*/
static async #getWasmArrayRecord(client, backend) {
const wasmRecords = /** @type {WasmRecord[]} */ (
await lazy.TranslationsParent.getMaxSupportedVersionRecords(client, {
filters: {
name: MLEngineParent.WASM_FILENAME[
backend || MLEngineParent.DEFAULT_BACKEND
],
},
minSupportedMajorVersion:
MLEngineParent.WASM_MAJOR_VERSION[
backend || MLEngineParent.DEFAULT_BACKEND
],
maxSupportedMajorVersion:
MLEngineParent.WASM_MAJOR_VERSION[
backend || MLEngineParent.DEFAULT_BACKEND
],
})
);
if (wasmRecords.length === 0) {
// The remote settings client provides an empty list of records when there is
// an error.
throw new Error("Unable to get the ML engine from Remote Settings.");
}
if (wasmRecords.length > 1) {
lazy.console.error(wasmRecords);
throw new Error("Expected the ml engine to only have 1 record.");
}
const [record] = wasmRecords;
lazy.console.debug(
`Using runtime ${record.name}@${record.version}`,
record
);
return record;
}
/**
* Gets the configuration of the worker
*/
static getWorkerConfig() {
return {
url: "chrome://global/content/ml/MLEngine.worker.mjs",
options: { type: "module" },
};
}
/**
* Selects the most appropriate backend for the current environment.
*
* @static
* @param {string} backend - Requested backend or an auto-select sentinel.
* @returns {string} Resolved backend identifier.
*/
static chooseBestBackend(backend) {
let bestBackend = backend;
if (backend === lazy.BACKENDS.bestLlama) {
bestBackend = lazy.BACKENDS.wllama;
if (lazy.mlUtils?.canUseLlamaCpp()) {
bestBackend = lazy.BACKENDS.llamaCpp;
}
lazy.console.debug(
`The best available llama backend detected for this machine is ${bestBackend}`
);
}
ChromeUtils.addProfilerMarker(
"MLEngineParent",
null,
`Backend selected: ${bestBackend} (requested: ${backend})`
);
return bestBackend;
}
/**
* Gets the allow/deny list from remote settings
*
* @returns {Promise<RecordsML["ml-model-allow-deny-list"][]>}
*/
static async getAllowDenyList() {
return /** @type {Promise<RecordsML["ml-model-allow-deny-list"][]>} */ (
MLEngineParent.#getRemoteClient(RS_ALLOW_DENY_COLLECTION).get()
);
}
/**
* Gets the inference options from remote settings given a feature id or task name.
*
* Each feature can store default options in Remote Settings.
*
* We fallback to taskName if there is no featureId provided.
*
* @param {string} featureId - id of the feature
* @param {string} taskName - name of the inference task
* @param {string|null} modelId - name of the model id
* @returns {Promise<RemoteSettingsInferenceOptions | { runtimeFilename: string }>}
*/
static async getInferenceOptions(featureId, taskName, modelId) {
const client = MLEngineParent.#getRemoteClient(
RS_INFERENCE_OPTIONS_COLLECTION
);
/** @type {Record<string, string>} */
const filters = featureId ? { featureId } : { taskName };
if (modelId) {
filters.modelId = modelId;
}
/** @type {Array<RecordsML["ml-inference-options"]>} */
let records = /** @type {any[]} */ (await client.get({ filters }));
// If no records found and we searched by featureId, retry with taskName
if (records.length === 0 && featureId) {
lazy.console.debug(`No record for feature id "${featureId}"`);
/** @type {Record<string, string>} */
const fallbackFilters = { taskName };
if (modelId) {
fallbackFilters.modelId = modelId;
}
records = /** @type {any} */ (
await client.get({ filters: fallbackFilters })
);
lazy.console.debug(`fallbackFilters: "${fallbackFilters}"`);
}
// Handle case where multiple records exist
if (records.length > 1) {
throw new Error(
`Found more than one inference options record for "${featureId}" and "${taskName}", and no matching modelId in pipelineOptions`
);
}
// If still no records, return default runtime options
if (records.length === 0) {
return {
runtimeFilename:
MLEngineParent.WASM_FILENAME[MLEngineParent.DEFAULT_BACKEND],
};
}
const options = records[0];
return {
modelRevision: options.modelRevision,
modelId: options.modelId,
tokenizerRevision: options.tokenizerRevision,
tokenizerId: options.tokenizerId,
processorRevision: options.processorRevision,
processorId: options.processorId,
dtype: options.dtype,
numThreads: options.numThreads,
runtimeFilename:
MLEngineParent.WASM_FILENAME[
options.backend || MLEngineParent.DEFAULT_BACKEND
],
};
}
/**
* Downloads and verifies a Remote Settings attachment.
*
* This method fetches a file from a remote base URL (either resolved from `lazy.Utils.baseAttachmentsURL()` or a fallback CDN),
* verifies its hash and size, and returns its binary data as an `ArrayBuffer`.
*
* @param {object} options - The input options.
* @param {WasmRecord} options.wasmRecord - wasm records
* @param {string} options.localRoot - The root where to save the attachment.
*
* @returns {Promise<ArrayBuffer>} A promise that resolves to the downloaded file's binary content as an ArrayBuffer.
*
* @throws {Error} If the content hash of the downloaded file does not match the expected hash.
*/
static async downloadRSAttachment({ wasmRecord, localRoot }) {
const { attachment, version } = wasmRecord;
const { location, filename, hash, size } = attachment;
let baseURL = RS_FALLBACK_BASE_URL;
try {
baseURL = await lazy.Utils.baseAttachmentsURL();
} catch (error) {
console.error(
`Error fetching remote settings base url from CDN. Falling back to ${RS_FALLBACK_BASE_URL}`,
error
);
}
// Validate inputs
let checkError = lazy.ModelHub.checkInput(localRoot, version, filename);
if (checkError) {
throw checkError;
}
const fileObject = await lazy.OPFS.download({
savePath: `${RUNTIME_ROOT_IN_OPFS}/${localRoot}/${version}/${filename}`,
deletePreviousVersions: true,
skipIfExists: true,
source: baseURL + location,
sha256Hash: hash,
fileSize: size,
});
return fileObject.arrayBuffer();
}
/**
* Download the wasm for the ML inference engine.
*
* @param {string} backend - The ML engine for which the WASM buffer is requested.
* @returns {Promise<ArrayBuffer>}
*/
static async getWasmArrayBuffer(backend) {
const client = MLEngineParent.#getRemoteClient(RS_RUNTIME_COLLECTION);
backend = backend || MLEngineParent.DEFAULT_BACKEND;
if (!MLEngineParent.#wasmRecord[backend]) {
// Place the records into a promise to prevent any races.
MLEngineParent.#wasmRecord[backend] = MLEngineParent.#getWasmArrayRecord(
client,
backend
);
}
let wasmRecord;
try {
wasmRecord = await MLEngineParent.#wasmRecord[backend];
if (!wasmRecord) {
return Promise.reject(
"Error: Unable to get the ML engine from Remote Settings."
);
}
} catch (error) {
MLEngineParent.#wasmRecord[backend] = null;
throw error;
}
/** @type {ArrayBuffer} */
let buffer;
if (wasmRecord.attachment) {
buffer = await MLEngineParent.downloadRSAttachment({
wasmRecord,
localRoot: backend,
});
} else {
// fallback for mocked unit tests.
// @ts-expect-error - This API is not well-typed.
buffer = (await client.attachments.download(wasmRecord)).buffer;
}
return buffer;
}
/**
* Lazily initializes the RemoteSettingsClient for the downloaded wasm binary data.
*
* @param {string} collectionName - The name of the collection to use.
* @returns {RemoteSettingsClient}
*/
static #getRemoteClient(collectionName) {
if (MLEngineParent.#remoteClients[collectionName]) {
return MLEngineParent.#remoteClients[collectionName];
}
/** @type {RemoteSettingsClient} */
const client = lazy.RemoteSettings(collectionName, {
bucketName: "main",
});
MLEngineParent.#remoteClients[collectionName] = client;
client.on(
"sync",
/**
* @param {object} param
* @param {SyncEvent} param.data
*/
async ({ data }) => {
const { created, updated, deleted } = data;
lazy.console.debug(`"sync" event for ${collectionName}`, {
created,
updated,
deleted,
});
// Remove all the deleted records.
for (const record of deleted) {
await client.attachments.deleteDownloaded(record);
}
// Remove any updated records, and download the new ones.
for (const { old: oldRecord } of updated) {
await client.attachments.deleteDownloaded(oldRecord);
}
// Do nothing for the created records.
}
);
return client;
}
/**
* Goes through the engines and determines their status. This is used by about:inference
* to display debug information about the engines.
*
* @see MLEngineChild#getStatusByEngineId
*
* @returns {Promise<StatusByEngineId>}
*/
getStatusByEngineId() {
return this.sendQuery("MLEngine:GetStatusByEngineId");
}
/**
* Send a message to gracefully shutdown all of the ML engines in the engine process.
* This mostly exists for testing the shutdown paths of the code.
*/
forceShutdown() {
lazy.console.debug("MLEngine:ForceShutdown called");
return this.sendQuery("MLEngine:ForceShutdown");
}
}
/**
* A utility class that manages a main promise for the full response
* and a sequence of chunk promises for incremental parts of the response.
*
* @template T
*/
class ResponseOrChunkResolvers {
/**
* Resolver for the main promise (full response).
*
* @type {PromiseWithResolvers<T>}
*/
mainResolvers;
/**
* The main promise for the full response.
*
* @type {Promise<T>}
*/
promise;
/**
* Index tracking the next chunk resolver to be returned.
*
* @type {number}
*/
nextchunkResolverIdx = 0;
/**
* Array of resolvers for incremental chunk promises.
*
* @type {Array<PromiseWithResolvers<ProgressAndStatusCallbackParams>>}
*/
chunkResolvers = [];
/**
* Initializes the class with a main promise resolver
* and the first chunk resolver for incremental data.
*/
constructor() {
lazy.console.debug("Initializing ResponseOrChunkResolvers ...");
this.mainResolvers = Promise.withResolvers();
this.promise = this.mainResolvers.promise;
// Initialize the first chunk resolver
this.chunkResolvers.push(Promise.withResolvers());
}
/**
* Resolves the main promise with the provided value, indicating the full response is ready.
*
* @param {*} value - The value to resolve the main promise with (e.g., the complete response data).
*/
resolve(value) {
this.mainResolvers.resolve(value);
}
/**
* Rejects the main promise with the provided reason, indicating that the full response failed.
*
* @param {*} reason - The reason for rejecting the main promise (e.g., an error).
*/
reject(reason) {
this.mainResolvers.reject(reason);
}
/**
* Returns the promise for the next chunk of the response and advances the internal index.
* Each call retrieves the promise for the next incremental part of the response.
*
* @returns {Promise<ProgressAndStatusCallbackParams>} The promise for the next chunk of data.
*/
getAndAdvanceChunkPromise() {
this.nextchunkResolverIdx += 1;
return this.chunkResolvers[this.nextchunkResolverIdx - 1].promise;
}
/**
* Resolves the current chunk promise with the provided value
* and prepares a new chunk resolver for the next incremental part of the response.
*
* @param {ProgressAndStatusCallbackParams} value - The value to resolve the current chunk promise with (e.g., a part of the response data).
*/
resolveChunk(value) {
// Create a new chunk resolver for future chunks
this.chunkResolvers.push(Promise.withResolvers());
// Resolve the current chunk
this.chunkResolvers[this.chunkResolvers.length - 2].resolve(value);
}
/**
* Rejects the current chunk promise with the provided reason
* and prepares a new chunk resolver for the next incremental part of the response.
*
* @param {*} reason - The reason for rejecting the current chunk promise (e.g., an error with this chunk).
*/
rejectChunk(reason) {
// Create a new chunk resolver for future chunks
this.chunkResolvers.push(Promise.withResolvers());
// Reject the current chunk
this.chunkResolvers[this.chunkResolvers.length - 2].reject(reason);
}
}
/**
* The interface to communicate to an MLEngine in the parent process. The engine manages
* its own lifetime, and is kept alive with a timeout. A reference to this engine can
* be retained, but once idle, the engine will be destroyed. If a new request to run
* is sent, the engine will be recreated on demand. This balances the cost of retaining
* potentially large amounts of memory to run models, with the speed and ease of running
* the engine.
*
* @template {EngineFeatureIds} FeatureID
*/
export class MLEngine {
/**
* The cached engines.
*
* @type {Map<string, MLEngine<any>>}
*/
static #instances = new Map();
/**
* @type {MessagePort | null}
*/
#port = null;
/**
* A monotonically increasing ID to track requests.
*/
#nextRequestId = 0;
/**
* Tie together a message id to a resolved response.
*
* @type {Map<number, PromiseWithResolvers<EngineResponses[FeatureID]> | ResponseOrChunkResolvers<EngineResponses[keyof EngineResponses]>>}
*/
#requests = new Map();
/**
* @type {"uninitialized" | "ready" | "error" | "closed" | "crashed"}
*/
engineStatus = "uninitialized";
/**
* Unique identifier for the engine.
*
* @type {string}
*/
engineId;
/**
* Callback to call when receiving an initializing progress status.
*
* @type {?function(ProgressAndStatusCallbackParams):void}
*/
notificationsCallback = null;
/**
* Removes an instance of the MLEngine with the given engineId.
*
* @param {string} engineId - The ID of the engine instance to be removed.
* @param {boolean} shutdown - Flag indicating whether to shutdown the engine.
* @param {boolean} replacement - Flag indicating whether the engine is being replaced.
* @returns {Promise<void>} A promise that resolves once the engine is removed.
*/
static async removeInstance(engineId, shutdown, replacement) {
for (const [id, engine] of MLEngine.#instances.entries()) {
if (engine.engineId == engineId) {
lazy.console.debug(`Removing engine ${engineId}`);
await engine.terminate(shutdown, replacement);
// Abort any pending operations for the engine
MLEngineParent.engineCreationAbortControllers.get(engineId)?.abort();
MLEngine.#instances.delete(id);
lazy.console.debug(`Removed engine ${engineId}`);
}
}
}
/**
* Retrieves an instance of the MLEngine with the given engineId.
*
* @param {string} engineId - The ID of the engine instance to retrieve.
* @returns {MLEngine<unknown> | null} The engine instance with the given ID, or null if not found.
*/
static getInstance(engineId) {
return MLEngine.#instances.get(engineId) || null;
}
/**
* Private constructor for an ML Engine.
*
* @param {object} config - The configuration object for the instance.
* @param {MLEngineParent} config.mlEngineParent - The parent machine learning engine associated with this instance.
* @param {PipelineOptions} config.pipelineOptions - The options for configuring the pipeline associated with this instance.
* @param {?function(ProgressAndStatusCallbackParams):void} config.notificationsCallback - The initialization progress callback function to call.
*/
constructor({ mlEngineParent, pipelineOptions, notificationsCallback }) {
const engineId = pipelineOptions.engineId;
if (!engineId) {
throw new Error("Expected to have an engineId on PipelineOptions");
}
/** @type {Record<string, Array<(data: unknown) => void>>} */
this.events = {};
this.engineId = engineId;
MLEngine.#instances.set(engineId, this);
/** @type {MLEngineParent} */
this.mlEngineParent = mlEngineParent;
/** @type {PipelineOptions} */
this.pipelineOptions = pipelineOptions;
this.notificationsCallback = notificationsCallback;
this.telemetry = new MLTelemetry({
featureId: pipelineOptions.featureId,
flowId: pipelineOptions.flowId,
});
}
/**
* Validates an inference request before sending to child process.
*
* @param {object} request - The request to validate
* @returns {object|null} The validated request, or null if blocked
*/
#validateRequest(request) {
return request;
}
/**
* Validates an inference response after receiving from child process.
*
* @param {object} response - The response to validate
* @returns {object|null} The validated response, or null if blocked
*/
#validateResponse(response) {
lazy.console.debug("[MLSecurity] Validating response:", response);
return response;
}
/**
* Observes shutdown events from the child process.
*
* When the inference process is shutdown, we want to set the port to null and throw an error.
*
* @param {any} aSubject
* @param {string} aTopic
*/
observe(aSubject, aTopic) {
aSubject.QueryInterface(Ci.nsIPropertyBag2);
if (!aSubject.get("abnormal")) {
// ignoring normal events
return;
}
const childID = aSubject.get("childID");
if (
aTopic === "ipc:content-shutdown" &&
childID === this.mlEngineParent.childID
) {
const pid = aSubject.get("osPid");
lazy.console.error(
`Got abnormal shutdown of the inference process (pid=${pid})`
);
this.#port = null;
const err = new Error(
`The inference process was shutdown (pid=${pid}), childId=${childID}`
);
this.setEngineStatus("crashed");
throw err;
}
}
/**
* Initialize the MLEngine.
*
* @template {EngineFeatureIds} FeatureId
*
* @param {object} config - The configuration object for the instance.
* @param {MLEngineParent} config.mlEngineParent - The parent machine learning engine associated with this instance.
* @param {PipelineOptions} config.pipelineOptions - The options for configuring the pipeline associated with this instance.
* @param {?function(ProgressAndStatusCallbackParams):void} config.notificationsCallback - The initialization progress callback function to call.
* @returns {Promise<MLEngine<FeatureId>>}
*/
static async initialize({
mlEngineParent,
pipelineOptions,
notificationsCallback,
}) {
lazy.console.debug("Initializing ML engine", pipelineOptions);
/** @type {MLEngine<FeatureId>} */
const mlEngine = new MLEngine({
mlEngineParent,
pipelineOptions,
notificationsCallback,
});
/**
* Helper to ensure we never leave resources dangling if something fails.
*
* @param {unknown} err
*/
const hardTeardown = err => {
try {
mlEngine.setEngineStatus?.("error");
} catch {}
try {
mlEngine.#port?.close?.();
} catch {}
// If you also keep a child port reference inside the instance, close it here too.
// try { mlEngine.#childPort?.close?.(); } catch {}
mlEngine.#port = null;
return err;
};
try {
await mlEngine.setupPortCommunication();
try {
// Only attempt this after comms are up.
await mlEngine.mlEngineParent.deletePreviousModelRevisions();
} catch (err) {
// Treat model cleanup failure as fatal for a clean init.
let message = err;
if (err && typeof err === "object" && "message" in err && err.message) {
message = err.message;
}
throw hardTeardown(
new Error(`Failed to delete previous model revisions: ${message}`)
);
}
return mlEngine;
} catch (err) {
// setupPortCommunication already tries to clean up, but make this idempotent.
throw hardTeardown(err);
}
}
/**
* Registers an event listener for the specified event.
*
* @param {string} event - The name of the event.
* @param {(...args: any[]) => void} listener - The callback function to execute when the event is triggered.
*/
on(event, listener) {
if (!this.events[event]) {
this.events[event] = [];
}
this.events[event].push(listener);
}
/**
* Removes an event listener for the specified event.
*
* @param {string} event - The name of the event.
* @param {(...args: any) => void} listenerToRemove - The callback function to remove.
*/
off(event, listenerToRemove) {
if (!this.events[event]) {
return;
}
this.events[event] = this.events[event].filter(
listener => listener !== listenerToRemove
);
}
/**
* Emits the specified event, invoking all registered listeners with the provided data.
*
* @param {string} event - The name of the event.
* @param {any} data - The data to pass to the event listeners.
*/
emit(event, data) {
if (!this.events[event]) {
return;
}
this.events[event].forEach(listener => listener(data));
}
/**
* Sets the engine status and emits a statusChanged event.
*
* @param {"uninitialized" | "ready" | "error" | "closed" | "crashed"} status - The new status of the engine.
*/
setEngineStatus(status) {
this.engineStatus = status;
this.emit("statusChanged", status);
}
/**
* Create a MessageChannel to communicate with the engine directly.
* And ensure the engine is fully initialized with all required files for the current model version downloaded.
*/
async setupPortCommunication() {
if (this.#port !== null) {
throw new Error("Port already exists");
}
lazy.console.debug("Creating ML engine port");
const { port1: childPort, port2: parentPort } = new MessageChannel();
const transferables = [childPort];
this.#port = parentPort;
const newPortResolvers = Promise.withResolvers();
// Wire messages before attempting to send the port
// @ts-expect-error - The onmessage event isn't particularly well typed at this time.
this.#port.onmessage =
/**
* @param {MessageEvent} message
*/
message => this.handlePortMessage(message, newPortResolvers);
/**
* Helper to clean up on failure
*
* @param {unknown} err
*/
const cleanupOnError = err => {
try {
if (this.#port && this.#port.onmessage) {
this.#port.removeEventListener("message", this.#port.onmessage);
}
} catch {}
try {
this.#port?.close?.();
} catch {}
try {
childPort.close();
} catch {}
this.#port = null;
// Make sure the awaiting code is released
try {
newPortResolvers.reject(err);
} catch {}
};
try {
this.mlEngineParent.sendAsyncMessage(
"MLEngine:NewPort",
{
port: childPort,
pipelineOptions: this.pipelineOptions.getOptions(),
},
transferables
);
} catch (error) {
this.setEngineStatus("error");
cleanupOnError(error);
try {
await newPortResolvers.promise;
} catch {
// Avoid unhandledrejection noise in some runtimes
}
throw error;
}
try {
await newPortResolvers.promise;
this.setEngineStatus("ready");
} catch (error) {
// If the resolver rejected for any other reason, still tidy up
this.setEngineStatus("error");
cleanupOnError(error);
throw error;
}
}
/**
* Handles messages received from the port.
*
* @param {MessageEvent} event - The message event.
* @param {PromiseWithResolvers<void>} newPortResolvers - An object containing a promise for mlEngine new port setup, along with two functions to resolve or reject it.
*/
handlePortMessage = (event, newPortResolvers) => {
const { data } = event;
switch (data.type) {
case "EnginePort:EngineReady": {
if (data.error) {
newPortResolvers.reject(data.error);
} else {
newPortResolvers.resolve();
}
break;
}
case "EnginePort:RunResponse": {
const { response, error, requestId, resourcesBefore, resourcesAfter } =
data;
const request = this.#requests.get(requestId);
if (request) {
if (error) {
this.telemetry.recordRunInferenceFailure(error);
request.reject(error);
} else if (response) {
// Validate response before returning to caller
/** @type {any} */
const validatedResponse = this.#validateResponse(response);
if (!validatedResponse) {
request.reject(new Error("Response failed security validation"));
} else {
this.telemetry.recordRunInferenceSuccessFlow(
this.engineId,
validatedResponse.metrics
);
// Attach resource metrics from the child process
validatedResponse.resourcesBefore = resourcesBefore;
validatedResponse.resourcesAfter = resourcesAfter;
request.resolve(validatedResponse);
}
}
} else {
lazy.console.error(
"Could not resolve response in the MLEngineParent",
data
);
}
this.#requests.delete(requestId);
break;
}
case "EnginePort:EngineTerminated": {
// The engine was terminated, and if a new run is needed a new port
// will need to be requested.
this.setEngineStatus("closed");
newPortResolvers?.reject(
new Error("Engine was terminated before initialization completed.")
);
this.discardPort();
break;
}
case "EnginePort:InitProgress": {
/** @type {ProgressAndStatusCallbackParams} */
const statusResponse = data.statusResponse;
if (data.statusResponse.type === lazy.Progress.ProgressType.INFERENCE) {
const requestId = data.statusResponse.metadata.requestId;
const request = this.#requests.get(requestId);
if (request) {
if (data.statusResponse.ok) {
if ("resolveChunk" in request) {
request.resolveChunk(statusResponse);
}
} else if ("rejectChunk" in request) {
request.rejectChunk(statusResponse);
}
} else {
lazy.console.error(
"Could not resolve response in the MLEngineParent",
data.statusResponse
);
}
}
// TODO(aristide) Don't send the chunk data back to the callback
this.notificationsCallback?.(data.statusResponse);
break;
}
default:
lazy.console.error("Unknown port message from engine", data);
break;
}
};
/**
* Discards the current port and closes the connection.
*/
discardPort() {
if (this.#port !== null) {
this.#port.postMessage({ type: "EnginePort:Discard" });
this.#port.close();
this.#port = null;
}
}
/**
* Terminates the engine.
*
* @param {boolean} [shutdown] - Flag indicating whether to shutdown the engine.
* @param {boolean} [replacement] - Flag indicating whether the engine is being replaced.
* @returns {Promise<void>} A promise that resolves once the engine is terminated.
*/
async terminate(shutdown, replacement) {
if (this.#port !== null) {
lazy.console.debug(`Terminating engine ${this.engineId}`);
this.#port.postMessage({
type: "EnginePort:Terminate",
shutdown,
replacement,
});
await this.#waitForStatus("closed");
}
}
/**
* Waits for the engine to reach the desired status.
*
* @param {string} desiredStatus - The desired engine status.
* @returns {Promise<string>} - A promise that resolves when the engine reaches the desired status.
*/
#waitForStatus(desiredStatus) {
return new Promise((resolve, reject) => {
// Initial check in case the status is already the desired one
if (this.engineStatus === desiredStatus) {
resolve(`Engine status is now ${desiredStatus} `);
}
/**
* @param {string} status
*/
const onStatusChanged = status => {
if (status === desiredStatus) {
this.off("statusChanged", onStatusChanged);
lazy.clearTimeout(timeoutId);
resolve(`Engine status is now ${desiredStatus} `);
}
};
// Set a timeout to reject the promise if the status doesn't change in time
const timeoutId = lazy.setTimeout(() => {
this.off("statusChanged", onStatusChanged);
reject(
`Timeout after ${TERMINATE_TIMEOUT} ms: Engine status did not reach ${desiredStatus} `
);
}, TERMINATE_TIMEOUT);
this.on("statusChanged", onStatusChanged);
});
}
/**
* Run the inference request
*
* @param {EngineRequests[FeatureID]} request
* @returns {Promise<EngineResponses[FeatureID]>}
*/
async run(request) {
/** @type {PromiseWithResolvers<EngineResponses[FeatureID]>} */
const resolvers = Promise.withResolvers();
const requestId = this.#nextRequestId++;
this.#requests.set(requestId, resolvers);
let transferables = [];
if (
request &&
typeof request === "object" &&
"data" in request &&
request.data instanceof ArrayBuffer
) {
transferables.push(request.data);
}
// If the port is null maybe the inference process has shut down
if (this.#port === null) {
throw new Error("Port does not exist");
}
// Validate request before sending to child process
const validatedRequest = this.#validateRequest(request);
if (!validatedRequest) {
throw new Error("Request failed security validation");
}
const beforeRun = ChromeUtils.now();
this.#port.postMessage(
{
type: "EnginePort:Run",
requestId,
request: validatedRequest,
engineRunOptions: { enableInferenceProgress: false },
},
transferables
);
const result = await resolvers.promise;
this.telemetry.recordEngineRun({
beforeRun,
resourcesBefore: result.resourcesBefore,
resourcesAfter: result.resourcesAfter,
engineId: this.engineId,
modelId: this.pipelineOptions.modelId,
backend: this.pipelineOptions.backend,
});
return result;
}
/**
* Run the inference request using an async generator function.
*
* @param {EngineRequests[FeatureID]} request - The inference request containing the input data.
* @returns {AsyncGenerator<ChunkResponse>} An async generator yielding chunks of generated responses.
*/
async *runWithGenerator(request) {
lazy.console.debug(`runWithGenerator called for request ${request}`);
const startTime = ChromeUtils.now();
// Create a promise to track when the engine has fully completed all runs
const responseChunkResolvers = new ResponseOrChunkResolvers();
const requestId = this.#nextRequestId++;
this.#requests.set(requestId, responseChunkResolvers);
let completed = false;
// Track when the engine is fully completed
const completionPromise = responseChunkResolvers.promise.finally(() => {
completed = true;
});
// Handle transferables for performance optimization
const transferables = [];
if (
request &&
typeof request === "object" &&
"data" in request &&
request.data instanceof ArrayBuffer
) {
transferables.push(request.data);
}
// If the port is null maybe the inference process has shut down
if (this.#port === null) {
throw new Error("The port is null");
}
// Validate request before sending to child process
const validatedRequest = this.#validateRequest(request);
if (!validatedRequest) {
throw new Error("Request failed security validation");
}
// Send the request to the engine via postMessage with optional transferables
this.#port.postMessage(
{
type: "EnginePort:Run",
requestId,
request: validatedRequest,
engineRunOptions: { enableInferenceProgress: true },
},
transferables
);
/**
* @param {number} delay
*/
const timeoutPromise = delay =>
new Promise(resolve =>
lazy.setTimeout(() => resolve({ timeout: true, ok: true }), delay)
);
// Collect both the token and text counts, as the tokens aren't always available.
let tokenCount = 0;
let characterCount = 0;
let chunkPromise = responseChunkResolvers.getAndAdvanceChunkPromise();
let chunkStartTime = ChromeUtils.now();
// Loop to yield chunks as they arrive
while (true) {
// Wait for the chunk with a timeout
const chunk = await Promise.race([chunkPromise, timeoutPromise(10)]);
// If there was no timeout we can yield the chunk and move to the next
if (!chunk.timeout) {
lazy.console.debug(
`Chunk received ${lazy.stringifyForLog(chunk.metadata)}`
);
tokenCount += chunk.metadata.tokens?.length ?? 0;
characterCount += chunk.metadata.text?.length ?? 0;
yield {
text: chunk.metadata.text,
tokens: chunk.metadata.tokens,
isPrompt: chunk.metadata.isPrompt,
toolCalls: chunk.metadata.toolCalls,
usage: chunk.metadata.usage,
};
// Be a bit defensive here in getting the metadata, as different engines may
// report different things back.
let markerText;
if (chunk.metadata.tokens?.length) {
markerText = `${chunk.metadata.tokens?.length} tokens`;
} else if (chunk.metadata.text?.length) {
markerText = `${chunk.metadata.text?.length} characters`;
} else {
markerText = "empty response";
}
ChromeUtils.addProfilerMarker(
"MLEngineParent",
{ startTime: chunkStartTime },
`chunk generated ${markerText}` +
` (${this.pipelineOptions.backend} ${this.pipelineOptions.modelId})`
);
chunkStartTime = ChromeUtils.now();
chunkPromise = responseChunkResolvers.getAndAdvanceChunkPromise();
} else if (this.#port === null) {
// in case of a timeout check if the inference process is still alive
lazy.console.error("The port was closed.");
if (this.engineStatus === "crashed") {
throw new Error(
"The inference process has crashed, the port is null. This was for the following request: " +
lazy.stringifyForLog(request)
);
}
break;
}
// Warn if the engine completed before receiving all chunks
if (completed) {
lazy.console.warn(
"Warning: The run completed before the last chunk was received. The full output may not have been received."
);
break;
}
// Check if this is the last chunk or if an error occurred
if (
chunk.statusText === lazy.Progress.ProgressStatusText.DONE ||
!chunk.ok
) {
break;
}
}
// Wait for the engine to fully complete before exiting
const result = await completionPromise;
// Tokens may not be available.
let markerText;
if (tokenCount) {
markerText = `${tokenCount} tokens`;
} else if (characterCount) {
markerText = `${characterCount} characters`;
} else {
markerText = "an empty response";
}
ChromeUtils.addProfilerMarker(
"MLEngineParent",
{ startTime },
`runWithGenerator generated ${markerText}` +
` (${this.pipelineOptions.backend} ${this.pipelineOptions.modelId})`
);
this.telemetry.recordEngineRun({
beforeRun: startTime,
resourcesBefore: result.resourcesBefore,
resourcesAfter: result.resourcesAfter,
engineId: this.engineId,
modelId: this.pipelineOptions.modelId,
backend: this.pipelineOptions.backend,
tokenCount,
characterCount,
});
return result;
}
}