browser_ml_telemetry.js

firefox-main/toolkit/components/ml/tests/browser/browser_ml_telemetry.js (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Machine Learning: On Device

Revision control

Copy as Markdown

Other Tools

HG Web

Test Info: Warnings

This test gets skipped with pattern: verify
Manifest: toolkit/components/ml/tests/browser/browser.toml

/* Any copyright is dedicated to the Public Domain.

 * http://creativecommons.org/publicdomain/zero/1.0/ */

requestLongerTimeout(2);

const RAW_PIPELINE_OPTIONS = {

  taskName: "moz-echo",

  timeoutMS: -1,

  modelId: "Mozilla/test",

  featureId: "test-feature",

  backend: "test-backend",

};

const { sinon } = ChromeUtils.importESModule(

  "resource://testing-common/Sinon.sys.mjs"

);

const { MLTelemetry } = ChromeUtils.importESModule(

  "chrome://global/content/ml/MLTelemetry.sys.mjs"

);

function getGleanCount(metricsName, engineId = "default-engine") {

  var metrics = Glean.firefoxAiRuntime[metricsName];

  // events

  if (["runInferenceFailure", "engineCreationFailure"].includes(metricsName)) {

    return metrics.testGetValue()?.length || 0;

  // labeled timing distribution

  return metrics[engineId]?.testGetValue()?.count || 0;

/**

 * Check that we record the engine creation and the inference run

*/

add_task(async function test_default_telemetry() {

  const { cleanup, remoteClients } = await setup();

  const engineCreationSuccessCount = getGleanCount("engineCreationSuccess");

  const runInferenceSuccessCount = getGleanCount("runInferenceSuccess");

  const runInferenceFailureCount = getGleanCount("runInferenceFailure");

  const engineCreationFailureCount = getGleanCount("engineCreationFailure");

  info("Get the engine");

  const engineInstance = await createEngine(RAW_PIPELINE_OPTIONS);

  info("Run the inference");

  const inferencePromise = engineInstance.run({

    data: "This gets echoed.",

});

  info("Wait for the pending downloads.");

  await remoteClients["ml-onnx-runtime"].resolvePendingDownloads(1);

  const res = await inferencePromise;

  Assert.equal(

    res.output.echo,

    "This gets echoed.",

    "The text get echoed exercising the whole flow."

);

    info("Test the run_inference_success_flow event");

    const inferenceFlowEvents =

      Glean.firefoxAiRuntime.runInferenceSuccessFlow.testGetValue();

    Assert.ok(

      inferenceFlowEvents && !!inferenceFlowEvents.length,

      "At least one run_inference_success_flow event was recorded"

);

    const lastInferenceEvent = inferenceFlowEvents.at(-1);

    const { extra: inferenceExtra } = lastInferenceEvent;

    // Helper to check that a number field is present and >= 0

    const checkNumber = (key, isOptional = false) => {

      const value = inferenceExtra[key];

      if (isOptional && (value === null || value === undefined)) {

        return; // Optional field not present is OK

      Assert.notEqual(value, null, `${key} should be present`);

      const number = Number(value); // Quantities are stored as strings

      Assert.ok(!Number.isNaN(number), `${key} should be a number`);

      Assert.greaterOrEqual(number, 0, `${key} should be >= 0`);

};

    // Check flow_id is present

    Assert.ok(inferenceExtra.flow_id, "flow_id should be present");

    // Check all required timing/token metrics

    checkNumber("tokenizing_time", true);

    checkNumber("inference_time", true);

    checkNumber("decoding_time", true);

    checkNumber("input_tokens", true);

    checkNumber("output_tokens", true);

    checkNumber("time_to_first_token", true);

    checkNumber("tokens_per_second", true);

    checkNumber("time_per_output_token", true);

    info("Test the engine_run event");

    const value = Glean.firefoxAiRuntime.engineRun.testGetValue();

    Assert.ok(

      value && !!value.length,

      "At least one engine_run event was recorded"

);

    const lastEngineRunEvent = value.at(-1);

    const { extra } = lastEngineRunEvent;

    const checkNumber = key => {

      const value = extra[key];

      Assert.notEqual(value, null, `${key} should be present`);

      const number = Number(value); // Quantities are stored as strings.

      Assert.ok(!Number.isNaN(number), `${key} should be a number`);

      Assert.greater(number, 0, `${key} should be greater than 0`);

};

    checkNumber("cpu_milliseconds");

    checkNumber("wall_milliseconds");

    checkNumber("cores");

    checkNumber("cpu_utilization");

    checkNumber("memory_bytes");

    Assert.equal(extra.feature_id, "test-feature");

    Assert.equal(extra.engine_id, "default-engine");

    Assert.equal(extra.model_id, "Mozilla/test");

    Assert.equal(extra.backend, "test-backend");

  Assert.equal(res.output.dtype, "q8", "The config was enriched by RS");

ok(

    !EngineProcess.areAllEnginesTerminated(),

    "The engine process is still active."

);

  Assert.equal(

    getGleanCount("engineCreationSuccess"),

    engineCreationSuccessCount + 1

);

  Assert.equal(

    getGleanCount("engineCreationSuccess"),

    engineCreationSuccessCount + 1

);

  Assert.equal(

    getGleanCount("runInferenceSuccess"),

    runInferenceSuccessCount + 1

);

  Assert.equal(getGleanCount("runInferenceFailure"), runInferenceFailureCount);

  Assert.equal(

    getGleanCount("engineCreationFailure"),

    engineCreationFailureCount

);

  await EngineProcess.destroyMLEngine();

  await cleanup();

});

/**

 * Check that we record the engine creation and the inference failure

*/

add_task(async function test_ml_engine_run_failure() {

  const { cleanup, remoteClients } = await setup();

  const engineCreationSuccessCount = getGleanCount("engineCreationSuccess");

  const runInferenceSuccessCount = getGleanCount("runInferenceSuccess");

  const runInferenceFailureCount = getGleanCount("runInferenceFailure");

  const engineCreationFailureCount = getGleanCount("engineCreationFailure");

  info("Get the engine");

  const engineInstance = await createEngine(RAW_PIPELINE_OPTIONS);

  info("Run the inference with a throwing example.");

  const inferencePromise = engineInstance.run("throw");

  info("Wait for the pending downloads.");

  await remoteClients["ml-onnx-runtime"].resolvePendingDownloads(1);

  let error;

  try {

    await inferencePromise;

  } catch (e) {

    error = e;

is(

    error?.message,

    'Error: Received the message "throw", so intentionally throwing an error.',

    "The error is correctly surfaced."

);

  Assert.equal(

    getGleanCount("engineCreationSuccess"),

    engineCreationSuccessCount + 1

);

  Assert.equal(getGleanCount("runInferenceSuccess"), runInferenceSuccessCount);

  Assert.equal(

    getGleanCount("runInferenceFailure"),

    runInferenceFailureCount + 1

);

  Assert.equal(

    getGleanCount("engineCreationFailure"),

    engineCreationFailureCount

);

  await EngineProcess.destroyMLEngine();

  await cleanup();

});

/**

 * Check that we record the engine creation failure

*/

add_task(async function test_engine_creation_failure() {

  const { cleanup } = await setup();

  const engineCreationSuccessCount = getGleanCount("engineCreationSuccess");

  const engineCreationFailureCount = getGleanCount("engineCreationFailure");

  const runInferenceSuccessCount = getGleanCount("runInferenceSuccess");

  const runInferenceFailureCount = getGleanCount("runInferenceFailure");

  try {

    await createEngine({ taskName: "moz-echo", featureId: "I DONT EXIST" });

  } catch (e) {}

  Assert.equal(

    getGleanCount("engineCreationSuccess"),

    engineCreationSuccessCount

);

  Assert.equal(

    getGleanCount("engineCreationSuccess"),

    engineCreationSuccessCount

);

  Assert.equal(getGleanCount("runInferenceSuccess"), runInferenceSuccessCount);

  Assert.equal(getGleanCount("runInferenceFailure"), runInferenceFailureCount);

  Assert.equal(

    getGleanCount("engineCreationFailure"),

    engineCreationFailureCount + 1

);

  await EngineProcess.destroyMLEngine();

  await cleanup();

});

/**

 * Check that model download telemetry is working as expected

*/

add_task(async function test_model_download_telemetry_success() {

  let initialModelDownloadsCount =

    Glean.firefoxAiRuntime.modelDownload.testGetValue()?.length || 0;

  // Allow any url

  Services.env.set("MOZ_ALLOW_EXTERNAL_ML_HUB", "true");

  // Mocking function used in the workers or child doesn't work.

  // So we are stubbing the code run by the worker.

  const workerCode = `

  // Inject the original worker code

  ${await getMLEngineWorkerCode()}

  // Stub

  ChromeUtils.defineESModuleGetters(

  lazy,

    createFileUrl: "chrome://global/content/ml/Utils.sys.mjs",

},

  { global: "current" }

);

  // Change the getBackend to a mocked version that doesn't actually do inference

  // but does initiate model downloads

  lazy.getBackend = async function (

    mlEngineWorker,

_,

      modelHubUrlTemplate,

      modelHubRootUrl,

      modelId,

      modelRevision,

      modelFile,

      engineId,

    } = {}

) {

    const url = lazy.createFileUrl({

      model: modelId,

      revision: modelRevision,

      file: modelFile,

      urlTemplate: modelHubUrlTemplate,

      rootUrl: modelHubRootUrl,

});

    const result = await mlEngineWorker.getModelFile({url}).catch(() => {});

    // Download Another file using engineId as revision

    const url2 = lazy.createFileUrl({

      model: modelId,

      revision: engineId,

      file: modelFile,

      urlTemplate: modelHubUrlTemplate,

      rootUrl: modelHubRootUrl,

});

    const result2 = await mlEngineWorker.getModelFile({url: url2}).catch(() => {});

    return {

      run: () => {},

};

};

`;

  const blob = new Blob([workerCode], { type: "application/javascript" });

  const blobURL = URL.createObjectURL(blob);

  let wasmBufferStub = sinon

    .stub(MLEngineParent, "getWasmArrayBuffer")

    .returns(new ArrayBuffer(16));

  let promiseStub = sinon

    .stub(MLEngineParent, "getWorkerConfig")

    .callsFake(function () {

      return { url: blobURL, options: { type: "module" } };

});

  await IndexedDBCache.init({ reset: true });

  await EngineProcess.destroyMLEngine();

  await createEngine({

    engineId: "main",

    taskName: "real-wllama-text-generation",

    featureId: "link-preview",

    backend: "wllama",

    modelId: "acme/bert",

    modelHubUrlTemplate: "{model}/resolve/{revision}",

    modelRevision: "v0.1",

    modelHubRootUrl:

      "chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/data",

    modelFile: "onnx/config.json",

});

  let observed = Glean.firefoxAiRuntime.modelDownload.testGetValue();

  Assert.equal(observed?.length || 0, initialModelDownloadsCount + 6);

  observed = observed.slice(-6);

  Assert.equal(new Set(observed.map(obj => obj.extra.modelDownloadId)).size, 1);

  Assert.deepEqual(

    observed.map(obj => obj.extra.step),

      "start_download",

      "start_file_download",

      "end_file_download_success",

      "start_file_download",

      "end_file_download_success",

      "end_download_success",

);

  await EngineProcess.destroyMLEngine();

  await IndexedDBCache.init({ reset: true });

  wasmBufferStub.restore();

  promiseStub.restore();

});

/**

 * Check that model download telemetry is working as expected

*/

add_task(async function test_model_download_telemetry_fail() {

  let initialModelDownloadsCount =

    Glean.firefoxAiRuntime.modelDownload.testGetValue()?.length || 0;

  // Allow any url

  Services.env.set("MOZ_ALLOW_EXTERNAL_ML_HUB", "true");

  // Mocking function used in the workers or child doesn't work.

  // So we are stubbing the code run by the worker.

  const workerCode = `

  // Inject the original worker code

  ${await getMLEngineWorkerCode()}

  // Stub

  ChromeUtils.defineESModuleGetters(

  lazy,

    createFileUrl: "chrome://global/content/ml/Utils.sys.mjs",

},

  { global: "current" }

);

  // Change the getBackend to a mocked version that doesn't actually do inference

  // but does initiate model downloads

  lazy.getBackend = async function (

    mlEngineWorker,

_,

      modelHubUrlTemplate,

      modelHubRootUrl,

      modelId,

      modelRevision,

      modelFile,

      engineId,

    } = {}

) {

    const url = lazy.createFileUrl({

      model: modelId,

      revision: modelRevision,

      file: modelFile,

      urlTemplate: modelHubUrlTemplate,

      rootUrl: modelHubRootUrl,

});

    const result = await mlEngineWorker.getModelFile({url}).catch(() => {});

    // Download Another file using engineId as revision

    const url2 = lazy.createFileUrl({

      model: modelId,

      revision: engineId,

      file: modelFile,

      urlTemplate: modelHubUrlTemplate,

      rootUrl: modelHubRootUrl,

});

    const result2 = await mlEngineWorker.getModelFile({url: url2}).catch(() => {});

    return {

      run: () => {},

};

};

`;

  const blob = new Blob([workerCode], { type: "application/javascript" });

  const blobURL = URL.createObjectURL(blob);

  let wasmBufferStub = sinon

    .stub(MLEngineParent, "getWasmArrayBuffer")

    .returns(new ArrayBuffer(16));

  let promiseStub = sinon

    .stub(MLEngineParent, "getWorkerConfig")

    .callsFake(function () {

      return { url: blobURL, options: { type: "module" } };

});

  await IndexedDBCache.init({ reset: true });

  await EngineProcess.destroyMLEngine();

  await createEngine({

    engineId: "main",

    taskName: "real-wllama-text-generation",

    featureId: "link-preview",

    backend: "wllama",

    modelId: "acme-not-found/bert",

    modelHubUrlTemplate: "{model}/resolve/{revision}",

    modelRevision: "v0.1",

    modelHubRootUrl:

      "chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/data",

    modelFile: "onnx/config.json",

  }).catch(() => {});

  let observed = Glean.firefoxAiRuntime.modelDownload.testGetValue();

  Assert.equal(observed?.length || 0, initialModelDownloadsCount + 6);

  observed = observed.slice(-6);

  Assert.equal(new Set(observed.map(obj => obj.extra.modelDownloadId)).size, 1);

  Assert.deepEqual(

    observed.map(obj => obj.extra.step),

      "start_download",

      "start_file_download",

      "end_file_download_failed",

      "start_file_download",

      "end_file_download_failed",

      "end_download_failed",

);

  await EngineProcess.destroyMLEngine();

  await IndexedDBCache.init({ reset: true });

  wasmBufferStub.restore();

  promiseStub.restore();

});

/**

 * Check that model download telemetry is working as expected

*/

add_task(async function test_model_download_telemetry_mixed() {

  let initialModelDownloadsCount =

    Glean.firefoxAiRuntime.modelDownload.testGetValue()?.length || 0;

  // Allow any url

  Services.env.set("MOZ_ALLOW_EXTERNAL_ML_HUB", "true");

  // Mocking function used in the workers or child doesn't work.

  // So we are stubbing the code run by the worker.

  const workerCode = `

  // Inject the original worker code

  ${await getMLEngineWorkerCode()}

  // Stub

  ChromeUtils.defineESModuleGetters(

  lazy,

    createFileUrl: "chrome://global/content/ml/Utils.sys.mjs",

},

  { global: "current" }

);

  // Change the getBackend to a mocked version that doesn't actually do inference

  // but does initiate model downloads

  lazy.getBackend = async function (

    mlEngineWorker,

_,

      modelHubUrlTemplate,

      modelHubRootUrl,

      modelId,

      modelRevision,

      modelFile,

      engineId,

    } = {}

) {

    const url = lazy.createFileUrl({

      model: modelId,

      revision: modelRevision,

      file: modelFile,

      urlTemplate: modelHubUrlTemplate,

      rootUrl: modelHubRootUrl,

});

    const result = await mlEngineWorker.getModelFile({url}).catch(() => {});

    // Download Another file using engineId as revision

    const url2 = lazy.createFileUrl({

      model: modelId,

      revision: engineId,

      file: modelFile,

      urlTemplate: modelHubUrlTemplate,

      rootUrl: modelHubRootUrl,

});

    const result2 = await mlEngineWorker.getModelFile({url: url2}).catch(() => {});

    return {

      run: () => {},

};

};

`;

  const blob = new Blob([workerCode], { type: "application/javascript" });

  const blobURL = URL.createObjectURL(blob);

  let wasmBufferStub = sinon

    .stub(MLEngineParent, "getWasmArrayBuffer")

    .returns(new ArrayBuffer(16));

  let promiseStub = sinon

    .stub(MLEngineParent, "getWorkerConfig")

    .callsFake(function () {

      return { url: blobURL, options: { type: "module" } };

});

  await createEngine({

    engineId: "main",

    taskName: "real-wllama-text-generation",

    featureId: "link-preview",

    backend: "wllama",

    modelId: "acme/bert",

    modelHubUrlTemplate: "{model}/resolve/{revision}",

    modelRevision: "v0.4",

    modelHubRootUrl:

      "chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/data",

    modelFile: "onnx/config.json",

  }).catch(() => {});

  let observed = Glean.firefoxAiRuntime.modelDownload.testGetValue();

  Assert.equal(observed?.length || 0, initialModelDownloadsCount + 6);

  observed = observed.slice(-6);

  Assert.equal(new Set(observed.map(obj => obj.extra.modelDownloadId)).size, 1);

  Assert.deepEqual(

    observed.map(obj => obj.extra.step),

      "start_download",

      "start_file_download",

      "end_file_download_failed",

      "start_file_download",

      "end_file_download_success",

      "end_download_success",

);

  await EngineProcess.destroyMLEngine();

  await IndexedDBCache.init({ reset: true });

  wasmBufferStub.restore();

  promiseStub.restore();

});

function getLastEvent(gleanMetric) {

  const events = gleanMetric.testGetValue() || [];

  return events.length ? events.at(-1) : null;

// A helper to wait for a new Glean event

async function waitForGleanEvent(gleanMetric) {

  const originalEvent = getLastEvent(gleanMetric);

  await TestUtils.waitForCondition(() => {

    return getLastEvent(gleanMetric) !== originalEvent;

  }, "Waiting for new Glean event");

  return getLastEvent(gleanMetric);

/**

 * Tests that the MLTelemetry constructor auto-generates a flowId

 * if one is not provided.

*/

add_task(async function test_ml_telemetry_flow_id_auto_generated() {

  info("Starting MLTelemetry test: Constructor auto-generates flowId");

  const telemetry1 = new MLTelemetry({ featureId: "feature-auto-id" });

  telemetry1.sessionStart({ interaction: "test-1" });

  let recordedEvent = await waitForGleanEvent(

    Glean.firefoxAiRuntime.sessionStart

);

  Assert.ok(

    recordedEvent.extra.flow_id,

    "An event was recorded with a flow_id"

);

  Assert.equal(

    recordedEvent.extra.flow_id,

    telemetry1.flowId,

    "Glean's recorded flow_id matches the instance's flowId"

);

  Assert.equal(

    recordedEvent.extra.flow_id.length,

36,

    "The auto-generated flow_id looks like a UUID"

);

});

/**

 * Tests that the MLTelemetry constructor correctly uses a flowId

 * when one is provided.

*/

add_task(async function test_ml_telemetry_flow_id_provided() {

  info("Starting MLTelemetry test: Constructor accepts provided flowId");

  const telemetry2 = new MLTelemetry({

    featureId: "feature-custom-id",

    flowId: "my-custom-flow-id-69420",

});

  telemetry2.sessionStart({ interaction: "test-2" });

  let recordedEvent = await waitForGleanEvent(

    Glean.firefoxAiRuntime.sessionStart

);

  Assert.ok(

    recordedEvent.extra.flow_id,

    "An event was recorded with a flow_id"

);

  Assert.equal(

    recordedEvent.extra.flow_id,

    "my-custom-flow-id-69420",

    "Glean's recorded flow_id matches the provided flowId"

);

  Assert.equal(

    recordedEvent.extra.flow_id,

    telemetry2.flowId,

    "Glean's recorded flow_id also matches the instance's flowId"

);

});

/**

 * Tests that the flowId set on the instance is used by all

 * telemetry methods (e.g., sessionStart and sessionEnd).

*/

add_task(async function test_ml_telemetry_flow_id_persistent_on_instance() {

  info("Starting MLTelemetry test: Instance flowId persists across methods");

  const telemetry3 = new MLTelemetry({

    featureId: "feature-persistent",

    flowId: "my-instance-flow-id-789",

});

  // Check sessionStart

  telemetry3.sessionStart({ interaction: "test-3" });

  let startEvent = await waitForGleanEvent(Glean.firefoxAiRuntime.sessionStart);

  Assert.equal(

    startEvent.extra.flow_id,

    "my-instance-flow-id-789",

    "sessionStart event used the instance flowId"

);

  // Check sessionEnd

  telemetry3.endSession({

    status: "ok",

});

  let endEvent = await waitForGleanEvent(Glean.firefoxAiRuntime.sessionEnd);

  Assert.ok(

    endEvent.extra.flow_id,

    "endSession event was recorded with a flow_id"

);

  Assert.equal(

    endEvent.extra.flow_id,

    "my-instance-flow-id-789",

    "endSession event used the *same* instance flowId"

);

  // Final check that the instance property itself wasn't modified

  Assert.equal(

    telemetry3.flowId,

    "my-instance-flow-id-789",

    "The instance's flowId property remained unchanged"

);

});

add_task(async function test_run_with_generator_telemetry() {

  const { cleanup } = await setup();

  const { server: mockServer, port } = startMockOpenAI({

    echo: "Streaming response.",

});

  info("Create the engine with OpenAI backend");

  const engineInstance = await createEngine({

    taskName: "text-generation",

    featureId: "about-inference",

    backend: "openai",

    modelId: "test-model",

    apiKey: "test-key",

    baseURL: `http://localhost:${port}/v1`,

});

  info("Call runWithGenerator");

  const generator = engineInstance.runWithGenerator({

    args: [{ role: "user", content: "test streaming" }],

    streamOptions: { enabled: true },

});

  info("Manually iterate to capture both chunks and return value");

  let iterResult;

  while (true) {

    iterResult = await generator.next();

    if (iterResult.done) {

      break;

    info("Test the engine_run event for runWithGenerator");

    const value = Glean.firefoxAiRuntime.engineRun.testGetValue();

    Assert.ok(

      value && !!value.length,

      "At least one engine_run event was recorded"

);

    const lastEngineRunEvent = value.at(-1);

    const { extra } = lastEngineRunEvent;

    info("Recorded Glean engine_run event: " + JSON.stringify(extra, null, 2));

    const checkNumber = key => {

      const value = extra[key];

      Assert.notEqual(value, null, `${key} should be present`);

      const number = Number(value);

      Assert.ok(!Number.isNaN(number), `${key} should be a number`);

      Assert.greater(number, 0, `${key} should be greater than 0`);

};

    checkNumber("cpu_milliseconds");

    checkNumber("wall_milliseconds");

    checkNumber("cores");

    checkNumber("cpu_utilization");

    checkNumber("memory_bytes");

    checkNumber("character_count");

    Assert.ok(!extra.token_count, "Token count is not implemented yet.");

    Assert.equal(extra.feature_id, "about-inference");

    Assert.equal(extra.backend, "openai");

  await EngineProcess.destroyMLEngine();

  await cleanup();

  await stopMockOpenAI(mockServer);

});