From f9d0d837cb7e23fdc1fdffd7f32906ad4bc2d404 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Wed, 30 Apr 2025 18:15:09 -0700 Subject: [PATCH 1/5] Infer expected inputs from prompt --- .../ai/src/methods/chrome-adapter.test.ts | 8 ++- packages/ai/src/methods/chrome-adapter.ts | 57 +++++++++++++++---- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/packages/ai/src/methods/chrome-adapter.test.ts b/packages/ai/src/methods/chrome-adapter.test.ts index f8ea80b0e09..275ab287b30 100644 --- a/packages/ai/src/methods/chrome-adapter.test.ts +++ b/packages/ai/src/methods/chrome-adapter.test.ts @@ -54,7 +54,7 @@ async function toStringArray( describe('ChromeAdapter', () => { describe('constructor', () => { - it('sets image as expected input type by default', async () => { + it('determines expected inputs by request inspection', async () => { const languageModelProvider = { availability: () => Promise.resolve(Availability.available) } as LanguageModel; @@ -70,7 +70,11 @@ describe('ChromeAdapter', () => { contents: [ { role: 'user', - parts: [{ text: 'hi' }] + parts: [ + { text: 'hi' }, + // Triggers image as expected type. + { inlineData: { mimeType: 'image/jpeg', data: 'asd' } } + ] } ] }); diff --git a/packages/ai/src/methods/chrome-adapter.ts b/packages/ai/src/methods/chrome-adapter.ts index e7bb39c34c8..e2631de4455 100644 --- a/packages/ai/src/methods/chrome-adapter.ts +++ b/packages/ai/src/methods/chrome-adapter.ts @@ -30,9 +30,11 @@ import { import { Availability, LanguageModel, + LanguageModelExpected, LanguageModelMessage, LanguageModelMessageContent, - LanguageModelMessageRole + LanguageModelMessageRole, + LanguageModelMessageType } from '../types/language-model'; /** @@ -48,13 +50,10 @@ export class ChromeAdapter { constructor( private languageModelProvider?: LanguageModel, private mode?: InferenceMode, - private onDeviceParams: OnDeviceParams = { - createOptions: { - // Defaults to support image inputs for convenience. - expectedInputs: [{ type: 'image' }] - } - } - ) {} + private onDeviceParams: OnDeviceParams = {} + ) { + this.onDeviceParams.createOptions ??= {}; + } /** * Checks if a given request can be made on-device. @@ -85,8 +84,10 @@ export class ChromeAdapter { return false; } + const expectedInputs = ChromeAdapter.extractExpectedInputs(request); + // Triggers out-of-band download so model will eventually become available. - const availability = await this.downloadIfAvailable(); + const availability = await this.downloadIfAvailable(expectedInputs); if (this.mode === 'only_on_device') { return true; @@ -158,6 +159,33 @@ export class ChromeAdapter { ); } + /** + * Maps + * + * Vertex's input mime types to + * + * Chrome's expected types. + * + *

Chrome's API checks availability by type. It's tedious to specify the types in advance, so + * this method infers the types.

+ */ + private static extractExpectedInputs( + request: GenerateContentRequest + ): LanguageModelExpected[] { + const inputSet = new Set(); + for (const content of request.contents) { + for (const part of content.parts) { + if (part.inlineData) { + const type = part.inlineData.mimeType.split( + '/' + )[0] as LanguageModelMessageType; + inputSet.add({ type }); + } + } + } + return Array.from(inputSet); + } + /** * Asserts inference for the given request can be performed by an on-device model. */ @@ -196,12 +224,21 @@ export class ChromeAdapter { /** * Encapsulates logic to get availability and download a model if one is downloadable. */ - private async downloadIfAvailable(): Promise { + private async downloadIfAvailable( + expectedInputs: LanguageModelExpected[] + ): Promise { + // Side-effect: updates construction-time params with request-time params. + // This is required because params are referenced through multiple flows. + // TODO: remove this side effect, since we need to also pass options when creating a session. + Object.assign(this.onDeviceParams.createOptions!, { expectedInputs }); + const availability = await this.languageModelProvider?.availability( this.onDeviceParams.createOptions ); if (availability === Availability.downloadable) { + // Side-effect: triggers out-of-band model download. + // This is required because Chrome manages the model download. this.download(); } From e002c407266bb655d65e919a21feb3d7ed94d091 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Wed, 4 Jun 2025 15:28:29 -0700 Subject: [PATCH 2/5] Refactor to clarify options merging --- packages/ai/src/methods/chrome-adapter.ts | 60 ++++++++++++++--------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/packages/ai/src/methods/chrome-adapter.ts b/packages/ai/src/methods/chrome-adapter.ts index e2631de4455..30f37fd7c36 100644 --- a/packages/ai/src/methods/chrome-adapter.ts +++ b/packages/ai/src/methods/chrome-adapter.ts @@ -30,12 +30,14 @@ import { import { Availability, LanguageModel, + LanguageModelCreateOptions, LanguageModelExpected, LanguageModelMessage, LanguageModelMessageContent, LanguageModelMessageRole, LanguageModelMessageType } from '../types/language-model'; +import { deepExtend } from '@firebase/util'; /** * Defines an inference "backend" that uses Chrome's on-device model, @@ -51,9 +53,7 @@ export class ChromeAdapter { private languageModelProvider?: LanguageModel, private mode?: InferenceMode, private onDeviceParams: OnDeviceParams = {} - ) { - this.onDeviceParams.createOptions ??= {}; - } + ) {} /** * Checks if a given request can be made on-device. @@ -84,10 +84,11 @@ export class ChromeAdapter { return false; } - const expectedInputs = ChromeAdapter.extractExpectedInputs(request); + const requestOptions = this.inferCreateOptions(request); + const mergedOptions = this.mergeCreateOptions(requestOptions); // Triggers out-of-band download so model will eventually become available. - const availability = await this.downloadIfAvailable(expectedInputs); + const availability = await this.downloadIfAvailable(mergedOptions); if (this.mode === 'only_on_device') { return true; @@ -119,7 +120,9 @@ export class ChromeAdapter { * @returns {@link Response}, so we can reuse common response formatting. */ async generateContent(request: GenerateContentRequest): Promise { - const session = await this.createSession(); + const requestOptions = this.inferCreateOptions(request); + const mergedOptions = this.mergeCreateOptions(requestOptions); + const session = await this.createSession(mergedOptions); const contents = await Promise.all( request.contents.map(ChromeAdapter.toLanguageModelMessage) ); @@ -141,7 +144,9 @@ export class ChromeAdapter { async generateContentStream( request: GenerateContentRequest ): Promise { - const session = await this.createSession(); + const inferredOptions = this.inferCreateOptions(request); + const mergedOptions = this.mergeCreateOptions(inferredOptions); + const session = await this.createSession(mergedOptions); const contents = await Promise.all( request.contents.map(ChromeAdapter.toLanguageModelMessage) ); @@ -164,14 +169,14 @@ export class ChromeAdapter { * * Vertex's input mime types to * - * Chrome's expected types. + * Chrome's expected input types. * *

Chrome's API checks availability by type. It's tedious to specify the types in advance, so * this method infers the types.

*/ - private static extractExpectedInputs( + private inferCreateOptions( request: GenerateContentRequest - ): LanguageModelExpected[] { + ): LanguageModelCreateOptions { const inputSet = new Set(); for (const content of request.contents) { for (const part of content.parts) { @@ -183,7 +188,23 @@ export class ChromeAdapter { } } } - return Array.from(inputSet); + + return { + expectedInputs: Array.from(inputSet) + }; + } + + /** + * Assembles a unified {@link LanguageModelCreateOptions} from create- and request-time options. + * Request-time options take priority over create-time options. + */ + private mergeCreateOptions( + requestOptions: LanguageModelCreateOptions + ): LanguageModelCreateOptions { + return deepExtend( + this.onDeviceParams.createOptions, + requestOptions + ) as LanguageModelCreateOptions; } /** @@ -225,15 +246,10 @@ export class ChromeAdapter { * Encapsulates logic to get availability and download a model if one is downloadable. */ private async downloadIfAvailable( - expectedInputs: LanguageModelExpected[] + createOptions: LanguageModelCreateOptions ): Promise { - // Side-effect: updates construction-time params with request-time params. - // This is required because params are referenced through multiple flows. - // TODO: remove this side effect, since we need to also pass options when creating a session. - Object.assign(this.onDeviceParams.createOptions!, { expectedInputs }); - const availability = await this.languageModelProvider?.availability( - this.onDeviceParams.createOptions + createOptions ); if (availability === Availability.downloadable) { @@ -328,16 +344,16 @@ export class ChromeAdapter { *

Chrome will remove a model from memory if it's no longer in use, so this method ensures a * new session is created before an old session is destroyed.

*/ - private async createSession(): Promise { + private async createSession( + createOptions: LanguageModelCreateOptions + ): Promise { if (!this.languageModelProvider) { throw new AIError( AIErrorCode.REQUEST_ERROR, 'Chrome AI requested for unsupported browser version.' ); } - const newSession = await this.languageModelProvider.create( - this.onDeviceParams.createOptions - ); + const newSession = await this.languageModelProvider.create(createOptions); if (this.oldSession) { this.oldSession.destroy(); } From e814bf912d42a32edbd38a2831862a5b800783ea Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Wed, 4 Jun 2025 16:39:54 -0700 Subject: [PATCH 3/5] Assert options extracted in availability and generate content paths --- .../ai/src/methods/chrome-adapter.test.ts | 77 +++++++++---------- packages/ai/src/methods/chrome-adapter.ts | 20 ++--- 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/packages/ai/src/methods/chrome-adapter.test.ts b/packages/ai/src/methods/chrome-adapter.test.ts index 275ab287b30..36ee8e30fbb 100644 --- a/packages/ai/src/methods/chrome-adapter.test.ts +++ b/packages/ai/src/methods/chrome-adapter.test.ts @@ -54,34 +54,6 @@ async function toStringArray( describe('ChromeAdapter', () => { describe('constructor', () => { - it('determines expected inputs by request inspection', async () => { - const languageModelProvider = { - availability: () => Promise.resolve(Availability.available) - } as LanguageModel; - const availabilityStub = stub( - languageModelProvider, - 'availability' - ).resolves(Availability.available); - const adapter = new ChromeAdapter( - languageModelProvider, - 'prefer_on_device' - ); - await adapter.isAvailable({ - contents: [ - { - role: 'user', - parts: [ - { text: 'hi' }, - // Triggers image as expected type. - { inlineData: { mimeType: 'image/jpeg', data: 'asd' } } - ] - } - ] - }); - expect(availabilityStub).to.have.been.calledWith({ - expectedInputs: [{ type: 'image' }] - }); - }); it('honors explicitly set expected inputs', async () => { const languageModelProvider = { availability: () => Promise.resolve(Availability.available) @@ -303,6 +275,34 @@ describe('ChromeAdapter', () => { }) ).to.be.false; }); + it('extracts expected inputs from the request', async () => { + const languageModelProvider = { + availability: () => Promise.resolve(Availability.available) + } as LanguageModel; + const availabilityStub = stub( + languageModelProvider, + 'availability' + ).resolves(Availability.available); + const adapter = new ChromeAdapter( + languageModelProvider, + 'prefer_on_device' + ); + await adapter.isAvailable({ + contents: [ + { + role: 'user', + parts: [ + { text: 'hi' }, + // Triggers image as expected type. + { inlineData: { mimeType: 'image/jpeg', data: 'asd' } } + ] + } + ] + }); + expect(availabilityStub).to.have.been.calledWith({ + expectedInputs: [{ type: 'image' }] + }); + }); }); describe('generateContent', () => { it('throws if Chrome API is undefined', async () => { @@ -382,14 +382,9 @@ describe('ChromeAdapter', () => { ); const promptOutput = 'hi'; const promptStub = stub(languageModel, 'prompt').resolves(promptOutput); - const createOptions = { - systemPrompt: 'be yourself', - expectedInputs: [{ type: 'image' }] - } as LanguageModelCreateOptions; const adapter = new ChromeAdapter( languageModelProvider, - 'prefer_on_device', - { createOptions } + 'prefer_on_device' ); const request = { contents: [ @@ -409,7 +404,9 @@ describe('ChromeAdapter', () => { } as GenerateContentRequest; const response = await adapter.generateContent(request); // Asserts initialization params are proxied. - expect(createStub).to.have.been.calledOnceWith(createOptions); + expect(createStub).to.have.been.calledOnceWith({ + expectedInputs: [{ type: 'image' }] + }); // Asserts Vertex input type is mapped to Chrome type. expect(promptStub).to.have.been.calledOnceWith([ { @@ -610,13 +607,9 @@ describe('ChromeAdapter', () => { } }) ); - const createOptions = { - expectedInputs: [{ type: 'image' }] - } as LanguageModelCreateOptions; const adapter = new ChromeAdapter( languageModelProvider, - 'prefer_on_device', - { createOptions } + 'prefer_on_device' ); const request = { contents: [ @@ -635,7 +628,9 @@ describe('ChromeAdapter', () => { ] } as GenerateContentRequest; const response = await adapter.generateContentStream(request); - expect(createStub).to.have.been.calledOnceWith(createOptions); + expect(createStub).to.have.been.calledOnceWith({ + expectedInputs: [{ type: 'image' }] + }); expect(promptStub).to.have.been.calledOnceWith([ { role: request.contents[0].role, diff --git a/packages/ai/src/methods/chrome-adapter.ts b/packages/ai/src/methods/chrome-adapter.ts index 30f37fd7c36..54c194032f4 100644 --- a/packages/ai/src/methods/chrome-adapter.ts +++ b/packages/ai/src/methods/chrome-adapter.ts @@ -84,8 +84,8 @@ export class ChromeAdapter { return false; } - const requestOptions = this.inferCreateOptions(request); - const mergedOptions = this.mergeCreateOptions(requestOptions); + const extractedOptions = this.extractCreateOptions(request); + const mergedOptions = this.mergeCreateOptions(extractedOptions); // Triggers out-of-band download so model will eventually become available. const availability = await this.downloadIfAvailable(mergedOptions); @@ -120,8 +120,8 @@ export class ChromeAdapter { * @returns {@link Response}, so we can reuse common response formatting. */ async generateContent(request: GenerateContentRequest): Promise { - const requestOptions = this.inferCreateOptions(request); - const mergedOptions = this.mergeCreateOptions(requestOptions); + const extractedOptions = this.extractCreateOptions(request); + const mergedOptions = this.mergeCreateOptions(extractedOptions); const session = await this.createSession(mergedOptions); const contents = await Promise.all( request.contents.map(ChromeAdapter.toLanguageModelMessage) @@ -144,8 +144,8 @@ export class ChromeAdapter { async generateContentStream( request: GenerateContentRequest ): Promise { - const inferredOptions = this.inferCreateOptions(request); - const mergedOptions = this.mergeCreateOptions(inferredOptions); + const extractedOptions = this.extractCreateOptions(request); + const mergedOptions = this.mergeCreateOptions(extractedOptions); const session = await this.createSession(mergedOptions); const contents = await Promise.all( request.contents.map(ChromeAdapter.toLanguageModelMessage) @@ -165,16 +165,18 @@ export class ChromeAdapter { } /** - * Maps + * Extracts session creation options specified at request-time. + * + *

In particular, this method maps * * Vertex's input mime types to * - * Chrome's expected input types. + * Chrome's expected input types.

* *

Chrome's API checks availability by type. It's tedious to specify the types in advance, so * this method infers the types.

*/ - private inferCreateOptions( + private extractCreateOptions( request: GenerateContentRequest ): LanguageModelCreateOptions { const inputSet = new Set(); From 316688cd57be16fb3e4f2561e820be7bb45e7d2b Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Wed, 4 Jun 2025 16:44:36 -0700 Subject: [PATCH 4/5] Pass create options into download method --- packages/ai/src/methods/chrome-adapter.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/ai/src/methods/chrome-adapter.ts b/packages/ai/src/methods/chrome-adapter.ts index 54c194032f4..446578d6891 100644 --- a/packages/ai/src/methods/chrome-adapter.ts +++ b/packages/ai/src/methods/chrome-adapter.ts @@ -257,7 +257,7 @@ export class ChromeAdapter { if (availability === Availability.downloadable) { // Side-effect: triggers out-of-band model download. // This is required because Chrome manages the model download. - this.download(); + this.download(createOptions); } return availability; @@ -267,18 +267,18 @@ export class ChromeAdapter { * Triggers out-of-band download of an on-device model. * *

Chrome only downloads models as needed. Chrome knows a model is needed when code calls - * LanguageModel.create.

+ * {@link LanguageModel.create}.

* *

Since Chrome manages the download, the SDK can only avoid redundant download requests by * tracking if a download has previously been requested.

*/ - private download(): void { + private download(createOptions: LanguageModelCreateOptions): void { if (this.isDownloading) { return; } this.isDownloading = true; this.downloadPromise = this.languageModelProvider - ?.create(this.onDeviceParams.createOptions) + ?.create(createOptions) .then(() => { this.isDownloading = false; }); From dae5fee4d82b5ccaca36bec62ef262de93816035 Mon Sep 17 00:00:00 2001 From: Erik Eldridge Date: Thu, 5 Jun 2025 18:04:02 -0700 Subject: [PATCH 5/5] Ensure create and request options are deeply merged --- packages/ai/package.json | 1 + packages/ai/src/methods/chrome-adapter.test.ts | 11 ++++++++--- packages/ai/src/methods/chrome-adapter.ts | 7 ++----- yarn.lock | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/packages/ai/package.json b/packages/ai/package.json index d159793b206..8382025a68e 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -52,6 +52,7 @@ "@firebase/component": "0.6.14", "@firebase/logger": "0.4.4", "@firebase/util": "1.11.1", + "deepmerge": "4.3.1", "tslib": "^2.1.0" }, "license": "Apache-2.0", diff --git a/packages/ai/src/methods/chrome-adapter.test.ts b/packages/ai/src/methods/chrome-adapter.test.ts index 36ee8e30fbb..5b245ac1ffb 100644 --- a/packages/ai/src/methods/chrome-adapter.test.ts +++ b/packages/ai/src/methods/chrome-adapter.test.ts @@ -275,7 +275,7 @@ describe('ChromeAdapter', () => { }) ).to.be.false; }); - it('extracts expected inputs from the request', async () => { + it('extracts and merges expected inputs from the request', async () => { const languageModelProvider = { availability: () => Promise.resolve(Availability.available) } as LanguageModel; @@ -285,7 +285,12 @@ describe('ChromeAdapter', () => { ).resolves(Availability.available); const adapter = new ChromeAdapter( languageModelProvider, - 'prefer_on_device' + 'prefer_on_device', + { + createOptions: { + expectedInputs: [{ type: 'text' }] + } + } ); await adapter.isAvailable({ contents: [ @@ -300,7 +305,7 @@ describe('ChromeAdapter', () => { ] }); expect(availabilityStub).to.have.been.calledWith({ - expectedInputs: [{ type: 'image' }] + expectedInputs: [{ type: 'text' }, { type: 'image' }] }); }); }); diff --git a/packages/ai/src/methods/chrome-adapter.ts b/packages/ai/src/methods/chrome-adapter.ts index 446578d6891..7f9cb2d7a75 100644 --- a/packages/ai/src/methods/chrome-adapter.ts +++ b/packages/ai/src/methods/chrome-adapter.ts @@ -37,7 +37,7 @@ import { LanguageModelMessageRole, LanguageModelMessageType } from '../types/language-model'; -import { deepExtend } from '@firebase/util'; +import deepMerge from 'deepmerge'; /** * Defines an inference "backend" that uses Chrome's on-device model, @@ -203,10 +203,7 @@ export class ChromeAdapter { private mergeCreateOptions( requestOptions: LanguageModelCreateOptions ): LanguageModelCreateOptions { - return deepExtend( - this.onDeviceParams.createOptions, - requestOptions - ) as LanguageModelCreateOptions; + return deepMerge(this.onDeviceParams.createOptions || {}, requestOptions); } /** diff --git a/yarn.lock b/yarn.lock index 51ede769d03..09d7a2eda0e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6250,7 +6250,7 @@ deep-is@^0.1.3: resolved "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz#a6f2dce612fadd2ef1f519b73551f17e85199831" integrity sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ== -deepmerge@^4.2.2: +deepmerge@4.3.1, deepmerge@^4.2.2: version "4.3.1" resolved "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz#44b5f2147cd3b00d4b56137685966f26fd25dd4a" integrity sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==