prompt-api/index.bs at 6e731c775635b3d5d23f207337def5bacfe3161d · webmachinelearning/prompt-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
<pre class='metadata'>
Title: Prompt API
Shortname: prompt
Level: None
Status: CG-DRAFT
Group: webml
Repository: webmachinelearning/prompt-api
URL: https://webmachinelearning.github.io/prompt-api
Editor: Domenic Denicola, Google https://google.com, d@domenic.me, https://domenic.me/
Abstract: The prompt API gives web pages the ability to directly prompt a language model
Markup Shorthands: markdown yes, css no
Complain About: accidental-2119 yes, missing-example-ids yes
Assume Explicit For: yes
Default Biblio Status: current
Boilerplate: omit conformance
Indent: 2
Die On: warning
</pre>

<h2 id="intro">Introduction</h2>

TODO

<h2 id="dependencies">Dependencies</h2>

This specification depends on the Infra Standard. [[!INFRA]]

As with the rest of the web platform, human languages are identified in these APIs by BCP 47 language tags, such as "`ja`", "`en-US`", "`sr-Cyrl`", or "`de-CH-1901-x-phonebk-extended`". The specific algorithms used for validation, canonicalization, and language tag matching are those from the <cite>ECMAScript Internationalization API Specification</cite>, which in turn defers some of its processing to <cite>Unicode Locale Data Markup Language (LDML)</cite>. [[BCP47]] [[!ECMA-402]] [[UTS35]].

These APIs are part of a family of APIs expected to be powered by machine learning models, which share common API surface idioms and specification patterns. Currently, the specification text for these shared parts lives in [[WRITING-ASSISTANCE-APIS#supporting]], and the common privacy and security considerations are discussed in [[WRITING-ASSISTANCE-APIS#privacy]] and [[WRITING-ASSISTANCE-APIS#security]]. Implementing these APIs requires implementing that shared infrastructure, and conforming to those privacy and security considerations. But it does not require implementing or exposing the actual writing assistance APIs. [[!WRITING-ASSISTANCE-APIS]]

<h2 id="api">The API</h2>

<xmp class="idl">
[Exposed=Window, SecureContext]
interface LanguageModel : EventTarget {
  static Promise<LanguageModel> create(optional LanguageModelCreateOptions options = {});
  static Promise<Availability> availability(optional LanguageModelCreateCoreOptions options = {});
  static Promise<LanguageModelParams?> params();

  // These will throw "NotSupportedError" DOMExceptions if role = "system"
  Promise<DOMString> prompt(
    LanguageModelPrompt input,
    optional LanguageModelPromptOptions options = {}
  );
  ReadableStream promptStreaming(
    LanguageModelPrompt input,
    optional LanguageModelPromptOptions options = {}
  );
  Promise<undefined> append(
    LanguageModelPrompt input,
    optional LanguageModelAppendOptions options = {}
  );

  Promise<double> measureInputUsage(
    LanguageModelPrompt input,
    optional LanguageModelPromptOptions options = {}
  );
  readonly attribute double inputUsage;
  readonly attribute unrestricted double inputQuota;
  attribute EventHandler onquotaoverflow;

  readonly attribute unsigned long topK;
  readonly attribute float temperature;

  Promise<LanguageModel> clone(optional LanguageModelCloneOptions options = {});
  undefined destroy();
};

[Exposed=Window, SecureContext]
interface LanguageModelParams {
  readonly attribute unsigned long defaultTopK;
  readonly attribute unsigned long maxTopK;
  readonly attribute float defaultTemperature;
  readonly attribute float maxTemperature;
};


callback LanguageModelToolFunction = Promise<DOMString> (any... arguments);

// A description of a tool call that a language model can invoke.
dictionary LanguageModelTool {
  required DOMString name;
  required DOMString description;
  LanguageModelExpected expectedOutputs;
  // JSON schema for the input parameters.
  required object inputSchema;
  // The function to be invoked by user agent on behalf of language model.
  required LanguageModelToolFunction execute;
};

dictionary LanguageModelCreateCoreOptions {
  // Note: these two have custom out-of-range handling behavior, not in the IDL layer.
  // They are unrestricted double so as to allow +Infinity without failing.
  unrestricted double topK;
  unrestricted double temperature;

  sequence<LanguageModelExpected> expectedInputs;
  sequence<LanguageModelExpected> expectedOutputs;
  sequence<LanguageModelTool> tools;
};

dictionary LanguageModelCreateOptions : LanguageModelCreateCoreOptions {
  AbortSignal signal;
  CreateMonitorCallback monitor;

  sequence<LanguageModelMessage> initialPrompts;
};

dictionary LanguageModelPromptOptions {
  object responseConstraint;
  boolean omitResponseConstraintInput = false;
  AbortSignal signal;
};

dictionary LanguageModelAppendOptions {
  AbortSignal signal;
};

dictionary LanguageModelCloneOptions {
  AbortSignal signal;
};

dictionary LanguageModelExpected {
  required LanguageModelMessageType type;
  sequence<DOMString> languages;
};

// The argument to the prompt() method and others like it

typedef (
  sequence<LanguageModelMessage>
  // Shorthand for `[{ role: "user", content: [{ type: "text", value: providedValue }] }]`
  or DOMString
) LanguageModelPrompt;

dictionary LanguageModelMessage {
  required LanguageModelMessageRole role;
  required LanguageModelMessageContent content;
  boolean prefix = false;
};

typedef (
  sequence<LanguageModelMessageContentChunk>
  // Shorthand for `[{ type: "text", value: providedValue }]`
  or DOMString
) LanguageModelMessageContent;

dictionary LanguageModelMessageContentChunk {
  required LanguageModelMessageType type;
  required LanguageModelMessageValue value;
};

enum LanguageModelMessageRole { "system", "user", "assistant" };

enum LanguageModelMessageType { "text", "image", "audio" };

typedef (
  ImageBitmapSource
  or AudioBuffer
  or BufferSource
  or DOMString
) LanguageModelMessageValue;
</xmp>

<h3 id="prompt-processing">Prompt processing</h3>

<p class="note">This will be incorporated into a proper part of the specification later. For now, we're just writing out this algorithm as a full spec, since it's complicated.</p>

<div algorithm>
  <!-- TODO remove noexport once there are actual references to this algorithm in the spec. It is only being used now to silence a build warning. -->
  To <dfn noexport>validate and canonicalize a prompt</dfn> given a {{LanguageModelPrompt}} |input|, a [=list=] of {{LanguageModelMessageType}}s |expectedTypes|, and a boolean |isInitial|, perform the following steps. The return value will be a non-empty [=list=] of {{LanguageModelMessage}}s in their "longhand" form.

  1. [=Assert=]: |expectedTypes| [=list/contains=] "{{LanguageModelMessageType/text}}".

  1. If |input| is a [=string=], then return <span style="white-space: pre-wrap">«
      «[
        "{{LanguageModelMessage/role}}" → "{{LanguageModelMessageRole/user}}",
        "{{LanguageModelMessage/content}}" → «
          «[
            "{{LanguageModelMessageContentChunk/type}}" → "{{LanguageModelMessageType/text}}",
            "{{LanguageModelMessageContentChunk/value}}" → |input|
          ]»
        »,
        "{{LanguageModelMessage/prefix}}" → false
      ]»
    »</span>.

  1. [=Assert=]: |input| is a [=list=] of {{LanguageModelMessage}}s.

  1. Let |seenNonSystemRole| be false.

  1. Let |messages| be an empty [=list=] of {{LanguageModelMessage}}s.

  1. [=list/For each=] |message| of |input|:

    1. If |message|["{{LanguageModelMessage/content}}"] is a [=string=], then set |message| to <span style="white-space: pre-wrap">«[
        "{{LanguageModelMessage/role}}" → |message|["{{LanguageModelMessage/role}}"],
        "{{LanguageModelMessage/content}}" → «
          «[
            "{{LanguageModelMessageContentChunk/type}}" → "{{LanguageModelMessageType/text}}",
            "{{LanguageModelMessageContentChunk/value}}" → |message|
          ]»
        »,
        "{{LanguageModelMessage/prefix}}" → |message|["{{LanguageModelMessage/prefix}}"]
      ]»</span>.

    1. If |message|["{{LanguageModelMessage/prefix}}"] is true, then:

      1. If |message|["{{LanguageModelMessage/role}}"] is not "{{LanguageModelMessageRole/assistant}}", then throw a "{{SyntaxError}}" {{DOMException}}.

          <p class="note">Per the below validation steps, this will also guarantee that |message|["{{LanguageModelMessage/content}}"] only contains text content.

      1. If |message| is not the last item in |messages|, then throw a "{{SyntaxError}}" {{DOMException}}.

    1. [=list/For each=] |content| of |message|["{{LanguageModelMessage/content}}"]:

      1. If |message|["{{LanguageModelMessage/role}}"] is "{{LanguageModelMessageRole/system}}", then:

        1. If |isInitial| is false, then throw a "{{NotSupportedError}}" {{DOMException}}.

        1. If |seenNonSystemRole| is true, then throw a "{{SyntaxError}}" {{DOMException}}.

      1. If |message|["{{LanguageModelMessage/role}}"] is not "{{LanguageModelMessageRole/system}}", then set |seenNonSystemRole| to true.

      1. If |message|["{{LanguageModelMessage/role}}"] is "{{LanguageModelMessageRole/assistant}}" and |content|["{{LanguageModelMessageContentChunk/type}}"] is not "{{LanguageModelMessageType/text}}", then throw a "{{NotSupportedError}}" {{DOMException}}.

      1. If |content|["{{LanguageModelMessageContentChunk/type}}"] is "{{LanguageModelMessageType/text}}" and |content|["{{LanguageModelMessageContentChunk/value}}"] is not a [=string=], then throw a {{TypeError}}.

      1. If |content|["{{LanguageModelMessageContentChunk/type}}"] is "{{LanguageModelMessageType/image}}", then:

        1. If |expectedTypes| does not [=list/contain=] "{{LanguageModelMessageType/image}}", then throw a "{{NotSupportedError}}" {{DOMException}}.

        1. If |content|["{{LanguageModelMessageContentChunk/value}}"] is not an {{ImageBitmapSource}} or {{BufferSource}}, then throw a {{TypeError}}.

      1. If |content|["{{LanguageModelMessageContentChunk/type}}"] is "{{LanguageModelMessageType/audio}}", then:

        1. If |expectedTypes| does not [=list/contain=] "{{LanguageModelMessageType/audio}}", then throw a "{{NotSupportedError}}" {{DOMException}}.

        1. If |content|["{{LanguageModelMessageContentChunk/value}}"] is not an {{AudioBuffer}}, {{BufferSource}}, or {{Blob}}, then throw a {{TypeError}}.

    1. Let |contentWithContiguousTextCollapsed| be an empty [=list=] of {{LanguageModelMessageContent}}s.

    1. Let |lastTextContent| be null.

    1. [=list/For each=] |content| of |message|["{{LanguageModelMessage/content}}"]:

      1. If |content|["{{LanguageModelMessageContentChunk/type}}"] is "{{LanguageModelMessageType/text}}":

        1. If |lastTextContent| is null:

          1. [=list/Append=] |content| to |contentWithContiguousTextCollapsed|.

          1. Set |lastTextContent| to |content|.

        1. Otherwise, set |lastTextContent|["{{LanguageModelMessageContentChunk/value}}"] to the concatenation of |lastTextContent|["{{LanguageModelMessageContentChunk/value}}"] and |content|["{{LanguageModelMessageContentChunk/value}}"].

          <p class="note">No space or other character is added. Thus, « «[ "{{LanguageModelMessageContentChunk/type}}" → "{{LanguageModelMessageType/text}}", "`foo`" ]», «[ "{{LanguageModelMessageContentChunk/type}}" → "{{LanguageModelMessageType/text}}", "`bar`" ]» » is canonicalized to « «[ "{{LanguageModelMessageContentChunk/type}}" → "{{LanguageModelMessageType/text}}", "`foobar`" ]».</p>

      1. Otherwise:

        1. [=list/Append=] |content| to |contentWithContiguousTextCollapsed|.

        1. Set |lastTextContent| to null.

      1. Set |message|["{{LanguageModelMessage/content}}"] to |contentWithContiguousTextCollapsed|.

    1. [=list/Append=] |message| to |messages|.

  1. If |messages| [=list/is empty=], then throw a "{{SyntaxError}}" {{DOMException}}.

  1. Return |messages|.
</div>

<h3 id="permissions-policy">Permissions policy integration</h3>

Access to the prompt API is gated behind the [=policy-controlled feature=] "<dfn permission>language-model</dfn>", which has a [=policy-controlled feature/default allowlist=] of <code>[=default allowlist/'self'=]</code>.

<h2 id="privacy">Privacy considerations</h2>

Please see [[WRITING-ASSISTANCE-APIS#privacy]] for a discussion of privacy considerations for the prompt API. That text was written to apply to all APIs sharing the same infrastructure, as noted in [[#dependencies]].

<h2 id="security">Security considerations</h2>

Please see [[WRITING-ASSISTANCE-APIS#security]] for a discussion of security considerations for the prompt API. That text was written to apply to all APIs sharing the same infrastructure, as noted in [[#dependencies]].