Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions crates/generate-types/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,11 @@ fn post_process_quicktype_output_for_google(quicktype_output: &str) -> String {
// "STRING" (Google native) and "string" (OpenAI JSON Schema) during deserialization
processed = add_type_enum_lowercase_aliases(&processed);

// Google's Discovery spec represents Schema int64 fields as protobuf-JSON
// strings. Accept numeric JSON Schema input too, while preserving string
// serialization in the generated Google type.
processed = add_google_schema_int64_deserializers(&processed);

processed
}

Expand Down Expand Up @@ -1750,3 +1755,75 @@ fn add_type_enum_lowercase_aliases(content: &str) -> String {

result_lines.join("\n")
}

fn add_google_schema_int64_deserializers(content: &str) -> String {
const INT64_SCHEMA_FIELDS: &[&str] = &[
"max_items",
"min_items",
"min_properties",
"max_properties",
"min_length",
"max_length",
];

let mut processed = content.to_string();
let helper = r#"
fn deserialize_optional_i64_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(serde::Deserialize)]
#[serde(untagged)]
enum I64String {
String(String),
I64(i64),
U64(u64),
}

Ok(Option::<I64String>::deserialize(deserializer)?.map(|value| match value {
I64String::String(value) => value,
I64String::I64(value) => value.to_string(),
I64String::U64(value) => value.to_string(),
}))
}
"#;

if let Some(insert_at) = processed.find("#[derive(") {
processed.insert_str(insert_at, helper);
}

let lines: Vec<&str> = processed.lines().collect();
let mut result_lines = Vec::new();
let mut in_schema = false;

for line in lines {
let trimmed = line.trim();

if trimmed == "pub struct Schema {" || trimmed.ends_with("pub struct Schema {") {
in_schema = true;
} else if in_schema && trimmed == "}" {
in_schema = false;
}

if in_schema
&& INT64_SCHEMA_FIELDS
.iter()
.any(|field| trimmed == format!("pub {field}: Option<String>,"))
{
let already_has_attr = result_lines
.last()
.is_some_and(|prev: &String| prev.contains("deserialize_optional_i64_string"));
if !already_has_attr {
let indent = line.len() - line.trim_start().len();
result_lines.push(format!(
"{}#[serde(default, deserialize_with = \"deserialize_optional_i64_string\")]",
" ".repeat(indent)
));
}
}

result_lines.push(line.to_string());
}

result_lines.join("\n")
}
78 changes: 74 additions & 4 deletions crates/lingua/src/providers/google/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -792,11 +792,15 @@ impl From<&FunctionDeclaration> for UniversalTool {
fn normalize_google_schema_types(mut value: Value) -> Value {
match &mut value {
Value::Object(map) => {
if let Some(Value::String(t)) = map.get_mut("type") {
*t = t.to_lowercase();
}
map.retain(|_, v| !v.is_null());
for v in map.values_mut() {
for (key, v) in map.iter_mut() {
if key == "type" {
if let Value::String(t) = v {
*t = t.to_lowercase();
}
} else if is_google_schema_int64_keyword(key) {
normalize_google_schema_int64_value(v);
Comment thread
knjiang marked this conversation as resolved.
}
*v = normalize_google_schema_types(std::mem::take(v));
}
}
Expand All @@ -810,6 +814,25 @@ fn normalize_google_schema_types(mut value: Value) -> Value {
value
}

fn is_google_schema_int64_keyword(key: &str) -> bool {
matches!(
key,
"minLength" | "maxLength" | "minItems" | "maxItems" | "minProperties" | "maxProperties"
)
}

fn normalize_google_schema_int64_value(value: &mut Value) {
let Value::String(s) = value else {
return;
};

let Ok(parsed) = s.parse::<i64>() else {
return;
};

*value = Value::Number(parsed.into());
}

/// Recursively strip `exclusiveMinimum` fields from a JSON schema value in-place.
///
/// Google's `Schema` proto does not recognise `exclusiveMinimum` (a JSON Schema Draft 6+
Expand Down Expand Up @@ -1567,6 +1590,53 @@ mod tests {
assert!(tool.is_function());
}

#[test]
fn test_function_declaration_schema_int64_fields_normalize_to_json_numbers() {
let decl: FunctionDeclaration = serde_json::from_value(json!({
"name": "demo",
"description": "demo tool",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": "9"
},
"tags": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
"maxItems": "3"
}
}
}
}))
.unwrap();

let tool = UniversalTool::from(&decl);

assert_eq!(
tool.parameters,
Some(json!({
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 9
},
"tags": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
"maxItems": 3
}
}
}))
);
}

#[test]
fn test_universal_tool_to_function_declaration() {
let tool = UniversalTool::function(
Expand Down
55 changes: 55 additions & 0 deletions crates/lingua/src/providers/google/detect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,61 @@ mod tests {
assert!(try_parse_google(&payload).is_ok());
}

#[test]
fn test_try_parse_google_accepts_numeric_schema_int64_fields() {
let payload = json!({
"contents": [{"role": "user", "parts": [{"text": "Hello"}]}],
"tools": [{
"functionDeclarations": [{
"name": "demo",
"description": "demo tool",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 9
},
"tags": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
"maxItems": 3
}
}
}
}]
}]
});

assert!(try_parse_google(&payload).is_ok());
}

#[test]
fn test_try_parse_google_accepts_string_schema_int64_fields() {
let payload = json!({
"contents": [{"role": "user", "parts": [{"text": "Hello"}]}],
"tools": [{
"functionDeclarations": [{
"name": "demo",
"description": "demo tool",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": "1"
}
}
}
}]
}]
});

assert!(try_parse_google(&payload).is_ok());
}

#[test]
fn test_try_parse_google_with_model_role() {
let payload = json!({
Expand Down
26 changes: 26 additions & 0 deletions crates/lingua/src/providers/google/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,26 @@ use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use ts_rs::TS;

fn deserialize_optional_i64_string<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
Comment on lines +24 to +27
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Regenerate Google types from the generator

This generated helper does not match the helper added to crates/generate-types/src/main.rs (the generator emits an untagged enum implementation, while this file contains a hand-written visitor with extra behavior such as visit_f64). Because generated.rs is regenerated from the generator, the next Google type regeneration will overwrite this implementation and make the committed output non-reproducible; please regenerate the file from the updated generator so the checked-in generated code exactly reflects the source-of-truth pipeline.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me regenrate before merging

#[derive(serde::Deserialize)]
#[serde(untagged)]
enum I64String {
String(String),
I64(i64),
U64(u64),
}

Ok(
Option::<I64String>::deserialize(deserializer)?.map(|value| match value {
I64String::String(value) => value,
I64String::I64(value) => value.to_string(),
I64String::U64(value) => value.to_string(),
}),
)
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize, TS)]
#[ts(export_to = "google/")]
pub struct GoogleSchemas {
Expand Down Expand Up @@ -940,25 +960,31 @@ pub struct Schema {
pub maximum: Option<f64>,
/// Optional. Maximum number of the elements for Type.ARRAY.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, deserialize_with = "deserialize_optional_i64_string")]
pub max_items: Option<String>,
/// Optional. Maximum length of the Type.STRING
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, deserialize_with = "deserialize_optional_i64_string")]
pub max_length: Option<String>,
/// Optional. Maximum number of the properties for Type.OBJECT.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, deserialize_with = "deserialize_optional_i64_string")]
pub max_properties: Option<String>,
/// Optional. SCHEMA FIELDS FOR TYPE INTEGER and NUMBER Minimum value of the Type.INTEGER and
/// Type.NUMBER
#[serde(skip_serializing_if = "Option::is_none")]
pub minimum: Option<f64>,
/// Optional. Minimum number of the elements for Type.ARRAY.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, deserialize_with = "deserialize_optional_i64_string")]
pub min_items: Option<String>,
/// Optional. SCHEMA FIELDS FOR TYPE STRING Minimum length of the Type.STRING
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, deserialize_with = "deserialize_optional_i64_string")]
pub min_length: Option<String>,
/// Optional. Minimum number of the properties for Type.OBJECT.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default, deserialize_with = "deserialize_optional_i64_string")]
pub min_properties: Option<String>,
/// Optional. Indicates if the value may be null.
#[serde(skip_serializing_if = "Option::is_none")]
Expand Down
48 changes: 47 additions & 1 deletion payloads/cases/params.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
Modality,
MediaResolution,
} from "@google/genai";
import { TestCaseCollection } from "./types";
import { TestCase, TestCaseCollection } from "./types";
import {
OPENAI_CHAT_COMPLETIONS_MODEL,
OPENAI_RESPONSES_MODEL,
Expand Down Expand Up @@ -2249,6 +2249,52 @@ export const paramsCases: TestCaseCollection = {
bedrock: null,
},

googleToolSchemaNumericInt64Param: (() => {
const indexNameSchema: Record<string, unknown> = {
type: Type.STRING,
minLength: 1,
maxLength: 128,
};
const tagsSchema: Record<string, unknown> = {
type: Type.ARRAY,
items: { type: Type.STRING },
minItems: 1,
maxItems: 3,
};

const testCase: TestCase = {
"chat-completions": null,
responses: null,
anthropic: null,
google: {
model: GOOGLE_MODEL,
contents: [
{ role: "user", parts: [{ text: "Validate tool schema bounds." }] },
],
tools: [
{
functionDeclarations: [
{
name: "validate_bounds",
description: "Validate bounded string and array inputs.",
parameters: {
type: Type.OBJECT,
properties: {
index_name: indexNameSchema,
tags: tagsSchema,
},
required: ["index_name", "tags"],
},
},
],
},
],
},
bedrock: null,
};
return testCase;
})(),

exclusiveMinimumToolParam: {
"chat-completions": {
model: OPENAI_CHAT_COMPLETIONS_MODEL,
Expand Down
Loading
Loading