Skip to content

Commit dc7007b

Browse files
authored
Fix remote compaction estimator/payload instruction small mismatch (openai#10692)
## Summary This PR fixes a deterministic mismatch in remote compaction where pre-trim estimation and the `/v1/responses/compact` payload could use different base instructions. Before this change: - pre-trim estimation used model-derived instructions (`model_info.get_model_instructions(...)`) - compact payload used session base instructions (`sess.get_base_instructions()`) After this change: - remote pre-trim estimation and compact payload both use the same `BaseInstructions` instance from session state. ## Changes - Added a shared estimator entry point in `ContextManager`: - `estimate_token_count_with_base_instructions(&self, base_instructions: &BaseInstructions) -> Option<i64>` - Kept `estimate_token_count(&TurnContext)` as a thin wrapper that resolves model/personality instructions and delegates to the new helper. - Updated remote compaction flow to fetch base instructions once and reuse it for both: - trim preflight estimation - compact request payload construction - Added regression coverage for parity and behavior: - unit test verifying explicit-base estimator behavior - integration test proving remote compaction uses session override instructions and trims accordingly ## Why this matters This removes a deterministic divergence source where pre-trim could think the request fits while the actual compact request exceeded context because its instructions were longer/different. ## Scope In scope: - estimator/payload base-instructions parity in remote compaction Out of scope: - retry-on-`context_length_exceeded` - compaction threshold/headroom policy changes - broader trimming policy changes ## Codex author: `codex fork 019c2b24-c2df-7b31-a482-fb8cf7a28559`
1 parent cd5f49a commit dc7007b

5 files changed

Lines changed: 314 additions & 10 deletions

File tree

codex-rs/core/src/codex.rs

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,10 +2117,10 @@ impl Session {
21172117
}
21182118

21192119
pub(crate) async fn recompute_token_usage(&self, turn_context: &TurnContext) {
2120-
let Some(estimated_total_tokens) = self
2121-
.clone_history()
2122-
.await
2123-
.estimate_token_count(turn_context)
2120+
let history = self.clone_history().await;
2121+
let base_instructions = self.get_base_instructions().await;
2122+
let Some(estimated_total_tokens) =
2123+
history.estimate_token_count_with_base_instructions(&base_instructions)
21242124
else {
21252125
return;
21262126
};
@@ -4782,6 +4782,7 @@ mod tests {
47824782
use crate::turn_diff_tracker::TurnDiffTracker;
47834783
use codex_app_server_protocol::AppInfo;
47844784
use codex_app_server_protocol::AuthMode;
4785+
use codex_protocol::models::BaseInstructions;
47854786
use codex_protocol::models::ContentItem;
47864787
use codex_protocol::models::ResponseItem;
47874788
use std::path::Path;
@@ -5061,6 +5062,46 @@ mod tests {
50615062
assert_eq!(actual, Some(info2));
50625063
}
50635064

5065+
#[tokio::test]
5066+
async fn recompute_token_usage_uses_session_base_instructions() {
5067+
let (session, turn_context) = make_session_and_context().await;
5068+
5069+
let override_instructions = "SESSION_OVERRIDE_INSTRUCTIONS_ONLY".repeat(120);
5070+
{
5071+
let mut state = session.state.lock().await;
5072+
state.session_configuration.base_instructions = override_instructions.clone();
5073+
}
5074+
5075+
let item = user_message("hello");
5076+
session
5077+
.record_into_history(std::slice::from_ref(&item), &turn_context)
5078+
.await;
5079+
5080+
let history = session.clone_history().await;
5081+
let session_base_instructions = BaseInstructions {
5082+
text: override_instructions,
5083+
};
5084+
let expected_tokens = history
5085+
.estimate_token_count_with_base_instructions(&session_base_instructions)
5086+
.expect("estimate with session base instructions");
5087+
let model_estimated_tokens = history
5088+
.estimate_token_count(&turn_context)
5089+
.expect("estimate with model instructions");
5090+
assert_ne!(expected_tokens, model_estimated_tokens);
5091+
5092+
session.recompute_token_usage(&turn_context).await;
5093+
5094+
let actual_tokens = session
5095+
.state
5096+
.lock()
5097+
.await
5098+
.token_info()
5099+
.expect("token info")
5100+
.last_token_usage
5101+
.total_tokens;
5102+
assert_eq!(actual_tokens, expected_tokens.max(0));
5103+
}
5104+
50645105
#[tokio::test]
50655106
async fn record_initial_history_reconstructs_forked_transcript() {
50665107
let (session, turn_context) = make_session_and_context().await;

codex-rs/core/src/compact_remote.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::protocol::RolloutItem;
1212
use crate::protocol::TurnStartedEvent;
1313
use codex_protocol::items::ContextCompactionItem;
1414
use codex_protocol::items::TurnItem;
15+
use codex_protocol::models::BaseInstructions;
1516
use codex_protocol::models::ResponseItem;
1617
use tracing::info;
1718

@@ -49,8 +50,12 @@ async fn run_remote_compact_task_inner_impl(
4950
sess.emit_turn_item_started(turn_context, &compaction_item)
5051
.await;
5152
let mut history = sess.clone_history().await;
52-
let deleted_items =
53-
trim_function_call_history_to_fit_context_window(&mut history, turn_context.as_ref());
53+
let base_instructions = sess.get_base_instructions().await;
54+
let deleted_items = trim_function_call_history_to_fit_context_window(
55+
&mut history,
56+
turn_context.as_ref(),
57+
&base_instructions,
58+
);
5459
if deleted_items > 0 {
5560
info!(
5661
turn_id = %turn_context.sub_id,
@@ -71,7 +76,7 @@ async fn run_remote_compact_task_inner_impl(
7176
input: history.for_prompt(),
7277
tools: vec![],
7378
parallel_tool_calls: false,
74-
base_instructions: sess.get_base_instructions().await,
79+
base_instructions,
7580
personality: turn_context.personality,
7681
output_schema: None,
7782
};
@@ -107,14 +112,15 @@ async fn run_remote_compact_task_inner_impl(
107112
fn trim_function_call_history_to_fit_context_window(
108113
history: &mut ContextManager,
109114
turn_context: &TurnContext,
115+
base_instructions: &BaseInstructions,
110116
) -> usize {
111117
let mut deleted_items = 0usize;
112118
let Some(context_window) = turn_context.model_context_window() else {
113119
return deleted_items;
114120
};
115121

116122
while history
117-
.estimate_token_count(turn_context)
123+
.estimate_token_count_with_base_instructions(base_instructions)
118124
.is_some_and(|estimated_tokens| estimated_tokens > context_window)
119125
{
120126
let Some(last_item) = history.raw_items().last() else {

codex-rs/core/src/context_manager/history.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::truncate::approx_tokens_from_byte_count;
99
use crate::truncate::truncate_function_output_items_with_policy;
1010
use crate::truncate::truncate_text;
1111
use crate::user_shell_command::is_user_shell_command_text;
12+
use codex_protocol::models::BaseInstructions;
1213
use codex_protocol::models::ContentItem;
1314
use codex_protocol::models::FunctionCallOutputBody;
1415
use codex_protocol::models::FunctionCallOutputContentItem;
@@ -88,8 +89,18 @@ impl ContextManager {
8889
pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
8990
let model_info = &turn_context.model_info;
9091
let personality = turn_context.personality.or(turn_context.config.personality);
91-
let base_instructions = model_info.get_model_instructions(personality);
92-
let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);
92+
let base_instructions = BaseInstructions {
93+
text: model_info.get_model_instructions(personality),
94+
};
95+
self.estimate_token_count_with_base_instructions(&base_instructions)
96+
}
97+
98+
pub(crate) fn estimate_token_count_with_base_instructions(
99+
&self,
100+
base_instructions: &BaseInstructions,
101+
) -> Option<i64> {
102+
let base_tokens =
103+
i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX);
93104

94105
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
95106
acc.saturating_add(estimate_item_token_count(item))

codex-rs/core/src/context_manager/history_tests.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use super::*;
22
use crate::truncate;
33
use crate::truncate::TruncationPolicy;
44
use codex_git::GhostCommit;
5+
use codex_protocol::models::BaseInstructions;
56
use codex_protocol::models::ContentItem;
67
use codex_protocol::models::FunctionCallOutputBody;
78
use codex_protocol::models::FunctionCallOutputContentItem;
@@ -103,6 +104,10 @@ fn truncate_exec_output(content: &str) -> String {
103104
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
104105
}
105106

107+
fn approx_token_count_for_text(text: &str) -> i64 {
108+
i64::try_from(text.len().saturating_add(3) / 4).unwrap_or(i64::MAX)
109+
}
110+
106111
#[test]
107112
fn filters_non_api_messages() {
108113
let mut h = ContextManager::default();
@@ -250,6 +255,28 @@ fn get_history_for_prompt_drops_ghost_commits() {
250255
assert_eq!(filtered, vec![]);
251256
}
252257

258+
#[test]
259+
fn estimate_token_count_with_base_instructions_uses_provided_text() {
260+
let history = create_history_with_items(vec![assistant_msg("hello from history")]);
261+
let short_base = BaseInstructions {
262+
text: "short".to_string(),
263+
};
264+
let long_base = BaseInstructions {
265+
text: "x".repeat(1_000),
266+
};
267+
268+
let short_estimate = history
269+
.estimate_token_count_with_base_instructions(&short_base)
270+
.expect("token estimate");
271+
let long_estimate = history
272+
.estimate_token_count_with_base_instructions(&long_base)
273+
.expect("token estimate");
274+
275+
let expected_delta = approx_token_count_for_text(&long_base.text)
276+
- approx_token_count_for_text(&short_base.text);
277+
assert_eq!(long_estimate - short_estimate, expected_delta);
278+
}
279+
253280
#[test]
254281
fn remove_first_item_removes_matching_output_for_function_call() {
255282
let items = vec![

0 commit comments

Comments
 (0)