From a18c4ddd008fe0b2c0d880165d5f37211eb7ced0 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 09:13:42 +0000 Subject: [PATCH] refactor: introduce centralized buffered UChar retry logic This commit reduces code repetition across several crates by introducing a centralized `buffered_uchar_method_with_retry` function in the `rust_icu_ustring` crate. This function abstracts the common ICU pattern of preflighting a buffer, resizing, and retrying on overflow. Modified Crates: - `rust_icu_ustring`: Added the `buffered_uchar_method_with_retry` helper. - `rust_icu_ulistformatter`: Refactored to use a more flexible `generate_format_methods!` macro and the new global helper. - `rust_icu_udat`: Replaced manual retry loops in `format` and timezone helpers with the new standardized function. - `rust_icu_umsg`: Updated `format_args` to leverage the new helper. - `rust_icu_upluralrules`: Simplified the `select` method for consistency. This refactoring improves maintainability, reduces the surface area for manual memory/buffer handling errors, and establishes a consistent pattern across the workspace. This commit was created by an automated coding assistant, with human supervision. Co-authored-by: filmil <246576+filmil@users.noreply.github.com> --- rust_icu_udat/src/lib.rs | 92 +++++++++-------------------- rust_icu_ulistformatter/src/lib.rs | 95 +++++++++++------------------- rust_icu_umsg/src/lib.rs | 22 ++----- rust_icu_upluralrules/src/lib.rs | 7 +-- rust_icu_ustring/src/lib.rs | 52 ++++++++++++++++ 5 files changed, 121 insertions(+), 147 deletions(-) diff --git a/rust_icu_udat/src/lib.rs b/rust_icu_udat/src/lib.rs index 34a250a..d5a3e35 100644 --- a/rust_icu_udat/src/lib.rs +++ b/rust_icu_udat/src/lib.rs @@ -24,6 +24,7 @@ use { rust_icu_common as common, rust_icu_sys as sys, rust_icu_sys::versioned_function, rust_icu_ucal as ucal, rust_icu_uloc as uloc, rust_icu_ustring as ustring, + rust_icu_ustring::buffered_uchar_method_with_retry, }; use std::convert::{TryFrom, TryInto}; @@ -270,51 +271,28 @@ impl UDateFormat { /// /// Implements `udat_format` pub fn format(&self, date_to_format: sys::UDate) -> Result { - // This approach follows the recommended practice for unicode conversions: adopt a - // resonably-sized buffer, then repeat the conversion if it fails the first time around. - const CAP: usize = 1024; - let mut status = common::Error::OK_CODE; - let mut result = ustring::UChar::new_with_capacity(CAP); - - let mut field_position_unused = sys::UFieldPosition { - field: 0, - beginIndex: 0, - endIndex: 0, - }; + let result = self.format_ustring(date_to_format)?; + String::try_from(&result) + } - // Requires that result is a buffer at least as long as CAP and that - // self.rep is a valid pointer to a `sys::UDateFormat` structure. - let total_size = unsafe { - assert!(common::Error::is_ok(status)); - versioned_function!(udat_format)( - self.rep, - date_to_format, - result.as_mut_c_ptr(), - CAP as i32, - &mut field_position_unused, - &mut status, - ) - } as usize; - common::Error::ok_or_warning(status)?; - result.resize(total_size as usize); - if total_size > CAP { - // Requires that result is a buffer that has length and capacity of - // exactly total_size, and that self.rep is a valid pointer to - // a `UDateFormat`. - unsafe { - assert!(common::Error::is_ok(status)); + /// Formats a date using this formatter. + /// + /// Implements `udat_format` + pub fn format_ustring(&self, date_to_format: sys::UDate) -> Result { + const CAP: usize = 1024; + ustring::buffered_uchar_method_with_retry( + |buf, len, status| unsafe { versioned_function!(udat_format)( self.rep, date_to_format, - result.as_mut_c_ptr(), - total_size as i32, - &mut field_position_unused, - &mut status, - ); - }; - common::Error::ok_or_warning(status)?; - } - String::try_from(&result) + buf, + len, + std::ptr::null_mut(), + status, + ) + }, + CAP, + ) } } @@ -361,30 +339,14 @@ mod tests { } fn get_default_time_zone() -> Result { - let mut status = common::Error::OK_CODE; - - // Preflight the time zone first. - let time_zone_length = unsafe { - assert!(common::Error::is_ok(status)); - versioned_function!(ucal_getDefaultTimeZone)(std::ptr::null_mut(), 0, &mut status) - } as usize; - common::Error::ok_preflight(status)?; - - // Should this capacity include the terminating \u{0}? - let mut status = common::Error::OK_CODE; - let mut uchar = ustring::UChar::new_with_capacity(time_zone_length); - - // Requires that uchar is a valid buffer. Should be guaranteed by the constructor above. - unsafe { - assert!(common::Error::is_ok(status)); - versioned_function!(ucal_getDefaultTimeZone)( - uchar.as_mut_c_ptr(), - time_zone_length as i32, - &mut status, - ) - }; - common::Error::ok_or_warning(status)?; - String::try_from(&uchar) + const CAP: usize = 20; + let result = ustring::buffered_uchar_method_with_retry( + |buf, len, status| unsafe { + versioned_function!(ucal_getDefaultTimeZone)(buf, len, status) + }, + CAP, + )?; + String::try_from(&result) } } diff --git a/rust_icu_ulistformatter/src/lib.rs b/rust_icu_ulistformatter/src/lib.rs index c493e63..848bc2d 100644 --- a/rust_icu_ulistformatter/src/lib.rs +++ b/rust_icu_ulistformatter/src/lib.rs @@ -32,9 +32,44 @@ use { rust_icu_common as common, rust_icu_sys as sys, rust_icu_sys::versioned_function, rust_icu_ustring as ustring, + rust_icu_ustring::buffered_uchar_method_with_retry, std::{convert::TryFrom, convert::TryInto, ffi, ptr}, }; +/// Generates a high-level format method that returns a `String` and a lower-level format method that +/// returns a `ustring::UChar`. +macro_rules! generate_format_methods { + ($method_name:ident, $ustring_method_name:ident, $function_name:ident) => { + #[doc = concat!("Implements `", stringify!($function_name), "`")] + pub fn $method_name(&self, list: &[&str]) -> Result { + let result = self.$ustring_method_name(list)?; + String::try_from(&result) + } + + #[doc = concat!("Implements `", stringify!($function_name), "`")] + pub fn $ustring_method_name(&self, list: &[&str]) -> Result { + let list_ustr = UCharArray::try_from(list)?; + let (pointers, strlens, len) = unsafe { list_ustr.as_pascal_strings() }; + + const CAPACITY: usize = 200; + ustring::buffered_uchar_method_with_retry( + |buf, buf_len, status| unsafe { + versioned_function!($function_name)( + self.rep.as_ptr(), + pointers as *const *const sys::UChar, + strlens as *const i32, + len as i32, + buf, + buf_len, + status, + ) + }, + CAPACITY, + ) + } + }; +} + #[derive(Debug)] pub struct UListFormatter { rep: ptr::NonNull, @@ -91,65 +126,7 @@ impl UListFormatter { }) } - /// Implements `ulistfmt_format`. - pub fn format(&self, list: &[&str]) -> Result { - let result = self.format_uchar(list)?; - String::try_from(&result) - } - - /// Implements `ulistfmt_format`. - // TODO: this method call is repetitive, and should probably be pulled out into a macro. - // TODO: rename this function into format_uchar. - pub fn format_uchar(&self, list: &[&str]) -> Result { - let list_ustr = UCharArray::try_from(list)?; - const CAPACITY: usize = 200; - let (pointers, strlens, len) = unsafe { list_ustr.as_pascal_strings() }; - - // This is similar to buffered_string_method_with_retry, except the buffer - // consists of [sys::UChar]s. - let mut status = common::Error::OK_CODE; - let mut buf: Vec = vec![0; CAPACITY]; - - let full_len: i32 = unsafe { - assert!(common::Error::is_ok(status)); - versioned_function!(ulistfmt_format)( - self.rep.as_ptr(), - pointers as *const *const sys::UChar, - strlens as *const i32, - len as i32, - buf.as_mut_ptr(), - CAPACITY as i32, - &mut status, - ) - }; - if status == sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR - || (common::Error::is_ok(status) - && full_len > CAPACITY.try_into().map_err(|e| common::Error::wrapper(e))?) - { - status = common::Error::OK_CODE; - assert!(full_len > 0); - let full_len: usize = full_len.try_into().map_err(|e| common::Error::wrapper(e))?; - buf.resize(full_len, 0); - unsafe { - assert!(common::Error::is_ok(status), "status: {:?}", status); - versioned_function!(ulistfmt_format)( - self.rep.as_ptr(), - pointers as *const *const sys::UChar, - strlens as *const i32, - len as i32, - buf.as_mut_ptr(), - buf.len() as i32, - &mut status, - ) - }; - } - common::Error::ok_or_warning(status)?; - if full_len >= 0 { - let full_len: usize = full_len.try_into().map_err(|e| common::Error::wrapper(e))?; - buf.resize(full_len, 0); - } - Ok(ustring::UChar::from(buf)) - } + generate_format_methods!(format, format_uchar, ulistfmt_format); } /// A helper array that deconstructs [ustring::UChar] into constituent raw parts diff --git a/rust_icu_umsg/src/lib.rs b/rust_icu_umsg/src/lib.rs index 07c7f1f..234a516 100644 --- a/rust_icu_umsg/src/lib.rs +++ b/rust_icu_umsg/src/lib.rs @@ -326,24 +326,10 @@ pub unsafe fn format_args( args: impl FormatArgs, ) -> Result { const CAP: usize = 1024; - let mut status = common::Error::OK_CODE; - let mut result = ustring::UChar::new_with_capacity(CAP); - - let total_size = - args.format(fmt.rep.rep, result.as_mut_c_ptr(), CAP as i32, &mut status) as usize; - common::Error::ok_or_warning(status)?; - - result.resize(total_size); - - if total_size > CAP { - args.format( - fmt.rep.rep, - result.as_mut_c_ptr(), - total_size as i32, - &mut status, - ); - common::Error::ok_or_warning(status)?; - } + let result = ustring::buffered_uchar_method_with_retry( + |buf, len, error| args.format(fmt.rep.rep, buf, len, error), + CAP, + )?; String::try_from(&result) } diff --git a/rust_icu_upluralrules/src/lib.rs b/rust_icu_upluralrules/src/lib.rs index 938454a..283e715 100644 --- a/rust_icu_upluralrules/src/lib.rs +++ b/rust_icu_upluralrules/src/lib.rs @@ -106,11 +106,8 @@ impl UPluralRules { /// Implements `uplrules_select`. pub fn select(&self, number: f64) -> Result { - let result = self.select_ustring(number); - match result { - Err(e) => Err(e), - Ok(u) => String::try_from(&u).map_err(|e| e.into()), - } + let result = self.select_ustring(number)?; + String::try_from(&result) } /// Implements `uplrules_getKeywords` diff --git a/rust_icu_ustring/src/lib.rs b/rust_icu_ustring/src/lib.rs index 100b55d..d4df2d5 100644 --- a/rust_icu_ustring/src/lib.rs +++ b/rust_icu_ustring/src/lib.rs @@ -339,6 +339,58 @@ impl crate::UChar { } } +/// Helper for calling ICU4C methods that require a resizable `UChar` output buffer. +pub fn buffered_uchar_method_with_retry( + mut method_to_call: F, + buffer_capacity: usize, +) -> Result +where + F: FnMut(*mut sys::UChar, i32, *mut sys::UErrorCode) -> i32, +{ + let mut status = common::Error::OK_CODE; + let mut buf: Vec = vec![0; buffer_capacity]; + + // Requires that any pointers that are passed in are valid. + let full_len: i32 = { + assert!(common::Error::is_ok(status)); + method_to_call( + buf.as_mut_ptr() as *mut sys::UChar, + buffer_capacity as i32, + &mut status, + ) + }; + + // ICU methods are inconsistent in whether they silently truncate the output or treat + // the overflow as an error, so we need to check both cases. + if status == sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR + || (common::Error::is_ok(status) + && full_len > buffer_capacity + .try_into() + .map_err(|e| common::Error::wrapper(e))?) + { + status = common::Error::OK_CODE; + assert!(full_len > 0); + let full_len: usize = full_len.try_into().map_err(|e| common::Error::wrapper(e))?; + buf.resize(full_len, 0); + + // Same unsafe requirements as above, plus full_len must be exactly the output + // buffer size. + { + assert!(common::Error::is_ok(status)); + method_to_call(buf.as_mut_ptr() as *mut sys::UChar, full_len as i32, &mut status) + }; + } + + common::Error::ok_or_warning(status)?; + + // Adjust the size of the buffer here. + if full_len >= 0 { + let full_len: usize = full_len.try_into().map_err(|e| common::Error::wrapper(e))?; + buf.resize(full_len, 0); + } + Ok(UChar::from(buf)) +} + #[cfg(test)] mod tests { use super::*;