From 461aaba524e701bb8734f99f342ab11bacb7694f Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Tue, 31 Mar 2026 16:36:14 -0400
Subject: [PATCH 1/2] add utf8proc_free

---
 README.md            |  6 +++---
 bench/bench.c        |  4 ++--
 test/case.c          |  4 ++--
 test/fuzzer.c        | 44 ++++++++++++++++++++++----------------------
 test/graphemetest.c  |  4 ++--
 test/normtest.c      |  2 +-
 test/printproperty.c |  2 +-
 test/tests.c         |  2 +-
 utf8proc.c           |  4 ++++
 utf8proc.h           | 17 ++++++++++++++++-
 10 files changed, 54 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index 72fe0cd4..e9a7ba2f 100644
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ utf8proc_uint8_t *fold_str;
 utf8proc_map(str, 0, &fold_str, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
 printf("%s\n", fold_str);
 // ss
-free(fold_str);
+utf8proc_free(fold_str);
 ```
 
 ### Normalization Form C/D (NFC/NFD)
@@ -138,6 +138,6 @@ utf8proc_uint8_t *nfd= utf8proc_NFD(input); // = {0x61, 0xcc, 0x88, 0x6f, 0xcc,
 // Compose "a\u0308o\u0308u\u0308" into "\u00e4\u00f6\u00fc" (= "äöü" via precomposed characters)
 utf8proc_uint8_t *nfc= utf8proc_NFC(nfd);
 
-free(nfd);
-free(nfc);
+utf8proc_free(nfd);
+utf8proc_free(nfc);
 ```
diff --git a/bench/bench.c b/bench/bench.c
index 4932c6d4..b0cc92a8 100644
--- a/bench/bench.c
+++ b/bench/bench.c
@@ -9,7 +9,7 @@ int main(int argc, char **argv)
 {
 	 int i, j;
 	 int options = 0;
-	 
+
 	 for (i = 1; i < argc; ++i) {
 		  if (!strcmp(argv[i], "-nfkc")) {
 			   options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT;
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
 		  mytime start = gettime();
 		  for (j = 0; j < 100; ++j) {
 			   utf8proc_map(src, len, &dest, options);
-			   free(dest);
+			   utf8proc_free(dest);
 		  }
 		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
 		  free(src);
diff --git a/test/case.c b/test/case.c
index c1630f0d..e7a92d40 100644
--- a/test/case.c
+++ b/test/case.c
@@ -68,8 +68,8 @@ int main(int argc, char **argv)
      check(!strcmp((char*)s1, "ss") &&
            !strcmp((char*)s2, "ss"),
            "incorrect 0x00df/0x1e9e casefold normalization");
-     free(s1);
-     free(s2);
+     utf8proc_free(s1);
+     utf8proc_free(s2);
      printf("More up-to-date than OS unicode tables for %d tests.\n", better);
      printf("utf8proc case conversion tests SUCCEEDED.\n");
      return 0;
diff --git a/test/fuzzer.c b/test/fuzzer.c
index fad14cc9..1c216efc 100644
--- a/test/fuzzer.c
+++ b/test/fuzzer.c
@@ -16,13 +16,13 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
     utf8proc_ssize_t ret, bytes = 0;
     utf8proc_uint8_t *str = NULL;
     size_t len = strlen((const char*)data);
-    
+
     while(bytes != len)
     {
         ret = utf8proc_iterate(ptr, -1, &c);
-        
+
         if(ret < 0 || ret == 0) break;
-        
+
         bytes += ret;
         ptr += ret;
 
@@ -35,31 +35,31 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
         utf8proc_category(c);
         utf8proc_category_string(c);
         utf8proc_codepoint_valid(c);
-        
+
         utf8proc_grapheme_break(c_prev, c);
         utf8proc_grapheme_break_stateful(c_prev, c, &state);
-        
+
         c_prev = c;
     }
-    
+
     utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
-    
+
     if(copy)
     {
         size /= 4;
-        
+
         options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         options = UTF8PROC_STRIPCC;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
@@ -71,30 +71,30 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
         options = 0;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         free(copy);
     }
 
-    free(utf8proc_NFD(data));
-    free(utf8proc_NFC(data));
-    free(utf8proc_NFKD(data));
-    free(utf8proc_NFKC(data));
-    free(utf8proc_NFKC_Casefold(data));
+    utf8proc_free(utf8proc_NFD(data));
+    utf8proc_free(utf8proc_NFC(data));
+    utf8proc_free(utf8proc_NFKD(data));
+    utf8proc_free(utf8proc_NFKC(data));
+    utf8proc_free(utf8proc_NFKC_Casefold(data));
 
     utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_STRIPNA);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_LUMP | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_COMPOSE | UTF8PROC_STRIPMARK);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_DECOMPOSE);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_COMPOSE);
-    free(str);
+    utf8proc_free(str);
 
     return 0;
 }
diff --git a/test/graphemetest.c b/test/graphemetest.c
index 025cd5d9..7fb16332 100644
--- a/test/graphemetest.c
+++ b/test/graphemetest.c
@@ -58,7 +58,7 @@ void checkline(const char *_buf, bool verbose) {
             check(!strcmp((char*)g, (char*)src),
                 "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
         }
-        free(g);
+        utf8proc_free(g);
     }
 
     if (si) { /* test manual calls to utf8proc_grapheme_break_stateful */
@@ -112,7 +112,7 @@ int main(int argc, char **argv)
         glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
         check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
         check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
-        free(g);
+        utf8proc_free(g);
     };
 
     /* https://github.com/JuliaLang/julia/issues/37680 */
diff --git a/test/normtest.c b/test/normtest.c
index 2e0d30b8..5dfc4885 100644
--- a/test/normtest.c
+++ b/test/normtest.c
@@ -4,7 +4,7 @@
     unsigned char *src_norm = (unsigned char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src);      \
     check(!strcmp((char *) norm, (char *) src_norm),                                  \
           "normalization failed for %s -> %s", src, norm);          \
-    free(src_norm);                                                 \
+    utf8proc_free(src_norm);                                                 \
 }
 
 int main(int argc, char **argv)
diff --git a/test/printproperty.c b/test/printproperty.c
index 13f3115b..6c6ee4c4 100644
--- a/test/printproperty.c
+++ b/test/printproperty.c
@@ -62,7 +62,7 @@ int main(int argc, char **argv)
         p->boundclass,
         p->indic_conjunct_break,
         utf8proc_charwidth(c));
-        free(map);
+        utf8proc_free(map);
     }
     return 0;
 }
diff --git a/test/tests.c b/test/tests.c
index 8a47b85a..9dfdc7da 100644
--- a/test/tests.c
+++ b/test/tests.c
@@ -95,6 +95,6 @@ void check_compare(const char *transformation,
           print_string_and_escaped(f, expected);
      }
      fprintf(f, "\n");
-     if (free_received) free(received);
+     if (free_received) utf8proc_free(received);
      if (!passed) exit(1);
 }
diff --git a/utf8proc.c b/utf8proc.c
index e8fa207a..d2d45590 100644
--- a/utf8proc.c
+++ b/utf8proc.c
@@ -836,3 +836,7 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8
     UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE));
   return retval;
 }
+
+UTF8PROC_DLLEXPORT  void utf8proc_free(utf8proc_uint8_t *ptr) {
+  free(ptr);
+}
diff --git a/utf8proc.h b/utf8proc.h
index 8d9a2e41..83bb5f6c 100644
--- a/utf8proc.h
+++ b/utf8proc.h
@@ -750,6 +750,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
  *
  * @note The memory of the new UTF-8 string will have been allocated
  * with `malloc`, and should therefore be deallocated with `free`.
+ * However, it is safer to deallocate it with @ref utf8proc_free in
+ * case your application is linked to a different C library than utf8proc.
  *
  * @note `utf8proc_map` simply calls `utf8proc_decompose` followed by `utf8proc_reencode`,
  * and applications requiring greater control over memory allocation should instead call
@@ -760,7 +762,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
 );
 
 /**
- * Like utf8proc_map(), but also takes a `custom_func` mapping function
+ * Like @ref utf8proc_map, but also takes a `custom_func` mapping function
  * that is called on each codepoint in `str` before any other transformations
  * (along with a `custom_data` pointer that is passed through to `custom_func`).
  * The `custom_func` argument is ignored if it is `NULL`.
@@ -776,6 +778,11 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
  * NFKC_Casefold normalized version of the null-terminated string `str`.  These
  * are shortcuts to calling utf8proc_map() with @ref UTF8PROC_NULLTERM
  * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
+ *
+ * @note The memory of the new UTF-8 string will have been allocated
+ * with `malloc`, and should therefore be deallocated with `free`.
+ * However, it is safer to deallocate it with @ref utf8proc_free in
+ * case your application is linked to a different C library than utf8proc.
  */
 /** @{ */
 /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
@@ -793,6 +800,14 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
 /** @} */
 
+/**
+ * Deallocate memory allocated and returned by @ref utf8proc_map and similar functions
+ * (which simply calls the `free` function from the underlying C library linked to utf8proc).
+ * It is safer to call `utf8proc_free` than calling `free` directly, in case your application
+ * is linked to a different C library with incompatible `malloc` and `free` functions.
+ */
+UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr);
+
 #ifdef __cplusplus
 }
 #endif

From 514b7191fc113061f3fd29f52040e05a073d8f12 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Tue, 31 Mar 2026 17:31:28 -0400
Subject: [PATCH 2/2] Apply suggestion from @stevengj

---
 utf8proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utf8proc.c b/utf8proc.c
index d2d45590..ad3cae9f 100644
--- a/utf8proc.c
+++ b/utf8proc.c
@@ -837,6 +837,6 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8
   return retval;
 }
 
-UTF8PROC_DLLEXPORT  void utf8proc_free(utf8proc_uint8_t *ptr) {
+UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr) {
   free(ptr);
 }