From 6fa80d1e1d0af011620a6978fea89a324a9d8d3d Mon Sep 17 00:00:00 2001 From: Campbell Barton Date: Wed, 12 Apr 2023 11:24:12 +1000 Subject: [PATCH] Cleanup: add doc-strings to upper/lowecase functions Move detailed note into the implementation. --- source/blender/blenlib/BLI_string_utf8.h | 11 ++++++++--- source/blender/blenlib/intern/string_utf8.c | 20 +++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h index 1ac319a5c9d..b298415908a 100644 --- a/source/blender/blenlib/BLI_string_utf8.h +++ b/source/blender/blenlib/BLI_string_utf8.h @@ -173,11 +173,16 @@ int BLI_wcwidth(char32_t ucs) ATTR_WARN_UNUSED_RESULT; int BLI_wcswidth(const char32_t *pwcs, size_t n) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); /** - * Upper and lowercase for 32-bit characters for all scripts that distinguish case. One-to-one - * mappings so this doesn't work corectly for uppercase Σ (two lowercase forms) and lowercase ß - * won't become "SS". + * Return the uppercase of a 32-bit character or the character when no case change is needed. + * + * \note A 1:1 mapping doesn't account for multiple characters as part of conversion in some cases. */ char32_t BLI_str_utf32_char_to_upper(char32_t wc); +/** + * Return the lowercase of a 32-bit character or the character when no case change is needed. + * + * \note A 1:1 mapping doesn't account for multiple characters as part of conversion in some cases. + */ char32_t BLI_str_utf32_char_to_lower(char32_t wc); /** diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c index aa859e529a6..e1977427b68 100644 --- a/source/blender/blenlib/intern/string_utf8.c +++ b/source/blender/blenlib/intern/string_utf8.c @@ -399,7 +399,17 @@ int BLI_str_utf8_char_width_safe(const char *p) return (columns < 0) ? 1 : columns; } -char32_t BLI_str_utf32_char_to_upper(char32_t wc) +/* -------------------------------------------------------------------- */ +/** \name UTF32 Case Conversion + * + * \warning the lower/uppercase form of some characters use multiple characters. + * These cases are not accounted for by this conversion function. + * A common example is the German `eszett` / `scharfes`. + * Supporting such cases would have to operate on a character array, with support for resizing. + * (for reference - Python's upper/lower functions support this). + * \{ */ + +char32_t BLI_str_utf32_char_to_upper(const char32_t wc) { if (wc < U'\xFF') { /* Latin. */ if ((wc <= U'z' && wc >= U'a') || (wc <= U'\xF6' && wc >= U'\xE0') || @@ -420,7 +430,7 @@ char32_t BLI_str_utf32_char_to_upper(char32_t wc) if (wc <= U'\x24E9' && wc >= U'\x24D0') { /* Enclosed Numerals. */ return wc - 26; } - if (wc <= U'\xFF5A' && wc >= U'\xFF41') { /* Fullwidth Forms. */ + if (wc <= U'\xFF5A' && wc >= U'\xFF41') { /* Full-width Forms. */ return wc - 32; } @@ -506,7 +516,7 @@ char32_t BLI_str_utf32_char_to_upper(char32_t wc) return wc; } -char32_t BLI_str_utf32_char_to_lower(char32_t wc) +char32_t BLI_str_utf32_char_to_lower(const char32_t wc) { if (wc < U'\xD8') { /* Latin. */ if ((wc <= U'Z' && wc >= U'A') || (wc <= U'\xD6' && wc >= U'\xC0')) { @@ -525,7 +535,7 @@ char32_t BLI_str_utf32_char_to_lower(char32_t wc) if (wc <= U'\x24CF' && wc >= U'\x24B6') { /* Enclosed Numerals. */ return wc + 26; } - if (wc <= U'\xFF3A' && wc >= U'\xFF21') { /* Fullwidth Forms. */ + if (wc <= U'\xFF3A' && wc >= U'\xFF21') { /* Full-width Forms. */ return wc + 32; } @@ -611,7 +621,7 @@ char32_t BLI_str_utf32_char_to_lower(char32_t wc) return wc; } -/* -------------------------------------------------------------------- */ +/** \} */ /* -------------------------------------------------------------------- */ /* copied from glib's gutf8.c, added 'Err' arg */