Cleanup: add doc-strings to upper/lowecase functions

Move detailed note into the implementation.
2023-04-12 11:24:12 +10:00 · 2023-04-12 11:24:12 +10:00 · 6fa80d1e1d
commit 6fa80d1e1d
parent ccea39b538
2 changed files with 23 additions and 8 deletions
--- a/source/blender/blenlib/BLI_string_utf8.h
+++ b/source/blender/blenlib/BLI_string_utf8.h
@ -173,11 +173,16 @@ int BLI_wcwidth(char32_t ucs) ATTR_WARN_UNUSED_RESULT;
 int BLI_wcswidth(const char32_t *pwcs, size_t n) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);

 /**
- * Upper and lowercase for 32-bit characters for all scripts that distinguish case. One-to-one
- * mappings so this doesn't work corectly for uppercase Σ (two lowercase forms) and lowercase ß
- * won't become "SS".
+ * Return the uppercase of a 32-bit character or the character when no case change is needed.
+ *
+ * \note A 1:1 mapping doesn't account for multiple characters as part of conversion in some cases.
 */
 char32_t BLI_str_utf32_char_to_upper(char32_t wc);
+/**
+ * Return the lowercase of a 32-bit character or the character when no case change is needed.
+ *
+ * \note A 1:1 mapping doesn't account for multiple characters as part of conversion in some cases.
+ */
 char32_t BLI_str_utf32_char_to_lower(char32_t wc);

 /**
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@ -399,7 +399,17 @@ int BLI_str_utf8_char_width_safe(const char *p)
  return (columns < 0) ? 1 : columns;
 }

-char32_t BLI_str_utf32_char_to_upper(char32_t wc)
+/* -------------------------------------------------------------------- */
+/** \name UTF32 Case Conversion
+ *
+ * \warning the lower/uppercase form of some characters use multiple characters.
+ * These cases are not accounted for by this conversion function.
+ * A common example is the German `eszett` / `scharfes`.
+ * Supporting such cases would have to operate on a character array, with support for resizing.
+ * (for reference - Python's upper/lower functions support this).
+ * \{ */
+
+char32_t BLI_str_utf32_char_to_upper(const char32_t wc)
 {
  if (wc < U'\xFF') { /* Latin. */
    if ((wc <= U'z' && wc >= U'a') || (wc <= U'\xF6' && wc >= U'\xE0') ||
@ -420,7 +430,7 @@ char32_t BLI_str_utf32_char_to_upper(char32_t wc)
  if (wc <= U'\x24E9' && wc >= U'\x24D0') { /* Enclosed Numerals. */
    return wc - 26;
  }
-  if (wc <= U'\xFF5A' && wc >= U'\xFF41') { /* Fullwidth Forms. */
+  if (wc <= U'\xFF5A' && wc >= U'\xFF41') { /* Full-width Forms. */
    return wc - 32;
  }

@ -506,7 +516,7 @@ char32_t BLI_str_utf32_char_to_upper(char32_t wc)
  return wc;
 }

-char32_t BLI_str_utf32_char_to_lower(char32_t wc)
+char32_t BLI_str_utf32_char_to_lower(const char32_t wc)
 {
  if (wc < U'\xD8') { /* Latin. */
    if ((wc <= U'Z' && wc >= U'A') || (wc <= U'\xD6' && wc >= U'\xC0')) {
@ -525,7 +535,7 @@ char32_t BLI_str_utf32_char_to_lower(char32_t wc)
  if (wc <= U'\x24CF' && wc >= U'\x24B6') { /* Enclosed Numerals. */
    return wc + 26;
  }
-  if (wc <= U'\xFF3A' && wc >= U'\xFF21') { /* Fullwidth Forms. */
+  if (wc <= U'\xFF3A' && wc >= U'\xFF21') { /* Full-width Forms. */
    return wc + 32;
  }

@ -611,7 +621,7 @@ char32_t BLI_str_utf32_char_to_lower(char32_t wc)
  return wc;
 }

-/* -------------------------------------------------------------------- */
+/** \} */ /* -------------------------------------------------------------------- */

 /* copied from glib's gutf8.c, added 'Err' arg */