From 32073993a8fcd235a5c7a2022dcdd7aef8a49687 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Mon, 22 Feb 2021 18:29:52 +0100 Subject: [PATCH] i18n messages extraction script: fix handling of C unicode-escapes. rB1f5647c07d15 introduced for the first time a unicode escape in strings to be translated, directly extracted from C-code itself. This revealed that this case was not properly handled by current code, for now we work around using `raw_unicode_escape` encoding/decoding of python. --- release/scripts/modules/bl_i18n_utils/bl_extract_messages.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/release/scripts/modules/bl_i18n_utils/bl_extract_messages.py b/release/scripts/modules/bl_i18n_utils/bl_extract_messages.py index 9e59a245cc4..180f9f0a01c 100644 --- a/release/scripts/modules/bl_i18n_utils/bl_extract_messages.py +++ b/release/scripts/modules/bl_i18n_utils/bl_extract_messages.py @@ -735,7 +735,9 @@ def dump_src_messages(msgs, reports, settings): _clean_str = re.compile(settings.str_clean_re).finditer def clean_str(s): - return "".join(m.group("clean") for m in _clean_str(s)) + # The encode/decode to/from 'raw_unicode_escape' allows to transform the C-type unicode hexadecimal escapes + # (like '\u2715' for the '×' symbol) back into a proper unicode character. + return "".join(m.group("clean") for m in _clean_str(s)).encode('raw_unicode_escape').decode('raw_unicode_escape') def dump_src_file(path, rel_path, msgs, reports, settings): def process_entry(_msgctxt, _msgid):