From d67609697e7a857e5e129ed77cceee9b3140d9fb Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Mon, 24 Apr 2017 10:49:54 +0200 Subject: [PATCH] Add C internal version of msgfmt (similar to makesrna/dna binaries). Fixes performance issues of C++ one with Windows MSVC debug builds... Merely a translation from msgfmt.cc code by @sergey, using BLI libs intead of C++'s stdlib. Reviewers: sergey, campbellbarton, LazyDodo Subscribers: sergey Differential Revision: https://developer.blender.org/D2605 --- intern/locale/CMakeLists.txt | 19 - intern/locale/msgfmt.cc | 374 -------------- source/blender/blentranslation/CMakeLists.txt | 2 + .../blentranslation/msgfmt/CMakeLists.txt | 50 ++ .../blender/blentranslation/msgfmt/msgfmt.c | 464 ++++++++++++++++++ 5 files changed, 516 insertions(+), 393 deletions(-) delete mode 100644 intern/locale/msgfmt.cc create mode 100644 source/blender/blentranslation/msgfmt/CMakeLists.txt create mode 100644 source/blender/blentranslation/msgfmt/msgfmt.c diff --git a/intern/locale/CMakeLists.txt b/intern/locale/CMakeLists.txt index 6896702fcbf..cbc75d1ab1f 100644 --- a/intern/locale/CMakeLists.txt +++ b/intern/locale/CMakeLists.txt @@ -60,22 +60,3 @@ if(WITH_INTERNATIONAL) endif() blender_add_lib(bf_intern_locale "${SRC}" "${INC}" "${INC_SYS}") - -# ----------------------------------------------------------------------------- -# Build msgfmt executable - -if(CMAKE_COMPILER_IS_GNUCC) - # workaroud ld.gold linker bug - string(REPLACE "-fuse-ld=gold" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -endif() - -set(MSFFMT_SRC - msgfmt.cc -) -add_executable(msgfmt ${MSFFMT_SRC}) - -if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 3.4))) - # needed for clang 3.4+ - target_link_libraries(msgfmt ${PLATFORM_LINKLIBS}) -endif() - diff --git a/intern/locale/msgfmt.cc b/intern/locale/msgfmt.cc deleted file mode 100644 index 02c58ebc5bc..00000000000 --- a/intern/locale/msgfmt.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Written by Sergey Sharybin -// Added support for contexts -// -// Based on Python script msgfmt.py from Python source -// code tree, which was written by Written by -// Martin v. Löwis -// -// Generate binary message catalog from textual translation description. -// -// This program converts a textual Uniforum-style message catalog (.po file) into -// a binary GNU catalog (.mo file). This is essentially the same function as the -// GNU msgfmt program, however, it is a simpler implementation. -// -// Usage: msgfmt input.po output.po - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { - -std::map MESSAGES; - -bool starts_with(const std::string &str, - const std::string &prefix) { - const size_t prefix_length = prefix.length(); - if (prefix_length == 0) { - return true; - } - // TODO(sergey): Could be optimized if we calculate str.length() - // to maximum of prefix_length characters. - if (prefix_length > str.length()) { - return false; - } else { - return str.compare(0, prefix_length, prefix) == 0; - } -} - -std::string trim(const std::string &str) { - std::string result = str; - result.erase(0, result.find_first_not_of(" \t\r\n")); - result.erase(result.find_last_not_of(" \t\r\n") + 1); - return result; -} - -std::string unescape(const std::string &str) { - std::string result; - const size_t str_length = str.length(); - size_t i = 0; - while (i < str_length) { - char current_char = str[i++]; - if (current_char == '\\' && i < str_length - 1) { - char next_char = str[i++]; - if (next_char == '\\') { - current_char = '\\'; - } else if (next_char == 'n') { - current_char = '\n'; - } else if (next_char == 't') { - current_char = '\t'; - } else { - current_char = next_char; - } - } - result += current_char; - } - - const size_t result_length = result.length(); - if (result[0] == '"' && result[result_length - 1] == '"') { - result = result.substr(1, result_length - 2); - } - - return result; -} - -// Add a non-fuzzy translation to the dictionary. -void add(const std::string &msgctxt, - const std::string &msgid, - const std::string &msgstr, - bool fuzzy) { - if (fuzzy == false && msgstr.empty() == false) { - if (msgctxt.empty()) { - MESSAGES[msgid] = msgstr; - } else { - MESSAGES[msgctxt + (char)0x04 + msgid] = msgstr; - } - } -} - -template -void get_keys(std::map map, - std::vector *keys) { - keys->reserve(map.size()); - for (typename std::map::iterator it = map.begin(); - it != map.end(); - it++) { - keys->push_back(it->first); - } -} - -std::string intToBytes(int value) { - std::string result; - for (unsigned int i = 0; i < sizeof(value); i++) { - result += (unsigned char) ((value >> (i * 8)) & 0xff); - } - return result; -} - -typedef enum { - SECTION_NONE = 0, - SECTION_CTX = 1, - SECTION_ID = 2, - SECTION_STR = 3 -} eSectionType; - -struct Offset { - unsigned int o1, l1, o2, l2; -}; - -// Return the generated output. -std::string generate(void) { - // The keys are sorted in the .mo file - std::vector keys; - - // Get list of sorted keys. - get_keys(MESSAGES, &keys); - std::sort(keys.begin(), keys.end()); - - std::vector offsets; - offsets.reserve(keys.size()); - std::string ids = "", strs = ""; - for (std::vector::iterator it = keys.begin(); - it != keys.end(); - it++) { - std::string &id = *it; - // For each string, we need size and file offset. Each string is NUL - // terminated; the NUL does not count into the size. - Offset offset = {(unsigned int) ids.size(), - (unsigned int) id.size(), - (unsigned int) strs.size(), - (unsigned int) MESSAGES[id].size()}; - offsets.push_back(offset); - ids += id + '\0'; - strs += MESSAGES[id] + '\0'; - } - - // The header is 7 32-bit unsigned integers. We don't use hash tables, so - // the keys start right after the index tables. - // translated string. - int keystart = 7 * 4 + 16 * keys.size(); - // and the values start after the keys - int valuestart = keystart + ids.size(); - std::vector koffsets; - std::vector voffsets; - koffsets.reserve(offsets.size() * 2); - voffsets.reserve(offsets.size() * 2); - // The string table first has the list of keys, then the list of values. - // Each entry has first the size of the string, then the file offset. - for (std::vector::iterator it = offsets.begin(); - it != offsets.end(); - it++) { - Offset &offset = *it; - koffsets.push_back(offset.l1); - koffsets.push_back(offset.o1 + keystart); - voffsets.push_back(offset.l2); - voffsets.push_back(offset.o2 + valuestart); - } - - std::vector all_offsets; - all_offsets.reserve(koffsets.size() + voffsets.size()); - all_offsets.insert(all_offsets.end(), koffsets.begin(), koffsets.end()); - all_offsets.insert(all_offsets.end(), voffsets.begin(), voffsets.end()); - - std::string output = ""; - output += intToBytes(0x950412de); // Magic - output += intToBytes(0x0); // Version - output += intToBytes(keys.size()); // # of entries - output += intToBytes(7 * 4); // start of key index - output += intToBytes(7 * 4 + keys.size() * 8); // start of value index - output += intToBytes(0); // Size of hash table - output += intToBytes(0); // Offset of hash table - - for (std::vector::iterator it = all_offsets.begin(); - it != all_offsets.end(); - it++) { - int offset = *it; - output += intToBytes(offset); - } - - output += ids; - output += strs; - - return output; -} - -void make(const char *input_file_name, - const char *output_file_name) { - std::map messages; - - // Start off assuming Latin-1, so everything decodes without failure, - // until we know the exact encoding. - // TODO(sergey): Support encoding. - // const char *encoding = "latin-1"; - - eSectionType section = SECTION_NONE; - bool fuzzy = false; - bool is_plural = false; - std::string msgctxt, msgid, msgstr; - - std::ifstream input_file_stream(input_file_name); - - // Parse the catalog. - int lno = 0; - for (std::string l; getline(input_file_stream, l); ) { - lno++; - // If we get a comment line after a msgstr, this is a new entry. - if (l[0] == '#' && section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - section = SECTION_NONE; - msgctxt = ""; - fuzzy = false; - } - // Record a fuzzy mark. - if (starts_with(l, "#,") && l.find("fuzzy") != std::string::npos) { - fuzzy = true; - } - // Skip comments - if (l[0] == '#') { - continue; - } - // Now we are in a msgid section, output previous section. - if (starts_with(l, "msgctxt")) { - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - } - section = SECTION_CTX; - l = l.substr(7, l.size() - 7); - msgctxt = msgid = msgstr = ""; - } - else if (starts_with(l, "msgid") && !starts_with(l, "msgid_plural")) { - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - msgctxt = ""; - if (msgid == "") { -#if 0 - // See whether there is an encoding declaration. - p = HeaderParser(); - charset = p.parsestr(msgstr.decode(encoding)).get_content_charset(); - if (charset) { - encoding = charset; - } -#else - // Not ported to C++ yet. - std::cerr << "Encoding declarations are not supported yet.\n" - << std::endl; - abort(); -#endif - } - } - section = SECTION_ID; - l = l.substr(5, l.size() - 5); - msgid = msgstr = ""; - is_plural = false; - } else if (starts_with(l, "msgid_plural")) { - // This is a message with plural forms. - if (section != SECTION_ID) { - std::cerr << "msgid_plural not preceeded by msgid on" - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - l = l.substr(12, l.size() - 12); - msgid += '\0'; // separator of singular and plural - is_plural = true; - } else if (starts_with(l, "msgstr")) { - // Now we are in a msgstr section - section = SECTION_STR; - if (starts_with(l, "msgstr[")) { - if (is_plural == false) { - std::cerr << "plural without msgid_plural on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - int bracket_position = l.find(']'); - if (bracket_position == std::string::npos) { - std::cerr << "Syntax error on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - l = l.substr(bracket_position, l.size() - bracket_position); - if (msgstr != "") { - msgstr += '\0'; // Separator of the various plural forms; - } - } else { - if (is_plural) { - std::cerr << "indexed msgstr required for plural on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - l = l.substr(6, l.size() - 6); - } - } - // Skip empty lines. - l = trim(l); - if (l.empty()) { - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - msgctxt = msgid = msgstr = ""; - section = SECTION_NONE; - fuzzy = false; - } - continue; - } - l = unescape(l); - if (section == SECTION_CTX) { - // TODO(sergey): Support encoding. - // msgid += l.encode(encoding); - msgctxt += l; - } - else if (section == SECTION_ID) { - // TODO(sergey): Support encoding. - // msgid += l.encode(encoding); - msgid += l; - } else if (section == SECTION_STR) { - // TODO(sergey): Support encoding. - // msgstr += l.encode(encoding) - msgstr += l; - } else { - std::cerr << "Syntax error on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - // Add last entry - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - } - } - - // Compute output - std::string output = generate(); - - std::ofstream output_file_stream(output_file_name, - std::ios::out | std::ios::binary); - output_file_stream << output; -} - -} // namespace - -int main(int argc, char **argv) { - if (argc != 3) { - printf("Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - const char *input_file = argv[1]; - const char *output_file = argv[2]; - - make(input_file, output_file); - - return EXIT_SUCCESS; -} diff --git a/source/blender/blentranslation/CMakeLists.txt b/source/blender/blentranslation/CMakeLists.txt index a3e85344027..c0dce5b4f0d 100644 --- a/source/blender/blentranslation/CMakeLists.txt +++ b/source/blender/blentranslation/CMakeLists.txt @@ -60,3 +60,5 @@ if(WIN32) endif() blender_add_lib(bf_blentranslation "${SRC}" "${INC}" "${INC_SYS}") + +add_subdirectory(msgfmt) diff --git a/source/blender/blentranslation/msgfmt/CMakeLists.txt b/source/blender/blentranslation/msgfmt/CMakeLists.txt new file mode 100644 index 00000000000..d2cb6f5a03b --- /dev/null +++ b/source/blender/blentranslation/msgfmt/CMakeLists.txt @@ -0,0 +1,50 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# The Original Code is Copyright (C) 2017, Blender Foundation +# All rights reserved. +# +# The Original Code is: all of this file. +# +# Contributor(s): Bastien Montagne. +# +# ***** END GPL LICENSE BLOCK ***** + +# ----------------------------------------------------------------------------- +# Build msgfmt executable + +blender_include_dirs( + ../../../../intern/guardedalloc + ../../blenlib +) + +set(SRC + msgfmt.c +) + +add_cc_flags_custom_test(msgfmt) + +add_executable(msgfmt ${SRC}) + +target_link_libraries(msgfmt bf_blenlib) +target_link_libraries(msgfmt bf_intern_guardedalloc) + +if(WIN32) + target_link_libraries(msgfmt bf_intern_utfconv) +endif() + +target_link_libraries(msgfmt ${ZLIB_LIBRARIES}) +target_link_libraries(msgfmt ${PLATFORM_LINKLIBS}) diff --git a/source/blender/blentranslation/msgfmt/msgfmt.c b/source/blender/blentranslation/msgfmt/msgfmt.c new file mode 100644 index 00000000000..487d9fee7b4 --- /dev/null +++ b/source/blender/blentranslation/msgfmt/msgfmt.c @@ -0,0 +1,464 @@ +/* + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2017 by Blender Foundation. + * All rights reserved. + * + * Contributor(s): Bastien Montagne + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/* + * Based on C++ version by Sergey Sharybin . + * Based on Python script msgfmt.py from Python source code tree, which was written by + * Martin v. Löwis + * + * Generate binary message catalog from textual translation description. + * + * This program converts a textual Uniforum-style message catalog (.po file) into a binary GNU catalog (.mo file). + * This is essentially the same function as the GNU msgfmt program, however, it is a simpler implementation. + * + * Usage: msgfmt input.po output.po + */ + +#include +#include + +#include "BLI_utildefines.h" +#include "BLI_dynstr.h" +#include "BLI_fileops.h" +#include "BLI_ghash.h" +#include "BLI_linklist.h" +#include "BLI_memarena.h" + +#include "MEM_guardedalloc.h" + + +/* Stupid stub necessary because some BLI files includes winstuff.h, which uses G a bit... */ +#ifdef WIN32 + typedef struct Global { + void *dummy; + } Global; + + Global G; +#endif + + +/* We cannot use NULL char until ultimate step, would give nightmare to our C string processing... + * Using one of the UTF-8 invalid bytes (as per our BLI string_utf8.c) */ +#define NULLSEP_STR "\xff" +#define NULLSEP_CHR '\xff' + +typedef enum { + SECTION_NONE = 0, + SECTION_CTX = 1, + SECTION_ID = 2, + SECTION_STR = 3, +} eSectionType; + +typedef struct Message { + DynStr *ctxt; + DynStr *id; + DynStr *str; + + bool is_fuzzy; +} Message; + +static char *trim(char *str) +{ + const size_t len = strlen(str); + size_t i; + + if (len == 0) { + return str; + } + + for (i = 0; i < len && ELEM(str[0], ' ', '\t', '\n'); str++, i++); + + char *end = &str[len - 1 - i]; + for (i = len; i > 0 && ELEM(end[0], ' ', '\t', '\n'); end--, i--); + end[1] = '\0'; + + return str; +} + +static char *unescape(char *str) +{ + char *curr, *next; + for (curr = next = str; next[0] != '\0'; curr++, next++) { + if (next[0] == '\\') { + switch (next[1]) { + case '\0': + /* Get rid of trailing escape char... */ + curr--; + break; + case '\\': + *curr = '\\'; + next++; + break; + case 'n': + *curr = '\n'; + next++; + break; + case 't': + *curr = '\t'; + next++; + break; + default: + /* Get rid of useless escape char. */ + next++; + *curr = *next; + } + } + else if (curr != next) { + *curr = *next; + } + } + *curr = '\0'; + + if (str[0] == '"' && *(curr - 1) == '"') { + *(curr - 1) = '\0'; + return str + 1; + } + return str; +} + +static int qsort_str_cmp(const void *a, const void *b) +{ + return strcmp(*(const char **)a, *(const char **)b); +} + +static char **get_keys_sorted(GHash *messages, const uint32_t num_keys) +{ + GHashIterator iter; + + char **keys = MEM_mallocN(sizeof(*keys) * num_keys, __func__); + char **k = keys; + + GHASH_ITER(iter, messages) { + *k = BLI_ghashIterator_getKey(&iter); + k++; + } + + qsort(keys, num_keys, sizeof(*keys), qsort_str_cmp); + + return keys; +} + +BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes) { + size_t i; + for (i = 0; i < sizeof(value); i++) { + bytes[i] = (char) ((value >> ((int)i * 8)) & 0xff); + } + return i; +} + +BLI_INLINE size_t msg_to_bytes(char *msg, char *bytes, uint32_t size) { + /* Note that we also perform replacing of our NULLSEP placeholder by real NULL char... */ + size_t i; + for (i = 0; i < size; i++, msg++, bytes++) { + *bytes = (*msg == NULLSEP_CHR) ? '\0' : *msg; + } + return i; +} + +typedef struct Offset { + uint32_t key_offset, key_len, val_offset, val_len; +} Offset; + +/* Return the generated binary output. */ +static char *generate(GHash *messages, size_t *r_output_size) { + const uint32_t num_keys = BLI_ghash_size(messages); + + /* Get list of sorted keys. */ + char **keys = get_keys_sorted(messages, num_keys); + char **vals = MEM_mallocN(sizeof(*vals) * num_keys, __func__); + uint32_t tot_keys_len = 0; + uint32_t tot_vals_len = 0; + + Offset *offsets = MEM_mallocN(sizeof(*offsets) * num_keys, __func__); + + for (int i = 0; i < num_keys; i++) { + Offset *off = &offsets[i]; + + vals[i] = BLI_ghash_lookup(messages, keys[i]); + + /* For each string, we need size and file offset. + * Each string is NULL terminated; the NULL does not count into the size. */ + off->key_offset = tot_keys_len; + off->key_len = (uint32_t)strlen(keys[i]); + tot_keys_len += off->key_len + 1; + + off->val_offset = tot_vals_len; + off->val_len = (uint32_t)strlen(vals[i]); + tot_vals_len += off->val_len + 1; + } + + /* The header is 7 32-bit unsigned integers. then comes the keys index table, then the values index table. */ + const uint32_t idx_keystart = 7 * 4; + const uint32_t idx_valstart = idx_keystart + 8 * num_keys; + /* We don't use hash tables, so the keys start right after the index tables. */ + const uint32_t keystart = idx_valstart + 8 * num_keys; + /* and the values start after the keys */ + const uint32_t valstart = keystart + tot_keys_len; + + /* Final buffer representing the binary MO file. */ + *r_output_size = valstart + tot_vals_len; + char *output = MEM_mallocN(*r_output_size, __func__); + char *h = output; + char *ik = output + idx_keystart; + char *iv = output + idx_valstart; + char *k = output + keystart; + char *v = output + valstart; + + h += uint32_to_bytes(0x950412de, h); /* Magic */ + h += uint32_to_bytes(0x0, h); /* Version */ + h += uint32_to_bytes(num_keys, h); /* Number of entries */ + h += uint32_to_bytes(idx_keystart, h); /* Start of key index */ + h += uint32_to_bytes(idx_valstart, h); /* Start of value index */ + h += uint32_to_bytes(0, h); /* Size of hash table */ + h += uint32_to_bytes(0, h); /* Offset of hash table */ + + BLI_assert(h == ik); + + for (int i = 0; i < num_keys; i++) { + Offset *off = &offsets[i]; + + /* The index table first has the list of keys, then the list of values. + * Each entry has first the size of the string, then the file offset. */ + ik += uint32_to_bytes(off->key_len, ik); + ik += uint32_to_bytes(off->key_offset + keystart, ik); + iv += uint32_to_bytes(off->val_len, iv); + iv += uint32_to_bytes(off->val_offset + valstart, iv); + + k += msg_to_bytes(keys[i], k, off->key_len + 1); + v += msg_to_bytes(vals[i], v, off->val_len + 1); + } + + BLI_assert(ik == output + idx_valstart); + BLI_assert(iv == output + keystart); + BLI_assert(k == output + valstart); + + MEM_freeN(keys); + MEM_freeN(vals); + MEM_freeN(offsets); + + return output; +} + +/* Add a non-fuzzy translation to the dictionary. */ +static void add(GHash *messages, MemArena *memarena, const Message *msg) +{ + const size_t msgctxt_len = (size_t)BLI_dynstr_get_len(msg->ctxt); + const size_t msgid_len = (size_t)BLI_dynstr_get_len(msg->id); + const size_t msgstr_len = (size_t)BLI_dynstr_get_len(msg->str); + const size_t msgkey_len = msgid_len + ((msgctxt_len == 0) ? 0 : msgctxt_len + 1); + + if (!msg->is_fuzzy && msgstr_len != 0) { + char *msgkey = BLI_memarena_alloc(memarena, sizeof(*msgkey) * (msgkey_len + 1)); + char *msgstr = BLI_memarena_alloc(memarena, sizeof(*msgstr) * (msgstr_len + 1)); + + if (msgctxt_len != 0) { + BLI_dynstr_get_cstring_ex(msg->ctxt, msgkey); + msgkey[msgctxt_len] = '\x04'; /* Context/msgid separator */ + BLI_dynstr_get_cstring_ex(msg->id, &msgkey[msgctxt_len + 1]); + } + else { + BLI_dynstr_get_cstring_ex(msg->id, msgkey); + } + + BLI_dynstr_get_cstring_ex(msg->str, msgstr); + + BLI_ghash_insert(messages, msgkey, msgstr); + } +} + + +static void clear(Message *msg) +{ + BLI_dynstr_clear(msg->ctxt); + BLI_dynstr_clear(msg->id); + BLI_dynstr_clear(msg->str); + msg->is_fuzzy = false; +} + +static int make(const char *input_file_name, const char *output_file_name) +{ + GHash *messages = BLI_ghash_new(BLI_ghashutil_strhash_p_murmur, BLI_ghashutil_strcmp, __func__); + MemArena *msgs_memarena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__); + + const char *msgctxt_kw = "msgctxt"; + const char *msgid_kw = "msgid"; + const char *msgid_plural_kw = "msgid_plural"; + const char *msgstr_kw = "msgstr"; + const size_t msgctxt_len = strlen(msgctxt_kw); + const size_t msgid_len = strlen(msgid_kw); + const size_t msgid_plural_len = strlen(msgid_plural_kw); + const size_t msgstr_len = strlen(msgstr_kw); + + /* Note: For now, we assume file encoding is always utf-8. */ + + eSectionType section = SECTION_NONE; + bool is_plural = false; + + Message msg = { + .ctxt = BLI_dynstr_new_memarena(), + .id = BLI_dynstr_new_memarena(), + .str = BLI_dynstr_new_memarena(), + .is_fuzzy = false, + }; + + LinkNode *input_file_lines = BLI_file_read_as_lines(input_file_name); + LinkNode *ifl = input_file_lines; + + /* Parse the catalog. */ + for (int lno = 1; ifl; ifl = ifl->next, lno++) { + char *l = ifl->link; + const bool is_comment = (l[0] == '#'); + /* If we get a comment line after a msgstr, this is a new entry. */ + if (is_comment) { + if (section == SECTION_STR) { + add(messages, msgs_memarena, &msg); + clear(&msg); + section = SECTION_NONE; + } + /* Record a fuzzy mark. */ + if (l[1] == ',' && strstr(l, "fuzzy") != NULL) { + msg.is_fuzzy = true; + } + /* Skip comments */ + continue; + } + if (strstr(l, msgctxt_kw) == l) { + if (section == SECTION_STR) { + /* New message, output previous section. */ + add(messages, msgs_memarena, &msg); + } + if (!ELEM(section, SECTION_NONE, SECTION_STR)) { + printf("msgctxt not at start of new message on %s:%d\n", input_file_name, lno); + return EXIT_FAILURE; + } + section = SECTION_CTX; + l = l + msgctxt_len; + clear(&msg); + } + else if (strstr(l, msgid_plural_kw) == l) { + /* This is a message with plural forms. */ + if (section != SECTION_ID) { + printf("msgid_plural not preceeded by msgid on %s:%d\n", input_file_name, lno); + return EXIT_FAILURE; + } + l = l + msgid_plural_len; + BLI_dynstr_append(msg.id, NULLSEP_STR); /* separator of singular and plural */ + is_plural = true; + } + else if (strstr(l, msgid_kw) == l) { + if (section == SECTION_STR) { + add(messages, msgs_memarena, &msg); + } + if (section != SECTION_CTX) { + clear(&msg); + } + section = SECTION_ID; + l = l + msgid_len; + is_plural = false; + } + else if (strstr(l, msgstr_kw) == l) { + l = l + msgstr_len; + // Now we are in a msgstr section + section = SECTION_STR; + if (l[0] == '[') { + if (!is_plural) { + printf("plural without msgid_plural on %s:%d\n", input_file_name, lno); + return EXIT_FAILURE; + } + if ((l = strchr(l, ']')) == NULL) { + printf("Syntax error on %s:%d\n", input_file_name, lno); + return EXIT_FAILURE; + } + if (BLI_dynstr_get_len(msg.str) != 0) { + BLI_dynstr_append(msg.str, NULLSEP_STR); /* Separator of the various plural forms. */ + } + } + else { + if (is_plural) { + printf("indexed msgstr required for plural on %s:%d\n", input_file_name, lno); + return EXIT_FAILURE; + } + } + } + /* Skip empty lines. */ + l = trim(l); + if (l[0] == '\0') { + if (section == SECTION_STR) { + add(messages, msgs_memarena, &msg); + clear(&msg); + } + section = SECTION_NONE; + continue; + } + l = unescape(l); + if (section == SECTION_CTX) { + BLI_dynstr_append(msg.ctxt, l); + } + else if (section == SECTION_ID) { + BLI_dynstr_append(msg.id, l); + } + else if (section == SECTION_STR) { + BLI_dynstr_append(msg.str, l); + } + else { + printf("Syntax error on %s:%d\n", input_file_name, lno); + return EXIT_FAILURE; + } + } + /* Add last entry */ + if (section == SECTION_STR) { + add(messages, msgs_memarena, &msg); + } + + BLI_dynstr_free(msg.ctxt); + BLI_dynstr_free(msg.id); + BLI_dynstr_free(msg.str); + BLI_file_free_lines(input_file_lines); + + /* Compute output */ + size_t output_size; + char *output = generate(messages, &output_size); + + FILE *fp = BLI_fopen(output_file_name, "wb"); + fwrite(output, 1, output_size, fp); + fclose(fp); + + MEM_freeN(output); + BLI_ghash_free(messages, NULL, NULL); + BLI_memarena_free(msgs_memarena); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) { + if (argc != 3) { + printf("Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + const char *input_file = argv[1]; + const char *output_file = argv[2]; + + return make(input_file, output_file); +}