python/utf8 compatibility fixes. (as discussed on the mailing list)

- user input gets non utf8 chars stripped all text input other then file paths.

- python has the same limitations, it will raise an error on non utf8 strings except for paths use unicode escape literals so its possible to deal with saving to these file paths from python.

- new string functions
  BLI_utf8_invalid_byte(str, len) returns the first invalid utf8 byte or -1 on on success.
  BLI_utf8_invalid_strip(str, len) strips non utf-8 chars.
This commit is contained in:
Campbell Barton 2010-08-28 12:34:22 +00:00
parent 7a7076c878
commit f28b5e672e
9 changed files with 269 additions and 25 deletions

@ -132,6 +132,9 @@ size_t BLI_strnlen(const char *str, size_t maxlen);
void BLI_timestr(double _time, char *str); /* time var is global */ void BLI_timestr(double _time, char *str); /* time var is global */
int BLI_utf8_invalid_byte(const char *str, int length);
int BLI_utf8_invalid_strip(char *str, int length);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -348,3 +348,114 @@ size_t BLI_strnlen(const char *str, size_t maxlen)
const char *end = memchr(str, '\0', maxlen); const char *end = memchr(str, '\0', maxlen);
return end ? (size_t) (end - str) : maxlen; return end ? (size_t) (end - str) : maxlen;
} }
/* from libswish3, originally called u8_isvalid(),
* modified to return the index of the bad character (byte index not utf).
* http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
/* based on the valid_utf8 routine from the PCRE library by Philip Hazel
length is in bytes, since without knowing whether the string is valid
it's hard to know how many characters there are! */
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
int BLI_utf8_invalid_byte(const char *str, int length)
{
const unsigned char *p, *pend = (unsigned char*)str + length;
unsigned char c;
int ab;
for (p = (unsigned char*)str; p < pend; p++) {
c = *p;
if (c < 128)
continue;
if ((c & 0xc0) != 0xc0)
goto utf8_error;
ab = trailingBytesForUTF8[c];
if (length < ab)
goto utf8_error;
length -= ab;
p++;
/* Check top bits in the second byte */
if ((*p & 0xc0) != 0x80)
goto utf8_error;
/* Check for overlong sequences for each different length */
switch (ab) {
/* Check for xx00 000x */
case 1:
if ((c & 0x3e) == 0) goto utf8_error;
continue; /* We know there aren't any more bytes to check */
/* Check for 1110 0000, xx0x xxxx */
case 2:
if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
break;
/* Check for 1111 0000, xx00 xxxx */
case 3:
if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
break;
/* Check for 1111 1000, xx00 0xxx */
case 4:
if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
break;
/* Check for leading 0xfe or 0xff,
and then for 1111 1100, xx00 00xx */
case 5:
if (c == 0xfe || c == 0xff ||
(c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
break;
}
/* Check for valid bytes after the 2nd, if any; all must start 10 */
while (--ab > 0) {
if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
p++; /* do this after so we get usable offset - campbell */
}
}
return -1;
utf8_error:
return (int)((char *)p - (char *)str) - 1;
}
int BLI_utf8_invalid_strip(char *str, int length)
{
int bad_char, tot= 0;
while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) {
str += bad_char;
length -= bad_char;
if(length == 0) {
/* last character bad, strip it */
*str= '\0';
tot++;
break;
}
else {
/* strip, keep looking */
memmove(str, str + 1, length);
tot++;
}
}
return tot;
}

@ -135,7 +135,7 @@ typedef struct uiLayout uiLayout;
#define UI_MAKE_RIGHT 8192 #define UI_MAKE_RIGHT 8192
/* button align flag, for drawing groups together */ /* button align flag, for drawing groups together */
#define UI_BUT_ALIGN (15<<14) #define UI_BUT_ALIGN (UI_BUT_ALIGN_TOP|UI_BUT_ALIGN_LEFT|UI_BUT_ALIGN_RIGHT|UI_BUT_ALIGN_DOWN)
#define UI_BUT_ALIGN_TOP (1<<14) #define UI_BUT_ALIGN_TOP (1<<14)
#define UI_BUT_ALIGN_LEFT (1<<15) #define UI_BUT_ALIGN_LEFT (1<<15)
#define UI_BUT_ALIGN_RIGHT (1<<16) #define UI_BUT_ALIGN_RIGHT (1<<16)
@ -151,9 +151,10 @@ typedef struct uiLayout uiLayout;
#define UI_BUT_UNDO (1<<25) #define UI_BUT_UNDO (1<<25)
#define UI_BUT_IMMEDIATE (1<<26) #define UI_BUT_IMMEDIATE (1<<26)
#define UI_BUT_NO_TOOLTIP (1<<27) #define UI_BUT_NO_TOOLTIP (1<<27)
#define UI_BUT_NO_UTF8 (1<<28)
#define UI_BUT_VEC_SIZE_LOCK (1<<28) /* used to flag if color hsv-circle should keep luminance */ #define UI_BUT_VEC_SIZE_LOCK (1<<29) /* used to flag if color hsv-circle should keep luminance */
#define UI_BUT_COLOR_CUBIC (1<<29) /* cubic saturation for the color wheel */ #define UI_BUT_COLOR_CUBIC (1<<30) /* cubic saturation for the color wheel */
#define UI_PANEL_WIDTH 340 #define UI_PANEL_WIDTH 340
#define UI_COMPACT_PANEL_WIDTH 160 #define UI_COMPACT_PANEL_WIDTH 160

@ -242,6 +242,20 @@ static int ui_is_a_warp_but(uiBut *but)
return FALSE; return FALSE;
} }
/* file selectors are exempt from utf-8 checks */
static int ui_is_utf8_but(uiBut *but)
{
if (but->rnaprop) {
int subtype= RNA_property_subtype(but->rnaprop);
if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
return TRUE;
}
}
return !(but->flag & UI_BUT_NO_UTF8);
}
/* ********************** button apply/revert ************************/ /* ********************** button apply/revert ************************/
static ListBase UIAfterFuncs = {NULL, NULL}; static ListBase UIAfterFuncs = {NULL, NULL};
@ -1572,6 +1586,15 @@ static void ui_textedit_begin(bContext *C, uiBut *but, uiHandleButtonData *data)
static void ui_textedit_end(bContext *C, uiBut *but, uiHandleButtonData *data) static void ui_textedit_end(bContext *C, uiBut *but, uiHandleButtonData *data)
{ {
if(but) { if(but) {
if(ui_is_utf8_but(but)) {
int strip= BLI_utf8_invalid_strip(but->editstr, strlen(but->editstr));
/* not a file?, strip non utf-8 chars */
if(strip) {
/* wont happen often so isnt that annoying to keep it here for a while */
printf("invalid utf8 - stripped chars %d\n", strip);
}
}
if(data->searchbox) { if(data->searchbox) {
if(data->cancel==0) if(data->cancel==0)
ui_searchbox_apply(but, data->searchbox); ui_searchbox_apply(but, data->searchbox);

@ -177,11 +177,14 @@ void file_draw_buttons(const bContext *C, ARegion *ar)
params->dir, 0.0, (float)FILE_MAX-1, 0, 0, params->dir, 0.0, (float)FILE_MAX-1, 0, 0,
"File path."); "File path.");
uiButSetCompleteFunc(but, autocomplete_directory, NULL); uiButSetCompleteFunc(but, autocomplete_directory, NULL);
uiButSetFlag(but, UI_BUT_NO_UTF8);
but = uiDefBut(block, TEX, B_FS_FILENAME, "", but = uiDefBut(block, TEX, B_FS_FILENAME, "",
min_x, line2_y, line2_w-chan_offs, btn_h, min_x, line2_y, line2_w-chan_offs, btn_h,
params->file, 0.0, (float)FILE_MAXFILE-1, 0, 0, params->file, 0.0, (float)FILE_MAXFILE-1, 0, 0,
"File name."); "File name.");
uiButSetCompleteFunc(but, autocomplete_file, NULL); uiButSetCompleteFunc(but, autocomplete_file, NULL);
uiButSetFlag(but, UI_BUT_NO_UTF8);
} }
/* Filename number increment / decrement buttons. */ /* Filename number increment / decrement buttons. */

@ -291,8 +291,8 @@ static void rna_def_render_engine(BlenderRNA *brna)
static void rna_def_render_result(BlenderRNA *brna) static void rna_def_render_result(BlenderRNA *brna)
{ {
StructRNA *srna; StructRNA *srna;
PropertyRNA *prop;
FunctionRNA *func; FunctionRNA *func;
PropertyRNA *parm;
srna= RNA_def_struct(brna, "RenderResult", NULL); srna= RNA_def_struct(brna, "RenderResult", NULL);
RNA_def_struct_ui_text(srna, "Render Result", "Result of rendering, including all layers and passes"); RNA_def_struct_ui_text(srna, "Render Result", "Result of rendering, including all layers and passes");
@ -300,22 +300,22 @@ static void rna_def_render_result(BlenderRNA *brna)
func= RNA_def_function(srna, "load_from_file", "RE_result_load_from_file"); func= RNA_def_function(srna, "load_from_file", "RE_result_load_from_file");
RNA_def_function_ui_description(func, "Copies the pixels of this render result from an image file."); RNA_def_function_ui_description(func, "Copies the pixels of this render result from an image file.");
RNA_def_function_flag(func, FUNC_USE_REPORTS); RNA_def_function_flag(func, FUNC_USE_REPORTS);
prop= RNA_def_string(func, "filename", "", 0, "Filename", "Filename to load into this render tile, must be no smaller then the render result"); parm= RNA_def_string_file_name(func, "filename", "", FILE_MAX, "File Name", "Filename to load into this render tile, must be no smaller then the render result");
RNA_def_property_flag(prop, PROP_REQUIRED); RNA_def_property_flag(parm, PROP_REQUIRED);
RNA_define_verify_sdna(0); RNA_define_verify_sdna(0);
prop= RNA_def_property(srna, "resolution_x", PROP_INT, PROP_NONE); parm= RNA_def_property(srna, "resolution_x", PROP_INT, PROP_NONE);
RNA_def_property_int_sdna(prop, NULL, "rectx"); RNA_def_property_int_sdna(parm, NULL, "rectx");
RNA_def_property_clear_flag(prop, PROP_EDITABLE); RNA_def_property_clear_flag(parm, PROP_EDITABLE);
prop= RNA_def_property(srna, "resolution_y", PROP_INT, PROP_NONE); parm= RNA_def_property(srna, "resolution_y", PROP_INT, PROP_NONE);
RNA_def_property_int_sdna(prop, NULL, "recty"); RNA_def_property_int_sdna(parm, NULL, "recty");
RNA_def_property_clear_flag(prop, PROP_EDITABLE); RNA_def_property_clear_flag(parm, PROP_EDITABLE);
prop= RNA_def_property(srna, "layers", PROP_COLLECTION, PROP_NONE); parm= RNA_def_property(srna, "layers", PROP_COLLECTION, PROP_NONE);
RNA_def_property_struct_type(prop, "RenderLayer"); RNA_def_property_struct_type(parm, "RenderLayer");
RNA_def_property_collection_funcs(prop, "rna_RenderResult_layers_begin", "rna_iterator_listbase_next", "rna_iterator_listbase_end", "rna_iterator_listbase_get", 0, 0, 0); RNA_def_property_collection_funcs(parm, "rna_RenderResult_layers_begin", "rna_iterator_listbase_next", "rna_iterator_listbase_end", "rna_iterator_listbase_get", 0, 0, 0);
RNA_define_verify_sdna(1); RNA_define_verify_sdna(1);
} }

@ -1336,7 +1336,7 @@ static void rna_def_plugin(BlenderRNA *brna)
RNA_def_struct_ui_text(srna, "Plugin Sequence", "Sequence strip applying an effect, loaded from an external plugin"); RNA_def_struct_ui_text(srna, "Plugin Sequence", "Sequence strip applying an effect, loaded from an external plugin");
RNA_def_struct_sdna_from(srna, "PluginSeq", "plugin"); RNA_def_struct_sdna_from(srna, "PluginSeq", "plugin");
prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_FILEPATH); prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_FILENAME);
RNA_def_property_string_sdna(prop, NULL, "name"); RNA_def_property_string_sdna(prop, NULL, "name");
RNA_def_property_clear_flag(prop, PROP_EDITABLE); RNA_def_property_clear_flag(prop, PROP_EDITABLE);
RNA_def_property_ui_text(prop, "Filename", ""); RNA_def_property_ui_text(prop, "Filename", "");

@ -2047,12 +2047,12 @@ static void rna_def_fileselect_params(BlenderRNA *brna)
RNA_def_property_ui_text(prop, "Title", "Title for the file browser"); RNA_def_property_ui_text(prop, "Title", "Title for the file browser");
RNA_def_property_clear_flag(prop, PROP_EDITABLE); RNA_def_property_clear_flag(prop, PROP_EDITABLE);
prop= RNA_def_property(srna, "directory", PROP_STRING, PROP_NONE); prop= RNA_def_property(srna, "directory", PROP_STRING, PROP_DIRPATH);
RNA_def_property_string_sdna(prop, NULL, "dir"); RNA_def_property_string_sdna(prop, NULL, "dir");
RNA_def_property_ui_text(prop, "Directory", "Directory displayed in the file browser"); RNA_def_property_ui_text(prop, "Directory", "Directory displayed in the file browser");
RNA_def_property_update(prop, NC_SPACE|ND_SPACE_FILE_PARAMS, NULL); RNA_def_property_update(prop, NC_SPACE|ND_SPACE_FILE_PARAMS, NULL);
prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_NONE); prop= RNA_def_property(srna, "filename", PROP_STRING, PROP_FILENAME);
RNA_def_property_string_sdna(prop, NULL, "file"); RNA_def_property_string_sdna(prop, NULL, "file");
RNA_def_property_ui_text(prop, "File Name", "Active file in the file browser"); RNA_def_property_ui_text(prop, "File Name", "Active file in the file browser");
RNA_def_property_update(prop, NC_SPACE|ND_SPACE_FILE_PARAMS, NULL); RNA_def_property_update(prop, NC_SPACE|ND_SPACE_FILE_PARAMS, NULL);

@ -49,6 +49,7 @@
#include "ED_keyframing.h" #include "ED_keyframing.h"
#define USE_MATHUTILS #define USE_MATHUTILS
#define USE_STRING_COERCE
#ifdef USE_MATHUTILS #ifdef USE_MATHUTILS
#include "../generic/mathutils.h" /* so we can have mathutils callbacks */ #include "../generic/mathutils.h" /* so we can have mathutils callbacks */
@ -192,6 +193,62 @@ Mathutils_Callback mathutils_rna_matrix_cb = {
NULL NULL
}; };
#ifdef USE_STRING_COERCE
/* string conversion, escape non-unicode chars, coerce must be set to NULL */
static const char *py_safe_unicode_to_byte(PyObject *py_str, PyObject **coerce)
{
char *result;
result= _PyUnicode_AsString(py_str);
if(result) {
/* 99% of the time this is enough but we better support non unicode
* chars since blender doesnt limit this */
return result;
}
else {
/* mostly copied from fileio.c's, fileio_init */
PyObject *stringobj;
PyObject *u;
PyErr_Clear();
u= PyUnicode_FromObject(py_str); /* coerce into unicode */
if (u == NULL)
return NULL;
stringobj= PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u), "surrogateescape");
Py_DECREF(u);
if (stringobj == NULL)
return NULL;
if (!PyBytes_Check(stringobj)) { /* this seems wrong but it works fine */
// printf("encoder failed to return bytes\n");
Py_DECREF(stringobj);
return NULL;
}
*coerce= stringobj;
return PyBytes_AS_STRING(stringobj);
}
}
static PyObject *py_safe_byte_to_unicode(char *str)
{
PyObject *result= PyUnicode_FromString(str);
if(result) {
/* 99% of the time this is enough but we better support non unicode
* chars since blender doesnt limit this */
return result;
}
else {
PyErr_Clear();
result= PyUnicode_DecodeUTF8(str, strlen(str), "surrogateescape");
return result;
}
}
#endif
/* same as RNA_enum_value_from_id but raises an exception */ /* same as RNA_enum_value_from_id but raises an exception */
int pyrna_enum_value_from_id(EnumPropertyItem *item, const char *identifier, int *value, const char *error_prefix) int pyrna_enum_value_from_id(EnumPropertyItem *item, const char *identifier, int *value, const char *error_prefix)
{ {
@ -768,9 +825,20 @@ PyObject * pyrna_prop_to_py(PointerRNA *ptr, PropertyRNA *prop)
break; break;
case PROP_STRING: case PROP_STRING:
{ {
int subtype= RNA_property_subtype(prop);
char *buf; char *buf;
buf = RNA_property_string_get_alloc(ptr, prop, NULL, -1); buf = RNA_property_string_get_alloc(ptr, prop, NULL, -1);
ret = PyUnicode_FromString( buf ); #ifdef USE_STRING_COERCE
/* only file paths get special treatment, they may contain non utf-8 chars */
if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
ret= py_safe_byte_to_unicode(buf);
}
else {
ret= PyUnicode_FromString(buf);
}
#else
ret= PyUnicode_FromString(buf);
#endif
MEM_freeN(buf); MEM_freeN(buf);
break; break;
} }
@ -971,16 +1039,31 @@ int pyrna_py_to_prop(PointerRNA *ptr, PropertyRNA *prop, ParameterList *parms, v
} }
case PROP_STRING: case PROP_STRING:
{ {
char *param = _PyUnicode_AsString(value); const char *param;
#ifdef USE_STRING_COERCE
PyObject *value_coerce= NULL;
int subtype= RNA_property_subtype(prop);
if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
param= py_safe_unicode_to_byte(value, &value_coerce);
}
else {
param= _PyUnicode_AsString(value);
}
#else
param= _PyUnicode_AsString(value);
#endif
if (param==NULL) { if (param==NULL) {
PyErr_Format(PyExc_TypeError, "%.200s %.200s.%.200s expected a string type", error_prefix, RNA_struct_identifier(ptr->type), RNA_property_identifier(prop)); PyErr_Format(PyExc_TypeError, "%.200s %.200s.%.200s expected a string type", error_prefix, RNA_struct_identifier(ptr->type), RNA_property_identifier(prop));
return -1; return -1;
} }
else { else {
if(data) *((char**)data)= param; if(data) *((char**)data)= param; /*XXX, this assignes a pointer, wouldnt it be better to copy??? */
else RNA_property_string_set(ptr, prop, param); else RNA_property_string_set(ptr, prop, param);
} }
#ifdef USE_STRING_COERCE
Py_XDECREF(value_coerce);
#endif
break; break;
} }
case PROP_ENUM: case PROP_ENUM:
@ -1307,7 +1390,7 @@ static PyObject *pyrna_prop_array_subscript_int(BPy_PropertyRNA *self, int keynu
return NULL; return NULL;
} }
static PyObject *pyrna_prop_collection_subscript_str(BPy_PropertyRNA *self, char *keyname) static PyObject *pyrna_prop_collection_subscript_str(BPy_PropertyRNA *self, const char *keyname)
{ {
PointerRNA newptr; PointerRNA newptr;
if(RNA_property_collection_lookup_string(&self->ptr, self->prop, keyname, &newptr)) if(RNA_property_collection_lookup_string(&self->ptr, self->prop, keyname, &newptr))
@ -3372,10 +3455,30 @@ PyObject *pyrna_param_to_py(PointerRNA *ptr, ParameterList *parms, PropertyRNA *
break; break;
case PROP_STRING: case PROP_STRING:
{ {
if(flag & PROP_THICK_WRAP) char *data_ch;
ret = PyUnicode_FromString( (char*)data ); PyObject *value_coerce= NULL;
int subtype= RNA_property_subtype(prop);
if (flag & PROP_THICK_WRAP)
data_ch= (char *)data;
else else
ret = PyUnicode_FromString( *(char**)data ); data_ch= *(char **)data;
#ifdef USE_STRING_COERCE
if(ELEM3(subtype, PROP_FILEPATH, PROP_DIRPATH, PROP_FILENAME)) {
ret= py_safe_byte_to_unicode(data_ch);
}
else {
ret= PyUnicode_FromString(data_ch);
}
#else
ret = PyUnicode_FromString(data_ch);
#endif
#ifdef USE_STRING_COERCE
Py_XDECREF(value_coerce);
#endif
break; break;
} }
case PROP_ENUM: case PROP_ENUM: