From e8878044bf4d5b248aba041ab6dbe7b461328f33 Mon Sep 17 00:00:00 2001 From: siontama Date: Tue, 16 Aug 2022 14:31:54 +0900 Subject: [PATCH 1/7] Feat: locale-module --- stdlib/src/lib.rs | 2 ++ stdlib/src/locale.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 stdlib/src/locale.rs diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs index 521734a1143..3b07f73d61b 100644 --- a/stdlib/src/lib.rs +++ b/stdlib/src/lib.rs @@ -62,6 +62,7 @@ mod termios; target_arch = "wasm32" )))] mod uuid; +mod locale; use rustpython_common as common; use rustpython_vm as vm; @@ -107,6 +108,7 @@ pub fn get_module_inits() -> impl Iterator, StdlibInit "unicodedata" => unicodedata::make_module, "zlib" => zlib::make_module, "_statistics" => statistics::make_module, + "_locale" => locale::make_module, // crate::vm::sysmodule::sysconfigdata_name() => sysconfigdata::make_module, } // parser related modules: diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs new file mode 100644 index 00000000000..5476a5b68ca --- /dev/null +++ b/stdlib/src/locale.rs @@ -0,0 +1,55 @@ +pub(crate) use locale::make_module; + +#[pymodule] +mod locale { + use std::ptr; + + use num_traits::ToPrimitive; + use rustpython_vm::{PyObjectRef, VirtualMachine, builtins::{PyTypeRef, PyBaseExceptionRef, PyStr}, utils::ToCString}; + + use crate::vm::{ + builtins::PyIntRef, + PyResult, + }; + + struct LocaleState { + error: PyObjectRef, + } + + fn new_locale_error(msg: String, vm: &VirtualMachine) -> PyBaseExceptionRef { + vm.new_exception_msg(error_type(vm), msg) + } + + #[pyattr(once)] + fn error_type(vm: &VirtualMachine) -> PyTypeRef { + vm.ctx.new_exception_type( + "locale", + "error", + Some(vec![vm.ctx.exceptions.value_error.to_owned()]), + ) + } + + #[pyfunction] + fn setlocale(category: PyIntRef, locale: Option, vm: &VirtualMachine) -> PyResult<*mut i8> { + match locale { + /* set locale */ + Some(locale) => { + let result = unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), locale.to_cstring(vm).unwrap().as_ptr()) }; + if result == 0 as *mut i8 { + /* operation failed, no setting was changed */ + return Err(new_locale_error("unsupported locale setting".to_owned(), vm)); + } + Ok(result) + }, + None => { + /* get locale */ + let result = unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), ptr::null()) }; + if result == 0 as *mut i8 { + return Err(new_locale_error("locale query failed".to_owned(), vm)); + } + //let result_object = PyUnicode_DecodeLocale(result, NULL); + Ok(result) + } + } + } +} From d67719e64301270e886ca3e33ffcccd9e7c6db18 Mon Sep 17 00:00:00 2001 From: Sion Kang <31057849+Yaminyam@users.noreply.github.com> Date: Tue, 16 Aug 2022 16:09:21 +0900 Subject: [PATCH 2/7] Update stdlib/src/locale.rs Co-authored-by: Jeong YunWon <69878+youknowone@users.noreply.github.com> --- stdlib/src/locale.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 5476a5b68ca..821f5ca7cea 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -30,7 +30,7 @@ mod locale { } #[pyfunction] - fn setlocale(category: PyIntRef, locale: Option, vm: &VirtualMachine) -> PyResult<*mut i8> { + fn setlocale(category: PyIntRef, locale: Option, vm: &VirtualMachine) -> PyResult> { match locale { /* set locale */ Some(locale) => { From 2e886fffc2295d17eebe46e1e60398bd8ee501d0 Mon Sep 17 00:00:00 2001 From: siontama Date: Tue, 16 Aug 2022 17:08:39 +0900 Subject: [PATCH 3/7] Fix: return type --- Lib/_pycodecs.py | 162 +++++++++++++++++++++++-------------------- stdlib/src/locale.rs | 14 ++-- 2 files changed, 93 insertions(+), 83 deletions(-) diff --git a/Lib/_pycodecs.py b/Lib/_pycodecs.py index 0741504cc9e..ec8fec16f06 100644 --- a/Lib/_pycodecs.py +++ b/Lib/_pycodecs.py @@ -22,10 +22,10 @@ The builtin Unicode codecs use the following interface: - _encode(Unicode_object[,errors='strict']) -> + _encode(Unicode_object[,errors='strict']) -> (string object, bytes consumed) - _decode(char_buffer_obj[,errors='strict']) -> + _decode(char_buffer_obj[,errors='strict']) -> (Unicode object, bytes consumed) _encode() interfaces also accept non-Unicode object as @@ -233,7 +233,7 @@ def escape_decode(data, errors='strict'): i = 0 res = bytearray() while i < l: - + if data[i] == '\\': i += 1 if i >= l: @@ -295,11 +295,11 @@ def mbcs_encode( obj, errors='strict'): """ pass ## return (PyUnicode_EncodeMBCS( -## (obj), +## (obj), ## len(obj), ## errors), ## len(obj)) - + def ascii_encode( obj, errors='strict'): """None @@ -364,9 +364,9 @@ def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0): ##import sys ##""" Python implementation of CPythons builtin unicode codecs. ## -## Generally the functions in this module take a list of characters an returns +## Generally the functions in this module take a list of characters an returns ## a list of characters. -## +## ## For use in the PyPy project""" @@ -376,7 +376,7 @@ def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0): ## 1 - special ## 2 - whitespace (optional) ## 3 - RFC2152 Set O (optional) - + utf7_special = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -401,21 +401,21 @@ def B64CHAR(c): return (c.isalnum() or (c) == b'+' or (c) == b'/') def UB64(c): if (c) == b'+' : - return 62 + return 62 elif (c) == b'/': - return 63 + return 63 elif (c) >= b'a': - return ord(c) - 71 + return ord(c) - 71 elif (c) >= b'A': - return ord(c) - 65 - else: + return ord(c) - 65 + else: return ord(c) + 4 def ENCODE( ch, bits) : out = [] while (bits >= 6): out += B64(ch >> (bits-6)) - bits -= 6 + bits -= 6 return out, bits def PyUnicode_DecodeUTF7(s, size, errors): @@ -434,24 +434,24 @@ def PyUnicode_DecodeUTF7(s, size, errors): return '' i = 0 while i < size: - + ch = bytes([s[i]]) if (inShift): if ((ch == b'-') or not B64CHAR(ch)): inShift = 0 i += 1 - + while (bitsleft >= 16): outCh = ((charsleft) >> (bitsleft-16)) & 0xffff bitsleft -= 16 - + if (surrogate): ## We have already generated an error for the high surrogate - ## so let's not bother seeing if the low surrogate is correct or not + ## so let's not bother seeing if the low surrogate is correct or not surrogate = 0 elif (0xDC00 <= (outCh) and (outCh) <= 0xDFFF): - ## This is a surrogate pair. Unfortunately we can't represent - ## it in a 16-bit character + ## This is a surrogate pair. Unfortunately we can't represent + ## it in a 16-bit character surrogate = 1 msg = "code pairs are not supported" out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i) @@ -467,7 +467,7 @@ def PyUnicode_DecodeUTF7(s, size, errors): ## but that is not the case here */ msg = "partial character in shift sequence" out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i) - + ## /* According to RFC2152 the remaining bits should be zero. We ## choose to signal an error/insert a replacement character ## here so indicate the potential of a misencoded character. */ @@ -479,11 +479,11 @@ def PyUnicode_DecodeUTF7(s, size, errors): if ((i < size) and (s[i] == '-')) : p += '-' inShift = 1 - + elif SPECIAL(ch, 0, 0) : raise UnicodeDecodeError("unexpected special character") - - else: + + else: p.append(chr(ord(ch))) else: charsleft = (charsleft << 6) | UB64(ch) @@ -499,7 +499,7 @@ def PyUnicode_DecodeUTF7(s, size, errors): else: inShift = 1 bitsleft = 0 - + elif (SPECIAL(ch, 0, 0)): i += 1 raise UnicodeDecodeError("unexpected special character") @@ -511,7 +511,7 @@ def PyUnicode_DecodeUTF7(s, size, errors): #XXX This aint right endinpos = size raise UnicodeDecodeError("unterminated shift sequence") - + return p def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors): @@ -571,7 +571,7 @@ def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors): out.append(b'-') inShift = False i += 1 - + if (bitsleft): out.append(B64(charsleft << (6-bitsleft) ) ) out.append(b'-') @@ -602,30 +602,30 @@ def unicodeescape_string(s, size, quotes): elif (ord(ch) >= 0x10000): p.append(b'\\U%08x' % ord(ch)) pos += 1 - continue + continue #endif #/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ elif (ord(ch) >= 0xD800 and ord(ch) < 0xDC00): pos += 1 ch2 = s[pos] - + if (ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF): ucs = (((ord(ch) & 0x03FF) << 10) | (ord(ch2) & 0x03FF)) + 0x00010000 p.append(b'\\U%08x' % ucs) pos += 1 continue - + #/* Fall through: isolated surrogates are copied as-is */ pos -= 1 - + #/* Map 16-bit characters to '\uxxxx' */ if (ord(ch) >= 256): p.append(b'\\u%04x' % ord(ch)) - + #/* Map special whitespace to '\t', \n', '\r' */ elif (ch == '\t'): p.append(b'\\t') - + elif (ch == '\n'): p.append(b'\\n') @@ -661,7 +661,7 @@ def PyUnicode_DecodeASCII(s, size, errors): p += chr(c) pos += 1 else: - + res = unicode_call_errorhandler( errors, "ascii", "ordinal not in range(128)", s, pos, pos+1) @@ -693,7 +693,7 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru else: ihi = 0 ilo = 1 - + #/* Unpack UTF-16 encoded data */ @@ -725,10 +725,10 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru bo = -1 else: bo = 1 - + if (size == 0): return [''], 0, bo - + if (bo == -1): #/* force LE */ ihi = 1 @@ -740,7 +740,7 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru ilo = 1 while (q < len(s)): - + #/* remaining bytes at the end? (size should be even) */ if (len(s)-q<2): if not final: @@ -751,14 +751,14 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) # /* The remaining input chars are ignored if the callback ## chooses to skip the input */ - + ch = (s[q+ihi] << 8) | s[q+ilo] q += 2 - + if (ch < 0xD800 or ch > 0xDFFF): p.append(chr(ch)) continue - + #/* UTF-16 code pair: */ if (q >= len(s)): errmsg = "unexpected end of data" @@ -783,12 +783,12 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru startinpos = q-4 endinpos = startinpos+2 unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) - + errmsg = "illegal encoding" startinpos = q-2 endinpos = startinpos+2 unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) - + return p, q, bo # moved out of local scope, especially because it didn't @@ -806,14 +806,14 @@ def PyUnicode_EncodeUTF16(s, size, errors, byteorder='little'): # /* Offsets from p for storing byte pairs in the right order. */ - + p = [] bom = sys.byteorder if (byteorder == 'native'): - + bom = sys.byteorder p += STORECHAR(0xFEFF, bom) - + if (size == 0): return "" @@ -843,9 +843,9 @@ def PyUnicode_DecodeMBCS(s, size, errors): def PyUnicode_EncodeMBCS(p, size, errors): pass -def unicode_call_errorhandler(errors, encoding, +def unicode_call_errorhandler(errors, encoding, reason, input, startinpos, endinpos, decode=True): - + errorHandler = lookup_error(errors) if decode: exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason) @@ -876,14 +876,14 @@ def PyUnicode_DecodeLatin1(s, size, errors): return p def unicode_encode_ucs1(p, size, errors, limit): - + if limit == 256: reason = "ordinal not in range(256)" encoding = "latin-1" else: reason = "ordinal not in range(128)" encoding = "ascii" - + if (size == 0): return [] res = bytearray() @@ -891,20 +891,20 @@ def unicode_encode_ucs1(p, size, errors, limit): while pos < len(p): #for ch in p: ch = p[pos] - + if ord(ch) < limit: res.append(ord(ch)) pos += 1 else: #/* startpos for collecting unencodable chars */ - collstart = pos - collend = pos+1 + collstart = pos + collend = pos+1 while collend < len(p) and ord(p[collend]) >= limit: collend += 1 x = unicode_call_errorhandler(errors, encoding, reason, p, collstart, collend, False) res += x[0].encode() pos = x[1] - + return res def PyUnicode_EncodeLatin1(p, size, errors): @@ -915,7 +915,7 @@ def PyUnicode_EncodeLatin1(p, size, errors): def hex_number_end(s, pos, digits): target_end = pos + digits - while pos < target_end and pos < len(s) and s[pos] in hexdigits: + while pos < target_end and pos < len(s) and s[pos] in hexdigits: pos += 1 return pos @@ -952,7 +952,7 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): if (size == 0): return '' - + if isinstance(s, str): s = s.encode() @@ -960,7 +960,7 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): p = [] pos = 0 - while (pos < size): + while (pos < size): ## /* Non-escape characters are interpreted as Unicode ordinals */ if (chr(s[pos]) != '\\') : p.append(chr(s[pos])) @@ -978,12 +978,12 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): if ch == '\n': pass elif ch == '\\': p += '\\' elif ch == '\'': p += '\'' - elif ch == '\"': p += '\"' - elif ch == 'b' : p += '\b' + elif ch == '\"': p += '\"' + elif ch == 'b' : p += '\b' elif ch == 'f' : p += '\014' #/* FF */ - elif ch == 't' : p += '\t' + elif ch == 't' : p += '\t' elif ch == 'n' : p += '\n' - elif ch == 'r' : p += '\r' + elif ch == 'r' : p += '\r' elif ch == 'v' : p += '\013' #break; /* VT */ elif ch == 'a' : p += '\007' # break; /* BEL, not classic C */ elif '0' <= ch <= '7': @@ -1007,7 +1007,7 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): x = hexescape(s, pos, digits, message, errors) p += x[0] pos = x[1] - + # /* \uXXXX */ elif ch == 'u': digits = 4 @@ -1015,7 +1015,7 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): x = hexescape(s, pos, digits, message, errors) p += x[0] pos = x[1] - + # /* \UXXXXXXXX */ elif ch == 'U': digits = 8 @@ -1046,12 +1046,12 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): except LookupError as e: x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) else: - x = chr_codec, look + 1 + x = chr_codec, look + 1 p.append(x[0]) pos = x[1] - else: + else: x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - else: + else: x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) else: if not found_invalid_escape: @@ -1062,7 +1062,7 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): return p def PyUnicode_EncodeRawUnicodeEscape(s, size): - + if (size == 0): return b'' @@ -1077,7 +1077,7 @@ def PyUnicode_EncodeRawUnicodeEscape(s, size): # /* Copy everything else as-is */ else: p.append(ord(ch)) - + #p += '\0' return p @@ -1136,7 +1136,7 @@ def PyUnicode_DecodeCharmap(s, size, mapping, errors): p = [] inpos = 0 while (inpos< len(s)): - + #/* Get mapping (char ordinal -> integer, Unicode char or None) */ ch = s[inpos] try: @@ -1175,7 +1175,7 @@ def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final): if (ch != '\\'): p.append(ch) pos += 1 - continue + continue startinpos = pos ## /* \u-escapes are only interpreted iff the number of leading ## backslashes is odd */ @@ -1185,7 +1185,7 @@ def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final): break p.append(chr(s[pos])) pos += 1 - + if (pos >= size): break if (((pos - bs) & 1) == 0 or @@ -1193,11 +1193,11 @@ def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final): p.append(chr(s[pos])) pos += 1 continue - + p.pop(-1) if s[pos] == ord('u'): - count = 4 - else: + count = 4 + else: count = 8 pos += 1 @@ -1236,3 +1236,17 @@ def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final): pos += count return p + +def PyUnicode_DecodeLocaleAndSize(s, size, errors): + + if (size == 0): + return '' + if isinstance(s, str): + s = s.encode() + return PyUnicode_DecodeLocaleAndSize(s, size, errors) + +def PyUnicode_DecodeLocale(s, errors): + + if isinstance(s, str): + s = s.encode() + return PyUnicode_DecodeLocaleAndSize(s, len(s), errors) diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 821f5ca7cea..517f4c00e8a 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -5,17 +5,13 @@ mod locale { use std::ptr; use num_traits::ToPrimitive; - use rustpython_vm::{PyObjectRef, VirtualMachine, builtins::{PyTypeRef, PyBaseExceptionRef, PyStr}, utils::ToCString}; + use rustpython_vm::{VirtualMachine, builtins::{PyTypeRef, PyBaseExceptionRef, PyStrRef}, utils::ToCString}; use crate::vm::{ builtins::PyIntRef, PyResult, }; - struct LocaleState { - error: PyObjectRef, - } - fn new_locale_error(msg: String, vm: &VirtualMachine) -> PyBaseExceptionRef { vm.new_exception_msg(error_type(vm), msg) } @@ -35,20 +31,20 @@ mod locale { /* set locale */ Some(locale) => { let result = unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), locale.to_cstring(vm).unwrap().as_ptr()) }; - if result == 0 as *mut i8 { + if result.is_null() { /* operation failed, no setting was changed */ return Err(new_locale_error("unsupported locale setting".to_owned(), vm)); } - Ok(result) + Ok(unsafe { Vec::from_raw_parts(result as *mut u8, libc::strlen(result), libc::strlen(result)) } ) }, None => { /* get locale */ let result = unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), ptr::null()) }; - if result == 0 as *mut i8 { + if result.is_null() { return Err(new_locale_error("locale query failed".to_owned(), vm)); } //let result_object = PyUnicode_DecodeLocale(result, NULL); - Ok(result) + Ok(unsafe { Vec::from_raw_parts(result as *mut u8, libc::strlen(result), libc::strlen(result)) } ) } } } From b11221ece2bc08e6dc8538374484e067ae343cf9 Mon Sep 17 00:00:00 2001 From: siontama Date: Tue, 16 Aug 2022 17:13:57 +0900 Subject: [PATCH 4/7] Refactor: fmt --- stdlib/src/lib.rs | 2 +- stdlib/src/locale.rs | 50 +++++++++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs index 3b07f73d61b..e34b522851f 100644 --- a/stdlib/src/lib.rs +++ b/stdlib/src/lib.rs @@ -45,6 +45,7 @@ mod posixsubprocess; // libc is missing constants on redox #[cfg(all(unix, not(any(target_os = "android", target_os = "redox"))))] mod grp; +mod locale; #[cfg(all(unix, not(target_os = "redox")))] mod resource; #[cfg(target_os = "macos")] @@ -62,7 +63,6 @@ mod termios; target_arch = "wasm32" )))] mod uuid; -mod locale; use rustpython_common as common; use rustpython_vm as vm; diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 517f4c00e8a..2a76424ccc3 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -5,13 +5,14 @@ mod locale { use std::ptr; use num_traits::ToPrimitive; - use rustpython_vm::{VirtualMachine, builtins::{PyTypeRef, PyBaseExceptionRef, PyStrRef}, utils::ToCString}; - - use crate::vm::{ - builtins::PyIntRef, - PyResult, + use rustpython_vm::{ + builtins::{PyBaseExceptionRef, PyStrRef, PyTypeRef}, + utils::ToCString, + VirtualMachine, }; + use crate::vm::{builtins::PyIntRef, PyResult}; + fn new_locale_error(msg: String, vm: &VirtualMachine) -> PyBaseExceptionRef { vm.new_exception_msg(error_type(vm), msg) } @@ -26,25 +27,50 @@ mod locale { } #[pyfunction] - fn setlocale(category: PyIntRef, locale: Option, vm: &VirtualMachine) -> PyResult> { + fn setlocale( + category: PyIntRef, + locale: Option, + vm: &VirtualMachine, + ) -> PyResult> { match locale { /* set locale */ Some(locale) => { - let result = unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), locale.to_cstring(vm).unwrap().as_ptr()) }; + let result = unsafe { + libc::setlocale( + category.as_bigint().to_i32().unwrap(), + locale.to_cstring(vm).unwrap().as_ptr(), + ) + }; if result.is_null() { /* operation failed, no setting was changed */ - return Err(new_locale_error("unsupported locale setting".to_owned(), vm)); + return Err(new_locale_error( + "unsupported locale setting".to_owned(), + vm, + )); } - Ok(unsafe { Vec::from_raw_parts(result as *mut u8, libc::strlen(result), libc::strlen(result)) } ) - }, + Ok(unsafe { + Vec::from_raw_parts( + result as *mut u8, + libc::strlen(result), + libc::strlen(result), + ) + }) + } None => { /* get locale */ - let result = unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), ptr::null()) }; + let result = + unsafe { libc::setlocale(category.as_bigint().to_i32().unwrap(), ptr::null()) }; if result.is_null() { return Err(new_locale_error("locale query failed".to_owned(), vm)); } //let result_object = PyUnicode_DecodeLocale(result, NULL); - Ok(unsafe { Vec::from_raw_parts(result as *mut u8, libc::strlen(result), libc::strlen(result)) } ) + Ok(unsafe { + Vec::from_raw_parts( + result as *mut u8, + libc::strlen(result), + libc::strlen(result), + ) + }) } } } From c0e4d2b3aeb984562603e3eca7f335a7553b82e0 Mon Sep 17 00:00:00 2001 From: siontama Date: Tue, 16 Aug 2022 17:34:18 +0900 Subject: [PATCH 5/7] Feat: localeconv --- Lib/locale.py | 20 ++++++++++---------- stdlib/src/locale.rs | 13 +++++++++++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index f3d3973d038..06083ddefe2 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -42,6 +42,16 @@ def _strxfrm(s): """ return s +CHAR_MAX = 127 +LC_ALL = 6 +LC_COLLATE = 3 +LC_CTYPE = 0 +LC_MESSAGES = 5 +LC_MONETARY = 4 +LC_NUMERIC = 1 +LC_TIME = 2 +Error = ValueError + try: from _locale import * @@ -50,16 +60,6 @@ def _strxfrm(s): # Locale emulation - CHAR_MAX = 127 - LC_ALL = 6 - LC_COLLATE = 3 - LC_CTYPE = 0 - LC_MESSAGES = 5 - LC_MONETARY = 4 - LC_NUMERIC = 1 - LC_TIME = 2 - Error = ValueError - def localeconv(): """ localeconv() -> dict. Returns numeric and monetary locale-specific parameters. diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 2a76424ccc3..2383e9533cb 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -1,7 +1,7 @@ -pub(crate) use locale::make_module; +pub(crate) use _locale::make_module; #[pymodule] -mod locale { +mod _locale { use std::ptr; use num_traits::ToPrimitive; @@ -74,4 +74,13 @@ mod locale { } } } + + #[pyfunction] + fn localeconv(vm: &VirtualMachine) -> PyResult { + let result = unsafe { libc::localeconv() }; + if result.is_null() { + return Err(new_locale_error("locale query failed".to_owned(), vm)); + } + Ok(vm.ctx.new_str("".to_owned())) + } } From bf662c8400ad0c9448d970948c5cac31992c3d10 Mon Sep 17 00:00:00 2001 From: siontama Date: Sun, 28 Aug 2022 14:59:36 +0900 Subject: [PATCH 6/7] Feat: setlocale --- Lib/locale.py | 49 ++++++++++++++++++++++---------------------- stdlib/src/locale.rs | 12 ++--------- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index 06083ddefe2..4e7635605b7 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -59,31 +59,6 @@ def _strxfrm(s): except ImportError: # Locale emulation - - def localeconv(): - """ localeconv() -> dict. - Returns numeric and monetary locale-specific parameters. - """ - # 'C' locale default values - return {'grouping': [127], - 'currency_symbol': '', - 'n_sign_posn': 127, - 'p_cs_precedes': 127, - 'n_cs_precedes': 127, - 'mon_grouping': [], - 'n_sep_by_space': 127, - 'decimal_point': '.', - 'negative_sign': '', - 'positive_sign': '', - 'p_sep_by_space': 127, - 'int_curr_symbol': '', - 'p_sign_posn': 127, - 'thousands_sep': '', - 'mon_thousands_sep': '', - 'frac_digits': 127, - 'mon_decimal_point': '', - 'int_frac_digits': 127} - def setlocale(category, value=None): """ setlocale(integer,string=None) -> string. Activates/queries locale processing. @@ -92,6 +67,30 @@ def setlocale(category, value=None): raise Error('_locale emulation only supports "C" locale') return 'C' +def localeconv(): + """ localeconv() -> dict. + Returns numeric and monetary locale-specific parameters. + """ + # 'C' locale default values + return {'grouping': [127], + 'currency_symbol': '', + 'n_sign_posn': 127, + 'p_cs_precedes': 127, + 'n_cs_precedes': 127, + 'mon_grouping': [], + 'n_sep_by_space': 127, + 'decimal_point': '.', + 'negative_sign': '', + 'positive_sign': '', + 'p_sep_by_space': 127, + 'int_curr_symbol': '', + 'p_sign_posn': 127, + 'thousands_sep': '', + 'mon_thousands_sep': '', + 'frac_digits': 127, + 'mon_decimal_point': '', + 'int_frac_digits': 127} + # These may or may not exist in _locale, so be sure to set them. if 'strxfrm' not in globals(): strxfrm = _strxfrm diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 2383e9533cb..35a80db0ed0 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -36,9 +36,10 @@ mod _locale { /* set locale */ Some(locale) => { let result = unsafe { + libc::setlocale( category.as_bigint().to_i32().unwrap(), - locale.to_cstring(vm).unwrap().as_ptr(), + locale.to_cstring(vm)?.as_ptr(), ) }; if result.is_null() { @@ -74,13 +75,4 @@ mod _locale { } } } - - #[pyfunction] - fn localeconv(vm: &VirtualMachine) -> PyResult { - let result = unsafe { libc::localeconv() }; - if result.is_null() { - return Err(new_locale_error("locale query failed".to_owned(), vm)); - } - Ok(vm.ctx.new_str("".to_owned())) - } } From 5639575493bff75fb6712d2c8249235adc051c11 Mon Sep 17 00:00:00 2001 From: siontama Date: Sun, 28 Aug 2022 16:08:25 +0900 Subject: [PATCH 7/7] Fix: FromArgs --- stdlib/src/locale.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 35a80db0ed0..5efe86f05aa 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -2,11 +2,12 @@ pub(crate) use _locale::make_module; #[pymodule] mod _locale { - use std::ptr; + use std::{ptr, slice}; use num_traits::ToPrimitive; use rustpython_vm::{ builtins::{PyBaseExceptionRef, PyStrRef, PyTypeRef}, + function::OptionalArg, utils::ToCString, VirtualMachine, }; @@ -25,15 +26,20 @@ mod _locale { Some(vec![vm.ctx.exceptions.value_error.to_owned()]), ) } - - #[pyfunction] - fn setlocale( + #[derive(FromArgs)] + struct LocaleArgs { + #[pyarg(positional)] category: PyIntRef, + #[pyarg(positional, default = "None")] locale: Option, - vm: &VirtualMachine, - ) -> PyResult> { + } + + #[pyfunction] + fn setlocale(args: LocaleArgs, vm: &VirtualMachine) -> PyResult> { + let category = args.category; + let locale = args.locale; match locale { - /* set locale */ + /* set locale */ Some(locale) => { let result = unsafe { @@ -50,11 +56,10 @@ mod _locale { )); } Ok(unsafe { - Vec::from_raw_parts( + slice::from_raw_parts( result as *mut u8, libc::strlen(result), - libc::strlen(result), - ) + ).to_vec() }) } None => { @@ -66,11 +71,10 @@ mod _locale { } //let result_object = PyUnicode_DecodeLocale(result, NULL); Ok(unsafe { - Vec::from_raw_parts( + slice::from_raw_parts( result as *mut u8, libc::strlen(result), - libc::strlen(result), - ) + ).to_vec() }) } }