diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py index eddfb740f8..49dafa8612 100644 --- a/mathics/builtin/strings.py +++ b/mathics/builtin/strings.py @@ -1652,7 +1652,7 @@ class ToString(Builtin): """ options = { - "CharacterEncoding": '"Unicode"', + "CharacterEncoding": "$SystemCharacterEncoding", "FormatType": "OutputForm", "NumberMarks": "$NumberMarks", "PageHeight": "Infinity", @@ -1668,6 +1668,9 @@ def apply_default(self, value, evaluation, options): def apply_form(self, value, form, evaluation, options): "ToString[value_, form_, OptionsPattern[ToString]]" encoding = options["System`CharacterEncoding"] + if not isinstance(encoding, String) or encoding.value not in _encodings: + evaluation.message("General", "charcode", encoding) + encoding = Symbol("$SystemCharacterEncoding") text = value.format(evaluation, form.get_name(), encoding=encoding) text = text.boxes_to_text(evaluation=evaluation) return String(text) diff --git a/mathics/core/evaluation.py b/mathics/core/evaluation.py index ac988ed659..71b08778dc 100644 --- a/mathics/core/evaluation.py +++ b/mathics/core/evaluation.py @@ -13,7 +13,8 @@ from mathics_scanner import TranslateError from mathics import settings -from mathics.core.expression import ensure_context, KeyComparable, SymbolAborted, SymbolList, SymbolNull +from mathics.core.expression import ensure_context, KeyComparable, SymbolAborted, SymbolList, SymbolNull, wl_strings_to_specific_encoding +from mathics.settings import SYSTEM_CHARACTER_ENCODING FORMATS = [ "StandardForm", @@ -228,7 +229,12 @@ def display(self, data, metadata): class Evaluation(object): def __init__( - self, definitions=None, output=None, format="text", catch_interrupt=True + self, + definitions=None, + output=None, + format="text", + catch_interrupt=True, + default_encoding=SYSTEM_CHARACTER_ENCODING, ) -> None: from mathics.core.definitions import Definitions from mathics.core.expression import Symbol @@ -249,6 +255,7 @@ def __init__( self.quiet_all = False self.format = format self.catch_interrupt = catch_interrupt + self.encoding = default_encoding self.SymbolNull = SymbolNull @@ -510,6 +517,7 @@ def message(self, symbol, tag, *args) -> None: Expression("StringForm", text, *(from_python(arg) for arg in args)), "text" ) + text = wl_strings_to_specific_encoding(text, encoding=self.encoding) self.out.append(Message(symbol_shortname, tag, text)) self.output.out(self.out[-1]) diff --git a/mathics/core/expression.py b/mathics/core/expression.py index 9a55368d0e..23d6191c37 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -17,6 +17,20 @@ from mathics.core.convert import sympy_symbol_prefix, SympyExpression import base64 +# In the future, this function should be replaced by one +# handling different encodings. +from mathics_scanner.characters import replace_wl_with_plain_text + + +def wl_strings_to_specific_encoding(string, enc=None): + """ + This function takes a string defined in the default + unicode WL encoding, and translate it to the corresponding + codepage. By now it is just an envelopment for + replace_wl_with_plain_text + """ + return replace_wl_with_plain_text(string, enc in ("UTF8", "UTF-8")) + # Imperical number that seems to work. # We have to be able to match mpmath values with sympy values COMPARE_PREC = 50 @@ -1969,7 +1983,8 @@ def do_copy(self) -> "Symbol": return Symbol(self.name) def boxes_to_text(self, **options) -> str: - return str(self.name) + encoding = options.get("encoding", "UTF-8") + return wl_strings_to_specific_encoding(str(self.name), encoding) def atom_to_boxes(self, f, evaluation) -> "String": return String(evaluation.definitions.shorten_name(self.name)) @@ -2825,7 +2840,8 @@ def boxes_to_text(self, show_string_characters=False, **options) -> str: ): value = value[1:-1] - return value + encoding = options.get("encoding", "UTF-8") + return wl_strings_to_specific_encoding(value, encoding) def boxes_to_mathml(self, show_string_characters=False, **options) -> str: from mathics.core.parser import is_symbol_name