From 62c0e636ad798055a42f697991a260d7b8ba55db Mon Sep 17 00:00:00 2001 From: mmatera Date: Sun, 14 Mar 2021 20:28:46 -0300 Subject: [PATCH 1/3] wip: implementing character enconding in boxes_to_text and ToString fix typo fix options in ToString improving MathicsSession restoring session.py from master this is how I think this should work Unicode is not an encoding... --- mathics/builtin/strings.py | 36 +++++++++++++++++++----------------- mathics/core/expression.py | 9 +++++++-- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py index 20682bbd5b..5962d2e8f1 100644 --- a/mathics/builtin/strings.py +++ b/mathics/builtin/strings.py @@ -1630,23 +1630,25 @@ class ToString(Builtin): >> "U" <> ToString[2] = U2 """ - - options = { - "CharacterEncoding": '"Unicode"', - "FormatType": "OutputForm", - "NumberMarks": "$NumberMarks", - "PageHeight": "Infinity", - "PageWidth": "Infinity", - "TotalHeight": "Infinity", - "TotalWidth": "Infinity", - } - - def apply(self, value, evaluation, **options): - "ToString[value_, OptionsPattern[ToString]]" - encoding = options["options"]["System`CharacterEncoding"] - text = value.format(evaluation, "System`OutputForm", encoding=encoding) - text = text.boxes_to_text(evaluation=evaluation) - return String(text) + options = { 'CharacterEncoding' : '$SystemCharacterEncoding', + 'FormatType' : 'OutputForm', + 'NumberMarks': '$NumberMarks', + 'PageHeight' : 'Infinity', + 'PageWidth' : 'Infinity', + 'TotalHeight' : 'Infinity', + 'TotalWidth' : 'Infinity'} + + def apply(self, value, evaluation, options): + 'ToString[value_, OptionsPattern[ToString]]' + encoding = options["System`CharacterEncoding"].evaluate(evaluation) + if not isinstance(encoding, String) or encoding.value not in encodings: + evaluation.message("General", "charcode", encoding) + encoding = Symbol("$SystemCharacterEncoding").evaluate(evaluation) + formattype = options["System`FormatType"].evaluate(evaluation) + res = value.format(evaluation, formattype) + res = res.boxes_to_text( + evaluation=evaluation, encoding=encoding.value) + return String(res) class ToExpression(Builtin): diff --git a/mathics/core/expression.py b/mathics/core/expression.py index 3dfacdfd19..84482dc812 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -17,6 +17,7 @@ from mathics.core.convert import sympy_symbol_prefix, SympyExpression import base64 +from mathics_scanner.characters import replace_wl_with_plain_text def fully_qualified_symbol_name(name) -> bool: return ( @@ -1906,7 +1907,9 @@ def do_copy(self) -> "Symbol": return Symbol(self.name) def boxes_to_text(self, **options) -> str: - return str(self.name) + if options.get("encoding", "UTF-8") in ("UTF-8", "UTF8"): + return str(self.name) + return replace_wl_with_plain_text(str(self.name), False) def atom_to_boxes(self, f, evaluation) -> "String": return String(evaluation.definitions.shorten_name(self.name)) @@ -2725,7 +2728,9 @@ def boxes_to_text(self, show_string_characters=False, **options) -> str: ): value = value[1:-1] - return value + if options.get("encoding", "UTF-8") in ("UTF-8", "UTF8"): + return value + return replace_wl_with_plain_text(value, False) def boxes_to_xml(self, show_string_characters=False, **options) -> str: from mathics.core.parser import is_symbol_name From f82607e259687e8d6d94fe759126eb0c03f04e59 Mon Sep 17 00:00:00 2001 From: mmatera Date: Tue, 30 Mar 2021 18:43:49 -0300 Subject: [PATCH 2/3] blacken + encoding --- mathics/builtin/strings.py | 26 ++++++++++++++------------ mathics/core/evaluation.py | 12 ++++++++++-- mathics/core/expression.py | 23 +++++++++++++++++------ 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py index 5962d2e8f1..95c5fd0aa4 100644 --- a/mathics/builtin/strings.py +++ b/mathics/builtin/strings.py @@ -1630,24 +1630,26 @@ class ToString(Builtin): >> "U" <> ToString[2] = U2 """ - options = { 'CharacterEncoding' : '$SystemCharacterEncoding', - 'FormatType' : 'OutputForm', - 'NumberMarks': '$NumberMarks', - 'PageHeight' : 'Infinity', - 'PageWidth' : 'Infinity', - 'TotalHeight' : 'Infinity', - 'TotalWidth' : 'Infinity'} + + options = { + "CharacterEncoding": "$SystemCharacterEncoding", + "FormatType": "OutputForm", + "NumberMarks": "$NumberMarks", + "PageHeight": "Infinity", + "PageWidth": "Infinity", + "TotalHeight": "Infinity", + "TotalWidth": "Infinity", + } def apply(self, value, evaluation, options): - 'ToString[value_, OptionsPattern[ToString]]' + "ToString[value_, OptionsPattern[ToString]]" encoding = options["System`CharacterEncoding"].evaluate(evaluation) - if not isinstance(encoding, String) or encoding.value not in encodings: + if not isinstance(encoding, String) or encoding.value not in _encodings: evaluation.message("General", "charcode", encoding) encoding = Symbol("$SystemCharacterEncoding").evaluate(evaluation) - formattype = options["System`FormatType"].evaluate(evaluation) + formattype = options["System`FormatType"].evaluate(evaluation) res = value.format(evaluation, formattype) - res = res.boxes_to_text( - evaluation=evaluation, encoding=encoding.value) + res = res.boxes_to_text(evaluation=evaluation, encoding=encoding.value) return String(res) diff --git a/mathics/core/evaluation.py b/mathics/core/evaluation.py index c8f3ceaa34..ea76895662 100644 --- a/mathics/core/evaluation.py +++ b/mathics/core/evaluation.py @@ -13,7 +13,8 @@ from mathics_scanner import TranslateError from mathics import settings -from mathics.core.expression import ensure_context, KeyComparable, SymbolAborted +from mathics.core.expression import ensure_context, KeyComparable, SymbolAborted, wl_strings_to_specific_encoding +from mathics.settings import SYSTEM_CHARACTER_ENCODING FORMATS = [ "StandardForm", @@ -228,7 +229,12 @@ def display(self, data, metadata): class Evaluation(object): def __init__( - self, definitions=None, output=None, format="text", catch_interrupt=True + self, + definitions=None, + output=None, + format="text", + catch_interrupt=True, + default_encoding=SYSTEM_CHARACTER_ENCODING, ) -> None: from mathics.core.definitions import Definitions from mathics.core.expression import Symbol @@ -249,6 +255,7 @@ def __init__( self.quiet_all = False self.format = format self.catch_interrupt = catch_interrupt + self.encoding = default_encoding self.SymbolNull = Symbol("Null") @@ -510,6 +517,7 @@ def message(self, symbol, tag, *args) -> None: Expression("StringForm", text, *(from_python(arg) for arg in args)), "text" ) + text = wl_strings_to_specific_encoding(text, encoding=self.encoding) self.out.append(Message(symbol_shortname, tag, text)) self.output.out(self.out[-1]) diff --git a/mathics/core/expression.py b/mathics/core/expression.py index 84482dc812..a15808c119 100644 --- a/mathics/core/expression.py +++ b/mathics/core/expression.py @@ -17,8 +17,21 @@ from mathics.core.convert import sympy_symbol_prefix, SympyExpression import base64 +# In the future, this function should be replaced by one +# handling different encodings. from mathics_scanner.characters import replace_wl_with_plain_text + +def wl_strings_to_specific_encoding(string, enc=None): + """ + This function takes a string defined in the default + unicode WL encoding, and translate it to the corresponding + codepage. By now it is just an envelopment for + replace_wl_with_plain_text + """ + return replace_wl_with_plain_text(string, enc in ("UTF8", "UTF-8")) + + def fully_qualified_symbol_name(name) -> bool: return ( isinstance(name, str) @@ -1907,9 +1920,8 @@ def do_copy(self) -> "Symbol": return Symbol(self.name) def boxes_to_text(self, **options) -> str: - if options.get("encoding", "UTF-8") in ("UTF-8", "UTF8"): - return str(self.name) - return replace_wl_with_plain_text(str(self.name), False) + encoding = options.get("encoding", "UTF-8") + return wl_strings_to_specific_encoding(str(self.name), encoding) def atom_to_boxes(self, f, evaluation) -> "String": return String(evaluation.definitions.shorten_name(self.name)) @@ -2728,9 +2740,8 @@ def boxes_to_text(self, show_string_characters=False, **options) -> str: ): value = value[1:-1] - if options.get("encoding", "UTF-8") in ("UTF-8", "UTF8"): - return value - return replace_wl_with_plain_text(value, False) + encoding = options.get("encoding", "UTF-8") + return wl_strings_to_specific_encoding(value, encoding) def boxes_to_xml(self, show_string_characters=False, **options) -> str: from mathics.core.parser import is_symbol_name From 5441ee23ef96994938e7823e8ffc13e5e7ba258a Mon Sep 17 00:00:00 2001 From: mmatera Date: Sun, 18 Apr 2021 08:19:47 -0300 Subject: [PATCH 3/3] merge --- mathics/builtin/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py index 32c75ef3f8..49dafa8612 100644 --- a/mathics/builtin/strings.py +++ b/mathics/builtin/strings.py @@ -1670,7 +1670,7 @@ def apply_form(self, value, form, evaluation, options): encoding = options["System`CharacterEncoding"] if not isinstance(encoding, String) or encoding.value not in _encodings: evaluation.message("General", "charcode", encoding) - encoding = Symbol("$SystemCharacterEncoding").evaluate(evaluation) + encoding = Symbol("$SystemCharacterEncoding") text = value.format(evaluation, form.get_name(), encoding=encoding) text = text.boxes_to_text(evaluation=evaluation) return String(text)