From a61c053a33ba85adf92c5df07c673ed1c94238a4 Mon Sep 17 00:00:00 2001 From: RiviaAzusa <526556008@qq.com> Date: Tue, 9 Sep 2025 14:02:28 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E4=B8=AD=E6=96=87?= =?UTF-8?q?=E5=AD=97=E7=AC=A6=E5=9C=A8LangChain=20CallbackHandler=E4=B8=AD?= =?UTF-8?q?=E8=A2=AB=E8=BF=87=E5=BA=A6=E8=BD=AC=E4=B9=89=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在所有json.dumps调用中添加ensure_ascii=False参数,确保中文等非ASCII字符不会被转义为\uXXXX格式。 修改的文件: - langfuse/_client/attributes.py: 核心序列化函数 - langfuse/_utils/request.py: API请求数据序列化 - langfuse/_client/utils.py: 调试输出格式化 - langfuse/_task_manager/score_ingestion_consumer.py: 分数处理序列化 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- langfuse/_client/attributes.py | 2 +- langfuse/_client/utils.py | 1 + langfuse/_task_manager/score_ingestion_consumer.py | 4 ++-- langfuse/_utils/request.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/langfuse/_client/attributes.py b/langfuse/_client/attributes.py index 343c70cdb..4cd017920 100644 --- a/langfuse/_client/attributes.py +++ b/langfuse/_client/attributes.py @@ -174,7 +174,7 @@ def _serialize(obj: Any) -> Optional[str]: if obj is None or isinstance(obj, str): return obj - return json.dumps(obj, cls=EventSerializer) + return json.dumps(obj, cls=EventSerializer, ensure_ascii=False) def _flatten_and_serialize_metadata( diff --git a/langfuse/_client/utils.py b/langfuse/_client/utils.py index 16d963d88..2ef187507 100644 --- a/langfuse/_client/utils.py +++ b/langfuse/_client/utils.py @@ -59,6 +59,7 @@ def span_formatter(span: ReadableSpan) -> str: "instrumentationScope": instrumentationScope, }, indent=2, + ensure_ascii=False, ) + "\n" ) diff --git a/langfuse/_task_manager/score_ingestion_consumer.py b/langfuse/_task_manager/score_ingestion_consumer.py index 1a5b61f91..7ad781be0 100644 --- a/langfuse/_task_manager/score_ingestion_consumer.py +++ b/langfuse/_task_manager/score_ingestion_consumer.py @@ -85,7 +85,7 @@ def _next(self) -> list: # check for serialization errors try: - json.dumps(event, cls=EventSerializer) + json.dumps(event, cls=EventSerializer, ensure_ascii=False) except Exception as e: self._log.error( f"Data error: Failed to serialize score object for ingestion. Score will be dropped. Error: {e}" @@ -117,7 +117,7 @@ def _next(self) -> list: def _get_item_size(self, item: Any) -> int: """Return the size of the item in bytes.""" - return len(json.dumps(item, cls=EventSerializer).encode()) + return len(json.dumps(item, cls=EventSerializer, ensure_ascii=False).encode()) def run(self) -> None: """Run the consumer.""" diff --git a/langfuse/_utils/request.py b/langfuse/_utils/request.py index 182fe3ffe..e3dd0fe4a 100644 --- a/langfuse/_utils/request.py +++ b/langfuse/_utils/request.py @@ -60,7 +60,7 @@ def post(self, **kwargs: Any) -> httpx.Response: """Post the `kwargs` to the API""" log = logging.getLogger("langfuse") url = self._remove_trailing_slash(self._base_url) + "/api/public/ingestion" - data = json.dumps(kwargs, cls=EventSerializer) + data = json.dumps(kwargs, cls=EventSerializer, ensure_ascii=False) log.debug("making request: %s to %s", data, url) headers = self.generate_headers() res = self._session.post( From db52ae6fbe6dd00dc70841661bb4d5e805f45245 Mon Sep 17 00:00:00 2001 From: RiviaAzusa <526556008@qq.com> Date: Wed, 14 Jan 2026 17:31:02 +0800 Subject: [PATCH 2/2] fix: add test case for Chinese character escaping issue --- tests/test_serializer.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_serializer.py b/tests/test_serializer.py index 4faf7019b..26849cd77 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -174,3 +174,27 @@ def __init__(self): obj = SlotClass() serializer = EventSerializer() assert json.loads(serializer.encode(obj)) == {"field": "value"} + + +def test_non_ascii_characters_not_escaped(): + """Test that non-ASCII characters are serialized directly without \\uXXXX escaping.""" + data = { + "chinese": "你好世界", + "japanese": "こんにちは", + "korean": "안녕하세요", + "emoji": "🎉", + } + + result = json.dumps(data, cls=EventSerializer, ensure_ascii=False) + + # Verify non-ASCII characters appear directly in output + assert "你好世界" in result + assert "こんにちは" in result + assert "안녕하세요" in result + assert "🎉" in result + + # Verify no unicode escape sequences + assert "\\u" not in result + + # Verify JSON is still valid + assert json.loads(result) == data