diff --git a/pyproject.toml b/pyproject.toml index 06dbae8a..eb9667c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "together" -version = "1.3.10" +version = "1.3.11" authors = [ "Together AI " ] diff --git a/src/together/utils/files.py b/src/together/utils/files.py index 6c5892f1..cc39fca0 100644 --- a/src/together/utils/files.py +++ b/src/together/utils/files.py @@ -177,6 +177,14 @@ def _check_jsonl(file: Path) -> Dict[str, Any]: error_source="key_value", ) + if len(json_line[message_column]) == 0: + raise InvalidFileFormatError( + message=f"Invalid format on line {idx + 1} of the input file. " + f"Expected a non-empty list of messages. Found empty list", + line_number=idx + 1, + error_source="key_value", + ) + for turn_id, turn in enumerate(json_line[message_column]): if not isinstance(turn, dict): raise InvalidFileFormatError( diff --git a/tests/unit/test_files_checks.py b/tests/unit/test_files_checks.py index 7abae4ad..37c698d2 100644 --- a/tests/unit/test_files_checks.py +++ b/tests/unit/test_files_checks.py @@ -290,3 +290,16 @@ def test_check_jsonl_extra_column(tmp_path: Path): report = check_file(file) assert not report["is_check_passed"] assert "Found extra column" in report["message"] + + +def test_check_jsonl_empty_messages(tmp_path: Path): + file = tmp_path / "empty_messages.jsonl" + content = [{"messages": []}] + with file.open("w") as f: + f.write("\n".join(json.dumps(item) for item in content)) + + report = check_file(file) + assert not report["is_check_passed"] + assert ( + "Expected a non-empty list of messages. Found empty list" in report["message"] + )