diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs index fef4a1d21b4b..868dc9bf4916 100644 --- a/datafusion/common/src/file_options/csv_writer.rs +++ b/datafusion/common/src/file_options/csv_writer.rs @@ -90,7 +90,7 @@ impl TryFrom<(&ConfigOptions, &StatementOptions)> for CsvWriterOptions { "Unable to convert CSV delimiter into u8".into(), ) })?) - }, + }, _ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for CSV format!"))) } } diff --git a/datafusion/core/tests/sql/csv_files.rs b/datafusion/core/tests/sql/csv_files.rs deleted file mode 100644 index 5ed0068d6135..000000000000 --- a/datafusion/core/tests/sql/csv_files.rs +++ /dev/null @@ -1,125 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use super::*; - -#[tokio::test] -async fn csv_custom_quote() -> Result<()> { - let tmp_dir = TempDir::new()?; - let ctx = SessionContext::new(); - - let schema = Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Utf8, false), - ])); - let filename = format!("partition.{}", "csv"); - let file_path = tmp_dir.path().join(filename); - let mut file = File::create(file_path)?; - - // generate some data - for index in 0..10 { - let text1 = format!("id{index:}"); - let text2 = format!("value{index:}"); - let data = format!("~{text1}~,~{text2}~\r\n"); - file.write_all(data.as_bytes())?; - } - ctx.register_csv( - "test", - tmp_dir.path().to_str().unwrap(), - CsvReadOptions::new() - .schema(&schema) - .has_header(false) - .quote(b'~'), - ) - .await?; - - let results = plan_and_collect(&ctx, "SELECT * from test").await?; - - let expected = vec![ - "+-----+--------+", - "| c1 | c2 |", - "+-----+--------+", - "| id0 | value0 |", - "| id1 | value1 |", - "| id2 | value2 |", - "| id3 | value3 |", - "| id4 | value4 |", - "| id5 | value5 |", - "| id6 | value6 |", - "| id7 | value7 |", - "| id8 | value8 |", - "| id9 | value9 |", - "+-----+--------+", - ]; - - assert_batches_sorted_eq!(expected, &results); - Ok(()) -} - -#[tokio::test] -async fn csv_custom_escape() -> Result<()> { - let tmp_dir = TempDir::new()?; - let ctx = SessionContext::new(); - - let schema = Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Utf8, false), - ])); - let filename = format!("partition.{}", "csv"); - let file_path = tmp_dir.path().join(filename); - let mut file = File::create(file_path)?; - - // generate some data - for index in 0..10 { - let text1 = format!("id{index:}"); - let text2 = format!("value\\\"{index:}"); - let data = format!("\"{text1}\",\"{text2}\"\r\n"); - file.write_all(data.as_bytes())?; - } - - ctx.register_csv( - "test", - tmp_dir.path().to_str().unwrap(), - CsvReadOptions::new() - .schema(&schema) - .has_header(false) - .escape(b'\\'), - ) - .await?; - - let results = plan_and_collect(&ctx, "SELECT * from test").await?; - - let expected = vec![ - "+-----+---------+", - "| c1 | c2 |", - "+-----+---------+", - "| id0 | value\"0 |", - "| id1 | value\"1 |", - "| id2 | value\"2 |", - "| id3 | value\"3 |", - "| id4 | value\"4 |", - "| id5 | value\"5 |", - "| id6 | value\"6 |", - "| id7 | value\"7 |", - "| id8 | value\"8 |", - "| id9 | value\"9 |", - "+-----+---------+", - ]; - - assert_batches_sorted_eq!(expected, &results); - Ok(()) -} diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 6d783a503184..af3d46491726 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -74,7 +74,6 @@ macro_rules! test_expression { pub mod aggregates; pub mod create_drop; -pub mod csv_files; pub mod explain_analyze; pub mod expr; pub mod group_by; diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index f5ab8f71aaaf..653f78b60629 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -104,6 +104,10 @@ impl TestContext { info!("Registering metadata table tables"); register_metadata_tables(test_ctx.session_ctx()).await; } + "csv_files.slt" => { + info!("Registering metadata table tables"); + register_csv_custom_tables(&mut test_ctx).await; + } _ => { info!("Using default SessionContext"); } @@ -219,6 +223,38 @@ pub async fn register_partition_table(test_ctx: &mut TestContext) { .unwrap(); } +pub async fn register_csv_custom_tables(test_ctx: &mut TestContext) { + test_ctx.enable_testdir(); + let schema = Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Utf8, false), + ])); + let filename = format!("quote_escape.{}", "csv"); + let file_path = test_ctx.testdir_path().join(filename); + let mut file = File::create(file_path.clone()).unwrap(); + + // generate some data + for index in 0..10 { + let text1 = format!("id{index:}"); + let text2 = format!("value{index:}"); + let data = format!("~{text1}~,~{text2}~\r\n"); + file.write_all(data.as_bytes()).unwrap(); + } + test_ctx + .ctx + .register_csv( + "test_custom_quote_escape", + file_path.to_str().unwrap(), + CsvReadOptions::new() + .schema(&schema) + .has_header(false) + .quote(b'~') + .escape(b'\\'), + ) + .await + .unwrap(); +} + // registers a LOCAL TEMPORARY table. pub async fn register_temp_table(ctx: &SessionContext) { struct TestTable(TableType); diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt new file mode 100644 index 000000000000..d9e3f51f8544 --- /dev/null +++ b/datafusion/sqllogictest/test_files/csv_files.slt @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############# +## Csv Files Tests +############# + +# TODO: https://github.com/apache/arrow-datafusion/issues/8310 +# statement ok +# CREATE EXTERNAL TABLE test_custom_quote_escape ( +# c1 VARCHAR DEFAULT NULL, +# c2 VARCHAR DEFAULT NULL +# ) +# STORED AS CSV +# WITH HEADER ROW +# DELIMITER ',' +# OPTIONS ('quote' '~', 'escape' '\') +# LOCATION 'test_custom_quote_escape.csv'; + +query TT +SELECT * from test_custom_quote_escape; +---- +id0 value0 +id1 value1 +id2 value2 +id3 value3 +id4 value4 +id5 value5 +id6 value6 +id7 value7 +id8 value8 +id9 value9