diff --git a/docs/modules/ROOT/pages/dataformats.adoc b/docs/modules/ROOT/pages/dataformats.adoc index 5fa576f8..0064226d 100644 --- a/docs/modules/ROOT/pages/dataformats.adoc +++ b/docs/modules/ROOT/pages/dataformats.adoc @@ -515,6 +515,113 @@ local params = { } ------------------------ +## XLSX Format + +### MIME types and identifiers +* `application/xlsx` +### `write` + +Creates a XLSX out of an array of JSON objects. The keys of JSON object values are used as headers. + +*Example* + +.Payload +------------------------ +[ + { + "First Name": "William", + "Last Name": "Shakespeare", + "Phone": "(123)456-7890" + }, + { + "First Name": "Christopher", + "Last Name": "Marlow", + "Phone": "(987)654-3210" + } +] +------------------------ +.DataSonnet map: +------------------------ +ds.write(payload, "application/xlsx") +------------------------ + +.Result +[%header,cols=3*a] +|=== +|First Name +|Last Name +|Phone + +|William +|Shakespeare +|(123)456-7890 + +|Christopher +|Marlow +|(987)654-3210 +|=== + +Providing an optional `params` object allows for more control over the format of the output XLSX. The XLSX can be created without headers - in this case the input can be an array of arrays. In addition, a list of columns can be specified to override the JSON object names. In addition to the parameters described in the `read` section, the following XLSX output-only parameters are supported: + +[%header, cols=3*a] +|=== +|Parameter +|Description +|Default value + +|`UseHeader` +|If set to `true`, the first row of XLSX will be a list of column headers and will mapped from the JSON object property names or from `Headers` +|`true` + +|`Headers` +|an array of strings to use as column names (has no effect if `UseHeader` is set to `false`) +|`"` + +|`UseTempFile` +|If set to `true`, a temp file will be created hold the output instead of an byte[] +|`false` +|=== + + + +*Example* + +.Payload +------------------------ +[ + [ + "William", + "Shakespeare", + "(123)456-7890" + ], + [ + "Christopher", + "Marlow", + "(987)654-3210" + ] +] +------------------------ +.DataSonnet map: +------------------------ +local params = { + "UseHeader": false +}; + +ds.write(payload, "application/xlsx", params) + +------------------------ +.Result +|=== +|William +|Shakespeare +|(123)456-7890 + +|Christopher +|Marlow +|(987)654-3210 +|=== + + ## Java Objects ### `read` diff --git a/pom.xml b/pom.xml index bdbe233e..e4b16657 100644 --- a/pom.xml +++ b/pom.xml @@ -77,7 +77,7 @@ UTF-8 UTF-8 - 2.11.2 + 2.13.0 5.6.2 1.68 0.9.4 @@ -123,6 +123,11 @@ jackson-dataformat-properties ${jackson.version} + + com.github.sett4 + jackson-dataformat-xlsx-lite + 2.13.0 + @@ -512,4 +517,11 @@ + + + + jitpack.io + https://jitpack.io + + diff --git a/src/main/java/com/datasonnet/document/MediaTypes.java b/src/main/java/com/datasonnet/document/MediaTypes.java index 8d2ebe1b..f6ae8d5f 100644 --- a/src/main/java/com/datasonnet/document/MediaTypes.java +++ b/src/main/java/com/datasonnet/document/MediaTypes.java @@ -1,7 +1,7 @@ package com.datasonnet.document; /*- - * Copyright 2019-2020 the original author or authors. + * Copyright 2019-2021 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,22 +16,6 @@ * limitations under the License. */ -/* - * Copyright 2002-2020 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - import java.util.Optional; /** @@ -348,6 +332,10 @@ public class MediaTypes { public static final String APPLICATION_CSV_VALUE = "application/csv"; + public static final MediaType APPLICATION_XLSX; + + public static final String APPLICATION_XLSX_VALUE = "application/xlsx"; + // See Null Object pattern /** * Public constant media type for representing an unknown content type. This is meant to used to signal to Datasonnet @@ -390,6 +378,7 @@ public class MediaTypes { TEXT_XML = new MediaType("text", "xml"); APPLICATION_JAVA = new MediaType("application", "x-java-object"); APPLICATION_CSV = new MediaType("application", "csv"); + APPLICATION_XLSX = new MediaType("application", "xlsx"); UNKNOWN = new MediaType("unknown", "unknown"); } @@ -402,6 +391,8 @@ public static Optional forExtension(String ext) { return Optional.of(APPLICATION_XML); case "csv": return Optional.of(APPLICATION_CSV); + case "xlsx": + return Optional.of(APPLICATION_XLSX); case "txt": return Optional.of(TEXT_PLAIN); default: diff --git a/src/main/java/com/datasonnet/io/AutoDeleteFileInputStream.java b/src/main/java/com/datasonnet/io/AutoDeleteFileInputStream.java new file mode 100644 index 00000000..0a38695d --- /dev/null +++ b/src/main/java/com/datasonnet/io/AutoDeleteFileInputStream.java @@ -0,0 +1,39 @@ +package com.datasonnet.io; +/*- + * Copyright 2019-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; + +public class AutoDeleteFileInputStream extends FileInputStream { + private File file; + + public AutoDeleteFileInputStream(File file) throws FileNotFoundException { + super(file); + this.file = file; + } + + @Override + public void close() throws IOException { + super.close(); + FileUtils.deleteQuietly(file); + } + +} diff --git a/src/main/java/com/datasonnet/plugins/DefaultXLSXFormatPlugin.java b/src/main/java/com/datasonnet/plugins/DefaultXLSXFormatPlugin.java new file mode 100644 index 00000000..3d3938d8 --- /dev/null +++ b/src/main/java/com/datasonnet/plugins/DefaultXLSXFormatPlugin.java @@ -0,0 +1,154 @@ +package com.datasonnet.plugins; + +/*- + * Copyright 2019-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.datasonnet.document.DefaultDocument; +import com.datasonnet.document.Document; +import com.datasonnet.document.MediaType; +import com.datasonnet.document.MediaTypes; +import com.datasonnet.io.AutoDeleteFileInputStream; +import com.datasonnet.spi.PluginException; +import com.datasonnet.spi.ujsonUtils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import com.github.sett4.dataformat.xlsx.XlsxMapper; +import com.github.sett4.dataformat.xlsx.XlsxParser; + +import ujson.Value; + +import javax.swing.text.html.Option; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class DefaultXLSXFormatPlugin extends BaseJacksonDataFormatPlugin { + public static final String DS_PARAM_USE_HEADER = "useheader"; + public static final String DS_PARAM_HEADERS = "headers"; + public static final String DS_PARAM_USE_TEMPFILE = "usetempfile"; + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final XlsxMapper XLSX_MAPPER = new XlsxMapper(); + + //static { + // XLSX_MAPPER.enable(XlsxParser.Feature.WRAP_AS_ARRAY); + //} + + public DefaultXLSXFormatPlugin() { + supportedTypes.add(MediaTypes.APPLICATION_XLSX); + //supportedTypes.add(MediaType.parseMediaType("text/csv")); + + writerParams.add(DS_PARAM_USE_HEADER); + writerParams.add(DS_PARAM_HEADERS); + writerParams.add(DS_PARAM_USE_TEMPFILE); + + writerSupportedClasses.add(InputStream.class); + writerSupportedClasses.add(byte[].class); + } + + private boolean isUseHeader(MediaType mediaType) { + if (mediaType.getParameter(DS_PARAM_USE_HEADER) != null) { + return Boolean.parseBoolean(mediaType.getParameter(DS_PARAM_USE_HEADER)); + } + return true; + } + private boolean isUseTempfile(MediaType mediaType) { + if (mediaType.getParameter(DS_PARAM_USE_TEMPFILE) != null) { + return Boolean.parseBoolean(mediaType.getParameter(DS_PARAM_USE_TEMPFILE)); + } + return false; + } + + @SuppressWarnings("unchecked") + @Override + public Document write(Value input, MediaType mediaType, Class targetType) throws PluginException { + Map params = mediaType.getParameters(); + CsvSchema.Builder builder = this.getBuilder(mediaType); + + try { + final JsonNode jsonTree = OBJECT_MAPPER.valueToTree(ujsonUtils.javaObjectFrom(input)); + if (isUseHeader(mediaType)) { + if (params.containsKey(DS_PARAM_HEADERS)) { + String[] headers = params.get(DS_PARAM_HEADERS).split(","); + for (String header : headers) { + builder.addColumn(header); + } + } else { + JsonNode firstObject = jsonTree.elements().next(); + firstObject.fieldNames().forEachRemaining(builder::addColumn); + } + } + + CsvSchema csvSchema = builder.build(); + + if (targetType.isAssignableFrom(InputStream.class)) { + if (isUseTempfile(mediaType)) { + File f = File.createTempFile("datasonnet", "tmp"); + OutputStream out = new BufferedOutputStream(new FileOutputStream(f)); + XLSX_MAPPER.writerFor(JsonNode.class) + .with(csvSchema) + .writeValue(out, jsonTree); + out.flush(); + out.close(); + return (Document) new DefaultDocument<>(new BufferedInputStream(new AutoDeleteFileInputStream(f)), MediaTypes.APPLICATION_XLSX); + } + else { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + XLSX_MAPPER.writerFor(JsonNode.class) + .with(csvSchema) + .writeValue(out, jsonTree); + out.flush(); + out.close(); + return (Document) new DefaultDocument<>(new BufferedInputStream(new ByteArrayInputStream(baos.toByteArray())), MediaTypes.APPLICATION_XLSX); + } + } + + if (targetType.isAssignableFrom(byte[].class)) { + return (Document) new DefaultDocument<>(XLSX_MAPPER.writerFor(JsonNode.class) + .with(csvSchema) + .writeValueAsBytes(jsonTree), MediaTypes.APPLICATION_XLSX); + } + + throw new PluginException(new IllegalArgumentException("Unsupported document content class, use the test method canWrite before invoking write")); + + } catch (IOException e) { + throw new PluginException("Unable to processing XLSX", e); + } + } + + private CsvSchema.Builder getBuilder(MediaType mediaType) { + CsvSchema.Builder builder = CsvSchema.builder(); + + String useHeadrStr = mediaType.getParameter(DS_PARAM_USE_HEADER); + boolean useHeader = Boolean.parseBoolean(Optional.ofNullable(useHeadrStr).orElse("true")); + builder.setUseHeader(useHeader); + + return builder; + } +} diff --git a/src/main/java/com/datasonnet/spi/DataFormatService.java b/src/main/java/com/datasonnet/spi/DataFormatService.java index 25600d45..dff7d71d 100644 --- a/src/main/java/com/datasonnet/spi/DataFormatService.java +++ b/src/main/java/com/datasonnet/spi/DataFormatService.java @@ -1,7 +1,7 @@ package com.datasonnet.spi; /*- - * Copyright 2019-2020 the original author or authors. + * Copyright 2019-2021 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import com.datasonnet.plugins.DefaultJSONFormatPlugin; import com.datasonnet.plugins.DefaultJavaFormatPlugin; import com.datasonnet.plugins.DefaultPlainTextFormatPlugin; +import com.datasonnet.plugins.DefaultXLSXFormatPlugin; import com.datasonnet.plugins.DefaultXMLFormatPlugin$; import ujson.Value; @@ -38,6 +39,7 @@ public class DataFormatService { new DefaultJavaFormatPlugin(), DefaultXMLFormatPlugin$.MODULE$, new DefaultCSVFormatPlugin(), + new DefaultXLSXFormatPlugin(), new DefaultPlainTextFormatPlugin())); public DataFormatService(List plugins) { diff --git a/src/main/scala/com/datasonnet/DS.scala b/src/main/scala/com/datasonnet/DS.scala index c2c1a8c5..c9a00561 100644 --- a/src/main/scala/com/datasonnet/DS.scala +++ b/src/main/scala/com/datasonnet/DS.scala @@ -398,7 +398,7 @@ object DSLowercase extends Library { (_, _, url: String) => url match { case str if str.startsWith("classpath://") => - val source = io.Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(str.replaceFirst("classpath://", ""))) + val source = scala.io.Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(str.replaceFirst("classpath://", ""))) val out = try { source.mkString diff --git a/src/test/java/com/datasonnet/JavaReaderTest.java b/src/test/java/com/datasonnet/JavaReaderTest.java index e1f3e890..0dd25cd0 100644 --- a/src/test/java/com/datasonnet/JavaReaderTest.java +++ b/src/test/java/com/datasonnet/JavaReaderTest.java @@ -1,7 +1,7 @@ package com.datasonnet; /*- - * Copyright 2019-2020 the original author or authors. + * Copyright 2019-2021 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.HashMap; +import java.util.TimeZone; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -52,6 +53,7 @@ void testJavaReader() throws Exception { theGizmo.setManufacturer(manufacturer); SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd"); + df.setTimeZone(TimeZone.getTimeZone("UTC")); theGizmo.setDate(df.parse("2020-01-06")); Document data = new DefaultDocument<>(theGizmo, MediaTypes.APPLICATION_JAVA); diff --git a/src/test/java/com/datasonnet/XLSXWriterTest.java b/src/test/java/com/datasonnet/XLSXWriterTest.java new file mode 100644 index 00000000..f7c6529a --- /dev/null +++ b/src/test/java/com/datasonnet/XLSXWriterTest.java @@ -0,0 +1,108 @@ +package com.datasonnet; + +/*- + * Copyright 2019-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.datasonnet.document.DefaultDocument; +import com.datasonnet.document.Document; +import com.datasonnet.document.MediaTypes; +import com.datasonnet.util.TestResourceReader; + +import org.apache.commons.io.IOUtils; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.math.BigDecimal; +import java.net.URISyntaxException; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class XLSXWriterTest { + + @Test + void testXLSXWriter() throws URISyntaxException, IOException { + + Document data = new DefaultDocument<>( + TestResourceReader.readFileAsString("writeCSVTest.json"), + MediaTypes.APPLICATION_JSON + ); + + Mapper mapper = new Mapper("payload"); + + + Document mapped = mapper.transform(data, Collections.emptyMap(), MediaTypes.APPLICATION_XLSX, byte[].class); + assertEquals(MediaTypes.APPLICATION_XLSX, mapped.getMediaType()); + + + Workbook workbook = new XSSFWorkbook(new ByteArrayInputStream(mapped.getContent())); + int activeSheetIndex = workbook.getActiveSheetIndex(); + Sheet sheet = workbook.getSheetAt(activeSheetIndex); + + // Verify header + assertEquals("First Name", sheet.getRow(0).getCell(0).getStringCellValue()); + assertEquals("Last Name", sheet.getRow(0).getCell(1).getStringCellValue()); + assertEquals("Phone", sheet.getRow(0).getCell(2).getStringCellValue()); + // Verify row 1 + assertEquals("William", sheet.getRow(1).getCell(0).getStringCellValue()); + assertEquals("Shakespeare", sheet.getRow(1).getCell(1).getStringCellValue()); + assertEquals("(123)456-7890", sheet.getRow(1).getCell(2).getStringCellValue()); + // Verify row 2 + assertEquals("Christopher", sheet.getRow(2).getCell(0).getStringCellValue()); + assertEquals("Marlow", sheet.getRow(2).getCell(1).getStringCellValue()); + assertEquals("(987)654-3210", sheet.getRow(2).getCell(2).getStringCellValue()); + + workbook.close(); + + } + + @Test + void testXLSXWriterExt() throws IOException, URISyntaxException { + Document data = new DefaultDocument<>( + TestResourceReader.readFileAsString("writeCSVExtTest.json"), + MediaTypes.APPLICATION_JSON + ); + String datasonnet = TestResourceReader.readFileAsString("writeXLSXExtTest.ds"); + + Mapper mapper = new Mapper(datasonnet); + + + InputStream mapped = mapper.transform(data, Collections.emptyMap(), MediaTypes.APPLICATION_XLSX, InputStream.class).getContent(); + + Workbook workbook = new XSSFWorkbook(mapped); + int activeSheetIndex = workbook.getActiveSheetIndex(); + Sheet sheet = workbook.getSheetAt(activeSheetIndex); + + // Verify row 1 + assertEquals("William", sheet.getRow(0).getCell(0).getStringCellValue()); + assertEquals("Shakespeare", sheet.getRow(0).getCell(1).getStringCellValue()); + assertEquals("(123)456-7890", sheet.getRow(0).getCell(2).getStringCellValue()); + // Verify row 2 + assertEquals("Christopher", sheet.getRow(1).getCell(0).getStringCellValue()); + assertEquals("Marlow", sheet.getRow(1).getCell(1).getStringCellValue()); + assertEquals("(987)654-3210", sheet.getRow(1).getCell(2).getStringCellValue()); + + workbook.close(); + } + +} diff --git a/src/test/resources/writeXLSXExtTest.ds b/src/test/resources/writeXLSXExtTest.ds new file mode 100644 index 00000000..2ad59f4b --- /dev/null +++ b/src/test/resources/writeXLSXExtTest.ds @@ -0,0 +1,5 @@ +/** DataSonnet +version=2.0 +output application/xlsx; UseHeader=false; UseTempfile=true +*/ +payload \ No newline at end of file