From c5bb25ecd7cec0465b650591378888bcad58b426 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Wed, 9 Oct 2019 11:25:44 +0800 Subject: [PATCH 1/6] init pr --- spark/spark-tensorflow-connector/pom.xml | 15 ++++++++------- .../datasources/tfrecords/DefaultSource.scala | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/spark/spark-tensorflow-connector/pom.xml b/spark/spark-tensorflow-connector/pom.xml index 389ab173..748078aa 100644 --- a/spark/spark-tensorflow-connector/pom.xml +++ b/spark/spark-tensorflow-connector/pom.xml @@ -27,13 +27,14 @@ UTF-8 - 3.2.2 - 2.11 - 1.0 - 2.2.6 - 3.0 + 4.2.0 + 2.12.10 + 2.12 + 2.0.0 + 3.0.8 + 3.8.0 1.8 - 2.3.1 + 3.0.0-SNAPSHOT 2.7.3 4.11 @@ -86,7 +87,7 @@ incremental true - ${scala.binary.version} + ${scala.version} false diff --git a/spark/spark-tensorflow-connector/src/main/scala/org/tensorflow/spark/datasources/tfrecords/DefaultSource.scala b/spark/spark-tensorflow-connector/src/main/scala/org/tensorflow/spark/datasources/tfrecords/DefaultSource.scala index bea6623d..628ed65a 100644 --- a/spark/spark-tensorflow-connector/src/main/scala/org/tensorflow/spark/datasources/tfrecords/DefaultSource.scala +++ b/spark/spark-tensorflow-connector/src/main/scala/org/tensorflow/spark/datasources/tfrecords/DefaultSource.scala @@ -167,7 +167,7 @@ class DefaultSource extends DataSourceRegister } } -object DefaultSource { +object DefaultSource extends scala.Serializable { // The function run on each worker. // Writes the partition to a file and returns the number of records output. private def writePartitionLocal( From 0a6c41213eead5c81a607c0a02123cbe3c3be2df Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Wed, 16 Oct 2019 17:32:27 +0800 Subject: [PATCH 2/6] fix flaky test and update pom artifact --- spark/spark-tensorflow-connector/pom.xml | 2 +- .../spark/datasources/tfrecords/LocalWriteSuite.scala | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/spark/spark-tensorflow-connector/pom.xml b/spark/spark-tensorflow-connector/pom.xml index 748078aa..0d6fa576 100644 --- a/spark/spark-tensorflow-connector/pom.xml +++ b/spark/spark-tensorflow-connector/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 org.tensorflow - spark-tensorflow-connector_2.11 + spark-tensorflow-connector_2.12 jar 1.10.0 spark-tensorflow-connector diff --git a/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala b/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala index 2c039003..9eac202b 100644 --- a/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala +++ b/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types._ +import org.apache.commons.io.FileUtils + class LocalWriteSuite extends SharedSparkSessionSuite { val testRows: Array[Row] = Array( @@ -49,19 +51,18 @@ class LocalWriteSuite extends SharedSparkSessionSuite { // In a distributed setting though, two different machines would each hold a single // partition. val localPath = Files.createTempDirectory("spark-connector-propagate").toAbsolutePath.toString - // Delete the directory, the default mode is ErrorIfExists - Files.delete(Paths.get(localPath)) + val savePath = localPath + "/testResult" df.write.format("tfrecords") .option("recordType", "Example") .option("writeLocality", "local") - .save(localPath) + .save(savePath) // Read again this directory, this time using the Hadoop file readers, it should // return the same data. // This only works in this test and does not hold in general, because the partitions // will be written on the workers. Everything runs locally for tests. val df2 = spark.read.format("tfrecords").option("recordType", "Example") - .load(localPath).sort("id").select("id", "IntegerTypeLabel", "LongTypeLabel", + .load(savePath).sort("id").select("id", "IntegerTypeLabel", "LongTypeLabel", "FloatTypeLabel", "DoubleTypeLabel", "VectorLabel", "name") // Correct column order. assert(df2.collect().toSeq === testRows.toSeq) From 77bab802ccc3fd3d4d2a96ff4ea0da7334ed9119 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 17 Oct 2019 10:58:31 +0800 Subject: [PATCH 3/6] Revert "fix flaky test and update pom artifact" This reverts commit 0a6c41213eead5c81a607c0a02123cbe3c3be2df. --- spark/spark-tensorflow-connector/pom.xml | 2 +- .../spark/datasources/tfrecords/LocalWriteSuite.scala | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/spark/spark-tensorflow-connector/pom.xml b/spark/spark-tensorflow-connector/pom.xml index 0d6fa576..748078aa 100644 --- a/spark/spark-tensorflow-connector/pom.xml +++ b/spark/spark-tensorflow-connector/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 org.tensorflow - spark-tensorflow-connector_2.12 + spark-tensorflow-connector_2.11 jar 1.10.0 spark-tensorflow-connector diff --git a/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala b/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala index 9eac202b..2c039003 100644 --- a/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala +++ b/spark/spark-tensorflow-connector/src/test/scala/org/tensorflow/spark/datasources/tfrecords/LocalWriteSuite.scala @@ -22,8 +22,6 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types._ -import org.apache.commons.io.FileUtils - class LocalWriteSuite extends SharedSparkSessionSuite { val testRows: Array[Row] = Array( @@ -51,18 +49,19 @@ class LocalWriteSuite extends SharedSparkSessionSuite { // In a distributed setting though, two different machines would each hold a single // partition. val localPath = Files.createTempDirectory("spark-connector-propagate").toAbsolutePath.toString - val savePath = localPath + "/testResult" + // Delete the directory, the default mode is ErrorIfExists + Files.delete(Paths.get(localPath)) df.write.format("tfrecords") .option("recordType", "Example") .option("writeLocality", "local") - .save(savePath) + .save(localPath) // Read again this directory, this time using the Hadoop file readers, it should // return the same data. // This only works in this test and does not hold in general, because the partitions // will be written on the workers. Everything runs locally for tests. val df2 = spark.read.format("tfrecords").option("recordType", "Example") - .load(savePath).sort("id").select("id", "IntegerTypeLabel", "LongTypeLabel", + .load(localPath).sort("id").select("id", "IntegerTypeLabel", "LongTypeLabel", "FloatTypeLabel", "DoubleTypeLabel", "VectorLabel", "name") // Correct column order. assert(df2.collect().toSeq === testRows.toSeq) From 292ce871a92229c1947c108b9dc6c9cb2266ec8a Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 17 Oct 2019 10:59:22 +0800 Subject: [PATCH 4/6] update pom artifact --- spark/spark-tensorflow-connector/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/spark-tensorflow-connector/pom.xml b/spark/spark-tensorflow-connector/pom.xml index 748078aa..0d6fa576 100644 --- a/spark/spark-tensorflow-connector/pom.xml +++ b/spark/spark-tensorflow-connector/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 org.tensorflow - spark-tensorflow-connector_2.11 + spark-tensorflow-connector_2.12 jar 1.10.0 spark-tensorflow-connector From 2ffb7b4aed5c691b45e7fe3d2eea31c99d36d069 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Sun, 10 Nov 2019 14:49:09 +0800 Subject: [PATCH 5/6] update pom.xml --- spark/spark-tensorflow-connector/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/spark-tensorflow-connector/pom.xml b/spark/spark-tensorflow-connector/pom.xml index 0d6fa576..0d060ce2 100644 --- a/spark/spark-tensorflow-connector/pom.xml +++ b/spark/spark-tensorflow-connector/pom.xml @@ -34,7 +34,7 @@ 3.0.8 3.8.0 1.8 - 3.0.0-SNAPSHOT + 3.0.0-preview 2.7.3 4.11 From 0493f823cc86fc1f021f54f81d4c59ec20b6b3d0 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 2 Apr 2020 23:37:34 +0800 Subject: [PATCH 6/6] update to spark 3.0 preview2 --- spark/spark-tensorflow-connector/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/spark-tensorflow-connector/pom.xml b/spark/spark-tensorflow-connector/pom.xml index 0d060ce2..114ee6a2 100644 --- a/spark/spark-tensorflow-connector/pom.xml +++ b/spark/spark-tensorflow-connector/pom.xml @@ -34,7 +34,7 @@ 3.0.8 3.8.0 1.8 - 3.0.0-preview + 3.0.0-preview2 2.7.3 4.11