diff --git a/.travis/hbase-install.sh b/.travis/hbase-install.sh index bba6df207a..40e810657f 100755 --- a/.travis/hbase-install.sh +++ b/.travis/hbase-install.sh @@ -1,6 +1,6 @@ #! /bin/bash -if [ ! -f $HOME/downloads/hbase-1.3.0-bin.tar.gz ]; then sudo wget -O $HOME/downloads/hbase-1.3.0-bin.tar.gz http://www-eu.apache.org/dist/hbase/1.3.0/hbase-1.3.0-bin.tar.gz; fi -sudo mv $HOME/downloads/hbase-1.3.0-bin.tar.gz hbase-1.3.0-bin.tar.gz && tar xzf hbase-1.3.0-bin.tar.gz -sudo rm -f hbase-1.3.0/conf/hbase-site.xml && sudo mv .travis/hbase/hbase-site.xml hbase-1.3.0/conf -sudo hbase-1.3.0/bin/start-hbase.sh +if [ ! -f $HOME/downloads/hbase-1.3.1-bin.tar.gz ]; then sudo wget -O $HOME/downloads/hbase-1.3.1-bin.tar.gz http://www-eu.apache.org/dist/hbase/1.3.1/hbase-1.3.1-bin.tar.gz; fi +sudo mv $HOME/downloads/hbase-1.3.1-bin.tar.gz hbase-1.3.1-bin.tar.gz && tar xzf hbase-1.3.1-bin.tar.gz +sudo rm -f hbase-1.3.1/conf/hbase-site.xml && sudo mv .travis/hbase/hbase-site.xml hbase-1.3.1/conf +sudo hbase-1.3.1/bin/start-hbase.sh diff --git a/build.sbt b/build.sbt index ac94f116d3..72780337b3 100644 --- a/build.sbt +++ b/build.sbt @@ -38,7 +38,7 @@ lazy val commonSettings = Seq( credentials += Credentials(Path.userHome / ".ivy2" / ".credentials"), - addCompilerPlugin("org.spire-math" % "kind-projector" % "0.9.3" cross CrossVersion.binary), + addCompilerPlugin("org.spire-math" % "kind-projector" % "0.9.4" cross CrossVersion.binary), addCompilerPlugin("org.scalamacros" %% "paradise" % "2.1.0" cross CrossVersion.full), pomExtra := ( diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 32dd8cd1b8..6f9dd3a37f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -19,8 +19,8 @@ import sbt._ object Dependencies { val typesafeConfig = "com.typesafe" % "config" % "1.3.1" val logging = "com.typesafe.scala-logging" %% "scala-logging" % "3.5.0" - val scalatest = "org.scalatest" %% "scalatest" % "3.0.1" - val scalacheck = "org.scalacheck" %% "scalacheck" % "1.13.4" + val scalatest = "org.scalatest" %% "scalatest" % "3.0.3" + val scalacheck = "org.scalacheck" %% "scalacheck" % "1.13.5" val jts = "com.vividsolutions" % "jts-core" % "1.14.0" val monocleCore = "com.github.julien-truffaut" %% "monocle-core" % Version.monocle @@ -34,20 +34,20 @@ object Dependencies { val apacheMath = "org.apache.commons" % "commons-math3" % "3.6.1" - val chronoscala = "jp.ne.opt" %% "chronoscala" % "0.1.2" + val chronoscala = "jp.ne.opt" %% "chronoscala" % "0.1.3" - val awsSdkS3 = "com.amazonaws" % "aws-java-sdk-s3" % "1.11.92" + val awsSdkS3 = "com.amazonaws" % "aws-java-sdk-s3" % "1.11.143" val scalazStream = "org.scalaz.stream" %% "scalaz-stream" % "0.8.6a" val sparkCore = "org.apache.spark" %% "spark-core" % Version.spark val hadoopClient = "org.apache.hadoop" % "hadoop-client" % Version.hadoop - val avro = "org.apache.avro" % "avro" % "1.8.1" + val avro = "org.apache.avro" % "avro" % "1.8.2" - val slickPG = "com.github.tminglei" %% "slick-pg" % "0.14.6" + val slickPG = "com.github.tminglei" %% "slick-pg" % "0.15.0" - val parserCombinators = "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.5" + val parserCombinators = "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.6" val jsonSchemaValidator = "com.networknt" % "json-schema-validator" % "0.1.7" } diff --git a/project/Environment.scala b/project/Environment.scala index 196a6c875b..27a0724d79 100644 --- a/project/Environment.scala +++ b/project/Environment.scala @@ -20,7 +20,7 @@ object Environment { def either(environmentVariable: String, default: String): String = Properties.envOrElse(environmentVariable, default) - lazy val hadoopVersion = either("SPARK_HADOOP_VERSION", "2.7.3") - lazy val sparkVersion = either("SPARK_VERSION", "2.1.0") + lazy val hadoopVersion = either("SPARK_HADOOP_VERSION", "2.8.0") + lazy val sparkVersion = either("SPARK_VERSION", "2.1.1") lazy val versionSuffix = either("GEOTRELLIS_VERSION_SUFFIX", "-SNAPSHOT") } diff --git a/project/Version.scala b/project/Version.scala index 97fd17b9b9..123e33971b 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -17,13 +17,13 @@ object Version { val geotrellis = "1.2.0" + Environment.versionSuffix val scala = "2.11.11" - val geotools = "16.1" + val geotools = "17.1" val sprayJson = "1.3.3" val monocle = "1.4.0" - val accumulo = "1.7.2" - val cassandra = "3.1.4" - val hbase = "1.3.0" - val geomesa = "1.2.7.3" + val accumulo = "1.7.3" + val cassandra = "3.2.0" + val hbase = "1.3.1" + val geomesa = "1.2.8" lazy val hadoop = Environment.hadoopVersion lazy val spark = Environment.sparkVersion } diff --git a/spark/build.sbt b/spark/build.sbt index e4dcd841b2..f584d7868c 100644 --- a/spark/build.sbt +++ b/spark/build.sbt @@ -4,7 +4,7 @@ name := "geotrellis-spark" libraryDependencies ++= Seq( sparkCore % "provided", hadoopClient % "provided", - "com.google.uzaygezen" % "uzaygezen-core" % "0.2", + "org.locationtech.sfcurve" %% "sfcurve-geowave-index" % "0.2.1-SNAPSHOT", "org.scalaj" %% "scalaj-http" % "2.3.0", avro, spire, diff --git a/spark/src/main/scala/geotrellis/spark/io/index/GeowaveKeyIndexMethod.scala b/spark/src/main/scala/geotrellis/spark/io/index/GeowaveKeyIndexMethod.scala new file mode 100644 index 0000000000..51424ce572 --- /dev/null +++ b/spark/src/main/scala/geotrellis/spark/io/index/GeowaveKeyIndexMethod.scala @@ -0,0 +1,35 @@ +/* + * Copyright 2017 Azavea + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package geotrellis.spark.io.index + +import geotrellis.spark._ +import geotrellis.spark.io.index.geowave._ + +private[index] trait GeowaveKeyIndexMethod + +object GeowaveKeyIndexMethod extends GeowaveKeyIndexMethod { + + implicit def spatialKeyIndexMethod(m: GeowaveKeyIndexMethod): KeyIndexMethod[SpatialKey] = + new KeyIndexMethod[SpatialKey] { + def createIndex(keyBounds: KeyBounds[SpatialKey]): KeyIndex[SpatialKey] = { + val xBits = resolution(keyBounds.maxKey.col, keyBounds.minKey.col) + val yBits = resolution(keyBounds.maxKey.row, keyBounds.minKey.row) + new GeowaveSpatialKeyIndex(keyBounds, xBits, yBits) + } + } + +} diff --git a/spark/src/main/scala/geotrellis/spark/io/index/MergeQueue.scala b/spark/src/main/scala/geotrellis/spark/io/index/MergeQueue.scala index 8634846b90..f03b6ec802 100644 --- a/spark/src/main/scala/geotrellis/spark/io/index/MergeQueue.scala +++ b/spark/src/main/scala/geotrellis/spark/io/index/MergeQueue.scala @@ -28,9 +28,9 @@ object MergeQueue{ class MergeQueue(initialSize: Int = 1) { private var array = if(initialSize <= 1) { Array.ofDim[(Long, Long)](1) } else { Array.ofDim[(Long, Long)](initialSize) } private var _size = 0 - + def size = _size - + private def removeElement(i: Int): Unit = { if(i < _size - 1) { val result = array.clone @@ -39,7 +39,7 @@ class MergeQueue(initialSize: Int = 1) { } _size = _size - 1 } - + private def insertElement(range: (Long, Long), i: Int): Unit = { ensureSize(_size + 1) if(i == _size) { @@ -53,8 +53,8 @@ class MergeQueue(initialSize: Int = 1) { } _size += 1 } - - + + /** Ensure that the internal array has at least `n` cells. */ protected def ensureSize(n: Int) { // Use a Long to prevent overflows @@ -66,15 +66,15 @@ class MergeQueue(initialSize: Int = 1) { } // Clamp newSize to Int.MaxValue if (newSize > Int.MaxValue) newSize = Int.MaxValue - + val newArray: Array[(Long, Long)] = new Array(newSize.toInt) scala.compat.Platform.arraycopy(array, 0, newArray, 0, _size) array = newArray } } - + val ordering = implicitly[Ordering[(Long, Long)]] - + /** Inserts a single range into the priority queue. * * @param range the element to insert. @@ -86,17 +86,17 @@ class MergeQueue(initialSize: Int = 1) { val i = -(res + 1) var (thisStart, thisEnd) = range var removeLeft = false - + var removeRight = false var rightRemainder: Option[(Long, Long)] = None - + // Look at the left range if(i != 0) { val (prevStart, prevEnd) = array(i - 1) if(prevStart == thisStart) { removeLeft = true } - if (prevEnd + 1 >= thisStart) { + if (prevEnd + 1 >= thisStart) { removeLeft = true thisStart = prevStart if(prevEnd > thisEnd) { @@ -104,7 +104,7 @@ class MergeQueue(initialSize: Int = 1) { } } } - + // Look at the right range if(i < _size && _size > 0) { val (nextStart, nextEnd) = array(i) @@ -123,8 +123,8 @@ class MergeQueue(initialSize: Int = 1) { } } } - - if(removeRight) { + + if(removeRight) { if(!removeLeft) { array(i) = (thisStart, thisEnd) } else { @@ -136,14 +136,14 @@ class MergeQueue(initialSize: Int = 1) { } else { insertElement(range, i) } - + rightRemainder match { case Some(r) => this += r case None => } } } - + def toSeq: Seq[(Long, Long)] = { val result = Array.ofDim[(Long, Long)](size) System.arraycopy(array, 0, result, 0, size) diff --git a/spark/src/main/scala/geotrellis/spark/io/index/geowave/GeowaveSpatialKeyIndex.scala b/spark/src/main/scala/geotrellis/spark/io/index/geowave/GeowaveSpatialKeyIndex.scala new file mode 100644 index 0000000000..1e9edea2f2 --- /dev/null +++ b/spark/src/main/scala/geotrellis/spark/io/index/geowave/GeowaveSpatialKeyIndex.scala @@ -0,0 +1,101 @@ +/* + * Copyright 2017 Azavea + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package geotrellis.spark.io.index.geowave + +import geotrellis.spark._ +import geotrellis.spark.io.index.KeyIndex + +import mil.nga.giat.geowave.core.index._ +import mil.nga.giat.geowave.core.index.dimension._ +import mil.nga.giat.geowave.core.index.sfc.data.{ BasicNumericDataset, NumericRange } +import mil.nga.giat.geowave.core.index.sfc.SFCDimensionDefinition +import mil.nga.giat.geowave.core.index.sfc.SFCFactory.SFCType +import mil.nga.giat.geowave.core.index.sfc.tiered.{ TieredSFCIndexFactory, TieredSFCIndexStrategy } + +import scala.collection.JavaConverters._ + + +object GeowaveSpatialKeyIndex { + def apply(minKey: SpatialKey, maxKey: SpatialKey, spatialResolution: Int): GeowaveSpatialKeyIndex = + apply(new KeyBounds(minKey, maxKey), spatialResolution) + + def apply(keyBounds: KeyBounds[SpatialKey], spatialResolution: Int): GeowaveSpatialKeyIndex = + apply(keyBounds, spatialResolution, spatialResolution) + + def apply(keyBounds: KeyBounds[SpatialKey], xResolution: Int, yResolution: Int): GeowaveSpatialKeyIndex = + new GeowaveSpatialKeyIndex(keyBounds, xResolution, yResolution) +} + +/** + * Class that provides spatial indexing using the GeoWave indexing + * machinery. + * + * @param keyBounds The bounds over-which the index is valid + * @param xResolution The number of bits of resolution requested/required by the x-axis + * @param yResoltuion The number of bits of resolution requested/required by the y-axis + * @author James McClain + */ +class GeowaveSpatialKeyIndex(val keyBounds: KeyBounds[SpatialKey], val xResolution: Int, val yResolution: Int) extends KeyIndex[SpatialKey] { + + val maxRangeDecomposition = 5000 + + val KeyBounds(SpatialKey(minCol, minRow), SpatialKey(maxCol, maxRow)) = keyBounds + @transient lazy val dim1 = new SFCDimensionDefinition(new BasicDimensionDefinition(minCol, maxCol), xResolution) + @transient lazy val dim2 = new SFCDimensionDefinition(new BasicDimensionDefinition(minRow, maxRow), yResolution) + @transient lazy val dimensions: Array[SFCDimensionDefinition] = Array(dim1, dim2) + @transient lazy val strategy: TieredSFCIndexStrategy = TieredSFCIndexFactory.createSingleTierStrategy(dimensions, SFCType.HILBERT) + + // Arrays SEEM TO BE big endian + private def idToLong(id: Array[Byte]): Long = { + id.take(8).foldLeft(0L)({ (accumulator, value) => (accumulator << 8) + value.toLong }) + } + + // ASSUMED to be used for insertion + def toIndex(key: SpatialKey): Long = { + + val SpatialKey(col, row) = key + val range1 = new NumericRange(col, col) // XXX + val range2 = new NumericRange(row, row) // XXX + val multiRange = new BasicNumericDataset(Array(range1, range2)) + val insertionIds = strategy.getInsertionIds(multiRange) + + assert(insertionIds.size() == 1) + idToLong(insertionIds.get(0).getBytes()) + } + + // ASSUMED to be used for queries + def indexRanges(keyRange: (SpatialKey, SpatialKey)): Seq[(Long, Long)] = { + val (SpatialKey(col1, row1), SpatialKey(col2, row2)) = keyRange + val minCol = math.min(col1, col2) + val maxCol = math.max(col1, col2) + val minRow = math.min(row1, row2) + val maxRow = math.max(row1, row2) + + val range1 = new NumericRange(minCol, maxCol) // XXX + val range2 = new NumericRange(minRow, maxRow) // XXX + val multiRange = new BasicNumericDataset(Array(range1, range2)) + val queryRanges = strategy.getQueryRanges(multiRange, maxRangeDecomposition) + + queryRanges + .asScala + .map({ range: ByteArrayRange => + val start = range.getStart() + val end = range.getEnd() + (idToLong(start.getBytes()), idToLong(end.getBytes())) + }) + } +} diff --git a/spark/src/main/scala/geotrellis/spark/io/json/KeyIndexFormats.scala b/spark/src/main/scala/geotrellis/spark/io/json/KeyIndexFormats.scala index e32eafd798..c19f9d5bb8 100644 --- a/spark/src/main/scala/geotrellis/spark/io/json/KeyIndexFormats.scala +++ b/spark/src/main/scala/geotrellis/spark/io/json/KeyIndexFormats.scala @@ -18,6 +18,7 @@ package geotrellis.spark.io.json import geotrellis.spark._ import geotrellis.spark.io.index._ +import geotrellis.spark.io.index.geowave._ import geotrellis.spark.io.index.hilbert._ import geotrellis.spark.io.index.rowmajor._ import geotrellis.spark.io.index.zcurve._ @@ -87,6 +88,7 @@ trait KeyIndexFormats { entryRegistry register KeyIndexFormatEntry[SpatialKey, HilbertSpatialKeyIndex](HilbertSpatialKeyIndexFormat.TYPE_NAME) entryRegistry register KeyIndexFormatEntry[SpatialKey, ZSpatialKeyIndex](ZSpatialKeyIndexFormat.TYPE_NAME) entryRegistry register KeyIndexFormatEntry[SpatialKey, RowMajorSpatialKeyIndex](RowMajorSpatialKeyIndexFormat.TYPE_NAME) + entryRegistry register KeyIndexFormatEntry[SpatialKey, GeowaveSpatialKeyIndex](GeowaveSpatialKeyIndexFormat.TYPE_NAME) entryRegistry register KeyIndexFormatEntry[SpaceTimeKey, HilbertSpaceTimeKeyIndex](HilbertSpaceTimeKeyIndexFormat.TYPE_NAME) entryRegistry register KeyIndexFormatEntry[SpaceTimeKey, ZSpaceTimeKeyIndex](ZSpaceTimeKeyIndexFormat.TYPE_NAME) @@ -122,6 +124,42 @@ trait KeyIndexFormats { implicit def keyIndexJsonFormat[K: ClassTag]: RootJsonFormat[KeyIndex[K]] = KeyIndexJsonFormatFactory.getKeyIndexJsonFormat[K] + implicit object GeowaveSpatialKeyIndexFormat extends RootJsonFormat[GeowaveSpatialKeyIndex] { + final def TYPE_NAME = "geowave" + + def write(obj: GeowaveSpatialKeyIndex): JsValue = + JsObject( + "type" -> JsString(TYPE_NAME), + "properties" -> JsObject( + "keyBounds" -> obj.keyBounds.toJson, + "xResolution" -> obj.xResolution.toJson, + "yResolution" -> obj.yResolution.toJson + ) + ) + + def read(value: JsValue): GeowaveSpatialKeyIndex = + value.asJsObject.getFields("type", "properties") match { + case Seq(JsString(typeName), properties) => { + if (typeName != TYPE_NAME) + throw new DeserializationException(s"Wrong KeyIndex type: ${TYPE_NAME} expected.") + properties.convertTo[JsObject] + .getFields("keyBounds", "xResolution", "yResolution") match { + case Seq(kb, xr, yr) => + GeowaveSpatialKeyIndex( + kb.convertTo[KeyBounds[SpatialKey]], + xr.convertTo[Int], + yr.convertTo[Int] + ) + case _ => + throw new DeserializationException( + "Wrong KeyIndex constructor arguments: GeowaveSpatialKeyIndex constructor arguments expected.") + } + } + case _ => + throw new DeserializationException("Wrong KeyIndex type: GeowaveSpatialKeyIndex expected.") + } + } + implicit object HilbertSpatialKeyIndexFormat extends RootJsonFormat[HilbertSpatialKeyIndex] { final def TYPE_NAME = "hilbert" diff --git a/spark/src/main/scala/geotrellis/spark/io/kryo/KryoRegistrator.scala b/spark/src/main/scala/geotrellis/spark/io/kryo/KryoRegistrator.scala index 3ba8b57c1b..436046694a 100644 --- a/spark/src/main/scala/geotrellis/spark/io/kryo/KryoRegistrator.scala +++ b/spark/src/main/scala/geotrellis/spark/io/kryo/KryoRegistrator.scala @@ -130,11 +130,12 @@ class KryoRegistrator extends SparkKryoRegistrator { kryo.register(classOf[geotrellis.spark.SpatialKey]) kryo.register(classOf[geotrellis.spark.SpaceTimeKey]) + kryo.register(classOf[geotrellis.spark.io.index.geowave.GeowaveSpatialKeyIndex]) + kryo.register(classOf[geotrellis.spark.io.index.hilbert.HilbertSpaceTimeKeyIndex]) + kryo.register(classOf[geotrellis.spark.io.index.hilbert.HilbertSpatialKeyIndex]) kryo.register(classOf[geotrellis.spark.io.index.rowmajor.RowMajorSpatialKeyIndex]) - kryo.register(classOf[geotrellis.spark.io.index.zcurve.ZSpatialKeyIndex]) kryo.register(classOf[geotrellis.spark.io.index.zcurve.ZSpaceTimeKeyIndex]) - kryo.register(classOf[geotrellis.spark.io.index.hilbert.HilbertSpatialKeyIndex]) - kryo.register(classOf[geotrellis.spark.io.index.hilbert.HilbertSpaceTimeKeyIndex]) + kryo.register(classOf[geotrellis.spark.io.index.zcurve.ZSpatialKeyIndex]) kryo.register(classOf[geotrellis.vector.ProjectedExtent]) kryo.register(classOf[geotrellis.vector.Extent]) kryo.register(classOf[geotrellis.proj4.CRS]) diff --git a/vectortile/build.sbt b/vectortile/build.sbt index 5c303e9186..dfc0ae80b4 100644 --- a/vectortile/build.sbt +++ b/vectortile/build.sbt @@ -4,5 +4,5 @@ name := "geotrellis-vectortile" libraryDependencies ++= Seq( scalatest % "test", - "com.trueaccord.scalapb" %% "scalapb-runtime" % "0.6.0-pre1" + "com.trueaccord.scalapb" %% "scalapb-runtime" % "0.6.0-pre4" )