diff --git a/.travis.yml b/.travis.yml index f719942..817e42f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: scala scala: - - 2.10.6 - - 2.11.7 + - 2.10.7 + - 2.11.11 jdk: - oraclejdk8 @@ -15,3 +15,6 @@ before_cache: # Tricks to avoid unnecessary cache updates - find $HOME/.ivy2 -name "ivydata-*.properties" -delete - find $HOME/.sbt -name "*.lock" -delete + +script: + - .travis/build.sh diff --git a/.travis/build.sh b/.travis/build.sh new file mode 100755 index 0000000..6ba9070 --- /dev/null +++ b/.travis/build.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +./sbt "project geowave" test || { exit 1; } +./sbt "project hilbert" test || { exit 1; } +./sbt "project zorder" test || { exit 1; } diff --git a/built.sbt b/build.sbt similarity index 94% rename from built.sbt rename to build.sbt index 43dddc9..8a9f4b8 100644 --- a/built.sbt +++ b/build.sbt @@ -69,8 +69,11 @@ lazy val hilbert: Project = .settings(commonSettings: _*) .dependsOn(api) +lazy val geowave: Project = + Project("geowave", file("geowave")) + .settings(commonSettings: _*) lazy val benchmarks: Project = Project("benchmarks", file("benchmarks")) .settings(commonSettings: _*) - .dependsOn(api, zorder, hilbert) + .dependsOn(api, zorder, hilbert, geowave) diff --git a/geowave/build.sbt b/geowave/build.sbt new file mode 100644 index 0000000..b1129e3 --- /dev/null +++ b/geowave/build.sbt @@ -0,0 +1,12 @@ +import Dependencies._ + +name := "sfcurve-geowave" +libraryDependencies ++= Seq( + jsonLib, + log4j12, + uzaygezen, + junit % "test", + junitIface % "test" +) + +testOptions += Tests.Argument(TestFrameworks.JUnit, "-v") diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TemporalBinningStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TemporalBinningStrategy.java new file mode 100644 index 0000000..2ca96e3 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TemporalBinningStrategy.java @@ -0,0 +1,511 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.geotime.index.dimension; + +import java.nio.ByteBuffer; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; +import java.util.TimeZone; + +import org.locationtech.sfcurve.geowave.index.FloatCompareUtils; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinValue; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinningStrategy; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * This class is useful for establishing a consistent binning strategy using a + * unit of time. Each bin will then be defined by the boundaries of that unit + * within the timezone given in the constructor. So if the unit is year and the + * data spreads across 2011-2013, the bins will be 2011, 2012, and 2013. The + * unit chosen should represent a much more significant range than the average + * query range (at least 20x larger) for efficiency purposes. So if the average + * query is for a 24 hour period, the unit should not be a day, but could be + * perhaps a month or a year (depending on the temporal extent of the dataset). + */ +public class TemporalBinningStrategy implements + BinningStrategy +{ + public static enum Unit { + MINUTE( + Calendar.MINUTE), + HOUR( + Calendar.HOUR_OF_DAY), + DAY( + Calendar.DAY_OF_MONTH), + WEEK( + Calendar.WEEK_OF_YEAR), + MONTH( + Calendar.MONTH), + YEAR( + Calendar.YEAR), + DECADE( + -1); + // java.util.Calendar does not define a field number for decade + // use -1 since that value is unused + + private final int calendarEnum; + + private Unit( + final int calendarEnum ) { + this.calendarEnum = calendarEnum; + } + + public int toCalendarEnum() { + return calendarEnum; + } + + public static Unit getUnit( + final int calendarEnum ) { + for (final Unit u : values()) { + if (u.calendarEnum == calendarEnum) { + return u; + } + } + throw new IllegalArgumentException( + "Calendar enum '" + calendarEnum + "' not found as a valid unit "); + } + + // converter that will be used later + public static Unit fromString( + final String code ) { + + for (final Unit output : Unit.values()) { + if (output.toString().equalsIgnoreCase( + code)) { + return output; + } + } + + return null; + } + } + + protected static final long MILLIS_PER_DAY = 86400000L; + private static final NumberFormat TWO_DIGIT_NUMBER = NumberFormat.getIntegerInstance(); + { + TWO_DIGIT_NUMBER.setMinimumIntegerDigits(2); + TWO_DIGIT_NUMBER.setMaximumIntegerDigits(2); + } + + private Unit unit; + private String timezone; + + protected TemporalBinningStrategy() {} + + public TemporalBinningStrategy( + final Unit unit ) { + this( + unit, + "GMT"); + } + + public TemporalBinningStrategy( + final Unit unit, + final String timezone ) { + this.unit = unit; + this.timezone = timezone; + } + + @Override + public double getBinMin() { + return 0; + } + + @Override + public double getBinMax() { + return getBinSizeMillis() - 1; + } + + /** + * Method used to bin a raw date in milliseconds to a binned value of the + * Binning Strategy. + */ + @Override + public BinValue getBinnedValue( + final double value ) { + // convert to a calendar and subtract the epoch for the bin + final Calendar epochCal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); + epochCal.setTimeInMillis((long) value); + setToEpoch(epochCal); + // use the value to get the bin ID (although the epoch should work fine + // too) + final Calendar valueCal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); + valueCal.setTimeInMillis((long) value); + + return new BinValue( + getBinId(valueCal), + valueCal.getTimeInMillis() - epochCal.getTimeInMillis()); + } + + private long getBinSizeMillis() { + long binSizeMillis = MILLIS_PER_DAY; + // use the max possible value for that unit as the bin size + switch (unit) { + case DECADE: + binSizeMillis *= 3653; + break; + case YEAR: + default: + binSizeMillis *= 366; + break; + case MONTH: + binSizeMillis *= 31; + break; + case WEEK: + binSizeMillis *= 7; + break; + case DAY: + break; + case HOUR: + binSizeMillis /= 24; + break; + case MINUTE: + binSizeMillis /= 1440; + break; + } + return binSizeMillis; + + } + + protected void setToEpoch( + final Calendar value ) { + // reset appropriate values to 0 based on the unit + switch (unit) { + case DECADE: + value.set( + Calendar.YEAR, + ((value.get(Calendar.YEAR) / 10) * 10)); + // don't break so that the other fields are also set to the + // minimum + case YEAR: + default: + value.set( + Calendar.MONTH, + value.getActualMinimum(Calendar.MONTH)); + // don't break so that the other fields are also set to the + // minimum + case MONTH: + value.set( + Calendar.DAY_OF_MONTH, + value.getActualMinimum(Calendar.DAY_OF_MONTH)); + // don't break so that the other fields are also set to the + // minimum + case DAY: + value.set( + Calendar.HOUR_OF_DAY, + value.getActualMinimum(Calendar.HOUR_OF_DAY)); + // don't break so that the other fields are also set to the + // minimum + case HOUR: + value.set( + Calendar.MINUTE, + value.getActualMinimum(Calendar.MINUTE)); + // don't break so that the other fields are also set to the + // minimum + case MINUTE: + value.set( + Calendar.SECOND, + value.getActualMinimum(Calendar.SECOND)); + value.set( + Calendar.MILLISECOND, + value.getActualMinimum(Calendar.MILLISECOND)); + break; // special handling for week + case WEEK: + value.set( + Calendar.DAY_OF_WEEK, + value.getActualMinimum(Calendar.DAY_OF_WEEK)); + value.set( + Calendar.HOUR_OF_DAY, + value.getActualMinimum(Calendar.HOUR_OF_DAY)); + value.set( + Calendar.MINUTE, + value.getActualMinimum(Calendar.MINUTE)); + value.set( + Calendar.SECOND, + value.getActualMinimum(Calendar.SECOND)); + value.set( + Calendar.MILLISECOND, + value.getActualMinimum(Calendar.MILLISECOND)); + } + } + + @Override + public int getFixedBinIdSize() { + switch (unit) { + case YEAR: + default: + return 4; + case MONTH: + return 7; + case WEEK: + return 7; + case DAY: + return 10; + case HOUR: + return 13; + case MINUTE: + return 16; + } + } + + private byte[] getBinId( + final Calendar value ) { + // this is assuming we want human-readable bin ID's but alternatively we + // could consider returning a more compressed representation + switch (unit) { + case YEAR: + default: + return StringUtils.stringToBinary(Integer.toString(value.get(Calendar.YEAR))); + case MONTH: + return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + TWO_DIGIT_NUMBER + .format(value.get(Calendar.MONTH)))); + case WEEK: + return StringUtils.stringToBinary(Integer.toString(value.get(Calendar.YEAR)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.WEEK_OF_YEAR))); + case DAY: + return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.MONTH)) + "_" + TWO_DIGIT_NUMBER.format(value + .get(Calendar.DAY_OF_MONTH)))); + case HOUR: + return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.MONTH)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.DAY_OF_MONTH)) + "_" + TWO_DIGIT_NUMBER + .format(value.get(Calendar.HOUR_OF_DAY)))); + case MINUTE: + return StringUtils.stringToBinary((Integer.toString(value.get(Calendar.YEAR)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.MONTH)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.DAY_OF_MONTH)) + "_" + + TWO_DIGIT_NUMBER.format(value.get(Calendar.HOUR_OF_DAY)) + "_" + TWO_DIGIT_NUMBER + .format(value.get(Calendar.MINUTE)))); + } + } + + private Calendar getStartEpoch( + final byte[] binId ) { + final String str = StringUtils.stringFromBinary(binId); + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); + switch (unit) { + case MINUTE: + final int minute = Integer.parseInt(str.substring( + 14, + 16)); + cal.set( + Calendar.MINUTE, + minute); + case HOUR: + final int hour = Integer.parseInt(str.substring( + 11, + 13)); + cal.set( + Calendar.HOUR_OF_DAY, + hour); + case DAY: + final int day = Integer.parseInt(str.substring( + 8, + 10)); + cal.set( + Calendar.DAY_OF_MONTH, + day); + case MONTH: + final int month = Integer.parseInt(str.substring( + 5, + 7)); + cal.set( + Calendar.MONTH, + month); + case YEAR: + default: + final int year = Integer.parseInt(str.substring( + 0, + 4)); + cal.set( + Calendar.YEAR, + year); + break; // do not automatically fall-through to decade parsing + case DECADE: + int decade = Integer.parseInt(str.substring( + 0, + 4)); + decade = (decade / 10) * 10; // int division will truncate ones + cal.set( + Calendar.YEAR, + decade); + break; // special handling for week + case WEEK: + final int yr = Integer.parseInt(str.substring( + 0, + 4)); + cal.set( + Calendar.YEAR, + yr); + final int weekOfYear = Integer.parseInt(str.substring( + 5, + 7)); + cal.set( + Calendar.WEEK_OF_YEAR, + weekOfYear); + break; + } + setToEpoch(cal); + return cal; + } + + @Override + public BinRange[] getNormalizedRanges( + final NumericData range ) { + if (range.getMax() < range.getMin()) { + return new BinRange[] {}; + } + final Calendar startEpoch = Calendar.getInstance(TimeZone.getTimeZone(timezone)); + final long binSizeMillis = getBinSizeMillis(); + // initialize the epoch to the range min and then reset appropriate + // values to 0 based on the units + startEpoch.setTimeInMillis((long) range.getMin()); + setToEpoch(startEpoch); + // now make sure all bin definitions between the start and end bins + // are covered + final long startEpochMillis = startEpoch.getTimeInMillis(); + long epochIterator = startEpochMillis; + final List bins = new ArrayList(); + // track this, so that we can easily declare a range to be the full + // extent and use the information to perform a more efficient scan + boolean firstBin = ((long) range.getMin() != startEpochMillis); + boolean lastBin = false; + do { + // because not every year has 366 days, and not every month has 31 + // days we need to reset next epoch to the actual epoch + final Calendar nextEpochCal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); + // set it to a value in the middle of the bin just to be sure (for + // example if the bin size does not get to the next epoch as is + // the case when units are days and the timezone accounts for + // daylight savings time) + nextEpochCal.setTimeInMillis(epochIterator + (long) (binSizeMillis * 1.5)); + setToEpoch(nextEpochCal); + final long nextEpoch = nextEpochCal.getTimeInMillis(); + final long maxOfBin = nextEpoch - 1; + final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(timezone)); + cal.setTimeInMillis(epochIterator); + long startMillis, endMillis; + boolean fullExtent; + if ((long) range.getMax() <= maxOfBin) { + lastBin = true; + endMillis = (long) range.getMax(); + // its questionable whether we use + fullExtent = FloatCompareUtils.checkDoublesEqual( + range.getMax(), + maxOfBin); + } + else { + endMillis = maxOfBin; + fullExtent = !firstBin; + } + + if (firstBin) { + startMillis = (long) range.getMin(); + firstBin = false; + } + else { + startMillis = epochIterator; + } + // we have the millis for range, but to normalize for this bin we + // need to subtract the epoch of the bin + bins.add(new BinRange( + getBinId(cal), + startMillis - epochIterator, + endMillis - epochIterator, + fullExtent)); + epochIterator = nextEpoch; + // iterate until we reach our end epoch + } + while (!lastBin); + return bins.toArray(new BinRange[bins.size()]); + } + + @Override + public byte[] toBinary() { + final byte[] timeZone = StringUtils.stringToBinary(timezone); + final ByteBuffer binary = ByteBuffer.allocate(timezone.length() + 4); + binary.putInt(unit.calendarEnum); + binary.put(timeZone); + return binary.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buffer = ByteBuffer.wrap(bytes); + final int unitCalendarEnum = buffer.getInt(); + final byte[] timeZoneName = new byte[bytes.length - 4]; + buffer.get(timeZoneName); + unit = Unit.getUnit(unitCalendarEnum); + timezone = StringUtils.stringFromBinary(timeZoneName); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + final String className = getClass().getName(); + result = (prime * result) + ((className == null) ? 0 : className.hashCode()); + result = (prime * result) + ((timezone == null) ? 0 : timezone.hashCode()); + result = (prime * result) + ((unit == null) ? 0 : unit.calendarEnum); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final TemporalBinningStrategy other = (TemporalBinningStrategy) obj; + if (timezone == null) { + if (other.timezone != null) { + return false; + } + } + else if (!timezone.equals(other.timezone)) { + return false; + } + if (unit == null) { + if (other.unit != null) { + return false; + } + } + else if (unit.calendarEnum != other.unit.calendarEnum) { + return false; + } + return true; + } + + @Override + public NumericRange getDenormalizedRanges( + final BinRange binnedRange ) { + final Calendar startofEpoch = getStartEpoch(binnedRange.getBinId()); + final long startOfEpochMillis = startofEpoch.getTimeInMillis(); + final long minMillis = startOfEpochMillis + (long) binnedRange.getNormalizedMin(); + final long maxMillis = startOfEpochMillis + (long) binnedRange.getNormalizedMax(); + return new NumericRange( + minMillis, + maxMillis); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TimeDefinition.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TimeDefinition.java new file mode 100644 index 0000000..01eaf01 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TimeDefinition.java @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.geotime.index.dimension; + +import org.locationtech.sfcurve.geowave.geotime.index.dimension.TemporalBinningStrategy.Unit; +import org.locationtech.sfcurve.geowave.index.dimension.UnboundedDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinningStrategy; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * The Time Definition class is a convenience class used to define a dimension + * which is associated with a time dimension. + * + */ +public class TimeDefinition extends + UnboundedDimensionDefinition +{ + protected TimeDefinition() { + super(); + } + + /** + * Constructor used to create a new Unbounded Binning Strategy based upon a + * temporal binning strategy of the unit parameter. The unit can be of DAY, + * MONTH, or YEAR. + * + * @param unit + * an enumeration of temporal units (DAY, MONTH, or YEAR) + */ + public TimeDefinition( + final Unit unit ) { + super( + new TemporalBinningStrategy( + unit)); + + } + + /** + * Constructor used to create a new Unbounded Binning Strategy based upon a + * generic binning strategy. + * + * @param binningStrategy + * a object which defines the bins + */ + public TimeDefinition( + final BinningStrategy binningStrategy ) { + super( + binningStrategy); + } + + @Override + public NumericData getFullRange() { + return new NumericRange( + 0, + System.currentTimeMillis() + 1); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayId.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayId.java new file mode 100644 index 0000000..21d257a --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayId.java @@ -0,0 +1,137 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.util.Arrays; + + +/** + * This class is a wrapper around a byte array to ensure equals and hashcode + * operations use the values of the bytes rather than explicit object identity + */ +public class ByteArrayId implements + java.io.Serializable, + Comparable +{ + private static final long serialVersionUID = 1L; + private final byte[] id; + private transient String stringId; + + public ByteArrayId( + final byte[] id ) { + this.id = id; + } + + public ByteArrayId( + final String id ) { + this.id = StringUtils.stringToBinary(id); + stringId = id; + } + + public byte[] getBytes() { + return id; + } + + public String getString() { + if (stringId == null) { + stringId = StringUtils.stringFromBinary(id); + } + return stringId; + } + + public String getHexString() { + + StringBuffer str = new StringBuffer(); + for (byte b : id) { + str.append(String.format( + "%02X ", + b)); + } + return str.toString(); + } + + @Override + public String toString() { + return "ByteArrayId [getString()=" + getString() + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(id); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final ByteArrayId other = (ByteArrayId) obj; + return Arrays.equals( + id, + other.id); + } + + public static byte[] toBytes( + final ByteArrayId[] ids ) { + int len = 4; + for (ByteArrayId id : ids) { + len += (id.id.length + 4); + } + final ByteBuffer buffer = ByteBuffer.allocate(len); + buffer.putInt(ids.length); + for (ByteArrayId id : ids) { + buffer.putInt(id.id.length); + buffer.put(id.id); + } + return buffer.array(); + } + + public static ByteArrayId[] fromBytes( + byte[] idData ) { + final ByteBuffer buffer = ByteBuffer.wrap(idData); + final int len = buffer.getInt(); + final ByteArrayId[] result = new ByteArrayId[len]; + for (int i = 0; i < len; i++) { + final int idSize = buffer.getInt(); + final byte[] id = new byte[idSize]; + buffer.get(id); + result[i] = new ByteArrayId( + id); + } + return result; + } + + @Override + public int compareTo( + ByteArrayId o ) { + + for (int i = 0, j = 0; i < id.length && j < o.id.length; i++, j++) { + int a = (id[i] & 0xff); + int b = (o.id[j] & 0xff); + if (a != b) { + return a - b; + } + } + return id.length - o.id.length; + + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayRange.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayRange.java new file mode 100644 index 0000000..1059b30 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayRange.java @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/*** + * Defines a unit interval on a number line + * + */ +public class ByteArrayRange implements + Comparable +{ + protected ByteArrayId start; + protected ByteArrayId end; + protected boolean singleValue; + + /*** + * + * @param start + * start of unit interval + * @param end + * end of unit interval + */ + public ByteArrayRange( + final ByteArrayId start, + final ByteArrayId end ) { + this( + start, + end, + false); + } + + /*** + * + * @param start + * start of unit interval + * @param end + * end of unit interval + */ + public ByteArrayRange( + final ByteArrayId start, + final ByteArrayId end, + boolean singleValue ) { + this.start = start; + this.end = end; + this.singleValue = singleValue; + } + + public ByteArrayId getStart() { + return start; + } + + public ByteArrayId getEnd() { + return end; + } + + public boolean isSingleValue() { + return singleValue; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((end == null) ? 0 : end.hashCode()); + result = prime * result + (singleValue ? 1231 : 1237); + result = prime * result + ((start == null) ? 0 : start.hashCode()); + return result; + } + + @Override + public boolean equals( + Object obj ) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + ByteArrayRange other = (ByteArrayRange) obj; + if (end == null) { + if (other.end != null) return false; + } + else if (!end.equals(other.end)) return false; + if (singleValue != other.singleValue) return false; + if (start == null) { + if (other.start != null) return false; + } + else if (!start.equals(other.start)) return false; + return true; + } + + public boolean intersects( + ByteArrayRange other ) { + return ((getStart().compareTo(other.getEnd())) <= 0 && (getEnd().compareTo(other.getStart())) >= 0); + } + + public ByteArrayRange intersection( + ByteArrayRange other ) { + return new ByteArrayRange( + this.start.compareTo(other.start) <= 0 ? other.start : this.start, + this.end.compareTo(other.end) >= 0 ? other.end : this.end); + } + + public ByteArrayRange union( + ByteArrayRange other ) { + return new ByteArrayRange( + this.start.compareTo(other.start) <= 0 ? this.start : other.start, + this.end.compareTo(other.end) >= 0 ? this.end : other.end); + } + + @Override + public int compareTo( + ByteArrayRange other ) { + final int diff = getStart().compareTo( + other.getStart()); + return diff != 0 ? diff : getEnd().compareTo( + other.getEnd()); + } + + public static enum MergeOperation { + UNION, + INTERSECTION + } + + public static final List mergeIntersections( + List ranges, + MergeOperation op ) { + // sort order so the first range can consume following ranges + Collections. sort(ranges); + final List result = new ArrayList(); + for (int i = 0; i < ranges.size();) { + ByteArrayRange r1 = ranges.get(i); + int j = i + 1; + for (; j < ranges.size(); j++) { + final ByteArrayRange r2 = ranges.get(j); + if (r1.intersects(r2)) { + if (op.equals(MergeOperation.UNION)) { + r1 = r1.union(r2); + } + else { + r1 = r1.intersection(r2); + } + } + else { + break; + } + } + i = j; + result.add(r1); + } + return result; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayUtils.java new file mode 100644 index 0000000..37f72e6 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/ByteArrayUtils.java @@ -0,0 +1,234 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.util.UUID; + +import org.apache.commons.lang3.tuple.Pair; + +import com.google.common.base.Preconditions; +import com.google.common.io.BaseEncoding; + +/** + * Convenience methods for converting binary data to and from strings. The + * encoding and decoding is done in base-64. These methods should be used for + * converting data that is binary in nature to a String representation for + * transport. Use StringUtils for serializing and deserializing text-based data. + * + * Additionally, this class has methods for manipulating byte arrays, such as + * combining or incrementing them. + */ +public class ByteArrayUtils +{ + + private static byte[] internalCombineArrays( + final byte[] beginning, + final byte[] end ) { + final byte[] combined = new byte[beginning.length + end.length]; + System.arraycopy( + beginning, + 0, + combined, + 0, + beginning.length); + System.arraycopy( + end, + 0, + combined, + beginning.length, + end.length); + return combined; + } + + /** + * Convert binary data to a string for transport + * + * @param byteArray + * the binary data + * @return the base64url encoded string + */ + public static String byteArrayToString( + final byte[] byteArray ) { + return BaseEncoding.base64Url().encode( + byteArray); + } + + /** + * Convert a string representation of binary data back to a String + * + * @param str + * the string representation of binary data + * @return the base64url decoded binary data + */ + public static byte[] byteArrayFromString( + final String str ) { + return BaseEncoding.base64Url().decode( + str); + } + + /** + * Combine 2 arrays into one large array. If both are not null it will + * append id2 to id1 and the result will be of length id1.length + + * id2.length + * + * @param id1 + * the first byte array to use (the start of the result) + * @param id2 + * the second byte array to combine (appended to id1) + * @return the concatenated byte array + */ + public static byte[] combineArrays( + final byte[] id1, + final byte[] id2 ) { + byte[] combinedId; + if (id1 == null || id1.length == 0) { + combinedId = id2; + } + else if (id2 == null || id2.length == 0) { + combinedId = id1; + } + else { + // concatenate bin ID 2 to the end of bin ID 1 + combinedId = ByteArrayUtils.internalCombineArrays( + id1, + id2); + } + return combinedId; + } + + /** + * add 1 to the least significant bit in this byte array (the last byte in + * the array) + * + * @param value + * the array to increment + * @return will return true as long as the value did not overflow + */ + public static boolean increment( + final byte[] value ) { + for (int i = value.length - 1; i >= 0; i--) { + value[i]++; + if (value[i] != 0) { + return true; + } + } + return value[0] != 0; + } + + /** + * Converts a UUID to a byte array + * + * @param uuid + * the uuid + * @return the byte array representing that UUID + */ + public static byte[] uuidToByteArray( + final UUID uuid ) { + final ByteBuffer bb = ByteBuffer.wrap(new byte[16]); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + return bb.array(); + } + + /** + * Converts a long to a byte array + * + * @param l + * the long + * @return the byte array representing that long + */ + public static byte[] longToByteArray( + final long l ) { + ByteBuffer bb = ByteBuffer.allocate(Long.BYTES); + bb.putLong(l); + return bb.array(); + } + + /** + * Converts a byte array to a long + * + * @param bytes + * the byte array the long + * @return the long represented by the byte array + */ + public static long byteArrayToLong( + byte[] bytes ) { + ByteBuffer bb = ByteBuffer.allocate(Long.BYTES); + bb.put(bytes); + bb.flip(); + return bb.getLong(); + } + + /** + * Combines two variable length byte arrays into one large byte array and + * appends the length of each individual byte array in sequential order at + * the end of the combined byte array. + * + * Given byte_array_1 of length 8 + byte_array_2 of length 16, the result + * will be byte_array1 + byte_array_2 + 8 + 16. + * + * Lengths are put after the individual arrays so they don't impact sorting + * when used within the key of a sorted key-value data store. + * + * @param array1 + * the first byte array + * @param array2 + * the second byte array + * @return the combined byte array including the individual byte array + * lengths + */ + public static byte[] combineVariableLengthArrays( + final byte[] array1, + final byte[] array2 ) { + Preconditions.checkNotNull( + array1, + "First byte array cannot be null"); + Preconditions.checkNotNull( + array2, + "Second byte array cannot be null"); + Preconditions.checkArgument( + array1.length > 1, + "First byte array cannot have length 0"); + Preconditions.checkArgument( + array2.length > 1, + "Second byte array cannot have length 0"); + final byte[] combinedWithoutLengths = ByteArrayUtils.internalCombineArrays( + array1, + array2); + final ByteBuffer combinedWithLengthsAppended = ByteBuffer.allocate(combinedWithoutLengths.length + 8); // 8 + // for + // two + // integer + // lengths + combinedWithLengthsAppended.put(combinedWithoutLengths); + combinedWithLengthsAppended.putInt(array1.length); + combinedWithLengthsAppended.putInt(array2.length); + return combinedWithLengthsAppended.array(); + } + + public static Pair splitVariableLengthArrays( + final byte[] combinedArray ) { + final ByteBuffer combined = ByteBuffer.wrap(combinedArray); + final byte[] combinedArrays = new byte[combinedArray.length - 8]; + combined.get(combinedArrays); + final ByteBuffer bb = ByteBuffer.wrap(combinedArrays); + final int len1 = combined.getInt(); + final int len2 = combined.getInt(); + final byte[] part1 = new byte[len1]; + final byte[] part2 = new byte[len2]; + bb.get(part1); + bb.get(part2); + return Pair.of( + part1, + part2); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/CompoundIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/CompoundIndexStrategy.java new file mode 100644 index 0000000..e38f8ce --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/CompoundIndexStrategy.java @@ -0,0 +1,721 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +import net.sf.json.JSONException; +import net.sf.json.JSONObject; + +import com.google.common.base.Function; +import com.google.common.collect.Lists; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; + +/** + * Class that implements a compound index strategy. It's a wrapper around two + * NumericIndexStrategy objects that can externally be treated as a + * multi-dimensional NumericIndexStrategy. + * + * Each of the 'wrapped' strategies cannot share the same dimension definition. + * + */ +public class CompoundIndexStrategy implements + NumericIndexStrategy +{ + + private NumericIndexStrategy subStrategy1; + private NumericIndexStrategy subStrategy2; + private NumericDimensionDefinition[] baseDefinitions; + private double[] highestPrecision; + private int[] strategy1Mappings; + private int[] strategy2Mappings; + private int defaultNumberOfRanges; + private int metaDataSplit = -1; + + public CompoundIndexStrategy( + final NumericIndexStrategy subStrategy1, + final NumericIndexStrategy subStrategy2 ) { + this.subStrategy1 = subStrategy1; + this.subStrategy2 = subStrategy2; + init(); + defaultNumberOfRanges = (int) Math.ceil(Math.pow( + 2, + getNumberOfDimensions())); + } + + protected CompoundIndexStrategy() {} + + public NumericIndexStrategy[] getSubStrategies() { + return new NumericIndexStrategy[] { + subStrategy1, + subStrategy2 + }; + } + + public NumericIndexStrategy getPrimarySubStrategy() { + return subStrategy1; + } + + public NumericIndexStrategy getSecondarySubStrategy() { + return subStrategy2; + } + + @Override + public byte[] toBinary() { + final byte[] delegateBinary1 = PersistenceUtils.toBinary(subStrategy1); + final byte[] delegateBinary2 = PersistenceUtils.toBinary(subStrategy2); + final ByteBuffer buf = ByteBuffer.allocate(4 + delegateBinary1.length + delegateBinary2.length); + buf.putInt(delegateBinary1.length); + buf.put(delegateBinary1); + buf.put(delegateBinary2); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int delegateBinary1Length = buf.getInt(); + final byte[] delegateBinary1 = new byte[delegateBinary1Length]; + buf.get(delegateBinary1); + final byte[] delegateBinary2 = new byte[bytes.length - delegateBinary1Length - 4]; + buf.get(delegateBinary2); + subStrategy1 = PersistenceUtils.fromBinary( + delegateBinary1, + NumericIndexStrategy.class); + subStrategy2 = PersistenceUtils.fromBinary( + delegateBinary2, + NumericIndexStrategy.class); + init(); + defaultNumberOfRanges = (int) Math.ceil(Math.pow( + 2, + getNumberOfDimensions())); + } + + /** + * Get the number of dimensions of each sub-strategy + * + * @return an array with the number of dimensions for each sub-strategy + */ + public int[] getNumberOfDimensionsPerIndexStrategy() { + return new int[] { + subStrategy1.getOrderedDimensionDefinitions().length, + subStrategy2.getOrderedDimensionDefinitions().length + }; + } + + /** + * Get the total number of dimensions from all sub-strategies + * + * @return the number of dimensions + */ + public int getNumberOfDimensions() { + return baseDefinitions.length; + } + + /** + * Create a compound ByteArrayId + * + * @param id1 + * ByteArrayId for the first sub-strategy + * @param id2 + * ByteArrayId for the second sub-strategy + * @return the ByteArrayId for the compound strategy + */ + public ByteArrayId composeByteArrayId( + final ByteArrayId id1, + final ByteArrayId id2 ) { + final byte[] bytes = new byte[id1.getBytes().length + id2.getBytes().length + 4]; + final ByteBuffer buf = ByteBuffer.wrap(bytes); + buf.put(id1.getBytes()); + buf.put(id2.getBytes()); + buf.putInt(id1.getBytes().length); + return new ByteArrayId( + bytes); + } + + /** + * Get the ByteArrayId for each sub-strategy from the ByteArrayId for the + * compound index strategy + * + * @param id + * the compound ByteArrayId + * @return the ByteArrayId for each sub-strategy + */ + public ByteArrayId[] decomposeByteArrayId( + final ByteArrayId id ) { + final ByteBuffer buf = ByteBuffer.wrap(id.getBytes()); + final int id1Length = buf.getInt(id.getBytes().length - 4); + final byte[] bytes1 = new byte[id1Length]; + final byte[] bytes2 = new byte[id.getBytes().length - id1Length - 4]; + buf.get(bytes1); + buf.get(bytes2); + return new ByteArrayId[] { + new ByteArrayId( + bytes1), + new ByteArrayId( + bytes2) + }; + } + + private List composeByteArrayIds( + final List ids1, + final List ids2 ) { + final List ids = new ArrayList<>( + ids1.size() * ids2.size()); + for (final ByteArrayId id1 : ids1) { + for (final ByteArrayId id2 : ids2) { + ids.add(composeByteArrayId( + id1, + id2)); + } + } + return ids; + } + + private ByteArrayRange composeByteArrayRange( + final ByteArrayRange rangeOfStrategy1, + final ByteArrayRange rangeOfStrategy2 ) { + final ByteArrayId start = composeByteArrayId( + rangeOfStrategy1.getStart(), + rangeOfStrategy2.getStart()); + final ByteArrayId end = composeByteArrayId( + rangeOfStrategy1.getEnd(), + rangeOfStrategy2.getEnd()); + return new ByteArrayRange( + start, + end); + } + + private List getByteArrayRanges( + final List ranges1, + final List ranges2 ) { + final List ranges = new ArrayList<>( + ranges1.size() * ranges2.size()); + for (final ByteArrayRange range1 : ranges1) { + for (final ByteArrayRange range2 : ranges2) { + final ByteArrayRange range = composeByteArrayRange( + range1, + range2); + ranges.add(range); + } + } + return ranges; + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return getQueryRanges( + indexedRange, + -1, + hints); + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxEstimatedRangeDecomposition, + final IndexMetaData... hints ) { + final MultiDimensionalNumericData[] ranges = getRangesForIndexedRange(indexedRange); + final List rangeForStrategy1; + final List rangeForStrategy2; + if (maxEstimatedRangeDecomposition < 1) { + rangeForStrategy1 = subStrategy1.getQueryRanges( + ranges[0], + extractHints( + hints, + 0)); + rangeForStrategy2 = subStrategy2.getQueryRanges( + ranges[1], + extractHints( + hints, + 1)); + } + else { + // for partitioning it works alright to just use permute ranges from + // both sub-strategies but in general this could be too much + + // final int maxEstRangeDecompositionPerStrategy = (int) Math.ceil( + // Math.sqrt( + // maxEstimatedRangeDecomposition)); + rangeForStrategy1 = subStrategy1.getQueryRanges( + ranges[0], + maxEstimatedRangeDecomposition, + extractHints( + hints, + 0)); + // final int maxEstRangeDecompositionStrategy2 = + // maxEstimatedRangeDecomposition / rangeForStrategy1.size(); + rangeForStrategy2 = subStrategy2.getQueryRanges( + ranges[1], + maxEstimatedRangeDecomposition, + extractHints( + hints, + 1)); + } + final List range = getByteArrayRanges( + rangeForStrategy1, + rangeForStrategy2); + return range; + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + return getInsertionIds( + indexedData, + defaultNumberOfRanges); + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxEstimatedDuplicateIds ) { + final int maxEstDuplicatesPerStrategy = (int) Math.sqrt(maxEstimatedDuplicateIds); + final MultiDimensionalNumericData[] ranges = getRangesForIndexedRange(indexedData); + final List rangeForStrategy1 = subStrategy1.getInsertionIds( + ranges[0], + maxEstDuplicatesPerStrategy); + final int maxEstDuplicatesStrategy2 = maxEstimatedDuplicateIds / rangeForStrategy1.size(); + final List rangeForStrategy2 = subStrategy2.getInsertionIds( + ranges[1], + maxEstDuplicatesStrategy2); + final List range = composeByteArrayIds( + rangeForStrategy1, + rangeForStrategy2); + return range; + } + + private MultiDimensionalNumericData[] getRangesForId( + final ByteArrayId insertionId ) { + final ByteArrayId[] insertionIds = decomposeByteArrayId(insertionId); + return new MultiDimensionalNumericData[] { + subStrategy1.getRangeForId(insertionIds[0]), + subStrategy2.getRangeForId(insertionIds[1]) + }; + } + + private MultiDimensionalNumericData[] getRangesForIndexedRange( + final MultiDimensionalNumericData indexedRange ) { + final NumericData[] datasets = indexedRange.getDataPerDimension(); + + final int[] numDimensionsPerStrategy = getNumberOfDimensionsPerIndexStrategy(); + + final NumericData[] datasetForStrategy1 = new NumericData[numDimensionsPerStrategy[0]]; + final NumericData[] datasetForStrategy2 = new NumericData[numDimensionsPerStrategy[1]]; + for (int i = 0; i < datasets.length; i++) { + if (strategy1Mappings[i] >= 0) { + datasetForStrategy1[strategy1Mappings[i]] = datasets[i]; + } + if (strategy2Mappings[i] >= 0) { + datasetForStrategy2[strategy2Mappings[i]] = datasets[i]; + } + } + return new MultiDimensionalNumericData[] { + new BasicNumericDataset( + datasetForStrategy1), + new BasicNumericDataset( + datasetForStrategy2) + }; + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + final MultiDimensionalNumericData[] rangesForId = getRangesForId(insertionId); + final NumericData[] data1 = rangesForId[0].getDataPerDimension(); + final NumericData[] data2 = rangesForId[1].getDataPerDimension(); + final NumericData[] dataPerDimension = new NumericData[baseDefinitions.length]; + for (int i = 0; i < dataPerDimension.length; i++) { + if (strategy1Mappings[i] >= 0) { + dataPerDimension[i] = data1[strategy1Mappings[i]]; + } + if (strategy2Mappings[i] >= 0) { + dataPerDimension[i] = data2[strategy2Mappings[i]]; + } + } + return new BasicNumericDataset( + dataPerDimension); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + final ByteArrayId[] insertionIds = decomposeByteArrayId(insertionId); + final MultiDimensionalCoordinates coordinates1 = subStrategy1.getCoordinatesPerDimension(insertionIds[0]); + final MultiDimensionalCoordinates coordinates2 = subStrategy2.getCoordinatesPerDimension(insertionIds[1]); + final Coordinate[] coordinates = new Coordinate[baseDefinitions.length]; + for (int i = 0; i < baseDefinitions.length; i++) { + if (strategy1Mappings[i] >= 0) { + coordinates[i] = coordinates1.getCoordinate(strategy1Mappings[i]); + } + if (strategy2Mappings[i] >= 0) { + coordinates[i] = coordinates2.getCoordinate(strategy2Mappings[i]); + } + } + return new MultiDimensionalCoordinates( + ByteArrayUtils.combineArrays( + coordinates1.getMultiDimensionalId(), + coordinates2.getMultiDimensionalId()), + coordinates); + } + + private void init() { + final NumericDimensionDefinition[] strategy1Definitions = subStrategy1.getOrderedDimensionDefinitions(); + final NumericDimensionDefinition[] strategy2Definitions = subStrategy2.getOrderedDimensionDefinitions(); + final double[] strategy1HighestPrecision = subStrategy1.getHighestPrecisionIdRangePerDimension(); + final double[] strategy2HighestPrecision = subStrategy2.getHighestPrecisionIdRangePerDimension(); + + final List definitions = new ArrayList( + strategy1Definitions.length + strategy2Definitions.length); + final double precision[] = new double[strategy1Definitions.length + strategy2Definitions.length]; + strategy1Mappings = new int[precision.length]; + strategy2Mappings = new int[precision.length]; + + int dimsPosition = 0; + for (final NumericDimensionDefinition definition : strategy1Definitions) { + definitions.add(definition); + strategy1Mappings[dimsPosition] = dimsPosition; + strategy2Mappings[dimsPosition] = -1; + precision[dimsPosition] = strategy1HighestPrecision[dimsPosition]; + dimsPosition++; + } + int twosDefsPosition = 0; + for (final NumericDimensionDefinition definition : strategy2Definitions) { + final int pos = definitions.indexOf(definition); + if (pos >= 0) { + strategy2Mappings[pos] = twosDefsPosition; + precision[pos] = Math.max( + precision[pos], + strategy2HighestPrecision[twosDefsPosition]); + } + else { + strategy2Mappings[dimsPosition] = twosDefsPosition; + strategy1Mappings[dimsPosition] = -1; + definitions.add(definition); + precision[dimsPosition] = strategy2HighestPrecision[twosDefsPosition]; + dimsPosition++; + } + twosDefsPosition++; + } + + baseDefinitions = definitions.toArray(new NumericDimensionDefinition[definitions.size()]); + highestPrecision = Arrays.copyOfRange( + precision, + 0, + baseDefinitions.length); + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return baseDefinitions; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(baseDefinitions); + result = (prime * result) + defaultNumberOfRanges; + result = (prime * result) + ((subStrategy1 == null) ? 0 : subStrategy1.hashCode()); + result = (prime * result) + ((subStrategy2 == null) ? 0 : subStrategy2.hashCode()); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CompoundIndexStrategy other = (CompoundIndexStrategy) obj; + if (!Arrays.equals( + baseDefinitions, + other.baseDefinitions)) { + return false; + } + if (defaultNumberOfRanges != other.defaultNumberOfRanges) { + return false; + } + if (subStrategy1 == null) { + if (other.subStrategy1 != null) { + return false; + } + } + else if (!subStrategy1.equals(other.subStrategy1)) { + return false; + } + if (subStrategy2 == null) { + if (other.subStrategy2 != null) { + return false; + } + } + else if (!subStrategy2.equals(other.subStrategy2)) { + return false; + } + return true; + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return highestPrecision; + } + + @Override + public Set getNaturalSplits() { + // because substrategy one is prefixing substrategy2, just use the + // splits associated with substrategy1 + return subStrategy1.getNaturalSplits(); + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + // TODO: this only makes sense if substrategy 1 contributes no + // dimensional index component + return subStrategy1.getByteOffsetFromDimensionalIndex() + subStrategy2.getByteOffsetFromDimensionalIndex(); + } + + @Override + public List createMetaData() { + final List result = new ArrayList(); + for (final IndexMetaData metaData : subStrategy1.createMetaData()) { + result.add(new CompoundIndexMetaDataWrapper( + metaData, + 0)); + } + metaDataSplit = result.size(); + for (final IndexMetaData metaData : subStrategy2.createMetaData()) { + result.add(new CompoundIndexMetaDataWrapper( + metaData, + 1)); + } + return result; + } + + private int getMetaDataSplit() { + if (metaDataSplit == -1) { + metaDataSplit = subStrategy1.createMetaData().size(); + } + return metaDataSplit; + } + + private IndexMetaData[] extractHints( + final IndexMetaData[] hints, + final int indexNo ) { + if ((hints == null) || (hints.length == 0)) { + return hints; + } + final int splitPoint = getMetaDataSplit(); + final int start = (indexNo == 0) ? 0 : splitPoint; + final int stop = (indexNo == 0) ? splitPoint : hints.length; + final IndexMetaData[] result = new IndexMetaData[stop - start]; + int p = 0; + for (int i = start; i < stop; i++) { + result[p++] = ((CompoundIndexMetaDataWrapper) hints[i]).metaData; + } + return result; + } + + /** + * Get the ByteArrayId for each sub-strategy from the ByteArrayId for the + * compound index strategy + * + * @param id + * the compound ByteArrayId + * @return the ByteArrayId for each sub-strategy + */ + public static ByteArrayId extractByteArrayId( + final ByteArrayId id, + final int index ) { + final ByteBuffer buf = ByteBuffer.wrap(id.getBytes()); + final int id1Length = buf.getInt(id.getBytes().length - 4); + + if (index == 0) { + final byte[] bytes1 = new byte[id1Length]; + buf.get(bytes1); + return new ByteArrayId( + bytes1); + } + + final byte[] bytes2 = new byte[id.getBytes().length - id1Length - 4]; + buf.position(id1Length); + buf.get(bytes2); + return new ByteArrayId( + bytes2); + + } + + /** + * + * Delegate Metadata item for an underlying index. For + * CompoundIndexStrategy, this delegate wraps the meta data for one of the + * two indices. The primary function of this class is to extract out the + * parts of the ByteArrayId that are specific to each index during an + * 'update' operation. + * + */ + private static class CompoundIndexMetaDataWrapper implements + IndexMetaData, + Persistable + { + + private IndexMetaData metaData; + private int index; + + public CompoundIndexMetaDataWrapper() {} + + public CompoundIndexMetaDataWrapper( + final IndexMetaData metaData, + final int index ) { + super(); + this.metaData = metaData; + this.index = index; + } + + @Override + public byte[] toBinary() { + final byte[] metaBytes = PersistenceUtils.toBinary(metaData); + final ByteBuffer buf = ByteBuffer.allocate(4 + metaBytes.length); + buf.put(metaBytes); + buf.putInt(index); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final byte[] metaBytes = new byte[bytes.length - 4]; + buf.get(metaBytes); + metaData = PersistenceUtils.fromBinary( + metaBytes, + IndexMetaData.class); + index = buf.getInt(); + } + + @Override + public void merge( + final Mergeable merge ) { + if (merge instanceof CompoundIndexMetaDataWrapper) { + final CompoundIndexMetaDataWrapper compound = (CompoundIndexMetaDataWrapper) merge; + metaData.merge(compound.metaData); + } + } + + @Override + public void insertionIdsAdded( + final List ids ) { + metaData.insertionIdsAdded(Lists.transform( + ids, + new Function() { + @Override + public ByteArrayId apply( + final ByteArrayId input ) { + return extractByteArrayId( + input, + index); + } + })); + } + + @Override + public void insertionIdsRemoved( + final List ids ) { + metaData.insertionIdsRemoved(Lists.transform( + ids, + new Function() { + @Override + public ByteArrayId apply( + final ByteArrayId input ) { + return extractByteArrayId( + input, + index); + } + })); + } + + /** + * Convert Tiered Index Metadata statistics to a JSON object + */ + + @Override + public JSONObject toJSONObject() + throws JSONException { + JSONObject jo = new JSONObject(); + jo.put( + "type", + "CompoundIndexMetaDataWrapper"); + + jo.put( + "index", + index); + + return jo; + } + + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + final MultiDimensionalCoordinateRanges[] ranges1 = subStrategy1.getCoordinateRangesPerDimension( + dataRange, + hints); + final MultiDimensionalCoordinateRanges[] ranges2 = subStrategy2.getCoordinateRangesPerDimension( + dataRange, + hints); + MultiDimensionalCoordinateRanges[] retVal = new MultiDimensionalCoordinateRanges[ranges1.length + * ranges2.length]; + int r = 0; + for (final MultiDimensionalCoordinateRanges range1 : ranges1) { + for (final MultiDimensionalCoordinateRanges range2 : ranges2) { + final CoordinateRange[][] coordinateRangesPerDimensions = new CoordinateRange[baseDefinitions.length][]; + for (int i = 0; i < baseDefinitions.length; i++) { + if (strategy1Mappings[i] >= 0) { + coordinateRangesPerDimensions[i] = range1.getRangeForDimension(strategy1Mappings[i]); + } + else if (strategy2Mappings[i] >= 0) { + coordinateRangesPerDimensions[i] = range2.getRangeForDimension(strategy2Mappings[i]); + } + } + retVal[r++] = new MultiDimensionalCoordinateRanges( + ByteArrayUtils.combineArrays( + range1.getMultiDimensionalId(), + range2.getMultiDimensionalId()), + coordinateRangesPerDimensions); + } + } + return retVal; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Coordinate.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Coordinate.java new file mode 100644 index 0000000..b4b85fb --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Coordinate.java @@ -0,0 +1,76 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.Arrays; + +public class Coordinate +{ + private long coordinate; + private byte[] binId; + + protected Coordinate() {} + + public Coordinate( + final long coordinate, + final byte[] binId ) { + this.coordinate = coordinate; + this.binId = binId; + } + + public long getCoordinate() { + return coordinate; + } + + public void setCoordinate( + final long coordinate ) { + this.coordinate = coordinate; + } + + public byte[] getBinId() { + return binId; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(binId); + result = (prime * result) + (int) (coordinate ^ (coordinate >>> 32)); + result = (prime * result) + Arrays.hashCode(binId); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Coordinate other = (Coordinate) obj; + if (!Arrays.equals( + binId, + other.binId)) { + return false; + } + if (coordinate != other.coordinate) { + return false; + } + return true; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/CoordinateRange.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/CoordinateRange.java new file mode 100644 index 0000000..56cd237 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/CoordinateRange.java @@ -0,0 +1,109 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class CoordinateRange implements + Persistable +{ + private long minCoordinate; + private long maxCoordinate; + private byte[] binId; + + protected CoordinateRange() {} + + public CoordinateRange( + final long minCoordinate, + final long maxCoordinate, + final byte[] binId ) { + this.minCoordinate = minCoordinate; + this.maxCoordinate = maxCoordinate; + this.binId = binId; + } + + public long getMinCoordinate() { + return minCoordinate; + } + + public long getMaxCoordinate() { + return maxCoordinate; + } + + public byte[] getBinId() { + return binId; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(binId); + result = (prime * result) + (int) (maxCoordinate ^ (maxCoordinate >>> 32)); + result = (prime * result) + (int) (minCoordinate ^ (minCoordinate >>> 32)); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CoordinateRange other = (CoordinateRange) obj; + if (!Arrays.equals( + binId, + other.binId)) { + return false; + } + if (maxCoordinate != other.maxCoordinate) { + return false; + } + if (minCoordinate != other.minCoordinate) { + return false; + } + return true; + } + + @Override + public byte[] toBinary() { + final ByteBuffer buf = ByteBuffer.allocate(16 + (binId == null ? 0 : binId.length)); + buf.putLong(minCoordinate); + buf.putLong(maxCoordinate); + if (binId != null) { + buf.put(binId); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + minCoordinate = buf.getLong(); + maxCoordinate = buf.getLong(); + if (bytes.length > 16) { + binId = new byte[bytes.length - 16]; + buf.get(binId); + } + else { + binId = null; + } + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/FloatCompareUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/FloatCompareUtils.java new file mode 100644 index 0000000..42b624b --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/FloatCompareUtils.java @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +/** + * Convenience methods for comparing floating point values. + * + */ +public class FloatCompareUtils +{ + public static final double COMP_EPSILON = 2.22E-16; + + /** + * The == operator is not reliable for doubles, so we are using this method + * to check if two doubles are equal + * + * @param x + * @param y + * @return true if the double are equal, false if they are not + */ + public static boolean checkDoublesEqual( + double x, + double y ) { + boolean xNeg = false; + boolean yNeg = false; + double diff = (Math.abs(x) - Math.abs(y)); + + if (x < 0.0) { + xNeg = true; + } + if (y < 0.0) { + yNeg = true; + } + return (diff <= COMP_EPSILON && diff >= -COMP_EPSILON && xNeg == yNeg); + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/HierarchicalNumericIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/HierarchicalNumericIndexStrategy.java new file mode 100644 index 0000000..7179eb0 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/HierarchicalNumericIndexStrategy.java @@ -0,0 +1,71 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.Arrays; + +/** + * This interface defines a multi-tiered approach to indexing, in which a single + * strategy is reliant on a set of sub-strategies + * + */ +public interface HierarchicalNumericIndexStrategy extends + NumericIndexStrategy +{ + public SubStrategy[] getSubStrategies(); + + public static class SubStrategy + { + private final NumericIndexStrategy indexStrategy; + private final byte[] prefix; + + public SubStrategy( + final NumericIndexStrategy indexStrategy, + final byte[] prefix ) { + this.indexStrategy = indexStrategy; + this.prefix = prefix; + } + + public NumericIndexStrategy getIndexStrategy() { + return indexStrategy; + } + + public byte[] getPrefix() { + return prefix; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((indexStrategy == null) ? 0 : indexStrategy.hashCode()); + result = prime * result + Arrays.hashCode(prefix); + return result; + } + + @Override + public boolean equals( + Object obj ) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + SubStrategy other = (SubStrategy) obj; + if (indexStrategy == null) { + if (other.indexStrategy != null) return false; + } + else if (!indexStrategy.equals(other.indexStrategy)) return false; + if (!Arrays.equals( + prefix, + other.prefix)) return false; + return true; + } + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexMetaData.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexMetaData.java new file mode 100644 index 0000000..03372cb --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexMetaData.java @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.List; + +import net.sf.json.JSONException; +import net.sf.json.JSONObject; + +public interface IndexMetaData extends + Persistable, + Mergeable +{ + /** + * Update the aggregation result using the new entry provided + * + * @param insertionIds + * the new indices to compute an updated aggregation result on + */ + public void insertionIdsAdded( + List insertionIds ); + + /** + * Update the aggregation result by removing the entries provided + * + * @param insertionIds + * the new indices to compute an updated aggregation result on + */ + public void insertionIdsRemoved( + List insertionIds ); + + /** + * Create a JSON object that shows all the metadata handled by this object + * + */ + public JSONObject toJSONObject() + throws JSONException; + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexStrategy.java new file mode 100644 index 0000000..95bfeec --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexStrategy.java @@ -0,0 +1,103 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.List; +import java.util.Set; + +/** + * Interface which defines an index strategy. + * + */ +public interface IndexStrategy extends + Persistable +{ + /** + * Returns a list of query ranges for an specified numeric range. + * + * @param indexedRange + * defines the numeric range for the query + * @return a List of query ranges + */ + public List getQueryRanges( + QueryRangeType indexedRange, + IndexMetaData... hints ); + + /** + * Returns a list of query ranges for an specified numeric range. + * + * @param indexedRange + * defines the numeric range for the query + * @param maxRangeDecomposition + * the maximum number of ranges provided by a single query + * decomposition, this is a best attempt and not a guarantee + * @return a List of query ranges + */ + public List getQueryRanges( + QueryRangeType indexedRange, + int maxEstimatedRangeDecomposition, + IndexMetaData... hints ); + + /** + * Returns a list of id's for insertion. The index strategy will use a + * reasonable default for the maximum duplication of insertion IDs + * + * @param indexedData + * defines the numeric data to be indexed + * @return a List of insertion ID's + */ + public List getInsertionIds( + EntryRangeType indexedData ); + + /** + * Returns a list of id's for insertion. + * + * @param indexedData + * defines the numeric data to be indexed + * @param maxDuplicateInsertionIds + * defines the maximum number of insertion IDs that can be used, + * this is a best attempt and not a guarantee + * @return a List of insertion ID's + */ + public List getInsertionIds( + EntryRangeType indexedData, + int maxEstimatedDuplicateIds ); + + /** + * Returns the range that the given ID represents + * + * @param insertionId + * the insertion ID to determine a range for + * @return the range that the given insertion ID represents, inclusive on + * the start and exclusive on the end for the range + */ + public EntryRangeType getRangeForId( + ByteArrayId insertionId ); + + /** + * + * @return a unique ID associated with the index strategy + */ + public String getId(); + + /*** + * Get the set of natural splits for this index strategy. Not all index + * strategies need to have natural splits but in the cases that they exist, + * it can be used downstream to appropriately partition the data according + * to natural split points from the index strategy. + * + * @return the set of natural splits + */ + public Set getNaturalSplits(); + + public List createMetaData(); + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexUtils.java new file mode 100644 index 0000000..4db24f2 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/IndexUtils.java @@ -0,0 +1,236 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.List; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +public class IndexUtils +{ + public static MultiDimensionalNumericData getFullBounds( + final NumericIndexStrategy indexStrategy ) { + return getFullBounds(indexStrategy.getOrderedDimensionDefinitions()); + } + + /** + * Constraints that are empty indicate full table scan. A full table scan + * occurs if ANY one dimension is unbounded. + * + * @param constraints + * @return true if any one dimension is unbounded + */ + public static final boolean isFullTableScan( + final List constraints ) { + for (final MultiDimensionalNumericData constraint : constraints) { + if (constraint.isEmpty()) { + return false; + } + } + return constraints.isEmpty(); + } + + public static MultiDimensionalNumericData getFullBounds( + final NumericDimensionDefinition[] dimensionDefinitions ) { + final NumericRange[] boundsPerDimension = new NumericRange[dimensionDefinitions.length]; + for (int d = 0; d < dimensionDefinitions.length; d++) { + boundsPerDimension[d] = dimensionDefinitions[d].getBounds(); + } + return new BasicNumericDataset( + boundsPerDimension); + } + + public static final double getDimensionalBitsUsed( + final NumericIndexStrategy indexStrategy, + final double[] dataRangePerDimension ) { + double result = Long.MAX_VALUE; + if (dataRangePerDimension.length == 0) { + return 0; + } + final double cellRangePerDimension[] = indexStrategy.getHighestPrecisionIdRangePerDimension(); + final double inflatedRangePerDimension[] = inflateRange( + cellRangePerDimension, + dataRangePerDimension); + final double bitsPerDimension[] = getBitsPerDimension( + indexStrategy, + cellRangePerDimension); + + final BinRange[][] binsPerDimension = getBinsPerDimension( + indexStrategy, + inflatedRangePerDimension); + final double[][] bitsFromTheRightPerDimension = getBitsFromTheRightPerDimension( + binsPerDimension, + cellRangePerDimension); + + // This ALWAYS chooses the index who dimension + // cells cover the widest range thus fewest cells. In temporal, YEAR is + // always chosen. + // However, this is not necessarily bad. A smaller bin size may result + // in more bins searched. + // When searching across multiple bins associated with a dimension, The + // first and last bin are + // partial searches. The inner bins are 'full' scans over the bin. + // Thus, smaller bin sizes could result more in more rows scanned. + // On the flip, fewer larger less-granular bins can also have the same + // result. + // Bottom line: this is not straight forward + // Example: YEAR + // d[ 3600000.0] + // cellRangePerDimension[30157.470702171326] + // inflatedRangePerDimension[3618896.484260559] + // bitsFromTheRightPerDimension[6.906890595608519]] + // Example: DAY + // cellRangePerDimension[ 2554.3212881088257] + // inflatedRangePerDimension[ 3601593.016233444] + // bitsFromTheRightPerDimension[ 10.461479447286157]] + for (final double[] binnedBitsPerFromTheRightDimension : bitsFromTheRightPerDimension) { + for (int d = 0; d < binnedBitsPerFromTheRightDimension.length; d++) { + final double totalBitsUsed = (bitsPerDimension[d] - binnedBitsPerFromTheRightDimension[d]); + if (totalBitsUsed < 0) { + return 0; + } + result = Math.min( + totalBitsUsed, + result); + } + } + + // The least constraining dimension uses the least amount of bits of + // fixed bits from the left. + // For example, half of the world latitude is 1 bit, 1/4 of the world is + // 2 bits etc. + // Use the least constraining dimension, but multiply by the + // # of dimensions. + return result * cellRangePerDimension.length; + } + + public static double[] inflateRange( + final double[] cellRangePerDimension, + final double[] dataRangePerDimension ) { + final double[] result = new double[cellRangePerDimension.length]; + for (int d = 0; d < result.length; d++) { + result[d] = Math.ceil(dataRangePerDimension[d] / cellRangePerDimension[d]) * cellRangePerDimension[d]; + } + return result; + } + + public static double[][] getBitsFromTheRightPerDimension( + final BinRange[][] binsPerDimension, + final double[] cellRangePerDimension ) { + int numBinnedQueries = 1; + for (int d = 0; d < binsPerDimension.length; d++) { + numBinnedQueries *= binsPerDimension[d].length; + } + // now we need to combine all permutations of bin ranges into + // BinnedQuery objects + final double[][] binnedQueries = new double[numBinnedQueries][]; + for (int d = 0; d < binsPerDimension.length; d++) { + for (int b = 0; b < binsPerDimension[d].length; b++) { + for (int i = b; i < numBinnedQueries; i += binsPerDimension[d].length) { + if (binnedQueries[i] == null) { + binnedQueries[i] = new double[binsPerDimension.length]; + } + if ((binsPerDimension[d][b].getNormalizedMax() - binsPerDimension[d][b].getNormalizedMin()) <= 0.000000001) { + binnedQueries[i][d] = 0; + } + else { + binnedQueries[i][d] = log2(Math + .ceil((binsPerDimension[d][b].getNormalizedMax() - binsPerDimension[d][b] + .getNormalizedMin()) / cellRangePerDimension[d])); + } + + } + } + } + return binnedQueries; + } + + public static int getBitPositionFromSubsamplingArray( + final NumericIndexStrategy indexStrategy, + final double[] maxResolutionSubsamplingPerDimension ) { + return (int) Math.round(getDimensionalBitsUsed( + indexStrategy, + maxResolutionSubsamplingPerDimension) + (8 * indexStrategy.getByteOffsetFromDimensionalIndex())); + } + + public static byte[] getNextRowForSkip( + final byte[] row, + final int bitPosition ) { + final int cardinality = bitPosition + 1; + final byte[] rowCopy = new byte[(int) Math.ceil(cardinality / 8.0)]; + + System.arraycopy( + row, + 0, + rowCopy, + 0, + rowCopy.length); + + // number of bits not used in the last byte + int remainder = (8 - (cardinality % 8)); + if (remainder == 8) { + remainder = 0; + } + + final int numIncrements = (int) Math.pow( + 2, + remainder); + + if (remainder > 0) { + for (int i = 0; i < remainder; i++) { + rowCopy[rowCopy.length - 1] |= (1 << (i)); + } + } + + for (int i = 0; i < numIncrements; i++) { + if (!ByteArrayUtils.increment(rowCopy)) { + return null; + } + } + + return rowCopy; + } + + private static final double[] getBitsPerDimension( + final NumericIndexStrategy indexStrategy, + final double[] rangePerDimension ) { + final NumericDimensionDefinition dim[] = indexStrategy.getOrderedDimensionDefinitions(); + final double result[] = new double[rangePerDimension.length]; + for (int d = 0; d < rangePerDimension.length; d++) { + result[d] += Math.ceil(log2((dim[d].getRange() / rangePerDimension[d]))); + } + return result; + } + + private static final BinRange[][] getBinsPerDimension( + final NumericIndexStrategy indexStrategy, + final double[] rangePerDimension ) { + + final NumericDimensionDefinition dim[] = indexStrategy.getOrderedDimensionDefinitions(); + final BinRange[][] result = new BinRange[rangePerDimension.length][]; + for (int d = 0; d < rangePerDimension.length; d++) { + final BinRange[] ranges = dim[d].getNormalizedRanges(new NumericRange( + 0, + rangePerDimension[d])); + result[d] = ranges; + } + return result; + } + + private static double log2( + final double v ) { + return Math.log(v) / Math.log(2); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Mergeable.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Mergeable.java new file mode 100644 index 0000000..29a676b --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Mergeable.java @@ -0,0 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +public interface Mergeable extends + Persistable +{ + public void merge( + Mergeable merge ); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinateRanges.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinateRanges.java new file mode 100644 index 0000000..2bcee34 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinateRanges.java @@ -0,0 +1,106 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +public class MultiDimensionalCoordinateRanges implements + Persistable +{ + // this is a generic placeholder for "tiers" + private byte[] multiDimensionalId; + private CoordinateRange[][] coordinateRangesPerDimension; + + public MultiDimensionalCoordinateRanges() { + coordinateRangesPerDimension = new CoordinateRange[][] {}; + } + + public MultiDimensionalCoordinateRanges( + final byte[] multiDimensionalPrefix, + final CoordinateRange[][] coordinateRangesPerDimension ) { + multiDimensionalId = multiDimensionalPrefix; + this.coordinateRangesPerDimension = coordinateRangesPerDimension; + } + + public byte[] getMultiDimensionalId() { + return multiDimensionalId; + } + + public int getNumDimensions() { + return coordinateRangesPerDimension.length; + } + + public CoordinateRange[] getRangeForDimension( + final int dimension ) { + return coordinateRangesPerDimension[dimension]; + } + + @Override + public byte[] toBinary() { + final List serializedRanges = new ArrayList<>(); + final int idLength = (multiDimensionalId == null ? 0 : multiDimensionalId.length); + + int byteLength = (4 * getNumDimensions()) + 8 + idLength; + final int[] numPerDimension = new int[getNumDimensions()]; + int d = 0; + for (final CoordinateRange[] dim : coordinateRangesPerDimension) { + numPerDimension[d++] = dim.length; + for (final CoordinateRange range : dim) { + final byte[] serializedRange = range.toBinary(); + byteLength += (serializedRange.length + 4); + serializedRanges.add(serializedRange); + } + } + final ByteBuffer buf = ByteBuffer.allocate(byteLength); + buf.putInt(idLength); + if (idLength > 0) { + buf.put(multiDimensionalId); + } + buf.putInt(coordinateRangesPerDimension.length); + for (final int num : numPerDimension) { + buf.putInt(num); + } + for (final byte[] serializedRange : serializedRanges) { + buf.putInt(serializedRange.length); + buf.put(serializedRange); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int idLength = buf.getInt(); + if (idLength > 0) { + multiDimensionalId = new byte[idLength]; + buf.get(multiDimensionalId); + } + else { + multiDimensionalId = null; + } + coordinateRangesPerDimension = new CoordinateRange[buf.getInt()][]; + for (int d = 0; d < coordinateRangesPerDimension.length; d++) { + coordinateRangesPerDimension[d] = new CoordinateRange[buf.getInt()]; + } + for (int d = 0; d < coordinateRangesPerDimension.length; d++) { + for (int i = 0; i < coordinateRangesPerDimension[d].length; i++) { + final byte[] serializedRange = new byte[buf.getInt()]; + buf.get(serializedRange); + + coordinateRangesPerDimension[d][i] = new CoordinateRange(); + coordinateRangesPerDimension[d][i].fromBinary(serializedRange); + } + } + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinateRangesArray.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinateRangesArray.java new file mode 100644 index 0000000..76f1cd9 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinateRangesArray.java @@ -0,0 +1,110 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; + +public class MultiDimensionalCoordinateRangesArray implements + Persistable +{ + private MultiDimensionalCoordinateRanges[] rangesArray; + + public MultiDimensionalCoordinateRangesArray() {} + + public MultiDimensionalCoordinateRangesArray( + final MultiDimensionalCoordinateRanges[] rangesArray ) { + this.rangesArray = rangesArray; + } + + public MultiDimensionalCoordinateRanges[] getRangesArray() { + return rangesArray; + } + + @Override + public byte[] toBinary() { + final byte[][] rangesBinaries = new byte[rangesArray.length][]; + int binaryLength = 4; + for (int i = 0; i < rangesArray.length; i++) { + rangesBinaries[i] = rangesArray[i].toBinary(); + binaryLength += (4 + rangesBinaries[i].length); + } + final ByteBuffer buf = ByteBuffer.allocate(binaryLength); + buf.putInt(rangesBinaries.length); + for (final byte[] rangesBinary : rangesBinaries) { + buf.putInt(rangesBinary.length); + buf.put(rangesBinary); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + rangesArray = new MultiDimensionalCoordinateRanges[buf.getInt()]; + for (int i = 0; i < rangesArray.length; i++) { + final byte[] rangesBinary = new byte[buf.getInt()]; + buf.get(rangesBinary); + rangesArray[i] = new MultiDimensionalCoordinateRanges(); + rangesArray[i].fromBinary(rangesBinary); + } + } + + public static class ArrayOfArrays implements + Persistable + { + private MultiDimensionalCoordinateRangesArray[] coordinateArrays; + + public ArrayOfArrays() { + + } + + public ArrayOfArrays( + MultiDimensionalCoordinateRangesArray[] coordinateArrays ) { + this.coordinateArrays = coordinateArrays; + } + + public MultiDimensionalCoordinateRangesArray[] getCoordinateArrays() { + return coordinateArrays; + } + + @Override + public byte[] toBinary() { + final byte[][] rangesBinaries = new byte[coordinateArrays.length][]; + int binaryLength = 4; + for (int i = 0; i < coordinateArrays.length; i++) { + rangesBinaries[i] = coordinateArrays[i].toBinary(); + binaryLength += (4 + rangesBinaries[i].length); + } + final ByteBuffer buf = ByteBuffer.allocate(binaryLength); + buf.putInt(rangesBinaries.length); + for (final byte[] rangesBinary : rangesBinaries) { + buf.putInt(rangesBinary.length); + buf.put(rangesBinary); + } + return buf.array(); + } + + @Override + public void fromBinary( + byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + coordinateArrays = new MultiDimensionalCoordinateRangesArray[buf.getInt()]; + for (int i = 0; i < coordinateArrays.length; i++) { + final byte[] rangesBinary = new byte[buf.getInt()]; + buf.get(rangesBinary); + coordinateArrays[i] = new MultiDimensionalCoordinateRangesArray(); + coordinateArrays[i].fromBinary(rangesBinary); + } + } + + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinates.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinates.java new file mode 100644 index 0000000..9dd0001 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/MultiDimensionalCoordinates.java @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.Arrays; + +public class MultiDimensionalCoordinates +{ + // this is a generic placeholder for tiers + private final byte[] multiDimensionalId; + private final Coordinate[] coordinatePerDimension; + + public MultiDimensionalCoordinates() { + multiDimensionalId = new byte[] {}; + coordinatePerDimension = new Coordinate[] {}; + } + + public MultiDimensionalCoordinates( + final byte[] multiDimensionalId, + final Coordinate[] coordinatePerDimension ) { + super(); + this.multiDimensionalId = multiDimensionalId; + this.coordinatePerDimension = coordinatePerDimension; + } + + public byte[] getMultiDimensionalId() { + return multiDimensionalId; + } + + public Coordinate getCoordinate( + final int dimension ) { + return coordinatePerDimension[dimension]; + } + + public int getNumDimensions() { + return coordinatePerDimension.length; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(coordinatePerDimension); + result = (prime * result) + Arrays.hashCode(multiDimensionalId); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final MultiDimensionalCoordinates other = (MultiDimensionalCoordinates) obj; + if (!Arrays.equals( + coordinatePerDimension, + other.coordinatePerDimension)) { + return false; + } + if (!Arrays.equals( + multiDimensionalId, + other.multiDimensionalId)) { + return false; + } + return true; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NullNumericIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NullNumericIndexStrategy.java new file mode 100644 index 0000000..1bda825 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NullNumericIndexStrategy.java @@ -0,0 +1,147 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/** + * This is a completely empty numeric index strategy representing no dimensions, + * and always returning empty IDs and ranges. It can be used in cases when the + * data is "indexed" by another means, and not using multi-dimensional numeric + * data. + * + */ +public class NullNumericIndexStrategy implements + NumericIndexStrategy +{ + private String id; + + protected NullNumericIndexStrategy() { + super(); + } + + public NullNumericIndexStrategy( + final String id ) { + this.id = id; + } + + @Override + public byte[] toBinary() { + return StringUtils.stringToBinary(id); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + id = StringUtils.stringFromBinary(bytes); + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return getQueryRanges( + indexedRange, + -1); + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxRangeDecomposition, + final IndexMetaData... hints ) { + // a null return here should be interpreted as negative to positive + // infinite + return null; + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + return getInsertionIds( + indexedData, + 1); + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + // there are no dimensions so return an empty array + return new NumericDimensionDefinition[] {}; + } + + @Override + public String getId() { + return id; + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + // a null return here should be interpreted as negative to positive + // infinite + return null; + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + // there are no dimensions so return an empty array + return new double[] {}; + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + // there are no dimensions so return an empty array + return new MultiDimensionalCoordinates(); + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxDuplicateInsertionIds ) { + // return a single empty ID + final List retVal = new ArrayList(); + retVal.add(new ByteArrayId( + new byte[] {})); + return retVal; + } + + @Override + public Set getNaturalSplits() { + return null; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + return 0; + } + + @Override + public List createMetaData() { + return Collections.emptyList(); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + MultiDimensionalNumericData dataRange, + IndexMetaData... hints ) { + return new MultiDimensionalCoordinateRanges[] { + new MultiDimensionalCoordinateRanges() + }; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NumericIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NumericIndexStrategy.java new file mode 100644 index 0000000..36a7def --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NumericIndexStrategy.java @@ -0,0 +1,75 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/** + * Interface which defines a numeric index strategy. + * + */ +public interface NumericIndexStrategy extends + IndexStrategy +{ + + /** + * Return an integer coordinate in each dimension for the given insertion ID + * plus a bin ID if that dimension is continuous + * + * @param insertionId + * the insertion ID to determine the coordinates for + * @return the integer coordinate that the given insertion ID represents and + * associated bin ID if that dimension is continuous + */ + public MultiDimensionalCoordinates getCoordinatesPerDimension( + ByteArrayId insertionId ); + + /** + * Return an integer coordinate range in each dimension for the given data + * range plus a bin ID if that dimension is continuous + * + * @param dataRange + * the range to determine the coordinates for + * @return the integer coordinate ranges that the given data ID represents + * and associated bin IDs if a dimension is continuous + */ + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + MultiDimensionalNumericData dataRange, + IndexMetaData... hints ); + + /** + * Returns an array of dimension definitions that defines this index + * strategy, the array is in the order that is expected within + * multidimensional numeric data that is passed to this index strategy + * + * @return the ordered array of dimension definitions that represents this + * index strategy + */ + public NumericDimensionDefinition[] getOrderedDimensionDefinitions(); + + /*** + * Get the range/size of a single insertion ID for each dimension at the + * highest precision supported by this index strategy + * + * @return the range of a single insertion ID for each dimension + */ + public double[] getHighestPrecisionIdRangePerDimension(); + + /*** + * Get the offset in bytes before the dimensional index. This can accounts + * for tier IDs and bin IDs + * + * @return the byte offset prior to the dimensional index + */ + public int getByteOffsetFromDimensionalIndex(); + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NumericIndexStrategyWrapper.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NumericIndexStrategyWrapper.java new file mode 100644 index 0000000..ad7b8d9 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/NumericIndexStrategyWrapper.java @@ -0,0 +1,146 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +public class NumericIndexStrategyWrapper implements + NumericIndexStrategy +{ + private String id; + private NumericIndexStrategy indexStrategy; + + protected NumericIndexStrategyWrapper() {} + + public NumericIndexStrategyWrapper( + final String id, + final NumericIndexStrategy indexStrategy ) { + this.id = id; + this.indexStrategy = indexStrategy; + } + + @Override + public String getId() { + return id; + } + + @Override + public byte[] toBinary() { + final byte[] idBinary = StringUtils.stringToBinary(id); + final byte[] delegateBinary = PersistenceUtils.toBinary(indexStrategy); + final ByteBuffer buf = ByteBuffer.allocate(4 + idBinary.length + delegateBinary.length); + buf.putInt(idBinary.length); + buf.put(idBinary); + buf.put(delegateBinary); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int idBinaryLength = buf.getInt(); + final byte[] idBinary = new byte[idBinaryLength]; + final byte[] delegateBinary = new byte[bytes.length - idBinaryLength - 4]; + buf.get(idBinary); + buf.get(delegateBinary); + id = StringUtils.stringFromBinary(idBinary); + indexStrategy = PersistenceUtils.fromBinary( + delegateBinary, + NumericIndexStrategy.class); + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return indexStrategy.getQueryRanges( + indexedRange, + hints); + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxRangeDecomposition, + final IndexMetaData... hints ) { + return indexStrategy.getQueryRanges( + indexedRange, + maxRangeDecomposition, + hints); + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + return indexStrategy.getInsertionIds(indexedData); + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + return indexStrategy.getRangeForId(insertionId); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + return indexStrategy.getCoordinatesPerDimension(insertionId); + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return indexStrategy.getOrderedDimensionDefinitions(); + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return indexStrategy.getHighestPrecisionIdRangePerDimension(); + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxDuplicateInsertionIds ) { + return indexStrategy.getInsertionIds( + indexedData, + maxDuplicateInsertionIds); + } + + @Override + public Set getNaturalSplits() { + return indexStrategy.getNaturalSplits(); + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + return indexStrategy.getByteOffsetFromDimensionalIndex(); + } + + @Override + public List createMetaData() { + return indexStrategy.createMetaData(); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + return indexStrategy.getCoordinateRangesPerDimension( + dataRange, + hints); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Persistable.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Persistable.java new file mode 100644 index 0000000..2c0081e --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/Persistable.java @@ -0,0 +1,36 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +/** + * + * A simple interface for persisting objects, PersistenceUtils provides + * convenience methods for serializing and de-serializing these objects + * + */ +public interface Persistable +{ + /** + * Convert fields and data within an object to binary form for transmission + * or storage. + * + * @return an array of bytes representing a binary stream representation of + * the object. + */ + public byte[] toBinary(); + + /** + * Convert a stream of binary bytes to fields and data within an object. + * + */ + public void fromBinary( + byte[] bytes ); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/PersistenceUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/PersistenceUtils.java new file mode 100644 index 0000000..be48999 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/PersistenceUtils.java @@ -0,0 +1,157 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.lang.reflect.Constructor; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A set of convenience methods for serializing and deserializing persistable + * objects + * + */ +public class PersistenceUtils +{ + private final static Logger LOGGER = LoggerFactory.getLogger(PersistenceUtils.class); + + public static byte[] toBinary( + final Collection persistables ) { + if (persistables.isEmpty()) { + return new byte[] {}; + } + int byteCount = 4; + + final List persistableBinaries = new ArrayList(); + for (final Persistable persistable : persistables) { + final byte[] binary = toBinary(persistable); + byteCount += (4 + binary.length); + persistableBinaries.add(binary); + } + final ByteBuffer buf = ByteBuffer.allocate(byteCount); + buf.putInt(persistables.size()); + for (final byte[] binary : persistableBinaries) { + buf.putInt(binary.length); + buf.put(binary); + } + return buf.array(); + } + + public static byte[] toBinary( + final Persistable persistable ) { + if (persistable == null) { + return new byte[0]; + } + // preface the payload with the class name and a length of the class + // name + final byte[] className = StringUtils.stringToBinary(persistable.getClass().getName()); + final byte[] persistableBinary = persistable.toBinary(); + final int classNameLength = className.length; + final ByteBuffer buf = ByteBuffer.allocate(4 + classNameLength + persistableBinary.length); + buf.putInt(classNameLength); + buf.put(className); + buf.put(persistableBinary); + return buf.array(); + } + + public static List fromBinary( + final byte[] bytes ) { + final List persistables = new ArrayList(); + if ((bytes == null) || (bytes.length < 4)) { + // the original binary didn't even contain the size of the + // array, assume that nothing was persisted + return persistables; + } + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int size = buf.getInt(); + for (int i = 0; i < size; i++) { + final byte[] persistableBinary = new byte[buf.getInt()]; + buf.get(persistableBinary); + persistables.add(fromBinary( + persistableBinary, + Persistable.class)); + } + return persistables; + } + + public static T fromBinary( + final byte[] bytes, + final Class expectedType ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int classNameLength = buf.getInt(); + final byte[] classNameBinary = new byte[classNameLength]; + final byte[] persistableBinary = new byte[bytes.length - classNameLength - 4]; + buf.get(classNameBinary); + + final String className = StringUtils.stringFromBinary(classNameBinary); + + final T retVal = classFactory( + className, + expectedType); + if (retVal != null) { + buf.get(persistableBinary); + retVal.fromBinary(persistableBinary); + } + return retVal; + } + + @SuppressWarnings("unchecked") + public static T classFactory( + final String className, + final Class expectedType ) { + Class factoryType = null; + + try { + factoryType = Class.forName(className); + } + catch (final ClassNotFoundException e) { + LOGGER.warn( + "error creating class: could not find class ", + e); + } + + if (factoryType != null) { + Object factoryClassInst = null; + + try { + // use the no arg constructor and make sure its accessible + + // HP Fortify "Access Specifier Manipulation" + // This method is being modified by trusted code, + // in a way that is not influenced by user input + final Constructor noArgConstructor = factoryType.getDeclaredConstructor(); + noArgConstructor.setAccessible(true); + factoryClassInst = noArgConstructor.newInstance(); + } + catch (final Exception e) { + LOGGER.warn( + "error creating class: could not create class ", + e); + } + + if (factoryClassInst != null) { + if (!expectedType.isAssignableFrom(factoryClassInst.getClass())) { + LOGGER.warn("error creating class, does not implement expected type"); + } + else { + return ((T) factoryClassInst); + } + } + } + + return null; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/QueryConstraints.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/QueryConstraints.java new file mode 100644 index 0000000..bb6326f --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/QueryConstraints.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +public interface QueryConstraints +{ + public int getDimensionCount(); + + /** + * Unconstrained? + * + * @return return if unconstrained on a dimension + */ + public boolean isEmpty(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/StringUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/StringUtils.java new file mode 100644 index 0000000..1b232be --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/StringUtils.java @@ -0,0 +1,146 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Convenience methods for converting to and from strings. The encoding and + * decoding of strings uses UTF-8, and these methods should be used for + * serializing and deserializing text-based data, not for converting binary data + * to a String representation. Use ByteArrayUtils for converting data that is + * binary in nature to a String for transport. + * + */ +public class StringUtils +{ + + private static final Logger LOGGER = LoggerFactory.getLogger(StringUtils.class); + public static final Charset GEOWAVE_CHAR_SET = Charset.forName("ISO-8859-1"); + public static final Charset UTF8_CHAR_SET = Charset.forName("UTF-8"); + + /** + * Utility to convert a String to bytes + * + * @param string + * incoming String to convert + * @return a byte array + */ + public static byte[] stringToBinary( + final String string ) { + return string.getBytes(GEOWAVE_CHAR_SET); + } + + /** + * Utility to convert a String to bytes + * + * @param string + * incoming String to convert + * @return a byte array + */ + public static byte[] stringsToBinary( + final String strings[] ) { + int len = 4; + final List strsBytes = new ArrayList(); + for (final String str : strings) { + final byte[] strByte = str.getBytes(GEOWAVE_CHAR_SET); + strsBytes.add(strByte); + len += (strByte.length + 4); + + } + final ByteBuffer buf = ByteBuffer.allocate(len); + buf.putInt(strings.length); + for (final byte[] str : strsBytes) { + buf.putInt(str.length); + buf.put(str); + } + return buf.array(); + } + + /** + * Utility to convert bytes to a String + * + * @param binary + * a byte array to convert to a String + * @return a String representation of the byte array + */ + public static String stringFromBinary( + final byte[] binary ) { + return new String( + binary, + GEOWAVE_CHAR_SET); + } + + /** + * Utility to convert bytes to a String + * + * @param binary + * a byte array to convert to a String + * @return a String representation of the byte array + */ + public static String[] stringsFromBinary( + final byte[] binary ) { + final ByteBuffer buf = ByteBuffer.wrap(binary); + final int count = buf.getInt(); + final String[] result = new String[count]; + for (int i = 0; i < count; i++) { + final int size = buf.getInt(); + final byte[] strBytes = new byte[size]; + buf.get(strBytes); + result[i] = new String( + strBytes, + GEOWAVE_CHAR_SET); + } + return result; + } + + /** + * Convert a number to a string. In this case we ensure that it is safe for + * Accumulo table names by replacing '-' with '_' + * + * @param number + * the number to convert + * @return the safe string representing that number + */ + public static String intToString( + final int number ) { + return org.apache.commons.lang3.StringUtils.replace( + Integer.toString(number), + "-", + "_"); + } + + public static Map parseParams( + final String params ) + throws NullPointerException { + final Map paramsMap = new HashMap(); + final String[] paramsSplit = params.split(";"); + for (final String param : paramsSplit) { + final String[] keyValue = param.split("="); + if (keyValue.length != 2) { + LOGGER.warn("Unable to parse param '" + param + "'"); + continue; + } + paramsMap.put( + keyValue[0].trim(), + keyValue[1].trim()); + } + return paramsMap; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/BasicDimensionDefinition.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/BasicDimensionDefinition.java new file mode 100644 index 0000000..fca2ab0 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/BasicDimensionDefinition.java @@ -0,0 +1,185 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension; + +import java.nio.ByteBuffer; + +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * The Basic Dimension Definition class defines a Space Filling Curve dimension + * as a minimum and maximum range with values linearly interpolated within the + * range. Values outside of the range will be clamped within the range. + * + */ +public class BasicDimensionDefinition implements + NumericDimensionDefinition +{ + protected double min; + protected double max; + + protected BasicDimensionDefinition() {} + + /** + * Constructor which defines and enforces the bounds of a numeric dimension + * definition. + * + * @param min + * the minimum bounds of the dimension + * @param max + * the maximum bounds of the dimension + */ + public BasicDimensionDefinition( + final double min, + final double max ) { + this.min = min; + this.max = max; + } + + @Override + public double normalize( + double value ) { + value = clamp(value); + + return ((value - min) / (max - min)); + } + + @Override + public BinRange[] getNormalizedRanges( + final NumericData range ) { + return new BinRange[] { + new BinRange( + // by default clamp to the min and max + clamp(range.getMin()), + clamp(range.getMax())) + }; + } + + @Override + public NumericData getFullRange() { + return new NumericRange( + min, + max); + } + + protected double clamp( + final double x ) { + return clamp( + x, + min, + max); + } + + protected static double clamp( + final double x, + final double min, + final double max ) { + if (x < min) { + return min; + } + if (x > max) { + return max; + } + return x; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + final String className = getClass().getName(); + result = (prime * result) + ((className == null) ? 0 : className.hashCode()); + long temp; + temp = Double.doubleToLongBits(max); + result = (prime * result) + (int) (temp ^ (temp >>> 32)); + temp = Double.doubleToLongBits(min); + result = (prime * result) + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final BasicDimensionDefinition other = (BasicDimensionDefinition) obj; + if (Double.doubleToLongBits(max) != Double.doubleToLongBits(other.max)) { + return false; + } + if (Double.doubleToLongBits(min) != Double.doubleToLongBits(other.min)) { + return false; + } + return true; + } + + @Override + public byte[] toBinary() { + final ByteBuffer buf = ByteBuffer.allocate(16); + buf.putDouble(min); + buf.putDouble(max); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + min = buf.getDouble(); + max = buf.getDouble(); + } + + @Override + public double denormalize( + double value ) { + if ((value < 0) || (value > 1)) { + value = clamp( + value, + 0, + 1); + } + + return (value * (max - min)) + min; + } + + @Override + public NumericRange getDenormalizedRange( + final BinRange range ) { + return new NumericRange( + range.getNormalizedMin(), + range.getNormalizedMax()); + } + + @Override + public int getFixedBinIdSize() { + return 0; + } + + @Override + public double getRange() { + return max - min; + } + + @Override + public NumericRange getBounds() { + return new NumericRange( + min, + max); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/NumericDimensionDefinition.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/NumericDimensionDefinition.java new file mode 100644 index 0000000..00ac8e2 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/NumericDimensionDefinition.java @@ -0,0 +1,90 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension; + +import org.locationtech.sfcurve.geowave.index.Persistable; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * The Numeric Dimension Definition interface defines the attributes and methods + * of a class which forms the Space Filling Curve dimension. + * + */ +public interface NumericDimensionDefinition extends + Persistable +{ + public double getRange(); + + /** + * Used to normalize a value within the bounds of the range to a percentage + * of the range between 0 and 1 + * + * @return normalized value + */ + public double normalize( + double value ); + + /** + * Used to denormalize the numeric data set from a value between 0 and 1 + * scaled to fit within its native bounds + * + * @return the denormalized value + */ + public double denormalize( + double value ); + + /** + * Returns the set of normalized ranges + * + * @param range + * a numeric range of the data set + * @return an array of BinRange[] objects + */ + public BinRange[] getNormalizedRanges( + NumericData range ); + + /** + * Returns a range in the native bounds of the dimension definition, + * denormalized from a bin and separate range + * + * @param range + * a numeric range of the data set, with a bin + * @return a NumericRange representing the given bin and range + */ + public NumericRange getDenormalizedRange( + BinRange range ); + + /** + * If this numeric dimension definition uses bins, it is given a fixed + * length for the bin ID + * + * @return the fixed length for this dimensions bin ID + */ + public int getFixedBinIdSize(); + + /** + * Returns the native bounds of the dimension definition + * + * @return a range representing the minimum value and the maximum value for + * this dimension definition + */ + public NumericRange getBounds(); + + /** + * Provide the entire allowed range + * + * @return + */ + public NumericData getFullRange(); + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/UnboundedDimensionDefinition.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/UnboundedDimensionDefinition.java new file mode 100644 index 0000000..00125d8 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/UnboundedDimensionDefinition.java @@ -0,0 +1,123 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension; + +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinningStrategy; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * Because space filling curves require an extent (minimum & maximum), the + * unbounded implementation relies on an external binning strategy to translate + * an unbounded variable into bounded bins + */ +public class UnboundedDimensionDefinition extends + BasicDimensionDefinition +{ + + protected BinningStrategy binningStrategy; + + protected UnboundedDimensionDefinition() { + super(); + } + + /** + * + * @param binningStrategy + * a bin strategy associated with the dimension + */ + public UnboundedDimensionDefinition( + final BinningStrategy binningStrategy ) { + super( + binningStrategy.getBinMin(), + binningStrategy.getBinMax()); + this.binningStrategy = binningStrategy; + + } + + /** + * @param index + * a numeric value to be normalized + */ + @Override + public BinRange[] getNormalizedRanges( + final NumericData index ) { + return binningStrategy.getNormalizedRanges(index); + } + + /** + * + * @return a bin strategy associated with the dimension + */ + public BinningStrategy getBinningStrategy() { + return binningStrategy; + } + + @Override + public NumericRange getDenormalizedRange( + BinRange range ) { + return binningStrategy.getDenormalizedRanges(range); + } + + @Override + public int getFixedBinIdSize() { + return binningStrategy.getFixedBinIdSize(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + ((binningStrategy == null) ? 0 : binningStrategy.hashCode()); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (!super.equals(obj)) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final UnboundedDimensionDefinition other = (UnboundedDimensionDefinition) obj; + if (binningStrategy == null) { + if (other.binningStrategy != null) { + return false; + } + } + else if (!binningStrategy.equals(other.binningStrategy)) { + return false; + } + return true; + } + + @Override + public byte[] toBinary() { + return PersistenceUtils.toBinary(binningStrategy); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + binningStrategy = PersistenceUtils.fromBinary( + bytes, + BinningStrategy.class); + min = binningStrategy.getBinMin(); + max = binningStrategy.getBinMax(); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinRange.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinRange.java new file mode 100644 index 0000000..e7d9ac7 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinRange.java @@ -0,0 +1,101 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension.bin; + +/** + * The Bin Range class is used to define the specific bins or ranges of a + * particular Binning Strategy. + * + */ +public class BinRange +{ + private final byte[] binId; + private final double normalizedMin; + private final double normalizedMax; + private final boolean fullExtent; + + /** + * Constructor used to create a new BinRange object with a only a minimum + * and maximum value. + * + * @param normalizedMin + * the lower bounds of the bin + * @param normalizedMax + * the upper bounds of the bin + */ + public BinRange( + final double normalizedMin, + final double normalizedMax ) { + this( + null, + normalizedMin, + normalizedMax, + false); + } + + /** + * Constructor used to create a new BinRange object. + * + * has a minimum and maximum value. + * + * @param binId + * a unique ID associated with the bin + * @param normalizedMin + * the lower bounds of the bin + * @param normalizedMax + * the upper bounds of the bin + * @param fullExtent + * flag to indicate whether this is the full bin + */ + public BinRange( + final byte[] binId, + final double normalizedMin, + final double normalizedMax, + final boolean fullExtent ) { + this.binId = binId; + this.normalizedMin = normalizedMin; + this.normalizedMax = normalizedMax; + this.fullExtent = fullExtent; + } + + /** + * + * @return a unique ID associate with this particular bin + */ + public byte[] getBinId() { + return binId; + } + + /** + * + * @return the lower bounds of this bin + */ + public double getNormalizedMin() { + return normalizedMin; + } + + /** + * + * @return the upper bounds of this bin + */ + public double getNormalizedMax() { + return normalizedMax; + } + + /** + * + * @return flag to indicate whether this is the entire bin extent + */ + public boolean isFullExtent() { + return fullExtent; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinValue.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinValue.java new file mode 100644 index 0000000..08d88a5 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinValue.java @@ -0,0 +1,69 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension.bin; + +/** + * The Bin Value class is used to define the specific bins of a particular + * Binning Strategy. + * + */ +public class BinValue +{ + private final byte[] binId; + private final double normalizedValue; + + /** + * Constructor used to create a new BinValue object based upon a normalized + * value + * + * @param normalizedValue + * the incoming value to be binned + */ + public BinValue( + final double normalizedValue ) { + this( + null, + normalizedValue); + } + + /** + * Constructor used to create a new BinValue object based upon a unique bin + * ID and normalized value + * + * @param binId + * a unique ID to associate with this Bin Value + * @param normalizedValue + * the incoming value to be binned + */ + public BinValue( + final byte[] binId, + final double normalizedValue ) { + this.binId = binId; + this.normalizedValue = normalizedValue; + } + + /** + * + * @return a unique ID associated with this Bin Value + */ + public byte[] getBinId() { + return binId; + } + + /** + * + * @return the normalized value of this particular Bin Value + */ + public double getNormalizedValue() { + return normalizedValue; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinningStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinningStrategy.java new file mode 100644 index 0000000..35a7fdf --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinningStrategy.java @@ -0,0 +1,88 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension.bin; + +import org.locationtech.sfcurve.geowave.index.Persistable; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * This interface enables a dimension to define a methodology for applying bins + * to a full set of values which can be used by a general purpose space filling + * curve implementation. + */ +public interface BinningStrategy extends + Persistable +{ + /** + * + * @return the minimum value to be used by the space filling curve + * implementation + */ + public double getBinMin(); + + /** + * + * @return the maximum value to be used by the space filling curve + * implementation + */ + public double getBinMax(); + + /** + * Returns a normalized value (confined to the normalized minimum and + * maximum of this binning strategy) and a bin from any value within the + * data set + * + * @param value + * the value that needs to be normalized and binned + * @return the normalized value to be used by a space filling curve + * implementation, and the bin + */ + public BinValue getBinnedValue( + double value ); + + /** + * Return a set of normalized ranges (each of which are confined to the + * normalized min and max of this binning strategy) with a bin for each of + * the ranges. If the passed in query range crosses multiple bins, a + * BinRange for each bin that it intersects will be returned, but if it is + * wholly contained within a single bin then a single BinRange will be + * returned + * + * @param index + * the data representing the query range that needs to be + * normalized and binned + * @return the set of all corresponding bins and ranges that the passed in + * query range intersects + */ + public BinRange[] getNormalizedRanges( + NumericData index ); + + /** + * Given a set of normalized ranges (each of which are confined to the + * normalized min and max of this binning strategy) with a bin for each of + * the ranges, this will calculate the original unbinned range. + * + * @param index + * the normalized and binned range + * @return the original query range represented by the normalized and binned + * range + */ + public NumericRange getDenormalizedRanges( + BinRange binnedRange ); + + /** + * Return the fixed size for the bin ID used by this binning strategy + * + * @return the length of the bin ID + */ + public int getFixedBinIdSize(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/DoubleLexicoder.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/DoubleLexicoder.java new file mode 100644 index 0000000..4a99b9a --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/DoubleLexicoder.java @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +/** + * A lexicoder for preserving the native Java sort order of Double values. + * + */ +public class DoubleLexicoder implements + NumberLexicoder +{ + + @Override + public byte[] toByteArray( + final Double value ) { + long l = Double.doubleToRawLongBits(value); + if (l < 0) { + l = ~l; + } + else { + l = l ^ 0x8000000000000000l; + } + return Lexicoders.LONG.toByteArray(l); + } + + @Override + public Double fromByteArray( + final byte[] bytes ) { + long l = Lexicoders.LONG.fromByteArray(bytes); + if (l < 0) { + l = l ^ 0x8000000000000000l; + } + else { + l = ~l; + } + return Double.longBitsToDouble(l); + } + + @Override + public Double getMinimumValue() { + return Double.MIN_VALUE; + } + + @Override + public Double getMaximumValue() { + return Double.MAX_VALUE; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/IntegerLexicoder.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/IntegerLexicoder.java new file mode 100644 index 0000000..7d518cd --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/IntegerLexicoder.java @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.Ints; + +/** + * A lexicoder for signed integers (in the range from Integer.MIN_VALUE to + * Integer.MAX_VALUE). Does an exclusive or on the most significant bit to + * invert the sign, so that lexicographic ordering of the byte arrays matches + * the natural order of the numbers. + * + * See Apache Accumulo + * (org.apache.accumulo.core.client.lexicoder.IntegerLexicoder) + */ +public class IntegerLexicoder implements + NumberLexicoder +{ + + protected IntegerLexicoder() {} + + @Override + public byte[] toByteArray( + final Integer value ) { + return Ints.toByteArray(value ^ 0x80000000); + } + + @Override + public Integer fromByteArray( + final byte[] bytes ) { + final int value = Ints.fromByteArray(bytes); + return value ^ 0x80000000; + } + + @Override + public Integer getMinimumValue() { + return Integer.MIN_VALUE; + } + + @Override + public Integer getMaximumValue() { + return Integer.MAX_VALUE; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/Lexicoders.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/Lexicoders.java new file mode 100644 index 0000000..8e582bb --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/Lexicoders.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +/** + * A class containing instances of lexicoders. + * + */ +public class Lexicoders +{ + public static final ShortLexicoder SHORT = new ShortLexicoder(); + public static final IntegerLexicoder INT = new IntegerLexicoder(); + public static final LongLexicoder LONG = new LongLexicoder(); + public static final DoubleLexicoder DOUBLE = new DoubleLexicoder(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/LongLexicoder.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/LongLexicoder.java new file mode 100644 index 0000000..5e9e18e --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/LongLexicoder.java @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.Longs; + +/** + * A lexicoder for signed integers (in the range from Long.MIN_VALUE to + * Long.MAX_VALUE). Does an exclusive or on the most significant bit to invert + * the sign, so that lexicographic ordering of the byte arrays matches the + * natural order of the numbers. + * + * See Apache Accumulo (org.apache.accumulo.core.client.lexicoder.LongLexicoder) + */ +public class LongLexicoder implements + NumberLexicoder +{ + + protected LongLexicoder() {} + + @Override + public byte[] toByteArray( + final Long value ) { + return Longs.toByteArray(value ^ 0x8000000000000000l); + } + + @Override + public Long fromByteArray( + final byte[] bytes ) { + final long value = Longs.fromByteArray(bytes); + return value ^ 0x8000000000000000l; + } + + @Override + public Long getMinimumValue() { + return Long.MIN_VALUE; + } + + @Override + public Long getMaximumValue() { + return Long.MAX_VALUE; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/NumberLexicoder.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/NumberLexicoder.java new file mode 100644 index 0000000..6545ce9 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/NumberLexicoder.java @@ -0,0 +1,58 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +/** + * A lexicoder for a number type. Converts back and forth between a number and a + * byte array. A lexicographical sorting of the byte arrays will yield the + * natural order of the numbers that they represent. + * + * @param + * a number type + */ +public interface NumberLexicoder +{ + /** + * Get a byte[] that represents the number value. + * + * @param value + * a number + * @return the byte array representing the number + */ + public byte[] toByteArray( + T value ); + + /** + * Get the value of a byte array + * + * @param bytes + * a byte array representing a number + * @return the number + */ + public T fromByteArray( + byte[] bytes ); + + /** + * Get the minimum value of the range of numbers that this lexicoder can + * encode and decode (i.e. the number represented by all 0 bits). + * + * @return the minimum value in the lexicoder's range + */ + public T getMinimumValue(); + + /** + * Get the maximum value of the range of numbers that this lexicoder can + * encode and decode (i.e. the number represented by all 1 bits). + * + * @return the maximum value in the lexicoder's range + */ + public T getMaximumValue(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/ShortLexicoder.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/ShortLexicoder.java new file mode 100644 index 0000000..47d27af --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/lexicoder/ShortLexicoder.java @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.Shorts; + +/** + * A lexicoder for signed integers (in the range from Short.MIN_VALUE to + * Short.MAX_VALUE). Does an exclusive or on the most significant bit to invert + * the sign, so that lexicographic ordering of the byte arrays matches the + * natural order of the numbers. + * + */ +public class ShortLexicoder implements + NumberLexicoder +{ + + protected ShortLexicoder() {} + + @Override + public byte[] toByteArray( + final Short value ) { + return Shorts.toByteArray((short) (value ^ 0x8000)); + } + + @Override + public Short fromByteArray( + final byte[] bytes ) { + final short value = Shorts.fromByteArray(bytes); + return (short) (value ^ 0x8000); + } + + @Override + public Short getMinimumValue() { + return Short.MIN_VALUE; + } + + @Override + public Short getMaximumValue() { + return Short.MAX_VALUE; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/RangeDecomposition.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/RangeDecomposition.java new file mode 100644 index 0000000..96e1d6e --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/RangeDecomposition.java @@ -0,0 +1,42 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc; + +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; + +/*** + * This class encapsulates a set of ranges returned from a space filling curve + * decomposition. + * + */ +public class RangeDecomposition +{ + private final ByteArrayRange[] ranges; + + /** + * Constructor used to create a new Range Decomposition object. + * + * @param ranges + * ranges for the space filling curve + */ + public RangeDecomposition( + final ByteArrayRange[] ranges ) { + this.ranges = ranges; + } + + /** + * + * @return the ranges associated with this Range Decomposition + */ + public ByteArrayRange[] getRanges() { + return ranges; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SFCDimensionDefinition.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SFCDimensionDefinition.java new file mode 100644 index 0000000..9796890 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SFCDimensionDefinition.java @@ -0,0 +1,167 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc; + +import java.nio.ByteBuffer; + +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * This class wraps a dimension definition with a cardinality (bits of + * precision) on a space filling curve + * + */ +public class SFCDimensionDefinition implements + NumericDimensionDefinition +{ + private int bitsOfPrecision; + private NumericDimensionDefinition dimensionDefinition; + + protected SFCDimensionDefinition() {} + + /** + * @param dimensionDefinition + * an object which defines a dimension used to create a space + * filling curve + * @param bitsOfPrecision + * the number of bits associated with the specified dimension + * object + */ + public SFCDimensionDefinition( + final NumericDimensionDefinition dimensionDefinition, + final int bitsOfPrecision ) { + this.bitsOfPrecision = bitsOfPrecision; + this.dimensionDefinition = dimensionDefinition; + } + + @Override + public NumericData getFullRange() { + return dimensionDefinition.getFullRange(); + } + + /** + * + * @return bitsOfPrecision the bits of precision for the dimension object + */ + public int getBitsOfPrecision() { + return bitsOfPrecision; + } + + /** + * + * @param range + * numeric data to be normalized + * @return a BinRange[] based on numeric data + */ + @Override + public BinRange[] getNormalizedRanges( + final NumericData range ) { + return dimensionDefinition.getNormalizedRanges(range); + } + + public NumericDimensionDefinition getDimensionDefinition() { + return dimensionDefinition; + } + + @Override + public double normalize( + final double value ) { + return dimensionDefinition.normalize(value); + } + + @Override + public double denormalize( + final double value ) { + return dimensionDefinition.denormalize(value); + } + + @Override + public NumericRange getDenormalizedRange( + final BinRange range ) { + return dimensionDefinition.getDenormalizedRange(range); + } + + @Override + public int getFixedBinIdSize() { + return dimensionDefinition.getFixedBinIdSize(); + } + + @Override + public double getRange() { + return dimensionDefinition.getRange(); + } + + @Override + public NumericRange getBounds() { + return dimensionDefinition.getBounds(); + } + + @Override + public byte[] toBinary() { + final byte[] dimensionBinary = PersistenceUtils.toBinary(dimensionDefinition); + final ByteBuffer buf = ByteBuffer.allocate(dimensionBinary.length + 4); + buf.putInt(bitsOfPrecision); + buf.put(dimensionBinary); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final byte[] dimensionBinary = new byte[bytes.length - 4]; + bitsOfPrecision = buf.getInt(); + buf.get(dimensionBinary); + dimensionDefinition = PersistenceUtils.fromBinary( + dimensionBinary, + NumericDimensionDefinition.class); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + bitsOfPrecision; + result = (prime * result) + ((dimensionDefinition == null) ? 0 : dimensionDefinition.hashCode()); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final SFCDimensionDefinition other = (SFCDimensionDefinition) obj; + if (bitsOfPrecision != other.bitsOfPrecision) { + return false; + } + if (dimensionDefinition == null) { + if (other.dimensionDefinition != null) { + return false; + } + } + else if (!dimensionDefinition.equals(other.dimensionDefinition)) { + return false; + } + return true; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SFCFactory.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SFCFactory.java new file mode 100644 index 0000000..b0b6a80 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SFCFactory.java @@ -0,0 +1,67 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc; + +import org.locationtech.sfcurve.geowave.index.sfc.hilbert.HilbertSFC; +import org.locationtech.sfcurve.geowave.index.sfc.xz.XZOrderSFC; +import org.locationtech.sfcurve.geowave.index.sfc.zorder.ZOrderSFC; + +/*** + * Factory used to generate an instance of a known space filling curve type + * + */ +public class SFCFactory +{ + /*** + * Generates a SFC instance based on the dimensions definition and the space + * filling curve type + * + * @param dimensionDefs + * specifies the min, max, and cardinality for this instance of + * the SFC + * @param sfc + * specifies the type (Hilbert, ZOrder) of space filling curve to + * generate + * @return a space filling curve instance generated based on the supplied + * parameters + */ + public static SpaceFillingCurve createSpaceFillingCurve( + final SFCDimensionDefinition[] dimensionDefs, + final SFCType sfc ) { + + switch (sfc) { + case HILBERT: + return new HilbertSFC( + dimensionDefs); + + case ZORDER: + return new ZOrderSFC( + dimensionDefs); + + case XZORDER: + return new XZOrderSFC( + dimensionDefs); + } + + return null; + } + + /*** + * Implemented and registered Space Filling curve types + * + */ + public static enum SFCType { + HILBERT, + ZORDER, + XZORDER + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SpaceFillingCurve.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SpaceFillingCurve.java new file mode 100644 index 0000000..6ed815d --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/SpaceFillingCurve.java @@ -0,0 +1,139 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc; + +import java.math.BigInteger; + +import org.locationtech.sfcurve.geowave.index.Persistable; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/*** + * Base class which defines common methods for any space filling curve. Hosts + * standard access methods shared between implementation. A space filling curve + * is expected to provide a reversible n-dimensional <-> 1-dimensional mapping. + * + */ +public interface SpaceFillingCurve extends + Persistable +{ + /*** + * Maps a n-dimensional value to a single dimension, i.e. [12,33] -> 0033423 + * + * @param values + * n-dimensional value to be encoded in the SFC. The size of + * value corresponds to the number of dimensions + * @return value derived from the the SFC transform. The value is left + * padded based on the number if bits in the SFC dimension + */ + public byte[] getId( + double[] values ); + + /*** + * Gets n-dimensional ranges from a single dimension, i.e. 0033423 -> + * [12,33] + * + * @param id + * the SFC ID to calculate the ranges of values represented. + * @return the valid ranges per dimension of a single SFC ID derived from + * the the SFC transform. + */ + public MultiDimensionalNumericData getRanges( + byte[] id ); + + /*** + * Gets n-dimensional coordinates from a single dimension + * + * @param id + * the SFC ID to calculate the coordinates for each dimension. + * @return the coordinate in each dimension for the given ID + */ + public long[] getCoordinates( + byte[] id ); + + /*** + * Returns a collection of ranges on the 1-d space filling curve that + * correspond to the n-dimensional range described in the query parameter. + * + * This method will decompose the range all the way down to the unit + * interval of 1. + * + * @param query + * describes the n-dimensional query window that will be + * decomposed + * @return an object containing the ranges on the SFC that overlap the + * parameters supplied in the query object + * + */ + public RangeDecomposition decomposeRangeFully( + MultiDimensionalNumericData query ); + + /*** + * Returns a collection of ranges on the 1-d space filling curve that + * correspond to the n-dimensional range described in the query parameter. + * + * This method will roll up the ranges based on the maxRanges parameter. + * Ranges will be "connected" based on the minimization of distance between + * the end of one range and the start of the next. + * + * @param query + * describes the n-dimensional query window that will be + * decomposed + * @return an object containing the ranges on the SFC that overlap the + * parameters supplied in the query object + * + */ + public RangeDecomposition decomposeRange( + MultiDimensionalNumericData query, + boolean overInclusiveOnEdge, + int maxRanges ); + + /*** + * Determines the estimated number of rows a multi-dimensional range will + * span within this space filling curve + * + * @param query + * describes the n-dimensional range to estimate the row count + * for + * @return an estimate of the row count for the ranges given within this + * space filling curve + * + */ + public BigInteger getEstimatedIdCount( + MultiDimensionalNumericData data ); + + /*** + * Determines the coordinates within this space filling curve for a + * dimension given a range + * + * @param minValue + * describes the minimum of a range in a single dimension used to + * determine the SFC coordinate range + * @param maxValue + * describes the maximum of a range in a single dimension used to + * determine the SFC coordinate range + * @param dimension + * the dimension + * @return the range of coordinates as an array where the first element is + * the min and the second element is the max + * + */ + public long[] normalizeRange( + double minValue, + double maxValue, + int dimension ); + + /*** + * Get the range/size of a single insertion ID for each dimension + * + * @return the range of a single insertion ID for each dimension + */ + public double[] getInsertionIdRangePerDimension(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/binned/BinnedSFCUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/binned/BinnedSFCUtils.java new file mode 100644 index 0000000..bbf02f4 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/binned/BinnedSFCUtils.java @@ -0,0 +1,233 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.binned; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.ByteArrayUtils; +import org.locationtech.sfcurve.geowave.index.Coordinate; +import org.locationtech.sfcurve.geowave.index.CoordinateRange; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.BinnedNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +public class BinnedSFCUtils +{ + + public static List getQueryRanges( + final BinnedNumericDataset[] binnedQueries, + final SpaceFillingCurve sfc, + final int maxRanges, + final byte tier ) { + final List queryRanges = new ArrayList(); + + int maxRangeDecompositionPerBin = maxRanges; + if ((maxRanges > 1) && (binnedQueries.length > 1)) { + maxRangeDecompositionPerBin = (int) Math.ceil((double) maxRanges / (double) binnedQueries.length); + } + for (final BinnedNumericDataset binnedQuery : binnedQueries) { + final RangeDecomposition rangeDecomp = sfc.decomposeRange( + binnedQuery, + true, + maxRangeDecompositionPerBin); + final byte[] tierAndBinId = ByteArrayUtils.combineArrays( + new byte[] { + tier + // we're assuming tiers only go to 127 (the max byte + // value) + }, + binnedQuery.getBinId()); + for (final ByteArrayRange range : rangeDecomp.getRanges()) { + queryRanges.add(new ByteArrayRange( + new ByteArrayId( + ByteArrayUtils.combineArrays( + tierAndBinId, + range.getStart().getBytes())), + new ByteArrayId( + ByteArrayUtils.combineArrays( + tierAndBinId, + range.getEnd().getBytes())))); + } + } + return queryRanges; + } + + public static MultiDimensionalCoordinateRanges getCoordinateRanges( + final BinRange[][] binRangesPerDimension, + final SpaceFillingCurve sfc, + final int numDimensions, + final byte tier ) { + final CoordinateRange[][] coordinateRangesPerDimension = new CoordinateRange[numDimensions][]; + for (int d = 0; d < coordinateRangesPerDimension.length; d++) { + coordinateRangesPerDimension[d] = new CoordinateRange[binRangesPerDimension[d].length]; + for (int i = 0; i < binRangesPerDimension[d].length; i++) { + final long[] range = sfc.normalizeRange( + binRangesPerDimension[d][i].getNormalizedMin(), + binRangesPerDimension[d][i].getNormalizedMax(), + d); + coordinateRangesPerDimension[d][i] = new CoordinateRange( + range[0], + range[1], + binRangesPerDimension[d][i].getBinId()); + } + } + return new MultiDimensionalCoordinateRanges( + new byte[] { + tier + }, + coordinateRangesPerDimension); + } + + public static ByteArrayId getSingleBinnedRowId( + final BigInteger rowCount, + final byte multiDimensionalId, + final BinnedNumericDataset index, + final SpaceFillingCurve sfc ) { + if (rowCount.equals(BigInteger.ONE)) { + final byte[] tierAndBinId = ByteArrayUtils.combineArrays( + new byte[] { + multiDimensionalId + }, + index.getBinId()); + final double[] minValues = index.getMinValuesPerDimension(); + final double[] maxValues = index.getMaxValuesPerDimension(); + byte[] singleId = null; + if (Arrays.equals( + maxValues, + minValues)) { + singleId = sfc.getId(minValues); + } + else { + byte[] minId = sfc.getId(minValues); + byte[] maxId = sfc.getId(maxValues); + + if (Arrays.equals( + minId, + maxId)) { + singleId = minId; + } + } + if (singleId != null) { + return new ByteArrayId( + ByteArrayUtils.combineArrays( + tierAndBinId, + singleId)); + } + } + return null; + } + + public static Coordinate[] getCoordinatesForId( + final byte[] rowId, + final NumericDimensionDefinition[] baseDefinitions, + final SpaceFillingCurve sfc ) { + final SFCIdAndBinInfo sfcIdAndBinInfo = getSFCIdAndBinInfo( + rowId, + baseDefinitions); + final long[] coordinateValues = sfc.getCoordinates(sfcIdAndBinInfo.sfcId); + final Coordinate[] retVal = new Coordinate[coordinateValues.length]; + for (int i = 0; i < coordinateValues.length; i++) { + final byte[] bin = sfcIdAndBinInfo.binIds.get(i); + retVal[i] = new Coordinate( + coordinateValues[i], + bin); + } + return retVal; + } + + public static MultiDimensionalNumericData getRangeForId( + final byte[] rowId, + final NumericDimensionDefinition[] baseDefinitions, + final SpaceFillingCurve sfc ) { + final SFCIdAndBinInfo sfcIdAndBinInfo = getSFCIdAndBinInfo( + rowId, + baseDefinitions); + final MultiDimensionalNumericData numericData = sfc.getRanges(sfcIdAndBinInfo.sfcId); + // now we need to unapply the bins to the data, denormalizing the + // ranges to the native bounds + if (sfcIdAndBinInfo.rowIdOffset > 1) { + final NumericData[] data = numericData.getDataPerDimension(); + for (final Entry entry : sfcIdAndBinInfo.binIds.entrySet()) { + final int dimension = entry.getKey(); + final NumericRange range = baseDefinitions[dimension].getDenormalizedRange(new BinRange( + entry.getValue(), + data[dimension].getMin(), + data[dimension].getMax(), + false)); + data[dimension] = range; + } + return new BasicNumericDataset( + data); + } + return numericData; + } + + private static SFCIdAndBinInfo getSFCIdAndBinInfo( + final byte[] rowId, + final NumericDimensionDefinition[] baseDefinitions ) { + + final Map binIds = new HashMap(); + // one for the tier + int rowIdOffset = 1; + for (int dimensionIdx = 0; dimensionIdx < baseDefinitions.length; dimensionIdx++) { + final int binSize = baseDefinitions[dimensionIdx].getFixedBinIdSize(); + if (binSize > 0) { + binIds.put( + dimensionIdx, + Arrays.copyOfRange( + rowId, + rowIdOffset, + rowIdOffset + binSize)); + rowIdOffset += binSize; + } + } + final byte[] sfcId = Arrays.copyOfRange( + rowId, + rowIdOffset, + rowId.length); + return new SFCIdAndBinInfo( + sfcId, + binIds, + rowIdOffset); + } + + private static class SFCIdAndBinInfo + { + private final byte[] sfcId; + private final Map binIds; + private final int rowIdOffset; + + public SFCIdAndBinInfo( + final byte[] sfcId, + final Map binIds, + final int rowIdOffset ) { + super(); + this.sfcId = sfcId; + this.binIds = binIds; + this.rowIdOffset = rowIdOffset; + } + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/BasicNumericDataset.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/BasicNumericDataset.java new file mode 100644 index 0000000..99cb62d --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/BasicNumericDataset.java @@ -0,0 +1,172 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; + +/** + * The Basic Index Result class creates an object associated with a generic + * query. This class can be used when the dimensions and/or axis are generic. + * + */ +public class BasicNumericDataset implements + MultiDimensionalNumericData +{ + + private NumericData[] dataPerDimension; + + /** + * Open ended/unconstrained + */ + public BasicNumericDataset() { + dataPerDimension = new NumericData[0]; + } + + /** + * Constructor used to create a new Basic Numeric Dataset object. + * + * @param dataPerDimension + * an array of numeric data objects + */ + public BasicNumericDataset( + final NumericData[] dataPerDimension ) { + this.dataPerDimension = dataPerDimension; + } + + /** + * @return all of the maximum values (for each dimension) + */ + @Override + public double[] getMaxValuesPerDimension() { + final NumericData[] ranges = getDataPerDimension(); + final double[] maxPerDimension = new double[ranges.length]; + for (int d = 0; d < ranges.length; d++) { + maxPerDimension[d] = ranges[d].getMax(); + } + return maxPerDimension; + } + + /** + * @return all of the minimum values (for each dimension) + */ + @Override + public double[] getMinValuesPerDimension() { + final NumericData[] ranges = getDataPerDimension(); + final double[] minPerDimension = new double[ranges.length]; + for (int d = 0; d < ranges.length; d++) { + minPerDimension[d] = ranges[d].getMin(); + } + return minPerDimension; + } + + /** + * @return all of the centroid values (for each dimension) + */ + @Override + public double[] getCentroidPerDimension() { + final NumericData[] ranges = getDataPerDimension(); + final double[] centroid = new double[ranges.length]; + for (int d = 0; d < ranges.length; d++) { + centroid[d] = ranges[d].getCentroid(); + } + return centroid; + } + + /** + * + * @return an array of NumericData objects + */ + @Override + public NumericData[] getDataPerDimension() { + return dataPerDimension; + } + + /** + * @return the number of dimensions associated with this data set + */ + @Override + public int getDimensionCount() { + return dataPerDimension.length; + } + + @Override + public boolean isEmpty() { + return (dataPerDimension == null) || (dataPerDimension.length == 0); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(dataPerDimension); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final BasicNumericDataset other = (BasicNumericDataset) obj; + if (!Arrays.equals( + dataPerDimension, + other.dataPerDimension)) { + return false; + } + return true; + } + + @Override + public byte[] toBinary() { + int totalBytes = 4; + final List serializedData = new ArrayList(); + for (final NumericData data : dataPerDimension) { + final byte[] binary = PersistenceUtils.toBinary(data); + totalBytes += (binary.length + 4); + serializedData.add(binary); + } + final ByteBuffer buf = ByteBuffer.allocate(totalBytes); + buf.putInt(dataPerDimension.length); + for (final byte[] binary : serializedData) { + buf.putInt(binary.length); + buf.put(binary); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int numDimensions = buf.getInt(); + dataPerDimension = new NumericData[numDimensions]; + for (int d = 0; d < numDimensions; d++) { + final byte[] binary = new byte[buf.getInt()]; + buf.get(binary); + dataPerDimension[d] = PersistenceUtils.fromBinary( + binary, + NumericData.class); + } + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/BinnedNumericDataset.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/BinnedNumericDataset.java new file mode 100644 index 0000000..14e19df --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/BinnedNumericDataset.java @@ -0,0 +1,213 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import java.nio.ByteBuffer; + +import org.locationtech.sfcurve.geowave.index.ByteArrayUtils; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; + +/** + * The Binned Numeric Dataset class creates an object that associates a + * multi-dimensional index range to a particular bin ID. + * + */ +public class BinnedNumericDataset implements + MultiDimensionalNumericData +{ + private byte[] binId; + private MultiDimensionalNumericData indexRanges; + private boolean fullExtent; + + protected BinnedNumericDataset() {} + + /** + * + * @param binId + * a unique ID associated with the BinnedQuery object + * @param indexRanges + * multi-dimensional range data + */ + public BinnedNumericDataset( + final byte[] binId, + final MultiDimensionalNumericData indexRanges, + final boolean fullExtent ) { + this.binId = binId; + this.indexRanges = indexRanges; + this.fullExtent = fullExtent; + } + + public boolean isFullExtent() { + return fullExtent; + } + + /** + * @return an array of NumericData objects associated with this object. + */ + @Override + public NumericData[] getDataPerDimension() { + return indexRanges.getDataPerDimension(); + } + + /** + * @return an array of max values associated with each dimension + */ + @Override + public double[] getMaxValuesPerDimension() { + return indexRanges.getMaxValuesPerDimension(); + } + + /** + * @return an array of min values associated with each dimension + */ + @Override + public double[] getMinValuesPerDimension() { + return indexRanges.getMinValuesPerDimension(); + } + + /** + * @return an array of centroid values associated with each dimension + */ + @Override + public double[] getCentroidPerDimension() { + return indexRanges.getCentroidPerDimension(); + } + + /** + * @return the number of total dimensions + */ + @Override + public int getDimensionCount() { + return indexRanges.getDimensionCount(); + } + + /** + * @return a unique ID associated with this object + */ + public byte[] getBinId() { + return binId; + } + + /** + * This method is responsible for translating a query into appropriate + * normalized and binned (if necessary) queries that can be used by the + * underlying index implementation. For example, for unbounded dimensions + * such as time, an incoming query of July 2012 to July 2013 may get + * translated into 2 binned queries representing the 2012 portion of the + * query and the 2013 portion, each normalized to millis from the beginning + * of the year. + * + * @param numericData + * the incoming query into the index implementation, to be + * translated into normalized, binned queries + * @param dimensionDefinitions + * the definition for the dimensions + * @return normalized indexes + */ + public static BinnedNumericDataset[] applyBins( + final MultiDimensionalNumericData numericData, + final NumericDimensionDefinition[] dimensionDefinitions ) { + if (dimensionDefinitions.length == 0) { + return new BinnedNumericDataset[0]; + } + final BinRange[][] binRangesPerDimension = getBinnedRangesPerDimension( + numericData, + dimensionDefinitions); + int numBinnedQueries = 1; + for (int d = 0; d < dimensionDefinitions.length; d++) { + numBinnedQueries *= binRangesPerDimension[d].length; + } + // now we need to combine all permutations of bin ranges into + // BinnedQuery objects + final BinnedNumericDataset[] binnedQueries = new BinnedNumericDataset[numBinnedQueries]; + for (int d = 0; d < dimensionDefinitions.length; d++) { + for (int b = 0; b < binRangesPerDimension[d].length; b++) { + for (int i = b; i < numBinnedQueries; i += binRangesPerDimension[d].length) { + final NumericData[] rangePerDimension; + if (binnedQueries[i] == null) { + rangePerDimension = new NumericRange[dimensionDefinitions.length]; + binnedQueries[i] = new BinnedNumericDataset( + binRangesPerDimension[d][b].getBinId(), + new BasicNumericDataset( + rangePerDimension), + binRangesPerDimension[d][b].isFullExtent()); + } + else { + // because binned queries were intended to be immutable, + // re-instantiate the object + rangePerDimension = binnedQueries[i].getDataPerDimension(); + + final byte[] combinedBinId = ByteArrayUtils.combineArrays( + binnedQueries[i].getBinId(), + binRangesPerDimension[d][b].getBinId()); + binnedQueries[i] = new BinnedNumericDataset( + combinedBinId, + new BasicNumericDataset( + rangePerDimension), + binnedQueries[i].fullExtent |= binRangesPerDimension[d][b].isFullExtent()); + } + + rangePerDimension[d] = new NumericRange( + binRangesPerDimension[d][b].getNormalizedMin(), + binRangesPerDimension[d][b].getNormalizedMax()); + } + } + } + return binnedQueries; + } + + public static BinRange[][] getBinnedRangesPerDimension( + final MultiDimensionalNumericData numericData, + final NumericDimensionDefinition[] dimensionDefinitions ) { + if (dimensionDefinitions.length == 0) { + return new BinRange[0][]; + } + final BinRange[][] binRangesPerDimension = new BinRange[dimensionDefinitions.length][]; + for (int d = 0; d < dimensionDefinitions.length; d++) { + binRangesPerDimension[d] = dimensionDefinitions[d] + .getNormalizedRanges(numericData.getDataPerDimension()[d]); + } + return binRangesPerDimension; + } + + @Override + public boolean isEmpty() { + return indexRanges.isEmpty(); + } + + @Override + public byte[] toBinary() { + final byte[] indexRangesBinary = PersistenceUtils.toBinary(indexRanges); + final ByteBuffer buf = ByteBuffer.allocate(5 + indexRangesBinary.length + binId.length); + buf.put((byte) (fullExtent ? 1 : 0)); + buf.putInt(binId.length); + buf.put(binId); + buf.put(indexRangesBinary); + return null; + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + fullExtent = (buf.get() == 1); + binId = new byte[buf.getInt()]; + buf.get(binId); + + final byte[] indexRangesBinary = new byte[bytes.length - 5 - binId.length]; + buf.get(indexRangesBinary); + indexRanges = PersistenceUtils.fromBinary( + indexRangesBinary, + MultiDimensionalNumericData.class); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/MultiDimensionalNumericData.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/MultiDimensionalNumericData.java new file mode 100644 index 0000000..69c7121 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/MultiDimensionalNumericData.java @@ -0,0 +1,35 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import org.locationtech.sfcurve.geowave.index.Persistable; +import org.locationtech.sfcurve.geowave.index.QueryConstraints; + +/** + * Interface which defines the methods associated with a multi-dimensional + * numeric data range. + * + */ +public interface MultiDimensionalNumericData extends + QueryConstraints, + Persistable +{ + /** + * @return an array of object QueryRange + */ + public NumericData[] getDataPerDimension(); + + public double[] getMaxValuesPerDimension(); + + public double[] getMinValuesPerDimension(); + + public double[] getCentroidPerDimension(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericData.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericData.java new file mode 100644 index 0000000..81be07a --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericData.java @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import org.locationtech.sfcurve.geowave.index.Persistable; + +/** + * Interface used to define numeric data associated with a space filling curve. + * + */ +public interface NumericData extends + java.io.Serializable, + Persistable +{ + public double getMin(); + + public double getMax(); + + public double getCentroid(); + + public boolean isRange(); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericRange.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericRange.java new file mode 100644 index 0000000..13981be --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericRange.java @@ -0,0 +1,131 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import java.nio.ByteBuffer; + +/** + * Concrete implementation defining a numeric range associated with a space + * filling curve. + * + */ +public class NumericRange implements + NumericData +{ + /** + * + */ + private static final long serialVersionUID = 1L; + private double min; + private double max; + + protected NumericRange() {} + + /** + * Constructor used to create a IndexRange object + * + * @param min + * the minimum bounds of a unique index range + * @param max + * the maximum bounds of a unique index range + */ + public NumericRange( + final double min, + final double max ) { + this.min = min; + this.max = max; + } + + /** + * + * @return min the minimum bounds of a index range object + */ + @Override + public double getMin() { + return min; + } + + /** + * + * @return max the maximum bounds of a index range object + */ + @Override + public double getMax() { + return max; + } + + /** + * + * @return centroid the center of a unique index range object + */ + @Override + public double getCentroid() { + return (min + max) / 2; + } + + /** + * Flag to determine if the object is a range + */ + @Override + public boolean isRange() { + return true; + } + + @Override + public String toString() { + return "NumericRange [min=" + min + ", max=" + max + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(max); + result = (prime * result) + (int) (temp ^ (temp >>> 32)); + temp = Double.doubleToLongBits(min); + result = (prime * result) + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + // changing this check will fail some unit tests. + if (!NumericRange.class.isAssignableFrom(obj.getClass())) { + return false; + } + final NumericRange other = (NumericRange) obj; + return (Math.abs(max - other.max) < NumericValue.EPSILON) && (Math.abs(min - other.min) < NumericValue.EPSILON); + } + + @Override + public byte[] toBinary() { + final ByteBuffer buf = ByteBuffer.allocate(16); + buf.putDouble(min); + buf.putDouble(max); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + min = buf.getDouble(); + max = buf.getDouble(); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericValue.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericValue.java new file mode 100644 index 0000000..ee0de68 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericValue.java @@ -0,0 +1,123 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import java.nio.ByteBuffer; + +/** + * Concrete implementation defining a single numeric value associated with a + * space filling curve. + * + */ +public class NumericValue implements + NumericData +{ + /** + * + */ + private static final long serialVersionUID = 1L; + private double value; + + protected NumericValue() {} + + /** + * Constructor used to create a new NumericValue object + * + * @param value + * the particular numeric value + */ + public NumericValue( + final double value ) { + this.value = value; + } + + /** + * + * @return value the value of a numeric value object + */ + @Override + public double getMin() { + return value; + } + + /** + * + * @return value the value of a numeric value object + */ + @Override + public double getMax() { + return value; + } + + /** + * + * @return value the value of a numeric value object + */ + @Override + public double getCentroid() { + return value; + } + + /** + * Determines if this object is a range or not + */ + @Override + public boolean isRange() { + return false; + } + + @Override + public String toString() { + return "NumericRange [value=" + value + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(value); + result = (prime * result) + (int) (temp ^ (temp >>> 32)); + return result; + } + + protected static final double EPSILON = 1E-10; + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final NumericValue other = (NumericValue) obj; + return (Math.abs(value - other.value) < EPSILON); + } + + @Override + public byte[] toBinary() { + final ByteBuffer buf = ByteBuffer.allocate(8); + buf.putDouble(value); + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + value = buf.getDouble(); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/HilbertSFC.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/HilbertSFC.java new file mode 100644 index 0000000..8070eef --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/HilbertSFC.java @@ -0,0 +1,398 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.hilbert; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import com.google.uzaygezen.core.CompactHilbertCurve; +import com.google.uzaygezen.core.MultiDimensionalSpec; + +import org.locationtech.sfcurve.geowave.index.ByteArrayUtils; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/*** + * Implementation of a Compact Hilbert space filling curve + * + */ +public class HilbertSFC implements + SpaceFillingCurve +{ + private static class QueryCacheKey + { + private final double[] minsPerDimension; + private final double[] maxesPerDimension; + private final boolean overInclusiveOnEdge; + private final int maxFilteredIndexedRanges; + + public QueryCacheKey( + final double[] minsPerDimension, + final double[] maxesPerDimension, + final boolean overInclusiveOnEdge, + final int maxFilteredIndexedRanges ) { + this.minsPerDimension = minsPerDimension; + this.maxesPerDimension = maxesPerDimension; + this.overInclusiveOnEdge = overInclusiveOnEdge; + this.maxFilteredIndexedRanges = maxFilteredIndexedRanges; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + maxFilteredIndexedRanges; + result = (prime * result) + Arrays.hashCode(maxesPerDimension); + result = (prime * result) + Arrays.hashCode(minsPerDimension); + result = (prime * result) + (overInclusiveOnEdge ? 1231 : 1237); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final QueryCacheKey other = (QueryCacheKey) obj; + if (maxFilteredIndexedRanges != other.maxFilteredIndexedRanges) { + return false; + } + if (!Arrays.equals( + maxesPerDimension, + other.maxesPerDimension)) { + return false; + } + if (!Arrays.equals( + minsPerDimension, + other.minsPerDimension)) { + return false; + } + if (overInclusiveOnEdge != other.overInclusiveOnEdge) { + return false; + } + return true; + } + } + + private static final int MAX_CACHED_QUERIES = 500; + private final Map queryDecompositionCache = new LinkedHashMap( + MAX_CACHED_QUERIES + 1, + .75F, + true) { + private static final long serialVersionUID = 1L; + + @Override + public boolean removeEldestEntry( + final Map.Entry eldest ) { + return size() > MAX_CACHED_QUERIES; + } + }; + protected CompactHilbertCurve compactHilbertCurve; + protected SFCDimensionDefinition[] dimensionDefinitions; + protected int totalPrecision; + + /** Tunables **/ + private final static boolean REMOVE_VACUUM = true; + protected HilbertSFCOperations getIdOperations; + protected HilbertSFCOperations decomposeQueryOperations; + + protected HilbertSFC() {} + + /*** + * Use the SFCFactory.createSpaceFillingCurve method - don't call this + * constructor directly + * + */ + public HilbertSFC( + final SFCDimensionDefinition[] dimensionDefs ) { + init(dimensionDefs); + } + + protected void init( + final SFCDimensionDefinition[] dimensionDefs ) { + + final List bitsPerDimension = new ArrayList(); + totalPrecision = 0; + for (final SFCDimensionDefinition dimension : dimensionDefs) { + bitsPerDimension.add(dimension.getBitsOfPrecision()); + totalPrecision += dimension.getBitsOfPrecision(); + } + + compactHilbertCurve = new CompactHilbertCurve( + new MultiDimensionalSpec( + bitsPerDimension)); + + dimensionDefinitions = dimensionDefs; + setOptimalOperations( + totalPrecision, + bitsPerDimension, + dimensionDefs); + } + + protected void setOptimalOperations( + final int totalPrecision, + final List bitsPerDimension, + final SFCDimensionDefinition[] dimensionDefs ) { + boolean primitiveForGetId = true; + final boolean primitiveForQueryDecomposition = totalPrecision <= 62L; + for (final Integer bits : bitsPerDimension) { + if (bits > 48) { + // if in any one dimension, more than 48 bits are used, we need + // to use bigdecimals + primitiveForGetId = false; + break; + } + } + if (primitiveForGetId) { + final PrimitiveHilbertSFCOperations primitiveOps = new PrimitiveHilbertSFCOperations(); + primitiveOps.init(dimensionDefs); + getIdOperations = primitiveOps; + if (primitiveForQueryDecomposition) { + decomposeQueryOperations = primitiveOps; + } + else { + final UnboundedHilbertSFCOperations unboundedOps = new UnboundedHilbertSFCOperations(); + unboundedOps.init(dimensionDefs); + decomposeQueryOperations = unboundedOps; + } + } + else { + final UnboundedHilbertSFCOperations unboundedOps = new UnboundedHilbertSFCOperations(); + unboundedOps.init(dimensionDefs); + getIdOperations = unboundedOps; + if (primitiveForQueryDecomposition) { + final PrimitiveHilbertSFCOperations primitiveOps = new PrimitiveHilbertSFCOperations(); + primitiveOps.init(dimensionDefs); + decomposeQueryOperations = primitiveOps; + } + else { + decomposeQueryOperations = unboundedOps; + } + } + } + + /*** + * {@inheritDoc} + */ + @Override + public byte[] getId( + final double[] values ) { + return getIdOperations.convertToHilbert( + values, + compactHilbertCurve, + dimensionDefinitions); + } + + /*** + * {@inheritDoc} + */ + @Override + public RangeDecomposition decomposeRangeFully( + final MultiDimensionalNumericData query ) { + return decomposeRange( + query, + true, + -1); + } + + // TODO: improve this method - min/max not being calculated optimally + /*** + * {@inheritDoc} + */ + @Override + public RangeDecomposition decomposeRange( + final MultiDimensionalNumericData query, + final boolean overInclusiveOnEdge, + int maxFilteredIndexedRanges ) { + if (maxFilteredIndexedRanges == -1) { + maxFilteredIndexedRanges = Integer.MAX_VALUE; + } + final QueryCacheKey key = new QueryCacheKey( + query.getMinValuesPerDimension(), + query.getMaxValuesPerDimension(), + overInclusiveOnEdge, + maxFilteredIndexedRanges); + RangeDecomposition rangeDecomp = queryDecompositionCache.get(key); + if (rangeDecomp == null) { + rangeDecomp = decomposeQueryOperations.decomposeRange( + query.getDataPerDimension(), + compactHilbertCurve, + dimensionDefinitions, + totalPrecision, + maxFilteredIndexedRanges, + REMOVE_VACUUM, + overInclusiveOnEdge); + queryDecompositionCache.put( + key, + rangeDecomp); + } + return rangeDecomp; + } + + protected static byte[] fitExpectedByteCount( + final int expectedByteCount, + final byte[] bytes ) { + final int leftPadding = expectedByteCount - bytes.length; + if (leftPadding > 0) { + final byte[] zeroes = new byte[leftPadding]; + Arrays.fill( + zeroes, + (byte) 0); + return ByteArrayUtils.combineArrays( + zeroes, + bytes); + } + else if (leftPadding < 0) { + final byte[] truncatedBytes = new byte[expectedByteCount]; + + if (bytes[0] != 0) { + Arrays.fill( + truncatedBytes, + (byte) 255); + } + else { + System.arraycopy( + bytes, + -leftPadding, + truncatedBytes, + 0, + expectedByteCount); + } + return truncatedBytes; + } + return bytes; + } + + @Override + public byte[] toBinary() { + final List dimensionDefBinaries = new ArrayList( + dimensionDefinitions.length); + int bufferLength = 4; + for (final SFCDimensionDefinition sfcDimension : dimensionDefinitions) { + final byte[] sfcDimensionBinary = PersistenceUtils.toBinary(sfcDimension); + bufferLength += (sfcDimensionBinary.length + 4); + dimensionDefBinaries.add(sfcDimensionBinary); + } + final ByteBuffer buf = ByteBuffer.allocate(bufferLength); + buf.putInt(dimensionDefinitions.length); + for (final byte[] dimensionDefBinary : dimensionDefBinaries) { + buf.putInt(dimensionDefBinary.length); + buf.put(dimensionDefBinary); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int numDimensions = buf.getInt(); + dimensionDefinitions = new SFCDimensionDefinition[numDimensions]; + for (int i = 0; i < numDimensions; i++) { + final byte[] dim = new byte[buf.getInt()]; + buf.get(dim); + dimensionDefinitions[i] = PersistenceUtils.fromBinary( + dim, + SFCDimensionDefinition.class); + } + init(dimensionDefinitions); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + final String className = getClass().getName(); + result = (prime * result) + ((className == null) ? 0 : className.hashCode()); + result = (prime * result) + Arrays.hashCode(dimensionDefinitions); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final HilbertSFC other = (HilbertSFC) obj; + + if (!Arrays.equals( + dimensionDefinitions, + other.dimensionDefinitions)) { + return false; + } + return true; + } + + @Override + public BigInteger getEstimatedIdCount( + final MultiDimensionalNumericData data ) { + return getIdOperations.getEstimatedIdCount( + data, + dimensionDefinitions); + } + + @Override + public MultiDimensionalNumericData getRanges( + final byte[] id ) { + return getIdOperations.convertFromHilbert( + id, + compactHilbertCurve, + dimensionDefinitions); + } + + @Override + public long[] normalizeRange( + final double minValue, + final double maxValue, + final int dimension ) { + return getIdOperations.normalizeRange( + minValue, + maxValue, + dimension, + dimensionDefinitions[dimension]); + } + + @Override + public long[] getCoordinates( + final byte[] id ) { + return getIdOperations.indicesFromHilbert( + id, + compactHilbertCurve, + dimensionDefinitions); + } + + @Override + public double[] getInsertionIdRangePerDimension() { + return getIdOperations.getInsertionIdRangePerDimension(dimensionDefinitions); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/HilbertSFCOperations.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/HilbertSFCOperations.java new file mode 100644 index 0000000..f0acaf9 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/HilbertSFCOperations.java @@ -0,0 +1,182 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.hilbert; + +import java.math.BigInteger; + +import com.google.uzaygezen.core.CompactHilbertCurve; + +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; + +/** + * This interface is used to abstract the details of operations used by the + * hilbert space filling curve, in particular to enable both primitive-based + * operations for performance (in cases where the bits of precision can be + * adequately represented by primitives) and non-primitive based operations for + * unbounded bits of precision. + * + */ +public interface HilbertSFCOperations +{ + /** + * initialize this set of operations with the given dimension definitions + * + * @param dimensionDefinitions + * the dimension definitions to use + */ + public void init( + SFCDimensionDefinition[] dimensionDefinitions ); + + /** + * Convert the raw values (ordered per dimension) to a single SFC value + * + * @param values + * a raw value per dimension in order + * @param compactHilbertCurve + * the compact Hilbert curve to use for the conversion + * @param dimensionDefinitions + * a set of dimension definitions to use to normalize the raw + * values + * @return the Hilbert SFC value + */ + public byte[] convertToHilbert( + double[] values, + CompactHilbertCurve compactHilbertCurve, + SFCDimensionDefinition[] dimensionDefinitions ); + + /** + * Convert the single SFC value to the ranges of raw values that it + * represents + * + * @param hilbertValue + * the computed hilbert value to invert back to native + * coordinates + * @param compactHilbertCurve + * the compact Hilbert curve to use for the conversion + * @param dimensionDefinitions + * a set of dimension definitions to use to normalize the raw + * values + * @return the ranges of values that the hilbert represents, inclusive on + * start and exclusive on end for each range + */ + public MultiDimensionalNumericData convertFromHilbert( + byte[] hilbertValue, + CompactHilbertCurve compactHilbertCurve, + SFCDimensionDefinition[] dimensionDefinitions ); + + /** + * Convert the single SFC value to the per dimension SFC coordinates that it + * represents + * + * @param hilbertValue + * the computed hilbert value to invert back to integer + * coordinates per dimension + * @param compactHilbertCurve + * the compact Hilbert curve to use for the conversion + * @param dimensionDefinitions + * a set of dimension definitions to use to determine the bits of + * precision per dimension that is expected in the compact + * hilbert curve + * + * @return the integer coordinate value per dimension that the given hilbert + * value represents + */ + public long[] indicesFromHilbert( + byte[] hilbertValue, + CompactHilbertCurve compactHilbertCurve, + SFCDimensionDefinition[] dimensionDefinitions ); + + /** + * Decompose the raw range per dimension values into an optimal set of + * compact Hilbert SFC ranges + * + * @param rangePerDimension + * the raw range per dimension + * @param compactHilbertCurve + * the compact Hilbert curve to use for the conversion + * @param dimensionDefinitions + * a set of dimension definitions to use to normalize the raw + * values + * @param totalPrecision + * the total precision of the dimension definitions, for + * convenience + * @param maxFilteredIndexedRanges + * the maximum number of ranges, if < 0 it will be unlimited + * @param removeVacuum + * a flag to pass to the compact hilbert curve range + * decomposition + * @return the optimal SFC range decomposition for the raw-valued ranges + */ + public RangeDecomposition decomposeRange( + NumericData[] rangePerDimension, + CompactHilbertCurve compactHilbertCurve, + SFCDimensionDefinition[] dimensionDefinitions, + int totalPrecision, + int maxFilteredIndexedRanges, + boolean removeVacuum, + boolean overInclusiveOnEdge ); + + /** + * Get a quick (minimal complexity calculation) estimate of the total row + * IDs a particular data would require to fully cover with SFC values + * + * @param data + * the dataset + * @param dimensionDefinitions + * a set of dimension definitions to use to normalize the raw + * values + * @return the total estimated row IDs the data would require to fully cover + * with SFC values + */ + public BigInteger getEstimatedIdCount( + MultiDimensionalNumericData data, + SFCDimensionDefinition[] dimensionDefinitions ); + + /** + * Determines the coordinates per dimension of rows given a + * multi-dimensional range will span within this space filling curve + * + * @param minValue + * the minimum value + * + * @param maxValue + * the maximum value + * @param dimension + * the ordinal of the dimension on this space filling curve + * @param dimensionDefinitions + * a set of dimension definitions to use to normalize the raw + * values + * @return the range of coordinates in each dimension (ie. [0][0] would be + * the min coordinate of the first dimension and [0][1] would be the + * max coordinate of the first dimension) + * + */ + public long[] normalizeRange( + double minValue, + double maxValue, + int dimension, + final SFCDimensionDefinition boundedDimensionDefinition ) + throws IllegalArgumentException; + + /*** + * Get the range/size of a single insertion ID for each dimension + * + * @param dimensionDefinitions + * a set of dimension definitions to use to calculate the range + * of each insertion ID + * @return the range of a single insertion ID for each dimension + */ + public double[] getInsertionIdRangePerDimension( + SFCDimensionDefinition[] dimensionDefinitions ); +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/PrimitiveHilbertSFCOperations.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/PrimitiveHilbertSFCOperations.java new file mode 100644 index 0000000..60ab8b7 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/PrimitiveHilbertSFCOperations.java @@ -0,0 +1,562 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.hilbert; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Functions; +import com.google.common.collect.ImmutableList; +import com.google.uzaygezen.core.BacktrackingQueryBuilder; +import com.google.uzaygezen.core.BitVector; +import com.google.uzaygezen.core.BitVectorFactories; +import com.google.uzaygezen.core.CompactHilbertCurve; +import com.google.uzaygezen.core.FilteredIndexRange; +import com.google.uzaygezen.core.LongContent; +import com.google.uzaygezen.core.PlainFilterCombiner; +import com.google.uzaygezen.core.QueryBuilder; +import com.google.uzaygezen.core.RegionInspector; +import com.google.uzaygezen.core.SimpleRegionInspector; +import com.google.uzaygezen.core.ZoomingSpaceVisitorAdapter; +import com.google.uzaygezen.core.ranges.LongRange; +import com.google.uzaygezen.core.ranges.LongRangeHome; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * This supports Compact Hilbert SFC operations using a primitive long + * internally to represent intermediate results. This can be significantly + * faster than using unbounded representations such as BigInteger, but can only + * support up to certain levels of precision. For getID() operations it is + * currently used if no single dimension is more than 48 bits of precision, and + * for query decomposition it is currently used if the total precision is <= 62 + * bits. + * + * + */ +public class PrimitiveHilbertSFCOperations implements + HilbertSFCOperations +{ + protected final static long UNIT_CELL_SIZE = (long) Math.pow( + 2, + 19); + protected long[] binsPerDimension; + + protected long minHilbertValue; + protected long maxHilbertValue; + + @Override + public void init( + final SFCDimensionDefinition[] dimensionDefs ) { + binsPerDimension = new long[dimensionDefs.length]; + int totalPrecision = 0; + for (int d = 0; d < dimensionDefs.length; d++) { + final SFCDimensionDefinition dimension = dimensionDefs[d]; + binsPerDimension[d] = (long) Math.pow( + 2, + dimension.getBitsOfPrecision()); + totalPrecision += dimension.getBitsOfPrecision(); + } + minHilbertValue = 0; + maxHilbertValue = (long) (Math.pow( + 2, + totalPrecision) - 1); + } + + @Override + public byte[] convertToHilbert( + final double[] values, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + + final List dimensionValues = new ArrayList(); + + // Compare the number of dimensions to the number of values sent in + if (dimensionDefinitions.length != values.length) { + throw new ArrayIndexOutOfBoundsException( + "Number of dimensions supplied (" + values.length + ") is different than initialized (" + + dimensionDefinitions.length + ")."); + } + + // Loop through each value, then normalize the value based on the + // dimension definition + for (int i = 0; i < dimensionDefinitions.length; i++) { + dimensionValues.add(normalizeDimension( + dimensionDefinitions[i], + values[i], + binsPerDimension[i], + false, + false)); + } + + // Convert the normalized values to a BitVector + final BitVector hilbertBitVector = convertToHilbert( + dimensionValues, + compactHilbertCurve, + dimensionDefinitions); + + return hilbertBitVector.toBigEndianByteArray(); + } + + /*** + * Converts the incoming values (one per dimension) into a BitVector using + * the Compact Hilbert instance. BitVector is a wrapper to allow values + * longer than 64 bits. + * + * @param values + * n-dimensional point to transoform to a point on the hilbert + * SFC + * @return point on hilbert SFC + */ + private BitVector convertToHilbert( + final List values, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final BitVector[] bitVectors = new BitVector[values.size()]; + + final BitVector hilbertBitVector = BitVectorFactories.OPTIMAL.apply(compactHilbertCurve + .getSpec() + .sumBitsPerDimension()); + + for (int i = 0; i < values.size(); i++) { + bitVectors[i] = BitVectorFactories.OPTIMAL.apply(dimensionDefinitions[i].getBitsOfPrecision()); + bitVectors[i].copyFrom(values.get(i)); + } + synchronized (compactHilbertCurve) { + compactHilbertCurve.index( + bitVectors, + 0, + hilbertBitVector); + } + return hilbertBitVector; + } + + @Override + public long[] indicesFromHilbert( + final byte[] hilbertValue, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + // because it returns an array of longs right now, just use a static + // method that the unbounded operations can use as well + return internalIndicesFromHilbert( + hilbertValue, + compactHilbertCurve, + dimensionDefinitions); + } + + protected static long[] internalIndicesFromHilbert( + final byte[] hilbertValue, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final BitVector[] perDimensionBitVectors = indexInverse( + hilbertValue, + compactHilbertCurve, + dimensionDefinitions); + final long[] retVal = new long[dimensionDefinitions.length]; + for (int i = 0; i < retVal.length; i++) { + retVal[i] = perDimensionBitVectors[i].toExactLong(); + } + return retVal; + } + + @Override + public MultiDimensionalNumericData convertFromHilbert( + final byte[] hilbertValue, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final BitVector[] perDimensionBitVectors = indexInverse( + hilbertValue, + compactHilbertCurve, + dimensionDefinitions); + final NumericRange[] retVal = new NumericRange[dimensionDefinitions.length]; + for (int i = 0; i < retVal.length; i++) { + retVal[i] = denormalizeDimension( + dimensionDefinitions[i], + perDimensionBitVectors[i].toExactLong(), + binsPerDimension[i]); + } + return new BasicNumericDataset( + retVal); + } + + protected static BitVector[] indexInverse( + final byte[] hilbertValue, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final BitVector[] perDimensionBitVectors = new BitVector[dimensionDefinitions.length]; + + final BitVector hilbertBitVector = BitVectorFactories.OPTIMAL.apply(compactHilbertCurve + .getSpec() + .sumBitsPerDimension()); + hilbertBitVector.copyFromBigEndian(hilbertValue); + for (int i = 0; i < dimensionDefinitions.length; i++) { + perDimensionBitVectors[i] = BitVectorFactories.OPTIMAL.apply(dimensionDefinitions[i].getBitsOfPrecision()); + } + + synchronized (compactHilbertCurve) { + compactHilbertCurve.indexInverse( + hilbertBitVector, + perDimensionBitVectors); + } + return perDimensionBitVectors; + } + + /*** + * Used to normalize the value based on the dimension definition, which + * includes the dimensional bounds and the bits of precision. This ensures + * the maximum amount of fidelity for represented values. + * + * @param boundedDimensionDefinition + * describes the min, max, and cardinality of a dimension + * @param value + * value to be normalized + * @param bins + * precomputed number of bins in this dimension the number of + * bins expected based on the cardinality of the definition + * @param isMin + * flag indicating if this value is a minimum of a range in which + * case it needs to be inclusive on a boundary, otherwise it is + * exclusive + * @return value after normalization + * @throws IllegalArgumentException + * thrown when the value passed doesn't fit with in the + * dimension definition provided + */ + public long normalizeDimension( + final SFCDimensionDefinition boundedDimensionDefinition, + final double value, + final long bins, + final boolean isMin, + final boolean overInclusiveOnEdge ) + throws IllegalArgumentException { + final double normalizedValue = boundedDimensionDefinition.normalize(value); + if ((normalizedValue < 0) || (normalizedValue > 1)) { + throw new IllegalArgumentException( + "Value (" + value + ") is not within dimension bounds. The normalized value (" + normalizedValue + + ") must be within (0,1)"); + } + // scale it to a value within the bits of precision, + // because max is handled as exclusive and min is inclusive, we need to + // handle the edge differently + if ((isMin && !overInclusiveOnEdge) || (!isMin && overInclusiveOnEdge)) { + // this will round up on the edge + return (long) Math.min( + Math.floor(normalizedValue * bins), + bins - 1); + } + else { + // this will round down on the edge + return (long) Math.max( + Math.ceil(normalizedValue * bins) - 1L, + 0); + + } + + } + + /*** + * Used to normalize the value based on the dimension definition, which + * includes the dimensional bounds and the bits of precision. This ensures + * the maximum amount of fidelity for represented values. + * + * @param boundedDimensionDefinition + * describes the min, max, and cardinality of a dimension + * @param value + * hilbert value to be denormalized + * @param bins + * precomputed number of bins in this dimension the number of + * bins expected based on the cardinality of the definition + * @return range of values representing this hilbert value (exlusive on the + * end) + * @throws IllegalArgumentException + * thrown when the value passed doesn't fit with in the hilbert + * SFC for the dimension definition provided + */ + private NumericRange denormalizeDimension( + final SFCDimensionDefinition boundedDimensionDefinition, + final long value, + final long bins ) + throws IllegalArgumentException { + final double min = (double) (value) / (double) bins; + final double max = (double) (value + 1) / (double) bins; + if ((min < 0) || (min > 1)) { + throw new IllegalArgumentException( + "Value (" + value + ") is not within bounds. The normalized value (" + min + + ") must be within (0,1)"); + } + if ((max < 0) || (max > 1)) { + throw new IllegalArgumentException( + "Value (" + value + ") is not within bounds. The normalized value (" + max + + ") must be within (0,1)"); + } + // scale it to a value within the dimension definition range + return new NumericRange( + boundedDimensionDefinition.denormalize(min), + boundedDimensionDefinition.denormalize(max)); + + } + + @Override + public RangeDecomposition decomposeRange( + final NumericData[] rangePerDimension, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions, + final int totalPrecision, + final int maxFilteredIndexedRanges, + final boolean removeVacuum, + final boolean overInclusiveOnEdge ) {// List of query range minimum + // and + // maximum + // values + final List minRangeList = new ArrayList(); + final List maxRangeList = new ArrayList(); + + final LongContent zero = new LongContent( + 0L); + final List region = new ArrayList( + dimensionDefinitions.length); + for (int d = 0; d < dimensionDefinitions.length; d++) { + + final long normalizedMin = normalizeDimension( + dimensionDefinitions[d], + rangePerDimension[d].getMin(), + binsPerDimension[d], + true, + overInclusiveOnEdge); + long normalizedMax = normalizeDimension( + dimensionDefinitions[d], + rangePerDimension[d].getMax(), + binsPerDimension[d], + false, + overInclusiveOnEdge); + if (normalizedMin > normalizedMax) { + // if they're both equal, which is possible because we treat max + // as exclusive, set bin max to bin min (ie. treat it as + // inclusive in this case) + normalizedMax = normalizedMin; + } + minRangeList.add(normalizedMin); + maxRangeList.add(normalizedMax); + region.add(LongRange.of( + normalizedMin, + normalizedMax + 1L)); + + } + + final long minQuadSize = getMinimumQuadSize( + minRangeList, + maxRangeList); + + final RegionInspector regionInspector = SimpleRegionInspector.create( + ImmutableList.of(region), + new LongContent( + minQuadSize), + Functions. identity(), + LongRangeHome.INSTANCE, + zero); + + final PlainFilterCombiner intervalCombiner = new PlainFilterCombiner( + LongRange.of( + 0, + 1)); + + final QueryBuilder queryBuilder = BacktrackingQueryBuilder.create( + regionInspector, + intervalCombiner, + maxFilteredIndexedRanges, + removeVacuum, + LongRangeHome.INSTANCE, + zero); + synchronized (compactHilbertCurve) { + compactHilbertCurve.accept(new ZoomingSpaceVisitorAdapter( + compactHilbertCurve, + queryBuilder)); + } + final List> hilbertRanges = queryBuilder + .get() + .getFilteredIndexRanges(); + + final ByteArrayRange[] sfcRanges = new ByteArrayRange[hilbertRanges.size()]; + final int expectedByteCount = (int) Math.ceil(totalPrecision / 8.0); + if (expectedByteCount <= 0) { + // special case for no precision + return new RangeDecomposition( + new ByteArrayRange[] { + new ByteArrayRange( + new ByteArrayId( + new byte[] {}), + new ByteArrayId( + new byte[] {})) + }); + } + for (int i = 0; i < hilbertRanges.size(); i++) { + final FilteredIndexRange range = hilbertRanges.get(i); + // sanity check that values fit within the expected range + // it seems that uzaygezen can produce a value at 2^totalPrecision + // rather than 2^totalPrecision - 1 + final long startValue = clamp( + minHilbertValue, + maxHilbertValue, + range.getIndexRange().getStart()); + final long endValue = clamp( + minHilbertValue, + maxHilbertValue, + range.getIndexRange().getEnd() - 1); + // make sure its padded if necessary + final byte[] start = HilbertSFC.fitExpectedByteCount( + expectedByteCount, + ByteBuffer.allocate( + 8).putLong( + startValue).array()); + + // make sure its padded if necessary + final byte[] end = HilbertSFC.fitExpectedByteCount( + expectedByteCount, + ByteBuffer.allocate( + 8).putLong( + endValue).array()); + sfcRanges[i] = new ByteArrayRange( + new ByteArrayId( + start), + new ByteArrayId( + end)); + } + + final RangeDecomposition rangeDecomposition = new RangeDecomposition( + sfcRanges); + + return rangeDecomposition; + } + + private static long clamp( + final long min, + final long max, + final long value ) { + return Math.max( + Math.min( + value, + max), + 0); + } + + /*** + * Returns the smallest range that will be fully decomposed (i.e. + * decomposition stops when the range is equal or smaller than this value). + * Values is based on the _maximumRangeDecompsed and _minRangeDecompsed + * instance members. + * + * @param minRangeList + * minimum values for each dimension (ordered) + * @param maxRangeList + * maximum values for each dimension (ordered) + * @return largest range that will be fully decomposed + */ + private long getMinimumQuadSize( + final List minRangeList, + final List maxRangeList ) { + long maxRange = 1; + final int dimensionality = Math.min( + minRangeList.size(), + maxRangeList.size()); + for (int d = 0; d < dimensionality; d++) { + maxRange = Math.max( + maxRange, + (Math.abs(maxRangeList.get(d) - minRangeList.get(d)) + 1)); + } + final long maxRangeDecomposed = (long) Math.pow( + maxRange, + dimensionality); + if (maxRangeDecomposed <= UNIT_CELL_SIZE) { + return 1L; + } + + return maxRangeDecomposed / UNIT_CELL_SIZE; + + } + + /** + * The estimated ID count is the cross product of normalized range of all + * dimensions per the bits of precision provided by the dimension + * definitions. + */ + @Override + public BigInteger getEstimatedIdCount( + final MultiDimensionalNumericData data, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final double[] mins = data.getMinValuesPerDimension(); + final double[] maxes = data.getMaxValuesPerDimension(); + long estimatedIdCount = 1L; + for (int d = 0; d < data.getDimensionCount(); d++) { + final long binMin = normalizeDimension( + dimensionDefinitions[d], + mins[d], + binsPerDimension[d], + true, + false); + long binMax = normalizeDimension( + dimensionDefinitions[d], + maxes[d], + binsPerDimension[d], + false, + false); + if (binMin > binMax) { + // if they're both equal, which is possible because we treat max + // as exclusive, set bin max to bin min (ie. treat it as + // inclusive in this case) + binMax = binMin; + } + estimatedIdCount *= (Math.abs(binMax - binMin) + 1); + } + return BigInteger.valueOf(estimatedIdCount); + } + + @Override + public double[] getInsertionIdRangePerDimension( + final SFCDimensionDefinition[] dimensionDefinitions ) { + final double[] retVal = new double[dimensionDefinitions.length]; + for (int i = 0; i < dimensionDefinitions.length; i++) { + retVal[i] = dimensionDefinitions[i].getRange() / binsPerDimension[i]; + } + return retVal; + } + + @Override + public long[] normalizeRange( + final double minValue, + final double maxValue, + final int dimension, + final SFCDimensionDefinition boundedDimensionDefinition ) + throws IllegalArgumentException { + return new long[] { + normalizeDimension( + boundedDimensionDefinition, + minValue, + binsPerDimension[dimension], + true, + true), + normalizeDimension( + boundedDimensionDefinition, + maxValue, + binsPerDimension[dimension], + false, + true) + }; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/UnboundedHilbertSFCOperations.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/UnboundedHilbertSFCOperations.java new file mode 100644 index 0000000..ce0174e --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/hilbert/UnboundedHilbertSFCOperations.java @@ -0,0 +1,535 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.hilbert; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.RoundingMode; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Functions; +import com.google.common.collect.ImmutableList; +import com.google.uzaygezen.core.BacktrackingQueryBuilder; +import com.google.uzaygezen.core.BigIntegerContent; +import com.google.uzaygezen.core.BitVector; +import com.google.uzaygezen.core.BitVectorFactories; +import com.google.uzaygezen.core.CompactHilbertCurve; +import com.google.uzaygezen.core.FilteredIndexRange; +import com.google.uzaygezen.core.PlainFilterCombiner; +import com.google.uzaygezen.core.QueryBuilder; +import com.google.uzaygezen.core.RegionInspector; +import com.google.uzaygezen.core.SimpleRegionInspector; +import com.google.uzaygezen.core.ZoomingSpaceVisitorAdapter; +import com.google.uzaygezen.core.ranges.BigIntegerRange; +import com.google.uzaygezen.core.ranges.BigIntegerRangeHome; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * This supports Compact Hilbert SFC operations using a BigInteger internally to + * represent intermediate results. This can be significantly slower than using a + * primitive long for intermediate results but can support arbitrarily many bits + * of precision. + * + * + */ +public class UnboundedHilbertSFCOperations implements + HilbertSFCOperations +{ + private static final BigDecimal TWO = BigDecimal.valueOf(2); + protected final static BigInteger UNIT_CELL_SIZE = BigDecimal.valueOf( + Math.pow( + 2, + 19)).toBigInteger(); + protected BigDecimal[] binsPerDimension; + protected BigInteger minHilbertValue; + protected BigInteger maxHilbertValue; + + @Override + public void init( + final SFCDimensionDefinition[] dimensionDefs ) { + binsPerDimension = new BigDecimal[dimensionDefs.length]; + int totalPrecision = 0; + for (int d = 0; d < dimensionDefs.length; d++) { + final SFCDimensionDefinition dimension = dimensionDefs[d]; + binsPerDimension[d] = TWO.pow(dimension.getBitsOfPrecision()); + totalPrecision += dimension.getBitsOfPrecision(); + } + minHilbertValue = BigInteger.ZERO; + maxHilbertValue = TWO.pow( + totalPrecision).subtract( + BigDecimal.ONE).toBigInteger(); + } + + @Override + public byte[] convertToHilbert( + final double[] values, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + + final List dimensionValues = new ArrayList(); + + // Compare the number of dimensions to the number of values sent in + if (dimensionDefinitions.length != values.length) { + throw new ArrayIndexOutOfBoundsException( + "Number of dimensions supplied (" + values.length + ") is different than initialized (" + + dimensionDefinitions.length + ")."); + } + + // Loop through each value, then normalize the value based on the + // dimension definition + for (int i = 0; i < dimensionDefinitions.length; i++) { + dimensionValues.add(normalizeDimension( + dimensionDefinitions[i], + values[i], + binsPerDimension[i], + false, + false)); + } + + // Convert the normalized values to a BitVector + final BitVector hilbertBitVector = convertToHilbert( + dimensionValues, + compactHilbertCurve, + dimensionDefinitions); + + return hilbertBitVector.toBigEndianByteArray(); + } + + /*** + * Converts the incoming values (one per dimension) into a BitVector using + * the Compact Hilbert instance. BitVector is a wrapper to allow values + * longer than 64 bits. + * + * @param values + * n-dimensional point to transoform to a point on the hilbert + * SFC + * @return point on hilbert SFC + */ + private BitVector convertToHilbert( + final List values, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final BitVector[] bitVectors = new BitVector[values.size()]; + + final BitVector hilbertBitVector = BitVectorFactories.OPTIMAL.apply(compactHilbertCurve + .getSpec() + .sumBitsPerDimension()); + + for (int i = 0; i < values.size(); i++) { + bitVectors[i] = BitVectorFactories.OPTIMAL.apply(dimensionDefinitions[i].getBitsOfPrecision()); + bitVectors[i].copyFrom(values.get(i)); + } + + compactHilbertCurve.index( + bitVectors, + 0, + hilbertBitVector); + + return hilbertBitVector; + + } + + /*** + * Used to normalize the value based on the dimension definition, which + * includes the dimensional bounds and the bits of precision. This ensures + * the maximum amount of fidelity for represented values. + * + * @param boundedDimensionDefinition + * describes the min, max, and cardinality of a dimension + * @param value + * value to be normalized + * @param bins + * precomputed number of bins in this dimension the number of + * bins expected bas on the cardinality of the definition + * @param isMin + * flag indicating if this value is a minimum of a range in which + * case it needs to be inclusive on a boundary, otherwise it is + * exclusive + * @return value after normalization + * @throws IllegalArgumentException + * thrown when the value passed doesn't fit with in the + * dimension definition provided + */ + private BigInteger normalizeDimension( + final SFCDimensionDefinition boundedDimensionDefinition, + final double value, + final BigDecimal bins, + final boolean isMin, + final boolean overInclusiveOnEdge ) + throws IllegalArgumentException { + final double normalizedValue = boundedDimensionDefinition.normalize(value); + if ((normalizedValue < 0) || (normalizedValue > 1)) { + throw new IllegalArgumentException( + "Value (" + value + ") is not within dimension bounds. The normalized value (" + normalizedValue + + ") must be within (0,1)"); + } + final BigDecimal val = BigDecimal.valueOf(normalizedValue); + // scale it to a value within the bits of precision + final BigDecimal valueScaledWithinPrecision = val.multiply(bins); + if ((isMin && !overInclusiveOnEdge) || (!isMin && overInclusiveOnEdge)) { + // round it down, and make sure it isn't above bins - 1 (exactly 1 + // for the normalized value could produce a bit shifted value equal + // to bins without this check) + return valueScaledWithinPrecision.setScale( + 0, + RoundingMode.FLOOR).min( + bins.subtract(BigDecimal.ONE)).toBigInteger(); + } + else { + // round it up, subtract one to set the range between [0, + // 2^cardinality-1) + // and make sure it isn't below 0 (exactly 0 for the normalized + // value + // could produce a bit shifted value of -1 without this check) + return valueScaledWithinPrecision.setScale( + 0, + RoundingMode.CEILING).subtract( + BigDecimal.ONE).max( + BigDecimal.ZERO).toBigInteger(); + } + + } + + @Override + public long[] indicesFromHilbert( + final byte[] hilbertValue, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + // warning: this very much won't be unbounded because it returns an + // array of longs right now + // but we may as well re-use the calculation from the primitive + // operations + return PrimitiveHilbertSFCOperations.internalIndicesFromHilbert( + hilbertValue, + compactHilbertCurve, + dimensionDefinitions); + } + + @Override + public MultiDimensionalNumericData convertFromHilbert( + final byte[] hilbertValue, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final BitVector[] perDimensionBitVectors = PrimitiveHilbertSFCOperations.indexInverse( + hilbertValue, + compactHilbertCurve, + dimensionDefinitions); + final NumericRange[] retVal = new NumericRange[dimensionDefinitions.length]; + for (int i = 0; i < retVal.length; i++) { + retVal[i] = denormalizeDimension( + dimensionDefinitions[i], + perDimensionBitVectors[i].toBigInteger(), + binsPerDimension[i]); + } + return new BasicNumericDataset( + retVal); + } + + /*** + * Used to normalize the value based on the dimension definition, which + * includes the dimensional bounds and the bits of precision. This ensures + * the maximum amount of fidelity for represented values. + * + * @param boundedDimensionDefinition + * describes the min, max, and cardinality of a dimension + * @param value + * hilbert value to be denormalized + * @param bins + * precomputed number of bins in this dimension the number of + * bins expected based on the cardinality of the definition + * @return range of values reprenenting this hilbert value (exlusive on the + * end) + * @throws IllegalArgumentException + * thrown when the value passed doesn't fit with in the hilbert + * SFC for the dimension definition provided + */ + private NumericRange denormalizeDimension( + final SFCDimensionDefinition boundedDimensionDefinition, + final BigInteger value, + final BigDecimal bins ) + throws IllegalArgumentException { + final double min = new BigDecimal( + value).divide( + bins).doubleValue(); + final double max = new BigDecimal( + value).add( + BigDecimal.ONE).divide( + bins).doubleValue(); + + if ((min < 0) || (min > 1)) { + throw new IllegalArgumentException( + "Value (" + value + ") is not within bounds. The normalized value (" + min + + ") must be within (0,1)"); + } + if ((max < 0) || (max > 1)) { + throw new IllegalArgumentException( + "Value (" + value + ") is not within bounds. The normalized value (" + max + + ") must be within (0,1)"); + } + // scale it to a value within the dimension definition range + return new NumericRange( + boundedDimensionDefinition.denormalize(min), + boundedDimensionDefinition.denormalize(max)); + + } + + @Override + public RangeDecomposition decomposeRange( + final NumericData[] rangePerDimension, + final CompactHilbertCurve compactHilbertCurve, + final SFCDimensionDefinition[] dimensionDefinitions, + final int totalPrecision, + final int maxFilteredIndexedRanges, + final boolean removeVacuum, + final boolean overInclusiveOnEdge ) {// List of query range minimum + // and + // maximum + // values + final List minRangeList = new ArrayList(); + final List maxRangeList = new ArrayList(); + + final BigIntegerContent zero = new BigIntegerContent( + BigInteger.valueOf(0L)); + final List region = new ArrayList( + dimensionDefinitions.length); + for (int d = 0; d < dimensionDefinitions.length; d++) { + + final BigInteger normalizedMin = normalizeDimension( + dimensionDefinitions[d], + rangePerDimension[d].getMin(), + binsPerDimension[d], + true, + overInclusiveOnEdge); + BigInteger normalizedMax = normalizeDimension( + dimensionDefinitions[d], + rangePerDimension[d].getMax(), + binsPerDimension[d], + false, + overInclusiveOnEdge); + if (normalizedMin.compareTo(normalizedMax) > 0) { + // if they're both equal, which is possible because we treat max + // as exclusive, set bin max to bin min (ie. treat it as + // inclusive in this case) + normalizedMax = normalizedMin; + } + minRangeList.add(normalizedMin); + maxRangeList.add(normalizedMax); + region.add(BigIntegerRange.of( + normalizedMin, + normalizedMax.add(BigInteger.ONE))); + + } + + final BigInteger minQuadSize = getMinimumQuadSize( + minRangeList, + maxRangeList); + + final RegionInspector regionInspector = SimpleRegionInspector.create( + ImmutableList.of(region), + new BigIntegerContent( + minQuadSize), + Functions. identity(), + BigIntegerRangeHome.INSTANCE, + zero); + + final PlainFilterCombiner intervalCombiner = new PlainFilterCombiner( + BigIntegerRange.of( + 0, + 1)); + + final QueryBuilder queryBuilder = BacktrackingQueryBuilder.create( + regionInspector, + intervalCombiner, + maxFilteredIndexedRanges, + removeVacuum, + BigIntegerRangeHome.INSTANCE, + zero); + + compactHilbertCurve.accept(new ZoomingSpaceVisitorAdapter( + compactHilbertCurve, + queryBuilder)); + + // com.google.uzaygezen.core.Query hilbertQuery = + // queryBuilder.get(); + + final List> hilbertRanges = queryBuilder + .get() + .getFilteredIndexRanges(); + + final ByteArrayRange[] sfcRanges = new ByteArrayRange[hilbertRanges.size()]; + final int expectedByteCount = (int) Math.ceil(totalPrecision / 8.0); + if (expectedByteCount <= 0) { + // special case for no precision + return new RangeDecomposition( + new ByteArrayRange[] { + new ByteArrayRange( + new ByteArrayId( + new byte[] {}), + new ByteArrayId( + new byte[] {})) + }); + } + for (int i = 0; i < hilbertRanges.size(); i++) { + final FilteredIndexRange range = hilbertRanges.get(i); + // sanity check that values fit within the expected range + // it seems that uzaygezen can produce a value at 2^totalPrecision + // rather than 2^totalPrecision - 1 + final BigInteger startValue = clamp( + minHilbertValue, + maxHilbertValue, + range.getIndexRange().getStart()); + final BigInteger endValue = clamp( + minHilbertValue, + maxHilbertValue, + range.getIndexRange().getEnd().subtract( + BigInteger.ONE)); + // make sure its padded if necessary + final byte[] start = HilbertSFC.fitExpectedByteCount( + expectedByteCount, + startValue.toByteArray()); + + // make sure its padded if necessary + final byte[] end = HilbertSFC.fitExpectedByteCount( + expectedByteCount, + endValue.toByteArray()); + sfcRanges[i] = new ByteArrayRange( + new ByteArrayId( + start), + new ByteArrayId( + end)); + } + + final RangeDecomposition rangeDecomposition = new RangeDecomposition( + sfcRanges); + + return rangeDecomposition; + } + + private static BigInteger clamp( + final BigInteger minValue, + final BigInteger maxValue, + final BigInteger value ) { + return value.max( + minValue).min( + maxValue); + } + + /*** + * Returns the smallest range that will be fully decomposed (i.e. + * decomposition stops when the range is equal or smaller than this value). + * Values is based on the _maximumRangeDecompsed and _minRangeDecompsed + * instance members. + * + * @param minRangeList + * minimum values for each dimension (ordered) + * @param maxRangeList + * maximum values for each dimension (ordered) + * @return largest range that will be fully decomposed + */ + private BigInteger getMinimumQuadSize( + final List minRangeList, + final List maxRangeList ) { + BigInteger maxRange = BigInteger.valueOf(1); + final int dimensionality = Math.min( + minRangeList.size(), + maxRangeList.size()); + for (int d = 0; d < dimensionality; d++) { + maxRange = maxRange.max(maxRangeList.get( + d).subtract( + minRangeList.get(d)).abs().add( + BigInteger.ONE)); + } + final BigInteger maxRangeDecomposed = maxRange.pow(dimensionality); + if (maxRangeDecomposed.compareTo(UNIT_CELL_SIZE) <= 0) { + return BigInteger.ONE; + } + + return maxRangeDecomposed.divide(UNIT_CELL_SIZE); + + } + + @Override + public BigInteger getEstimatedIdCount( + final MultiDimensionalNumericData data, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final double[] mins = data.getMinValuesPerDimension(); + final double[] maxes = data.getMaxValuesPerDimension(); + BigInteger estimatedIdCount = BigInteger.valueOf(1); + for (int d = 0; d < data.getDimensionCount(); d++) { + final BigInteger binMin = normalizeDimension( + dimensionDefinitions[d], + mins[d], + binsPerDimension[d], + true, + false); + BigInteger binMax = normalizeDimension( + dimensionDefinitions[d], + maxes[d], + binsPerDimension[d], + false, + false); + if (binMin.compareTo(binMax) > 0) { + // if they're both equal, which is possible because we treat max + // as exclusive, set bin max to bin min (ie. treat it as + // inclusive in this case) + binMax = binMin; + } + estimatedIdCount = estimatedIdCount.multiply(binMax.subtract( + binMin).abs().add( + BigInteger.ONE)); + } + return estimatedIdCount; + } + + @Override + public double[] getInsertionIdRangePerDimension( + final SFCDimensionDefinition[] dimensionDefinitions ) { + final double[] retVal = new double[dimensionDefinitions.length]; + for (int i = 0; i < dimensionDefinitions.length; i++) { + retVal[i] = new BigDecimal( + dimensionDefinitions[i].getRange()).divide( + binsPerDimension[i]).doubleValue(); + } + return retVal; + } + + @Override + public long[] normalizeRange( + final double minValue, + final double maxValue, + final int dimension, + final SFCDimensionDefinition boundedDimensionDefinition ) + throws IllegalArgumentException { + return new long[] { + normalizeDimension( + boundedDimensionDefinition, + minValue, + binsPerDimension[dimension], + true, + true).longValue(), + normalizeDimension( + boundedDimensionDefinition, + maxValue, + binsPerDimension[dimension], + false, + true).longValue() + }; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/SingleTierSubStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/SingleTierSubStrategy.java new file mode 100644 index 0000000..961dabf --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/SingleTierSubStrategy.java @@ -0,0 +1,280 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.tiered; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.IndexMetaData; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.NumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.binned.BinnedSFCUtils; +import org.locationtech.sfcurve.geowave.index.sfc.data.BinnedNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/** + * This class wraps a single SpaceFillingCurve implementation with a tiered + * approach to indexing (an SFC with a tier ID). This can be utilized by an + * overall HierarchicalNumericIndexStrategy as an encapsulated sub-strategy. + * + */ +public class SingleTierSubStrategy implements + NumericIndexStrategy +{ + private SpaceFillingCurve sfc; + private NumericDimensionDefinition[] baseDefinitions; + public byte tier; + + protected SingleTierSubStrategy() {} + + public SingleTierSubStrategy( + final SpaceFillingCurve sfc, + final NumericDimensionDefinition[] baseDefinitions, + final byte tier ) { + this.sfc = sfc; + this.baseDefinitions = baseDefinitions; + this.tier = tier; + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return getQueryRanges( + indexedRange, + TieredSFCIndexStrategy.DEFAULT_MAX_RANGES); + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxRangeDecomposition, + final IndexMetaData... hints ) { + final BinnedNumericDataset[] binnedQueries = BinnedNumericDataset.applyBins( + indexedRange, + baseDefinitions); + return BinnedSFCUtils.getQueryRanges( + binnedQueries, + sfc, + maxRangeDecomposition, + tier); + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + final byte[] rowId = insertionId.getBytes(); + return BinnedSFCUtils.getRangeForId( + rowId, + baseDefinitions, + sfc); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + final byte[] rowId = insertionId.getBytes(); + return new MultiDimensionalCoordinates( + new byte[] { + tier + }, + BinnedSFCUtils.getCoordinatesForId( + rowId, + baseDefinitions, + sfc)); + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + return getInsertionIds( + indexedData, + 1); + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxDuplicateInsertionIds ) { + // we need to duplicate per bin so we can't adhere to max duplication + // anyways + final BinnedNumericDataset[] ranges = BinnedNumericDataset.applyBins( + indexedData, + baseDefinitions); + // place each of these indices into a single row ID at a tier that will + // fit its min and max + final List rowIds = new ArrayList(); + for (final BinnedNumericDataset range : ranges) { + final List binRowIds = TieredSFCIndexStrategy.getRowIdsAtTier( + range, + tier, + sfc, + null, + tier); + if (binRowIds != null) { + rowIds.addAll(binRowIds); + } + } + return rowIds; + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return baseDefinitions; + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(baseDefinitions); + result = (prime * result) + ((sfc == null) ? 0 : sfc.hashCode()); + result = (prime * result) + tier; + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final SingleTierSubStrategy other = (SingleTierSubStrategy) obj; + if (!Arrays.equals( + baseDefinitions, + other.baseDefinitions)) { + return false; + } + if (sfc == null) { + if (other.sfc != null) { + return false; + } + } + else if (!sfc.equals(other.sfc)) { + return false; + } + if (tier != other.tier) { + return false; + } + return true; + } + + @Override + public byte[] toBinary() { + int byteBufferLength = 5; + final List dimensionBinaries = new ArrayList( + baseDefinitions.length); + final byte[] sfcBinary = PersistenceUtils.toBinary(sfc); + byteBufferLength += (4 + sfcBinary.length); + for (final NumericDimensionDefinition dimension : baseDefinitions) { + final byte[] dimensionBinary = PersistenceUtils.toBinary(dimension); + byteBufferLength += (4 + dimensionBinary.length); + dimensionBinaries.add(dimensionBinary); + } + final ByteBuffer buf = ByteBuffer.allocate(byteBufferLength); + buf.put(tier); + buf.putInt(baseDefinitions.length); + buf.putInt(sfcBinary.length); + buf.put(sfcBinary); + for (final byte[] dimensionBinary : dimensionBinaries) { + buf.putInt(dimensionBinary.length); + buf.put(dimensionBinary); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + tier = buf.get(); + final int numDimensions = buf.getInt(); + baseDefinitions = new NumericDimensionDefinition[numDimensions]; + final byte[] sfcBinary = new byte[buf.getInt()]; + buf.get(sfcBinary); + sfc = PersistenceUtils.fromBinary( + sfcBinary, + SpaceFillingCurve.class); + for (int i = 0; i < numDimensions; i++) { + final byte[] dim = new byte[buf.getInt()]; + buf.get(dim); + baseDefinitions[i] = PersistenceUtils.fromBinary( + dim, + NumericDimensionDefinition.class); + } + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return sfc.getInsertionIdRangePerDimension(); + } + + @Override + public Set getNaturalSplits() { + return null; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + int rowIdOffset = 1; + for (int dimensionIdx = 0; dimensionIdx < baseDefinitions.length; dimensionIdx++) { + final int binSize = baseDefinitions[dimensionIdx].getFixedBinIdSize(); + if (binSize > 0) { + rowIdOffset += binSize; + } + } + return rowIdOffset; + } + + @Override + public List createMetaData() { + return Collections. emptyList(); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + final BinRange[][] binRangesPerDimension = BinnedNumericDataset.getBinnedRangesPerDimension( + dataRange, + baseDefinitions); + return new MultiDimensionalCoordinateRanges[] { + BinnedSFCUtils.getCoordinateRanges( + binRangesPerDimension, + sfc, + baseDefinitions.length, + tier) + }; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/TieredSFCIndexFactory.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/TieredSFCIndexFactory.java new file mode 100644 index 0000000..e013fda --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/TieredSFCIndexFactory.java @@ -0,0 +1,317 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.tiered; + +import java.util.Arrays; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory.SFCType; + +import com.google.common.collect.ImmutableBiMap; + +/** + * A factory for creating TieredSFCIndexStrategy using various approaches for + * breaking down the bits of precision per tier + * + */ +public class TieredSFCIndexFactory +{ + private static int DEFAULT_NUM_TIERS = 11; + + /** + * Used to create a Single Tier Index Strategy. For example, this would be + * used to generate a strategy that has Point type spatial data. + * + * @param dimensionDefs + * an array of SFC Dimension Definition objects + * @param sfc + * the type of space filling curve (e.g. Hilbert) + * @return an Index Strategy object with a single tier + */ + static public TieredSFCIndexStrategy createSingleTierStrategy( + final SFCDimensionDefinition[] dimensionDefs, + final SFCType sfc ) { + final SpaceFillingCurve[] orderedSfcs = new SpaceFillingCurve[] { + SFCFactory.createSpaceFillingCurve( + dimensionDefs, + sfc) + }; + // unwrap SFC dimension definitions + final NumericDimensionDefinition[] baseDefinitions = new NumericDimensionDefinition[dimensionDefs.length]; + int maxBitsOfPrecision = Integer.MIN_VALUE; + for (int d = 0; d < baseDefinitions.length; d++) { + baseDefinitions[d] = dimensionDefs[d].getDimensionDefinition(); + maxBitsOfPrecision = Math.max( + dimensionDefs[d].getBitsOfPrecision(), + maxBitsOfPrecision); + } + return new TieredSFCIndexStrategy( + baseDefinitions, + orderedSfcs, + ImmutableBiMap.of( + 0, + (byte) maxBitsOfPrecision)); + } + + /** + * Used to create a Single Tier Index Strategy. For example, this would be + * used to generate a strategy that has Point type spatial data. + * + * @param dimensionDefs + * an array of SFC Dimension Definition objects + * @param sfc + * the type of space filling curve (e.g. Hilbert) + * @return an Index Strategy object with a single tier + */ + static public TieredSFCIndexStrategy createSingleTierStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfc ) { + final SFCDimensionDefinition[] sfcDimensions = new SFCDimensionDefinition[baseDefinitions.length]; + int maxBitsOfPrecision = Integer.MIN_VALUE; + for (int d = 0; d < baseDefinitions.length; d++) { + sfcDimensions[d] = new SFCDimensionDefinition( + baseDefinitions[d], + maxBitsPerDimension[d]); + maxBitsOfPrecision = Math.max( + maxBitsPerDimension[d], + maxBitsOfPrecision); + } + + final SpaceFillingCurve[] orderedSfcs = new SpaceFillingCurve[] { + SFCFactory.createSpaceFillingCurve( + sfcDimensions, + sfc) + }; + + return new TieredSFCIndexStrategy( + baseDefinitions, + orderedSfcs, + ImmutableBiMap.of( + 0, + (byte) maxBitsOfPrecision)); + } + + static public TieredSFCIndexStrategy createFullIncrementalTieredStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfcType ) { + return createFullIncrementalTieredStrategy( + baseDefinitions, + maxBitsPerDimension, + sfcType, + null); + } + + /** + * + * @param baseDefinitions + * an array of Numeric Dimension Definitions + * @param maxBitsPerDimension + * the max cardinality for the Index Strategy + * @param sfcType + * the type of space filling curve (e.g. Hilbert) + * @param maxEstimatedDuplicatedIds + * the max number of duplicate SFC IDs + * @return an Index Strategy object with a tier for every incremental + * cardinality between the lowest max bits of precision and 0 + */ + static public TieredSFCIndexStrategy createFullIncrementalTieredStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfcType, + Long maxEstimatedDuplicatedIds ) { + if (maxBitsPerDimension.length == 0) { + final ImmutableBiMap emptyMap = ImmutableBiMap.of(); + return new TieredSFCIndexStrategy( + baseDefinitions, + new SpaceFillingCurve[] {}, + emptyMap); + } + int numIndices = Integer.MAX_VALUE; + for (final int element : maxBitsPerDimension) { + numIndices = Math.min( + numIndices, + element + 1); + } + final SpaceFillingCurve[] spaceFillingCurves = new SpaceFillingCurve[numIndices]; + final ImmutableBiMap.Builder sfcIndexToTier = ImmutableBiMap.builder(); + for (int sfcIndex = 0; sfcIndex < numIndices; sfcIndex++) { + final SFCDimensionDefinition[] sfcDimensions = new SFCDimensionDefinition[baseDefinitions.length]; + int maxBitsOfPrecision = Integer.MIN_VALUE; + for (int d = 0; d < baseDefinitions.length; d++) { + final int bitsOfPrecision = maxBitsPerDimension[d] - (numIndices - sfcIndex - 1); + maxBitsOfPrecision = Math.max( + bitsOfPrecision, + maxBitsOfPrecision); + sfcDimensions[d] = new SFCDimensionDefinition( + baseDefinitions[d], + bitsOfPrecision); + } + sfcIndexToTier.put( + sfcIndex, + (byte) maxBitsOfPrecision); + + spaceFillingCurves[sfcIndex] = SFCFactory.createSpaceFillingCurve( + sfcDimensions, + sfcType); + + } + if (maxEstimatedDuplicatedIds != null && maxEstimatedDuplicatedIds > 0) { + return new TieredSFCIndexStrategy( + baseDefinitions, + spaceFillingCurves, + sfcIndexToTier.build(), + maxEstimatedDuplicatedIds); + } + return new TieredSFCIndexStrategy( + baseDefinitions, + spaceFillingCurves, + sfcIndexToTier.build()); + } + + /** + * + * @param baseDefinitions + * an array of Numeric Dimension Definitions + * @param maxBitsPerDimension + * the max cardinality for the Index Strategy + * @param sfcType + * the type of space filling curve (e.g. Hilbert) + * @return an Index Strategy object with a equal interval tiers + */ + static public TieredSFCIndexStrategy createEqualIntervalPrecisionTieredStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfcType ) { + return createEqualIntervalPrecisionTieredStrategy( + baseDefinitions, + maxBitsPerDimension, + sfcType, + DEFAULT_NUM_TIERS); + } + + /** + * + * @param baseDefinitions + * an array of Numeric Dimension Definitions + * @param maxBitsPerDimension + * the max cardinality for the Index Strategy + * @param sfcType + * the type of space filling curve (e.g. Hilbert) + * @param numTiers + * the number of tiers of the Index Strategy + * @return an Index Strategy object with a specified number of tiers + */ + static public TieredSFCIndexStrategy createEqualIntervalPrecisionTieredStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfcType, + final int numIndices ) { + // Subtracting one from the number tiers prevents an extra tier. If + // we decide to create a catch-all, then we can ignore the subtraction. + final SpaceFillingCurve[] spaceFillingCurves = new SpaceFillingCurve[numIndices]; + final ImmutableBiMap.Builder sfcIndexToTier = ImmutableBiMap.builder(); + for (int sfcIndex = 0; sfcIndex < numIndices; sfcIndex++) { + final SFCDimensionDefinition[] sfcDimensions = new SFCDimensionDefinition[baseDefinitions.length]; + int maxBitsOfPrecision = Integer.MIN_VALUE; + for (int d = 0; d < baseDefinitions.length; d++) { + int bitsOfPrecision; + if (numIndices == 1) { + bitsOfPrecision = maxBitsPerDimension[d]; + } + else { + final double bitPrecisionIncrement = ((double) maxBitsPerDimension[d] / (numIndices - 1)); + bitsOfPrecision = (int) (bitPrecisionIncrement * sfcIndex); + } + maxBitsOfPrecision = Math.max( + bitsOfPrecision, + maxBitsOfPrecision); + sfcDimensions[d] = new SFCDimensionDefinition( + baseDefinitions[d], + bitsOfPrecision); + } + sfcIndexToTier.put( + sfcIndex, + (byte) maxBitsOfPrecision); + spaceFillingCurves[sfcIndex] = SFCFactory.createSpaceFillingCurve( + sfcDimensions, + sfcType); + + } + + return new TieredSFCIndexStrategy( + baseDefinitions, + spaceFillingCurves, + sfcIndexToTier.build()); + } + + /** + * + * @param orderedDimensionDefinitions + * an array of Numeric Dimension Definitions + * @param bitsPerDimensionPerLevel + * @param sfcType + * the type of space filling curve (e.g. Hilbert) + * @return an Index Strategy object with a specified number of tiers + */ + static public TieredSFCIndexStrategy createDefinedPrecisionTieredStrategy( + final NumericDimensionDefinition[] orderedDimensionDefinitions, + final int[][] bitsPerDimensionPerLevel, + final SFCType sfcType ) { + Integer numLevels = null; + for (final int[] element : bitsPerDimensionPerLevel) { + if (numLevels == null) { + numLevels = element.length; + } + else { + numLevels = Math.min( + numLevels, + element.length); + } + + Arrays.sort(element); + } + if (numLevels == null) { + numLevels = 0; + } + + final SpaceFillingCurve[] orderedSFCTiers = new SpaceFillingCurve[numLevels]; + final int numDimensions = orderedDimensionDefinitions.length; + final ImmutableBiMap.Builder sfcIndexToTier = ImmutableBiMap.builder(); + for (int l = 0; l < numLevels; l++) { + final SFCDimensionDefinition[] sfcDimensions = new SFCDimensionDefinition[numDimensions]; + int maxBitsOfPrecision = Integer.MIN_VALUE; + for (int d = 0; d < numDimensions; d++) { + sfcDimensions[d] = new SFCDimensionDefinition( + orderedDimensionDefinitions[d], + bitsPerDimensionPerLevel[d][l]); + maxBitsOfPrecision = Math.max( + bitsPerDimensionPerLevel[d][l], + maxBitsOfPrecision); + } + sfcIndexToTier.put( + l, + (byte) maxBitsOfPrecision); + orderedSFCTiers[l] = SFCFactory.createSpaceFillingCurve( + sfcDimensions, + sfcType); + } + return new TieredSFCIndexStrategy( + orderedDimensionDefinitions, + orderedSFCTiers, + sfcIndexToTier.build()); + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/TieredSFCIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/TieredSFCIndexStrategy.java new file mode 100644 index 0000000..6ff7cea --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/tiered/TieredSFCIndexStrategy.java @@ -0,0 +1,715 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.tiered; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import net.sf.json.JSONException; +import net.sf.json.JSONObject; + +import com.google.common.collect.ImmutableBiMap; +import com.google.common.collect.ImmutableBiMap.Builder; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.ByteArrayUtils; +import org.locationtech.sfcurve.geowave.index.CoordinateRange; +import org.locationtech.sfcurve.geowave.index.FloatCompareUtils; +import org.locationtech.sfcurve.geowave.index.HierarchicalNumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.IndexMetaData; +import org.locationtech.sfcurve.geowave.index.Mergeable; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.binned.BinnedSFCUtils; +import org.locationtech.sfcurve.geowave.index.sfc.data.BinnedNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/** + * This class uses multiple SpaceFillingCurve objects, one per tier, to + * represent a single cohesive index strategy with multiple precisions + * + */ +public class TieredSFCIndexStrategy implements + HierarchicalNumericIndexStrategy +{ + private final static Logger LOGGER = LoggerFactory.getLogger(TieredSFCIndexStrategy.class); + private final static int DEFAULT_MAX_ESTIMATED_DUPLICATE_IDS_PER_DIMENSION = 2; + protected static final int DEFAULT_MAX_RANGES = -1; + private SpaceFillingCurve[] orderedSfcs; + private ImmutableBiMap orderedSfcIndexToTierId; + private NumericDimensionDefinition[] baseDefinitions; + private long maxEstimatedDuplicateIdsPerDimension; + private final Map maxEstimatedDuplicatesPerDimensionalExtent = new HashMap<>(); + + protected TieredSFCIndexStrategy() {} + + /** + * Constructor used to create a Tiered Index Strategy. + * + * @param baseDefinitions + * the dimension definitions of the space filling curve + * @param orderedSfcs + * the space filling curve used to create the strategy + */ + public TieredSFCIndexStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final SpaceFillingCurve[] orderedSfcs, + final ImmutableBiMap orderedSfcIndexToTierId ) { + this( + baseDefinitions, + orderedSfcs, + orderedSfcIndexToTierId, + DEFAULT_MAX_ESTIMATED_DUPLICATE_IDS_PER_DIMENSION); + } + + /** + * Constructor used to create a Tiered Index Strategy. + */ + public TieredSFCIndexStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final SpaceFillingCurve[] orderedSfcs, + final ImmutableBiMap orderedSfcIndexToTierId, + final long maxEstimatedDuplicateIdsPerDimension ) { + this.orderedSfcs = orderedSfcs; + this.baseDefinitions = baseDefinitions; + this.orderedSfcIndexToTierId = orderedSfcIndexToTierId; + this.maxEstimatedDuplicateIdsPerDimension = maxEstimatedDuplicateIdsPerDimension; + initDuplicateIdLookup(); + + } + + private void initDuplicateIdLookup() { + for (int i = 0; i <= baseDefinitions.length; i++) { + final long maxEstimatedDuplicateIds = (long) Math.pow( + maxEstimatedDuplicateIdsPerDimension, + i); + maxEstimatedDuplicatesPerDimensionalExtent.put( + i, + BigInteger.valueOf(maxEstimatedDuplicateIds)); + } + } + + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxRangeDecomposition, + final IndexMetaData... hints ) { + // TODO don't just pass max ranges along to the SFC, take tiering and + // binning into account to limit the number of ranges correctly + + final List queryRanges = new ArrayList(); + final BinnedNumericDataset[] binnedQueries = BinnedNumericDataset.applyBins( + indexedRange, + baseDefinitions); + final TierIndexMetaData metaData = ((hints.length > 0) && (hints[0] != null) && (hints[0] instanceof TierIndexMetaData)) ? (TierIndexMetaData) hints[0] + : null; + + for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) { + if ((metaData != null) && (metaData.tierCounts[sfcIndex] == 0)) { + continue; + } + final SpaceFillingCurve sfc = orderedSfcs[sfcIndex]; + final Byte tier = orderedSfcIndexToTierId.get(sfcIndex); + queryRanges.addAll(BinnedSFCUtils.getQueryRanges( + binnedQueries, + sfc, + maxRangeDecomposition, // for now we're doing this + // per SFC/tier rather than + // dividing by the tiers + tier)); + } + return queryRanges; + } + + /** + * Returns a list of query ranges for an specified numeric range. + * + * @param indexedRange + * defines the numeric range for the query + * @return a List of query ranges + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return getQueryRanges( + indexedRange, + DEFAULT_MAX_RANGES, + hints); + } + + /** + * Returns a list of id's for insertion. + * + * @param indexedData + * defines the numeric data to be indexed + * @return a List of insertion ID's + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + return internalGetInsertionIds( + indexedData, + maxEstimatedDuplicatesPerDimensionalExtent.get(getRanges(indexedData))); + } + + private static int getRanges( + final MultiDimensionalNumericData indexedData ) { + final double[] mins = indexedData.getMinValuesPerDimension(); + final double[] maxes = indexedData.getMaxValuesPerDimension(); + int ranges = 0; + for (int d = 0; d < mins.length; d++) { + if (!FloatCompareUtils.checkDoublesEqual( + mins[d], + maxes[d])) { + ranges++; + } + } + return ranges; + } + + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxDuplicateInsertionIdsPerDimension ) { + return internalGetInsertionIds( + indexedData, + BigInteger.valueOf(maxDuplicateInsertionIdsPerDimension)); + } + + private List internalGetInsertionIds( + final MultiDimensionalNumericData indexedData, + final BigInteger maxDuplicateInsertionIds ) { + final BinnedNumericDataset[] ranges = BinnedNumericDataset.applyBins( + indexedData, + baseDefinitions); + // place each of these indices into a single row ID at a tier that will + // fit its min and max + final List rowIds = new ArrayList( + ranges.length); + for (final BinnedNumericDataset range : ranges) { + rowIds.addAll(getRowIds( + range, + maxDuplicateInsertionIds)); + } + return rowIds; + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + final byte[] rowId = insertionId.getBytes(); + if (rowId.length > 0) { + final Integer orderedSfcIndex = orderedSfcIndexToTierId.inverse().get( + rowId[0]); + return new MultiDimensionalCoordinates( + new byte[] { + rowId[0] + }, + BinnedSFCUtils.getCoordinatesForId( + rowId, + baseDefinitions, + orderedSfcs[orderedSfcIndex])); + } + else { + LOGGER.warn("Row must at least contain a byte for tier"); + } + return null; + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + final byte[] rowId = insertionId.getBytes(); + if (rowId.length > 0) { + final Integer orderedSfcIndex = orderedSfcIndexToTierId.inverse().get( + rowId[0]); + return BinnedSFCUtils.getRangeForId( + rowId, + baseDefinitions, + orderedSfcs[orderedSfcIndex]); + } + else { + LOGGER.warn("Row must at least contain a byte for tier"); + } + return null; + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + final List coordRanges = new ArrayList(); + final BinRange[][] binRangesPerDimension = BinnedNumericDataset.getBinnedRangesPerDimension( + dataRange, + baseDefinitions); + final TierIndexMetaData metaData = ((hints.length > 0) && (hints[0] != null) && (hints[0] instanceof TierIndexMetaData)) ? (TierIndexMetaData) hints[0] + : null; + + for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) { + if ((metaData != null) && (metaData.tierCounts[sfcIndex] == 0)) { + continue; + } + final SpaceFillingCurve sfc = orderedSfcs[sfcIndex]; + final Byte tier = orderedSfcIndexToTierId.get(sfcIndex); + coordRanges.add(BinnedSFCUtils.getCoordinateRanges( + binRangesPerDimension, + sfc, + baseDefinitions.length, + tier)); + } + return coordRanges.toArray(new MultiDimensionalCoordinateRanges[] {}); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(baseDefinitions); + result = (prime * result) + + (int) (maxEstimatedDuplicateIdsPerDimension ^ (maxEstimatedDuplicateIdsPerDimension >>> 32)); + result = (prime * result) + ((orderedSfcIndexToTierId == null) ? 0 : orderedSfcIndexToTierId.hashCode()); + result = (prime * result) + Arrays.hashCode(orderedSfcs); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final TieredSFCIndexStrategy other = (TieredSFCIndexStrategy) obj; + if (!Arrays.equals( + baseDefinitions, + other.baseDefinitions)) { + return false; + } + if (maxEstimatedDuplicateIdsPerDimension != other.maxEstimatedDuplicateIdsPerDimension) { + return false; + } + if (orderedSfcIndexToTierId == null) { + if (other.orderedSfcIndexToTierId != null) { + return false; + } + } + else if (!orderedSfcIndexToTierId.equals(other.orderedSfcIndexToTierId)) { + return false; + } + if (!Arrays.equals( + orderedSfcs, + other.orderedSfcs)) { + return false; + } + return true; + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return baseDefinitions; + } + + public boolean tierExists( + Byte tierId ) { + return orderedSfcIndexToTierId.containsValue(tierId); + } + + synchronized private List getRowIds( + final BinnedNumericDataset index, + final BigInteger maxEstimatedDuplicateIds ) { + // most times this should be a single row ID, but if the lowest + // precision tier does not have a single SFC value for this data, it + // will be multiple row IDs + + // what tier does this entry belong in? + for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) { + final SpaceFillingCurve sfc = orderedSfcs[sfcIndex]; + // loop through space filling curves and stop when both the min and + // max of the ranges fit the same row ID + final byte tierId = orderedSfcIndexToTierId.get(sfcIndex); + final List rowIdsAtTier = getRowIdsAtTier( + index, + tierId, + sfc, + maxEstimatedDuplicateIds, + sfcIndex); + if (rowIdsAtTier != null) { + return rowIdsAtTier; + } + } + + // this should never happen because of the check for tier 0 + return new ArrayList(); + } + + protected static List getRowIdsAtTier( + final BinnedNumericDataset index, + final byte tierId, + final SpaceFillingCurve sfc, + final BigInteger maxEstimatedDuplicateIds, + final int sfcIndex ) { + + final BigInteger rowCount = sfc.getEstimatedIdCount(index); + + ByteArrayId singleId = BinnedSFCUtils.getSingleBinnedRowId( + rowCount, + tierId, + index, + sfc); + if (singleId != null) { + return Collections.singletonList(singleId); + } + + if ((maxEstimatedDuplicateIds == null) || (rowCount.compareTo(maxEstimatedDuplicateIds) <= 0) + || (sfcIndex == 0)) { + return decomposeRangesForEntry( + index, + tierId, + sfc); + } + return null; + } + + protected static List decomposeRangesForEntry( + final BinnedNumericDataset index, + final byte tierId, + final SpaceFillingCurve sfc ) { + final List retVal = new ArrayList(); + final byte[] tierAndBinId = ByteArrayUtils.combineArrays( + new byte[] { + tierId + }, + index.getBinId()); + final RangeDecomposition rangeDecomp = sfc.decomposeRange( + index, + false, + DEFAULT_MAX_RANGES); + // this range does not fit into a single row ID at the lowest + // tier, decompose it + for (final ByteArrayRange range : rangeDecomp.getRanges()) { + final byte[] currentRowId = Arrays.copyOf( + range.getStart().getBytes(), + range.getStart().getBytes().length); + retVal.add(new ByteArrayId( + ByteArrayUtils.combineArrays( + tierAndBinId, + currentRowId))); + while (!Arrays.equals( + currentRowId, + range.getEnd().getBytes())) { + // increment until we reach the end row ID + boolean overflow = !ByteArrayUtils.increment(currentRowId); + if (!overflow) { + retVal.add(new ByteArrayId( + ByteArrayUtils.combineArrays( + tierAndBinId, + currentRowId))); + } + else { + // the increment caused an overflow which shouldn't + // ever happen assuming the start row ID is less + // than the end row ID + LOGGER + .warn("Row IDs overflowed when ingesting data; start of range decomposition must be less than or equal to end of range. This may be because the start of the decomposed range is higher than the end of the range."); + overflow = true; + break; + } + } + } + return retVal; + } + + @Override + public byte[] toBinary() { + int byteBufferLength = 20 + (2 * orderedSfcIndexToTierId.size()); + final List orderedSfcBinaries = new ArrayList( + orderedSfcs.length); + final List dimensionBinaries = new ArrayList( + baseDefinitions.length); + for (final SpaceFillingCurve sfc : orderedSfcs) { + final byte[] sfcBinary = PersistenceUtils.toBinary(sfc); + byteBufferLength += (4 + sfcBinary.length); + orderedSfcBinaries.add(sfcBinary); + } + for (final NumericDimensionDefinition dimension : baseDefinitions) { + final byte[] dimensionBinary = PersistenceUtils.toBinary(dimension); + byteBufferLength += (4 + dimensionBinary.length); + dimensionBinaries.add(dimensionBinary); + } + final ByteBuffer buf = ByteBuffer.allocate(byteBufferLength); + buf.putInt(orderedSfcs.length); + buf.putInt(baseDefinitions.length); + buf.putInt(orderedSfcIndexToTierId.size()); + buf.putLong(maxEstimatedDuplicateIdsPerDimension); + for (final byte[] sfcBinary : orderedSfcBinaries) { + buf.putInt(sfcBinary.length); + buf.put(sfcBinary); + } + for (final byte[] dimensionBinary : dimensionBinaries) { + buf.putInt(dimensionBinary.length); + buf.put(dimensionBinary); + } + for (final Entry entry : orderedSfcIndexToTierId.entrySet()) { + buf.put(entry.getKey().byteValue()); + buf.put(entry.getValue()); + } + + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int numSfcs = buf.getInt(); + final int numDimensions = buf.getInt(); + final int mappingSize = buf.getInt(); + maxEstimatedDuplicateIdsPerDimension = buf.getLong(); + orderedSfcs = new SpaceFillingCurve[numSfcs]; + baseDefinitions = new NumericDimensionDefinition[numDimensions]; + for (int i = 0; i < numSfcs; i++) { + final byte[] sfc = new byte[buf.getInt()]; + buf.get(sfc); + orderedSfcs[i] = PersistenceUtils.fromBinary( + sfc, + SpaceFillingCurve.class); + } + for (int i = 0; i < numDimensions; i++) { + final byte[] dim = new byte[buf.getInt()]; + buf.get(dim); + baseDefinitions[i] = PersistenceUtils.fromBinary( + dim, + NumericDimensionDefinition.class); + } + final Builder bimapBuilder = ImmutableBiMap.builder(); + for (int i = 0; i < mappingSize; i++) { + bimapBuilder.put( + Byte.valueOf( + buf.get()).intValue(), + buf.get()); + } + orderedSfcIndexToTierId = bimapBuilder.build(); + + initDuplicateIdLookup(); + } + + @Override + public SubStrategy[] getSubStrategies() { + final SubStrategy[] subStrategies = new SubStrategy[orderedSfcs.length]; + for (int sfcIndex = 0; sfcIndex < orderedSfcs.length; sfcIndex++) { + final byte tierId = orderedSfcIndexToTierId.get(sfcIndex); + subStrategies[sfcIndex] = new SubStrategy( + new SingleTierSubStrategy( + orderedSfcs[sfcIndex], + baseDefinitions, + tierId), + new byte[] { + tierId + }); + } + return subStrategies; + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + // delegate this to the highest precision tier SFC + return orderedSfcs[orderedSfcs.length - 1].getInsertionIdRangePerDimension(); + } + + public void setMaxEstimatedDuplicateIdsPerDimension( + final int maxEstimatedDuplicateIdsPerDimension ) { + this.maxEstimatedDuplicateIdsPerDimension = maxEstimatedDuplicateIdsPerDimension; + + initDuplicateIdLookup(); + } + + @Override + public Set getNaturalSplits() { + final Set retVal = new HashSet( + orderedSfcIndexToTierId.size()); + for (final Byte tier : orderedSfcIndexToTierId.values()) { + retVal.add(new ByteArrayId( + new byte[] { + tier + })); + } + return retVal; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + int rowIdOffset = 1; + for (int dimensionIdx = 0; dimensionIdx < baseDefinitions.length; dimensionIdx++) { + final int binSize = baseDefinitions[dimensionIdx].getFixedBinIdSize(); + if (binSize > 0) { + rowIdOffset += binSize; + } + } + return rowIdOffset; + } + + @Override + public List createMetaData() { + return Collections.singletonList((IndexMetaData) new TierIndexMetaData( + orderedSfcIndexToTierId.inverse())); + } + + public static class TierIndexMetaData implements + IndexMetaData + { + + private int[] tierCounts = null; + private ImmutableBiMap orderedTierIdToSfcIndex = null; + + public TierIndexMetaData() {} + + public TierIndexMetaData( + final ImmutableBiMap orderedTierIdToSfcIndex ) { + super(); + tierCounts = new int[orderedTierIdToSfcIndex.size()]; + this.orderedTierIdToSfcIndex = orderedTierIdToSfcIndex; + } + + @Override + public byte[] toBinary() { + final ByteBuffer buffer = ByteBuffer.allocate(4 + (tierCounts.length * 4)); + buffer.putInt(tierCounts.length); + for (final int count : tierCounts) { + buffer.putInt(count); + } + // do not use orderedTierIdToSfcIndex on query + // for (final Entry entry : + // orderedTierIdToSfcIndex.entrySet()) { + // buffer.put(entry.getKey().byteValue()); + // buffer.put(entry.getValue().byteValue()); + // } + return buffer.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buffer = ByteBuffer.wrap(bytes); + tierCounts = new int[buffer.getInt()]; + for (int i = 0; i < tierCounts.length; i++) { + tierCounts[i] = buffer.getInt(); + } + // do not use orderedTierIdToSfcIndex on query + // final Builder bimapBuilder = + // ImmutableBiMap.builder(); + // for (int i = 0; i < tierCounts.length; i++) { + // bimapBuilder.put( + // buffer.get(), + // Byte.valueOf(buffer.get()).intValue() + // ); + // } + // orderedTierIdToSfcIndex = bimapBuilder.build(); + } + + @Override + public void merge( + final Mergeable merge ) { + if (merge instanceof TierIndexMetaData) { + final TierIndexMetaData other = (TierIndexMetaData) merge; + int pos = 0; + for (final int count : other.tierCounts) { + tierCounts[pos++] += count; + } + } + + } + + @Override + public void insertionIdsAdded( + final List ids ) { + for (final ByteArrayId id : ids) { + final byte first = id.getBytes()[0]; + if (orderedTierIdToSfcIndex.containsKey(first)) { + tierCounts[orderedTierIdToSfcIndex.get( + first).intValue()]++; + } + } + } + + @Override + public void insertionIdsRemoved( + final List ids ) { + for (final ByteArrayId id : ids) { + final byte first = id.getBytes()[0]; + if (orderedTierIdToSfcIndex.containsKey(first)) { + tierCounts[orderedTierIdToSfcIndex.get( + first).intValue()]--; + } + } + } + + /** + * Convert Tiered Index Metadata statistics to a JSON object + */ + + @Override + public JSONObject toJSONObject() + throws JSONException { + JSONObject jo = new JSONObject(); + jo.put( + "type", + "TieredSFCIndexStrategy"); + + jo.put( + "TierCountsSize", + tierCounts.length); + + if (null == orderedTierIdToSfcIndex) { + jo.put( + "orderedTierIdToSfcIndex", + "null"); + } + else { + jo.put( + "orderedTierIdToSfcIndexSize", + orderedTierIdToSfcIndex.size()); + } + + return jo; + } + + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZHierarchicalIndexFactory.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZHierarchicalIndexFactory.java new file mode 100644 index 0000000..ab53147 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZHierarchicalIndexFactory.java @@ -0,0 +1,69 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.xz; + +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory.SFCType; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexFactory; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexStrategy; + +/** + * A factory for creating a Hierachical XZ Index strategy with a + * TieredSFCIndexStrategy substrategy using various approaches for breaking down + * the bits of precision per tier + * + */ +public class XZHierarchicalIndexFactory +{ + + static public XZHierarchicalIndexStrategy createFullIncrementalTieredStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfcType ) { + return createFullIncrementalTieredStrategy( + baseDefinitions, + maxBitsPerDimension, + sfcType, + null); + } + + /** + * + * @param baseDefinitions + * an array of Numeric Dimension Definitions + * @param maxBitsPerDimension + * the max cardinality for the Index Strategy + * @param sfcType + * the type of space filling curve (e.g. Hilbert) + * @param maxEstimatedDuplicatedIds + * the max number of duplicate SFC IDs + * @return an Index Strategy object with a tier for every incremental + * cardinality between the lowest max bits of precision and 0 + */ + static public XZHierarchicalIndexStrategy createFullIncrementalTieredStrategy( + final NumericDimensionDefinition[] baseDefinitions, + final int[] maxBitsPerDimension, + final SFCType sfcType, + Long maxEstimatedDuplicatedIds ) { + + TieredSFCIndexStrategy rasterStrategy = TieredSFCIndexFactory.createFullIncrementalTieredStrategy( + baseDefinitions, + maxBitsPerDimension, + sfcType, + maxEstimatedDuplicatedIds); + + return new XZHierarchicalIndexStrategy( + baseDefinitions, + rasterStrategy, + maxBitsPerDimension); + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZHierarchicalIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZHierarchicalIndexStrategy.java new file mode 100644 index 0000000..9bbc0b5 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZHierarchicalIndexStrategy.java @@ -0,0 +1,578 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.xz; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.ByteArrayUtils; +import org.locationtech.sfcurve.geowave.index.Coordinate; +import org.locationtech.sfcurve.geowave.index.HierarchicalNumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.IndexMetaData; +import org.locationtech.sfcurve.geowave.index.Mergeable; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory.SFCType; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.binned.BinnedSFCUtils; +import org.locationtech.sfcurve.geowave.index.sfc.data.BinnedNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexStrategy; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexStrategy.TierIndexMetaData; +import net.sf.json.JSONException; +import net.sf.json.JSONObject; + +public class XZHierarchicalIndexStrategy implements + HierarchicalNumericIndexStrategy +{ + private final static Logger LOGGER = LoggerFactory.getLogger(XZHierarchicalIndexStrategy.class); + + protected static final int DEFAULT_MAX_RANGES = -1; + + private Byte pointCurveMultiDimensionalId = null; + private Byte xzCurveMultiDimensionalId = null; + + private SpaceFillingCurve pointCurve; + private SpaceFillingCurve xzCurve; + private TieredSFCIndexStrategy rasterStrategy; + + private NumericDimensionDefinition[] baseDefinitions; + private int[] maxBitsPerDimension; + + private int byteOffsetFromDimensionIndex; + + protected XZHierarchicalIndexStrategy() {} + + /** + * Constructor used to create a XZ Hierarchical Index Strategy. + * + * @param maxBitsPerDimension + */ + public XZHierarchicalIndexStrategy( + NumericDimensionDefinition[] baseDefinitions, + TieredSFCIndexStrategy rasterStrategy, + int[] maxBitsPerDimension ) { + this.rasterStrategy = rasterStrategy; + this.maxBitsPerDimension = maxBitsPerDimension; + init(baseDefinitions); + } + + private void init( + final NumericDimensionDefinition[] baseDefinitions ) { + + this.baseDefinitions = baseDefinitions; + + byteOffsetFromDimensionIndex = rasterStrategy.getByteOffsetFromDimensionalIndex(); + + // init dimensionalIds with values not used by rasterStrategy + for (byte i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) { + if (!rasterStrategy.tierExists(i)) { + if (pointCurveMultiDimensionalId == null) { + pointCurveMultiDimensionalId = i; + } + else if (xzCurveMultiDimensionalId == null) { + xzCurveMultiDimensionalId = i; + } + else { + break; + } + } + } + if (pointCurveMultiDimensionalId == null || xzCurveMultiDimensionalId == null) { + LOGGER.error("No available byte values for xz and point sfc multiDimensionalIds."); + } + + SFCDimensionDefinition[] sfcDimensions = new SFCDimensionDefinition[baseDefinitions.length]; + for (int i = 0; i < baseDefinitions.length; i++) { + sfcDimensions[i] = new SFCDimensionDefinition( + baseDefinitions[i], + maxBitsPerDimension[i]); + } + + pointCurve = SFCFactory.createSpaceFillingCurve( + sfcDimensions, + SFCType.HILBERT); + xzCurve = SFCFactory.createSpaceFillingCurve( + sfcDimensions, + SFCType.XZORDER); + } + + @Override + public List getQueryRanges( + MultiDimensionalNumericData indexedRange, + IndexMetaData... hints ) { + return getQueryRanges( + indexedRange, + DEFAULT_MAX_RANGES, + hints); + } + + @Override + public List getQueryRanges( + MultiDimensionalNumericData indexedRange, + int maxEstimatedRangeDecomposition, + IndexMetaData... hints ) { + + // TODO don't just pass max ranges along to the SFC, take tiering and + // binning into account to limit the number of ranges correctly + + TierIndexMetaData tieredHints = null; + XZHierarchicalIndexMetaData xzHints = null; + if (hints != null && hints.length > 0) { + tieredHints = (TierIndexMetaData) hints[0]; + xzHints = (XZHierarchicalIndexMetaData) hints[1]; + } + + List queryRanges = rasterStrategy.getQueryRanges( + indexedRange, + maxEstimatedRangeDecomposition, + tieredHints); + + final BinnedNumericDataset[] binnedQueries = BinnedNumericDataset.applyBins( + indexedRange, + baseDefinitions); + + if (xzHints == null || xzHints.pointCurveCount > 0) { + queryRanges.addAll(BinnedSFCUtils.getQueryRanges( + binnedQueries, + pointCurve, + maxEstimatedRangeDecomposition, // for now we're doing this + // per SFC rather than + // dividing by the SFCs + pointCurveMultiDimensionalId)); + } + + if (xzHints == null || xzHints.xzCurveCount > 0) { + queryRanges.addAll(BinnedSFCUtils.getQueryRanges( + binnedQueries, + xzCurve, + maxEstimatedRangeDecomposition, // for now we're doing this + // per SFC rather than + // dividing by the SFCs + xzCurveMultiDimensionalId)); + } + + return queryRanges; + } + + @Override + public List getInsertionIds( + MultiDimensionalNumericData indexedData ) { + + final BinnedNumericDataset[] ranges = BinnedNumericDataset.applyBins( + indexedData, + baseDefinitions); + final List rowIds = new ArrayList( + ranges.length); + for (final BinnedNumericDataset range : ranges) { + BigInteger pointIds = pointCurve.getEstimatedIdCount(range); + ByteArrayId pointCurveId = BinnedSFCUtils.getSingleBinnedRowId( + pointIds, + pointCurveMultiDimensionalId, + range, + pointCurve); + if (pointCurveId != null) { + rowIds.add(pointCurveId); + } + else { + final double[] mins = range.getMinValuesPerDimension(); + final double[] maxes = range.getMaxValuesPerDimension(); + + final double[] values = new double[mins.length + maxes.length]; + for (int i = 0; i < (values.length - 1); i++) { + values[i] = mins[i / 2]; + values[i + 1] = maxes[i / 2]; + i++; + } + + byte[] xzId = xzCurve.getId(values); + + byte[] prefixedId = ByteArrayUtils.combineArrays( + ByteArrayUtils.combineArrays( + new byte[] { + xzCurveMultiDimensionalId + }, + range.getBinId()), + xzId); + rowIds.add(new ByteArrayId( + prefixedId)); + } + } + + return rowIds; + } + + @Override + public List getInsertionIds( + MultiDimensionalNumericData indexedData, + int maxEstimatedDuplicateIds ) { + return getInsertionIds(indexedData); + } + + @Override + public MultiDimensionalNumericData getRangeForId( + ByteArrayId insertionId ) { + // select curve based on first byte + byte first = insertionId.getBytes()[0]; + if (first == pointCurveMultiDimensionalId) { + return pointCurve.getRanges(insertionId.getBytes()); + } + else if (first == xzCurveMultiDimensionalId) { + return xzCurve.getRanges(insertionId.getBytes()); + } + else { + return rasterStrategy.getRangeForId(insertionId); + } + } + + @Override + public int hashCode() { + // internal tiered raster strategy already contains all the details that + // provide uniqueness and comparability to the hierarchical strategy + return rasterStrategy.hashCode(); + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final XZHierarchicalIndexStrategy other = (XZHierarchicalIndexStrategy) obj; + // internal tiered raster strategy already contains all the details that + // provide uniqueness and comparability to the hierarchical strategy + return rasterStrategy.equals(other.rasterStrategy); + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public Set getNaturalSplits() { + // return the multidimensionalIds of the curves and tiers + Set splits = rasterStrategy.getNaturalSplits(); + + splits.add(new ByteArrayId( + new byte[] { + pointCurveMultiDimensionalId + })); + splits.add(new ByteArrayId( + new byte[] { + xzCurveMultiDimensionalId + })); + + return splits; + } + + @Override + public byte[] toBinary() { + + final List dimensionDefBinaries = new ArrayList( + baseDefinitions.length); + int bufferLength = 4; + for (final NumericDimensionDefinition dimension : baseDefinitions) { + final byte[] sfcDimensionBinary = PersistenceUtils.toBinary(dimension); + bufferLength += (sfcDimensionBinary.length + 4); + dimensionDefBinaries.add(sfcDimensionBinary); + } + + bufferLength += 4; + byte[] rasterStrategyBinary = PersistenceUtils.toBinary(rasterStrategy); + bufferLength += rasterStrategyBinary.length; + + bufferLength += 4; + bufferLength += maxBitsPerDimension.length * 4; + + final ByteBuffer buf = ByteBuffer.allocate(bufferLength); + + buf.putInt(baseDefinitions.length); + for (final byte[] dimensionDefBinary : dimensionDefBinaries) { + buf.putInt(dimensionDefBinary.length); + buf.put(dimensionDefBinary); + } + + buf.putInt(rasterStrategyBinary.length); + buf.put(rasterStrategyBinary); + + buf.putInt(maxBitsPerDimension.length); + for (int dimBits : maxBitsPerDimension) { + buf.putInt(dimBits); + } + + return buf.array(); + } + + @Override + public void fromBinary( + byte[] bytes ) { + + final ByteBuffer buf = ByteBuffer.wrap(bytes); + + final int numDimensions = buf.getInt(); + + baseDefinitions = new NumericDimensionDefinition[numDimensions]; + for (int i = 0; i < numDimensions; i++) { + final byte[] dim = new byte[buf.getInt()]; + buf.get(dim); + baseDefinitions[i] = PersistenceUtils.fromBinary( + dim, + NumericDimensionDefinition.class); + } + + final int rasterStrategySize = buf.getInt(); + byte[] rasterStrategyBinary = new byte[rasterStrategySize]; + buf.get(rasterStrategyBinary); + rasterStrategy = PersistenceUtils.fromBinary( + rasterStrategyBinary, + TieredSFCIndexStrategy.class); + + final int bitsPerDimensionLength = buf.getInt(); + maxBitsPerDimension = new int[bitsPerDimensionLength]; + for (int i = 0; i < bitsPerDimensionLength; i++) { + maxBitsPerDimension[i] = buf.getInt(); + } + + init(baseDefinitions); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + ByteArrayId insertionId ) { + + // select curve based on first byte + byte first = insertionId.getBytes()[0]; + Coordinate[] coordinates = null; + + if (first == pointCurveMultiDimensionalId) { + coordinates = BinnedSFCUtils.getCoordinatesForId( + insertionId.getBytes(), + baseDefinitions, + pointCurve); + } + else if (first == xzCurveMultiDimensionalId) { + coordinates = BinnedSFCUtils.getCoordinatesForId( + insertionId.getBytes(), + baseDefinitions, + xzCurve); + } + else { + return rasterStrategy.getCoordinatesPerDimension(insertionId); + } + + return new MultiDimensionalCoordinates( + new byte[] { + first + }, + coordinates); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + MultiDimensionalNumericData dataRange, + IndexMetaData... hints ) { + + MultiDimensionalCoordinateRanges[] rasterRanges = rasterStrategy.getCoordinateRangesPerDimension( + dataRange, + hints); + + // just pass through raster strategy results since this is only used by + // raster data for now + return rasterRanges; + + // final BinRange[][] binRangesPerDimension = + // BinnedNumericDataset.getBinnedRangesPerDimension( + // dataRange, + // baseDefinitions); + // + // MultiDimensionalCoordinateRanges[] ranges = new + // MultiDimensionalCoordinateRanges[rasterRanges.length + 2]; + // + // ranges[0] = BinnedSFCUtils.getCoordinateRanges( + // binRangesPerDimension, + // pointCurve, + // baseDefinitions.length, + // pointCurveMultiDimensionalId); + // + // ranges[1] = BinnedSFCUtils.getCoordinateRanges( + // binRangesPerDimension, + // xzCurve, + // baseDefinitions.length, + // xzCurveMultiDimensionalId); + // + // System.arraycopy( + // rasterRanges, + // 0, + // ranges, + // 2, + // rasterRanges.length); + // + // return ranges; + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return baseDefinitions; + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return pointCurve.getInsertionIdRangePerDimension(); + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + return byteOffsetFromDimensionIndex; + } + + @Override + public SubStrategy[] getSubStrategies() { + return rasterStrategy.getSubStrategies(); + } + + @Override + public List createMetaData() { + List metaData = new ArrayList(); + metaData.addAll(rasterStrategy.createMetaData()); + metaData.add((IndexMetaData) new XZHierarchicalIndexMetaData( + pointCurveMultiDimensionalId, + xzCurveMultiDimensionalId)); + return metaData; + } + + private static class XZHierarchicalIndexMetaData implements + IndexMetaData + { + + private int pointCurveCount = 0; + private int xzCurveCount = 0; + + private byte pointCurveMultiDimensionalId; + private byte xzCurveMultiDimensionalId; + + public XZHierarchicalIndexMetaData() {} + + public XZHierarchicalIndexMetaData( + final byte pointCurveMultiDimensionalId, + final byte xzCurveMultiDimensionalId ) { + super(); + this.pointCurveMultiDimensionalId = pointCurveMultiDimensionalId; + this.xzCurveMultiDimensionalId = xzCurveMultiDimensionalId; + } + + @Override + public byte[] toBinary() { + final ByteBuffer buffer = ByteBuffer.allocate(2 + (4 * 2)); + buffer.put(pointCurveMultiDimensionalId); + buffer.put(xzCurveMultiDimensionalId); + buffer.putInt(pointCurveCount); + buffer.putInt(xzCurveCount); + return buffer.array(); + } + + @Override + public void fromBinary( + byte[] bytes ) { + final ByteBuffer buffer = ByteBuffer.wrap(bytes); + pointCurveMultiDimensionalId = buffer.get(); + xzCurveMultiDimensionalId = buffer.get(); + pointCurveCount = buffer.getInt(); + xzCurveCount = buffer.getInt(); + } + + @Override + public void merge( + Mergeable merge ) { + if (merge instanceof XZHierarchicalIndexMetaData) { + final XZHierarchicalIndexMetaData other = (XZHierarchicalIndexMetaData) merge; + pointCurveCount += other.pointCurveCount; + xzCurveCount += other.xzCurveCount; + } + } + + @Override + public void insertionIdsAdded( + List insertionIds ) { + for (final ByteArrayId id : insertionIds) { + final byte first = id.getBytes()[0]; + if (first == pointCurveMultiDimensionalId) { + pointCurveCount++; + } + else if (first == xzCurveMultiDimensionalId) { + xzCurveCount++; + } + } + } + + @Override + public void insertionIdsRemoved( + List insertionIds ) { + for (final ByteArrayId id : insertionIds) { + final byte first = id.getBytes()[0]; + if (first == pointCurveMultiDimensionalId) { + pointCurveCount--; + } + else if (first == xzCurveMultiDimensionalId) { + xzCurveCount--; + } + } + } + + /** + * Convert XZHierarchical Index Metadata statistics to a JSON object + */ + + @Override + public JSONObject toJSONObject() + throws JSONException { + JSONObject jo = new JSONObject(); + jo.put( + "type", + "XZHierarchicalIndexStrategy"); + + jo.put( + "pointCurveMultiDimensionalId", + pointCurveMultiDimensionalId); + jo.put( + "xzCurveMultiDimensionalId", + xzCurveMultiDimensionalId); + jo.put( + "pointCurveCount", + pointCurveCount); + jo.put( + "xzCurveCount", + xzCurveCount); + + return jo; + } + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZOrderSFC.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZOrderSFC.java new file mode 100644 index 0000000..154794f --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZOrderSFC.java @@ -0,0 +1,612 @@ +/******************************************************************************* + * This is a derivative of GeoMesa's XZ3SFC.scala + * Copyright 2013-2106 Commonwealth Computer Research, Inc. + * + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.xz; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange.MergeOperation; +import org.locationtech.sfcurve.geowave.index.ByteArrayUtils; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; + +public class XZOrderSFC implements + SpaceFillingCurve +{ + private final static Logger LOGGER = LoggerFactory.getLogger(XZOrderSFC.class); + private static double LOG_POINT_FIVE = Math.log(0.5); + + // the initial level of 2^dim tree + private XElement[] LevelOneElements; + + // indicator that we have searched a full level of the 2^dim tree + private XElement LevelTerminator; + + // TODO magic number; have to determine most appropriate value? + private static int g = 12; + + private SFCDimensionDefinition[] dimensionDefs; + private int dimensionCount; + private int nthPowerOfTwo; + + public XZOrderSFC() {} + + public XZOrderSFC( + SFCDimensionDefinition[] dimensionDefs ) { + this.dimensionDefs = dimensionDefs; + init(); + } + + private void init() { + dimensionCount = dimensionDefs.length; + nthPowerOfTwo = (int) Math.pow( + 2, + dimensionCount); + + double[] mins = new double[dimensionCount]; + Arrays.fill( + mins, + 0.0); + double[] maxes = new double[dimensionCount]; + Arrays.fill( + maxes, + 1.0); + double[] negativeOnes = new double[dimensionCount]; + Arrays.fill( + negativeOnes, + -1.0); + LevelOneElements = new XElement( + mins, + maxes, + 1.0).children(); + LevelTerminator = new XElement( + negativeOnes, + negativeOnes, + 0.0); + } + + @Override + public byte[] getId( + double[] values ) { + + if (values.length == dimensionCount) { + // We have a point, not a bounding box + int boxCount = 0; + double[] boxedValues = new double[dimensionCount * 2]; + for (int i = 0; i < dimensionCount; i++) { + boxedValues[boxCount++] = values[i]; + boxedValues[boxCount++] = values[i]; + } + values = boxedValues; + } + + if (values.length != dimensionCount * 2) { + LOGGER.error("Point or bounding box value count does not match number of indexed dimensions."); + return null; + } + normalize(values); + + // calculate the length of the sequence code (section 4.1 of XZ-Ordering + // paper) + double maxDim = 0.0; + for (int i = 0; (i + 1) < values.length; i++) { + maxDim = Math.max( + maxDim, + Math.abs(values[i] - values[++i])); + } + + // l1 (el-one) is a bit confusing to read, but corresponds with the + // paper's definitions + int l1 = (int) Math.floor(Math.log(maxDim) / LOG_POINT_FIVE); + + // the length will either be (l1) or (l1 + 1) + int length = g; + + if (l1 < g) { + double w2 = Math.pow( + 0.5, + l1 + 1); // width of an element at + // resolution l2 (l1 + 1) + + length = l1 + 1; + for (int i = 0; (i + 1) < values.length; i++) { + if (!predicate( + values[i], + values[++i], + w2)) { + length = l1; + break; + } + } + } + + double[] minValues = new double[values.length / 2]; + for (int i = 0; (i + 1) < values.length; i += 2) { + minValues[i / 2] = values[i]; + } + + return sequenceCode( + minValues, + length); + } + + // predicate for checking how many axis the polygon intersects + // math.floor(min / w2) * w2 == start of cell containing min + private boolean predicate( + double min, + double max, + double w2 ) { + return max <= (Math.floor(min / w2) * w2) + (2 * w2); + } + + /** + * Normalize user space values to [0,1] + */ + private void normalize( + double[] values ) { + for (int i = 0; i < values.length; i++) { + values[i] = dimensionDefs[i / 2].normalize(values[i]); + } + } + + private byte[] sequenceCode( + double[] minValues, + int length ) { + + double[] minsPerDimension = new double[dimensionCount]; + Arrays.fill( + minsPerDimension, + 0.0); + + double[] maxesPerDimension = new double[dimensionCount]; + Arrays.fill( + maxesPerDimension, + 1.0); + + long cs = 0L; + + for (int i = 0; i < length; i++) { + + double[] centers = new double[dimensionCount]; + for (int j = 0; j < dimensionCount; j++) { + centers[j] = (minsPerDimension[j] + maxesPerDimension[j]) / 2.0; + } + + BitSet bits = new BitSet( + dimensionCount); + for (int j = dimensionCount - 1; j >= 0; j--) { + if (minValues[j] >= centers[j]) { + bits.set(j); + } + } + long bTerm = 0L; + long[] longs = bits.toLongArray(); + if (longs.length > 0) { + bTerm = longs[0]; + } + + cs += 1L + bTerm * (((long) (Math.pow( + nthPowerOfTwo, + g - i))) - 1L) / ((long) nthPowerOfTwo - 1); + + for (int j = 0; j < dimensionCount; j++) { + if (minValues[j] < centers[j]) { + maxesPerDimension[j] = centers[j]; + } + else { + minsPerDimension[j] = centers[j]; + } + } + } + + return ByteArrayUtils.longToByteArray(cs); + } + + /** + * An extended Z curve element. Bounds refer to the non-extended z element + * for simplicity of calculation. + * + * An extended Z element refers to a normal Z curve element that has its + * upper bounds expanded by double its dimensions. By convention, an element + * is always an n-cube. + */ + private static class XElement + { + + private final double[] minsPerDimension; + private final double[] maxesPerDimension; + private double length; + + private final Double[] extendedBounds; + private XElement[] children; + + private final int dimensionCount; + private final int nthPowerOfTwo; + + public XElement( + double[] minsPerDimension, + double[] maxesPerDimension, + double length ) { + this.minsPerDimension = minsPerDimension; + this.maxesPerDimension = maxesPerDimension; + this.length = length; + dimensionCount = minsPerDimension.length; + nthPowerOfTwo = (int) Math.pow( + 2, + dimensionCount); + extendedBounds = new Double[dimensionCount]; + } + + public XElement( + XElement xElement ) { + this( + Arrays.copyOf( + xElement.minsPerDimension, + xElement.minsPerDimension.length), + Arrays.copyOf( + xElement.maxesPerDimension, + xElement.maxesPerDimension.length), + xElement.length); + } + + // lazy-evaluated extended bounds + public double getExtendedBound( + int dimension ) { + if (extendedBounds[dimension] == null) { + extendedBounds[dimension] = maxesPerDimension[dimension] + length; + } + return extendedBounds[dimension]; + } + + public boolean isContained( + final double[] windowMins, + final double[] windowMaxes ) { + for (int i = 0; i < dimensionCount; i++) { + if (windowMins[i] > minsPerDimension[i] || windowMaxes[i] < getExtendedBound(i)) { + return false; + } + } + return true; + } + + public boolean overlaps( + final double[] windowMins, + final double[] windowMaxes ) { + for (int i = 0; i < dimensionCount; i++) { + if (windowMaxes[i] < minsPerDimension[i] || windowMins[i] > getExtendedBound(i)) { + return false; + } + } + return true; + } + + public XElement[] children() { + if (children == null) { + double[] centers = new double[dimensionCount]; + for (int i = 0; i < dimensionCount; i++) { + centers[i] = (minsPerDimension[i] + maxesPerDimension[i]) / 2.0; + } + + double len = length / 2.0; + + children = new XElement[nthPowerOfTwo]; + for (int i = 0; i < children.length; i++) { + XElement child = new XElement( + this); + + child.length = len; + + String binaryString = Integer.toBinaryString(i); + // pad or trim binary as necessary to match dimensionality + // of curve + int paddingCount = binaryString.length() - dimensionCount; + if (paddingCount > 0) { + binaryString = binaryString.substring(paddingCount); + } + else { + while (paddingCount < 0) { + binaryString = "0" + binaryString; + paddingCount++; + } + } + + for (int j = 1; j <= dimensionCount; j++) { + if (binaryString.charAt(j - 1) == '1') { + child.minsPerDimension[dimensionCount - j] = centers[dimensionCount - j]; + } + else { + child.maxesPerDimension[dimensionCount - j] = centers[dimensionCount - j]; + } + } + + children[i] = child; + } + } + + return children; + } + } + + @Override + public RangeDecomposition decomposeRangeFully( + MultiDimensionalNumericData query ) { + return decomposeRange( + query, + true, + -1); + } + + @Override + public RangeDecomposition decomposeRange( + MultiDimensionalNumericData query, + boolean overInclusiveOnEdge, + int maxRanges ) { + + // normalize query values + double[] queryMins = query.getMinValuesPerDimension(); + double[] queryMaxes = query.getMaxValuesPerDimension(); + for (int i = 0; i < dimensionCount; i++) { + queryMins[i] = dimensionDefs[i].normalize(queryMins[i]); + queryMaxes[i] = dimensionDefs[i].normalize(queryMaxes[i]); + } + + // stores our results - initial size of 100 in general saves us some + // re-allocation + ArrayList ranges = new ArrayList( + 100); + + // values remaining to process - initial size of 100 in general saves us + // some re-allocation + ArrayDeque remaining = new ArrayDeque( + 100); + + // initial level + for (XElement levelOneEl : LevelOneElements) { + remaining.add(levelOneEl); + } + remaining.add(LevelTerminator); + + // level of recursion + short level = 1; + + while (level < g && !remaining.isEmpty() && (maxRanges < 1 || ranges.size() < maxRanges)) { + XElement next = remaining.poll(); + if (next.equals(LevelTerminator)) { + // we've fully processed a level, increment our state + if (!remaining.isEmpty()) { + level = (short) (level + 1); + remaining.add(LevelTerminator); + } + } + else { + checkValue( + next, + level, + queryMins, + queryMaxes, + ranges, + remaining); + } + } + + // bottom out and get all the ranges that partially overlapped but we + // didn't fully process + while (!remaining.isEmpty()) { + XElement next = remaining.poll(); + if (next.equals(LevelTerminator)) { + level = (short) (level + 1); + } + else { + ByteArrayRange range = sequenceInterval( + next.minsPerDimension, + level, + false); + ranges.add(range); + } + } + + // we've got all our ranges - now reduce them down by merging + // overlapping values + // note: we don't bother reducing the ranges as in the XZ paper, as + // accumulo handles lots of ranges fairly well + ArrayList result = (ArrayList) ByteArrayRange.mergeIntersections( + ranges, + MergeOperation.UNION); + + return new RangeDecomposition( + result.toArray(new ByteArrayRange[result.size()])); + } + + // checks a single value and either: + // eliminates it as out of bounds + // adds it to our results as fully matching, or + // adds it to our results as partial matching and queues up it's children + // for further processing + private void checkValue( + XElement value, + Short level, + double[] queryMins, + double[] queryMaxes, + ArrayList ranges, + ArrayDeque remaining ) { + if (value.isContained( + queryMins, + queryMaxes)) { + // whole range matches, happy day + ByteArrayRange range = sequenceInterval( + value.minsPerDimension, + level, + false); + ranges.add(range); + } + else if (value.overlaps( + queryMins, + queryMaxes)) { + // some portion of this range is excluded + // add the partial match and queue up each sub-range for processing + ByteArrayRange range = sequenceInterval( + value.minsPerDimension, + level, + true); + ranges.add(range); + for (XElement child : value.children()) { + remaining.add(child); + } + } + } + + /** + * Computes an interval of sequence codes for a given point - for polygons + * this is the lower-left corner. + * + * @param minsPerDimension + * normalized min values [0,1] per dimension + * @param length + * length of the sequence code that will used as the basis for + * this interval + * @param partial + * true if the element partially intersects the query window, + * false if it is fully contained + * @return + */ + private ByteArrayRange sequenceInterval( + double[] minsPerDimension, + short length, + boolean partial ) { + byte[] min = sequenceCode( + minsPerDimension, + length); + // if a partial match, we just use the single sequence code as an + // interval + // if a full match, we have to match all sequence codes starting with + // the single sequence code + byte[] max; + if (partial) { + max = min; + } + else { + // from lemma 3 in the XZ-Ordering paper + max = ByteArrayUtils.longToByteArray(ByteArrayUtils.byteArrayToLong(min) + (((long) (Math.pow( + nthPowerOfTwo, + g - length + 1))) - 1L) / ((long) (nthPowerOfTwo - 1))); + } + return new ByteArrayRange( + new ByteArrayId( + min), + new ByteArrayId( + max)); + } + + @Override + public byte[] toBinary() { + final List dimensionDefBinaries = new ArrayList( + dimensionDefs.length); + int bufferLength = 4; + for (final SFCDimensionDefinition sfcDimension : dimensionDefs) { + final byte[] sfcDimensionBinary = PersistenceUtils.toBinary(sfcDimension); + bufferLength += (sfcDimensionBinary.length + 4); + dimensionDefBinaries.add(sfcDimensionBinary); + } + final ByteBuffer buf = ByteBuffer.allocate(bufferLength); + buf.putInt(dimensionDefs.length); + for (final byte[] dimensionDefBinary : dimensionDefBinaries) { + buf.putInt(dimensionDefBinary.length); + buf.put(dimensionDefBinary); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int numDimensions = buf.getInt(); + dimensionDefs = new SFCDimensionDefinition[numDimensions]; + for (int i = 0; i < numDimensions; i++) { + final byte[] dim = new byte[buf.getInt()]; + buf.get(dim); + dimensionDefs[i] = PersistenceUtils.fromBinary( + dim, + SFCDimensionDefinition.class); + } + + init(); + } + + @Override + public double[] getInsertionIdRangePerDimension() { + double normalizedSize = Math.pow( + 0.5, + g); + + double[] rangesPerDimension = new double[dimensionCount]; + for (int i = 0; i < dimensionCount; i++) { + rangesPerDimension[i] = dimensionDefs[i].denormalize(normalizedSize); + } + return rangesPerDimension; + } + + @Override + public BigInteger getEstimatedIdCount( + MultiDimensionalNumericData data ) { + // TODO Replace hard-coded value with real implementation? + return BigInteger.ONE; + } + + // TODO Backwords (sfc-space to user-space) conversion?? + @Override + public MultiDimensionalNumericData getRanges( + byte[] id ) { + // use max range per dimension for now + // to avoid false negatives + NumericData[] dataPerDimension = new NumericData[dimensionCount]; + int i = 0; + for (SFCDimensionDefinition dim : dimensionDefs) { + dataPerDimension[i++] = dim.getFullRange(); + } + return new BasicNumericDataset( + dataPerDimension); + } + + @Override + public long[] getCoordinates( + byte[] id ) { + return null; + } + + @Override + public long[] normalizeRange( + double minValue, + double maxValue, + int dimension ) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderSFC.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderSFC.java new file mode 100644 index 0000000..6e8bc1e --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderSFC.java @@ -0,0 +1,253 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.zorder; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.sfc.RangeDecomposition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SpaceFillingCurve; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/*** + * Implementation of a ZOrder Space Filling Curve. Also called Morton, GeoHash, + * etc. + * + */ +public class ZOrderSFC implements + SpaceFillingCurve +{ + private SFCDimensionDefinition[] dimensionDefs; + private int cardinalityPerDimension; + private double binsPerDimension; + + protected ZOrderSFC() { + super(); + } + + /*** + * Use the SFCFactory.createSpaceFillingCurve method - don't call this + * constructor directly + * + */ + public ZOrderSFC( + final SFCDimensionDefinition[] dimensionDefs ) { + init(dimensionDefs); + } + + private void init( + final SFCDimensionDefinition[] dimensionDefs ) { + this.dimensionDefs = dimensionDefs; + cardinalityPerDimension = 1; + for (final SFCDimensionDefinition dimensionDef : dimensionDefs) { + if (dimensionDef.getBitsOfPrecision() > cardinalityPerDimension) { + cardinalityPerDimension = dimensionDef.getBitsOfPrecision(); + } + } + binsPerDimension = Math.pow( + 2, + cardinalityPerDimension); + } + + /*** + * {@inheritDoc} + */ + @Override + public byte[] getId( + final double[] values ) { + final double[] normalizedValues = new double[values.length]; + for (int d = 0; d < values.length; d++) { + normalizedValues[d] = dimensionDefs[d].normalize(values[d]); + } + return ZOrderUtils.encode( + normalizedValues, + cardinalityPerDimension, + values.length); + } + + @Override + public MultiDimensionalNumericData getRanges( + final byte[] id ) { + return new BasicNumericDataset( + ZOrderUtils.decodeRanges( + id, + cardinalityPerDimension, + dimensionDefs)); + } + + @Override + public long[] getCoordinates( + final byte[] id ) { + return ZOrderUtils.decodeIndices( + id, + cardinalityPerDimension, + dimensionDefs.length); + } + + @Override + public double[] getInsertionIdRangePerDimension() { + final double[] retVal = new double[dimensionDefs.length]; + for (int i = 0; i < dimensionDefs.length; i++) { + retVal[i] = dimensionDefs[i].getRange() / binsPerDimension; + } + return retVal; + } + + @Override + public BigInteger getEstimatedIdCount( + final MultiDimensionalNumericData data ) { + final double[] mins = data.getMinValuesPerDimension(); + final double[] maxes = data.getMaxValuesPerDimension(); + BigInteger estimatedIdCount = BigInteger.valueOf(1); + for (int d = 0; d < data.getDimensionCount(); d++) { + final double binMin = dimensionDefs[d].normalize(mins[d]) * binsPerDimension; + final double binMax = dimensionDefs[d].normalize(maxes[d]) * binsPerDimension; + estimatedIdCount = estimatedIdCount.multiply(BigInteger.valueOf((long) (Math.abs(binMax - binMin) + 1))); + } + return estimatedIdCount; + } + + /*** + * {@inheritDoc} + */ + @Override + public RangeDecomposition decomposeRange( + final MultiDimensionalNumericData query, + final boolean overInclusiveOnEdge, + final int maxFilteredIndexedRanges ) { + // TODO: Because the research and benchmarking show Hilbert to + // outperform Z-Order + // the optimization of full query decomposition is not implemented at + // the moment for Z-Order + final double[] queryMins = query.getMinValuesPerDimension(); + final double[] queryMaxes = query.getMaxValuesPerDimension(); + final double[] normalizedMins = new double[query.getDimensionCount()]; + final double[] normalizedMaxes = new double[query.getDimensionCount()]; + for (int d = 0; d < query.getDimensionCount(); d++) { + normalizedMins[d] = dimensionDefs[d].normalize(queryMins[d]); + normalizedMaxes[d] = dimensionDefs[d].normalize(queryMaxes[d]); + } + final byte[] minZorder = ZOrderUtils.encode( + normalizedMins, + cardinalityPerDimension, + query.getDimensionCount()); + final byte[] maxZorder = ZOrderUtils.encode( + normalizedMaxes, + cardinalityPerDimension, + query.getDimensionCount()); + return new RangeDecomposition( + new ByteArrayRange[] { + new ByteArrayRange( + new ByteArrayId( + minZorder), + new ByteArrayId( + maxZorder)) + }); + } + + /*** + * {@inheritDoc} + */ + @Override + public RangeDecomposition decomposeRangeFully( + final MultiDimensionalNumericData query ) { + return decomposeRange( + query, + true, + -1); + } + + @Override + public byte[] toBinary() { + final List dimensionDefBinaries = new ArrayList( + dimensionDefs.length); + int bufferLength = 4; + for (final SFCDimensionDefinition sfcDimension : dimensionDefs) { + final byte[] sfcDimensionBinary = PersistenceUtils.toBinary(sfcDimension); + bufferLength += (sfcDimensionBinary.length + 4); + dimensionDefBinaries.add(sfcDimensionBinary); + } + final ByteBuffer buf = ByteBuffer.allocate(bufferLength); + buf.putInt(dimensionDefs.length); + for (final byte[] dimensionDefBinary : dimensionDefBinaries) { + buf.putInt(dimensionDefBinary.length); + buf.put(dimensionDefBinary); + } + return buf.array(); + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + final int numDimensions = buf.getInt(); + dimensionDefs = new SFCDimensionDefinition[numDimensions]; + for (int i = 0; i < numDimensions; i++) { + final byte[] dim = new byte[buf.getInt()]; + buf.get(dim); + dimensionDefs[i] = PersistenceUtils.fromBinary( + dim, + SFCDimensionDefinition.class); + } + init(dimensionDefs); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + final String className = getClass().getName(); + result = (prime * result) + ((className == null) ? 0 : className.hashCode()); + result = (prime * result) + Arrays.hashCode(dimensionDefs); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final ZOrderSFC other = (ZOrderSFC) obj; + + if (!Arrays.equals( + dimensionDefs, + other.dimensionDefs)) { + return false; + } + return true; + } + + @Override + public long[] normalizeRange( + final double minValue, + final double maxValue, + final int d ) { + return new long[] { + (long) (dimensionDefs[d].normalize(minValue) * binsPerDimension), + (long) (dimensionDefs[d].normalize(maxValue) * binsPerDimension) + }; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderUtils.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderUtils.java new file mode 100644 index 0000000..7835fee --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderUtils.java @@ -0,0 +1,198 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.zorder; + +import java.util.Arrays; +import java.util.BitSet; + +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +/** + * Convenience methods used to decode/encode Z-Order space filling curve values + * (using a simple bit-interleaving approach). + * + */ +public class ZOrderUtils +{ + public static NumericRange[] decodeRanges( + final byte[] bytes, + final int bitsPerDimension, + final SFCDimensionDefinition[] dimensionDefinitions ) { + final byte[] littleEndianBytes = swapEndianFormat(bytes); + final BitSet bitSet = BitSet.valueOf(littleEndianBytes); + final NumericRange[] normalizedValues = new NumericRange[dimensionDefinitions.length]; + for (int d = 0; d < dimensionDefinitions.length; d++) { + final BitSet dimensionSet = new BitSet(); + int j = 0; + for (int i = d; i < (bitsPerDimension * dimensionDefinitions.length); i += dimensionDefinitions.length) { + dimensionSet.set( + j++, + bitSet.get(i)); + } + + normalizedValues[d] = decode( + dimensionSet, + 0, + 1, + dimensionDefinitions[d]); + } + + return normalizedValues; + } + + public static long[] decodeIndices( + final byte[] bytes, + final int bitsPerDimension, + final int numDimensions ) { + final byte[] littleEndianBytes = swapEndianFormat(bytes); + final BitSet bitSet = BitSet.valueOf(littleEndianBytes); + final long[] coordinates = new long[numDimensions]; + final long rangePerDimension = (long) Math.pow( + 2, + bitsPerDimension); + for (int d = 0; d < numDimensions; d++) { + final BitSet dimensionSet = new BitSet(); + int j = 0; + for (int i = d; i < (bitsPerDimension * numDimensions); i += numDimensions) { + dimensionSet.set( + j++, + bitSet.get(i)); + } + + coordinates[d] = decodeIndex( + dimensionSet, + rangePerDimension); + } + + return coordinates; + } + + private static long decodeIndex( + final BitSet bs, + final long rangePerDimension ) { + long floor = 0; + long ceiling = rangePerDimension; + long mid = 0; + for (int i = 0; i < bs.length(); i++) { + mid = (floor + ceiling) / 2; + if (bs.get(i)) { + floor = mid; + } + else { + ceiling = mid; + } + } + return mid; + } + + private static NumericRange decode( + final BitSet bs, + double floor, + double ceiling, + final SFCDimensionDefinition dimensionDefinition ) { + double mid = 0; + for (int i = 0; i < bs.length(); i++) { + mid = (floor + ceiling) / 2; + if (bs.get(i)) { + floor = mid; + } + else { + ceiling = mid; + } + } + return new NumericRange( + dimensionDefinition.denormalize(floor), + dimensionDefinition.denormalize(ceiling)); + } + + public static byte[] encode( + final double[] normalizedValues, + final int bitsPerDimension, + final int numDimensions ) { + final BitSet[] bitSets = new BitSet[numDimensions]; + + for (int d = 0; d < numDimensions; d++) { + bitSets[d] = getBits( + normalizedValues[d], + 0, + 1, + bitsPerDimension); + } + int usedBits = bitsPerDimension * numDimensions; + int usedBytes = (int) Math.ceil(usedBits / 8.0); + int bitsetLength = (usedBytes * 8); + int bitOffset = bitsetLength - usedBits; + // round up to a bitset divisible by 8 + final BitSet combinedBitSet = new BitSet( + bitsetLength); + int j = bitOffset; + for (int i = 0; i < bitsPerDimension; i++) { + for (int d = 0; d < numDimensions; d++) { + combinedBitSet.set( + j++, + bitSets[d].get(i)); + } + } + final byte[] littleEndianBytes = combinedBitSet.toByteArray(); + byte[] retVal = swapEndianFormat(littleEndianBytes); + if (retVal.length < usedBytes) { + return Arrays.copyOf( + retVal, + usedBytes); + } + return retVal; + } + + public static byte[] swapEndianFormat( + final byte[] b ) { + final byte[] endianSwappedBytes = new byte[b.length]; + for (int i = 0; i < b.length; i++) { + endianSwappedBytes[i] = swapEndianFormat(b[i]); + } + return endianSwappedBytes; + } + + private static byte swapEndianFormat( + final byte b ) { + int converted = 0x00; + converted ^= (b & 0b1000_0000) >> 7; + converted ^= (b & 0b0100_0000) >> 5; + converted ^= (b & 0b0010_0000) >> 3; + converted ^= (b & 0b0001_0000) >> 1; + converted ^= (b & 0b0000_1000) << 1; + converted ^= (b & 0b0000_0100) << 3; + converted ^= (b & 0b0000_0010) << 5; + converted ^= (b & 0b0000_0001) << 7; + return (byte) (converted & 0xFF); + } + + private static BitSet getBits( + final double value, + double floor, + double ceiling, + final int bitsPerDimension ) { + final BitSet buffer = new BitSet( + bitsPerDimension); + for (int i = 0; i < bitsPerDimension; i++) { + final double mid = (floor + ceiling) / 2; + if (value >= mid) { + buffer.set(i); + floor = mid; + } + else { + ceiling = mid; + } + } + return buffer; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/HashKeyIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/HashKeyIndexStrategy.java new file mode 100644 index 0000000..d795eaa --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/HashKeyIndexStrategy.java @@ -0,0 +1,247 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.IndexMetaData; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.NumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/** + * Used to create determined, uniform row id prefix as one possible approach to + * prevent hot spotting. + * + * Before using this class, one should consider balancing options for the + * specific data store. Can one pre-split using a component of another index + * strategy (e.g. bin identifier)? How about ingest first and then do major + * compaction? + * + * Consider that Accumulo 1.7 supports two balancers + * org.apache.accumulo.server.master.balancer.RegexGroupBalancer and + * org.apache.accumulo.server.master.balancer.GroupBalancer. + * + * This class should be used with a CompoundIndexStrategy. In addition, tablets + * should be pre-split on the number of prefix IDs. Without splits, the splits + * are at the mercy of the Big Table servers default. For example, Accumulo + * fills up one tablet before splitting, regardless of the partitioning. + * + * The key set size does not need to be large. For example, using two times the + * number of tablet servers (for growth) and presplitting, two keys per server. + * The default is 3. + * + * There is a cost to using this approach: queries must span all prefixes. The + * number of prefixes should initially be at least the number of tablet servers. + * + * + * + */ +public class HashKeyIndexStrategy implements + NumericIndexStrategy +{ + + private final List keySet = new ArrayList(); + + public HashKeyIndexStrategy() { + this( + 3); + } + + public HashKeyIndexStrategy( + final int size ) { + init(size); + } + + private void init( + final int size ) { + keySet.clear(); + if (size > 256) { + final ByteBuffer buf = ByteBuffer.allocate(4); + for (int i = 0; i < size; i++) { + buf.putInt(i); + final ByteArrayId id = new ByteArrayId( + Arrays.copyOf( + buf.array(), + 4)); + keySet.add(new ByteArrayRange( + id, + id)); + buf.rewind(); + } + } + else { + for (int i = 0; i < size; i++) { + final ByteArrayId id = new ByteArrayId( + new byte[] { + (byte) i + }); + keySet.add(new ByteArrayRange( + id, + id)); + } + } + } + + /** + * Always returns all possible ranges + * + * {@inheritDoc} + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return keySet; + } + + /** + * Always returns all possible ranges + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxEstimatedRangeDecomposition, + final IndexMetaData... hints ) { + return keySet; + } + + /** + * Returns an insertion id selected round-robin from a predefined pool + * + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + final long hashCode = Math.abs(hashCode( + indexedData.getMaxValuesPerDimension(), + hashCode( + indexedData.getMinValuesPerDimension(), + 1))); + final int position = (int) (hashCode % keySet.size()); + + return Collections.singletonList(keySet.get( + position).getStart()); + } + + /** + * Returns all of the insertion ids for the range. Since this index strategy + * doensn't use binning, it will return the ByteArrayId of every value in + * the range (i.e. if you are storing a range using this index strategy, + * your data will be replicated for every integer value in the range). + * + * {@inheritDoc} + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxEstimatedDuplicateIds ) { + return getInsertionIds(indexedData); + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return new NumericDimensionDefinition[0]; + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + return new BasicNumericDataset(); + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return new double[0]; + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public byte[] toBinary() { + final ByteBuffer buf = ByteBuffer.allocate(4); + buf.putInt(keySet.size()); + return buf.array(); + + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + init(buf.getInt()); + } + + @Override + public Set getNaturalSplits() { + final Set naturalSplits = new HashSet(); + for (final ByteArrayRange range : keySet) { + naturalSplits.add(range.getStart()); + } + return naturalSplits; + } + + private static long hashCode( + final double a1[], + final long start ) { + + long result = start; + for (final double element : a1) { + final long bits = Double.doubleToLongBits(element); + result = (31 * result) + (bits ^ (bits >>> 32)); + } + return result; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + if ((keySet != null) && !keySet.isEmpty()) { + return keySet.get( + 0).getStart().getBytes().length; + } + return 0; + } + + @Override + public List createMetaData() { + return Collections.emptyList(); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + return new MultiDimensionalCoordinates(); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + return new MultiDimensionalCoordinateRanges[] { + new MultiDimensionalCoordinateRanges() + }; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/RoundRobinKeyIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/RoundRobinKeyIndexStrategy.java new file mode 100644 index 0000000..76e09ff --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/RoundRobinKeyIndexStrategy.java @@ -0,0 +1,232 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.Coordinate; +import org.locationtech.sfcurve.geowave.index.IndexMetaData; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.NumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; + +/** + * Used to create determined, uniform row id prefix as one possible approach to + * prevent hot spotting. + * + * Before using this class, one should consider balancing options for the + * specific data store. Can one pre-split using a component of another index + * strategy (e.g. bin identifier)? How about ingest first and then do major + * compaction? + * + * Consider that Accumulo 1.7 supports two balancers + * org.apache.accumulo.server.master.balancer.RegexGroupBalancer and + * org.apache.accumulo.server.master.balancer.GroupBalancer. + * + * This class should be used with a CompoundIndexStrategy. In addition, tablets + * should be pre-split on the number of prefix IDs. Without splits, the splits + * are at the mercy of the Big Table servers default. For example, Accumulo + * fills up one tablet before splitting, regardless of the partitioning. + * + * The key set size does not need to be large. For example, using two times the + * number of tablet servers (for growth) and presplitting, two keys per server. + * The default is 3. + * + * There is a cost to using this approach: queries must span all prefixes. The + * number of prefixes should initially be at least the number of tablet servers. + * + * + * + */ +public class RoundRobinKeyIndexStrategy implements + NumericIndexStrategy +{ + + private final List keySet = new ArrayList(); + public int position = 0; + + /** + * Default initial key set size is 3. + */ + public RoundRobinKeyIndexStrategy() { + init(3); + } + + public RoundRobinKeyIndexStrategy( + final int size ) { + init(size); + } + + private void init( + final int size ) { + keySet.clear(); + if (size > 256) { + final ByteBuffer buf = ByteBuffer.allocate(4); + for (int i = 0; i < size; i++) { + buf.putInt(i); + final ByteArrayId id = new ByteArrayId( + Arrays.copyOf( + buf.array(), + 4)); + keySet.add(new ByteArrayRange( + id, + id)); + buf.rewind(); + } + } + else { + for (int i = 0; i < size; i++) { + final ByteArrayId id = new ByteArrayId( + new byte[] { + (byte) i + }); + keySet.add(new ByteArrayRange( + id, + id)); + } + } + } + + /** + * Always returns all possible ranges + * + * {@inheritDoc} + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return keySet; + } + + /** + * Always returns all possible ranges + * + * {@inheritDoc} + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxEstimatedRangeDecomposition, + final IndexMetaData... hints ) { + return keySet; + } + + /** + * Returns an insertion id selected round-robin from a predefined pool + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + position = (position + 1) % keySet.size(); + return Collections.singletonList(keySet.get( + position).getStart()); + } + + /** + * Returns an insertion id selected round-robin from a predefined pool + * + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxEstimatedDuplicateIds ) { + position = (position + 1) % keySet.size(); + return Collections.singletonList(keySet.get( + position).getStart()); + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return new NumericDimensionDefinition[0]; + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + return new BasicNumericDataset(); + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return new double[0]; + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public byte[] toBinary() { + final ByteBuffer buf = ByteBuffer.allocate(4); + buf.putInt(keySet.size()); + return buf.array(); + + } + + @Override + public void fromBinary( + final byte[] bytes ) { + final ByteBuffer buf = ByteBuffer.wrap(bytes); + init(buf.getInt()); + } + + @Override + public Set getNaturalSplits() { + final Set naturalSplits = new HashSet(); + for (final ByteArrayRange range : keySet) { + naturalSplits.add(range.getStart()); + } + return naturalSplits; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + if ((keySet != null) && !keySet.isEmpty()) { + return keySet.get( + 0).getStart().getBytes().length; + } + return 0; + } + + @Override + public List createMetaData() { + return Collections.emptyList(); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + return new MultiDimensionalCoordinates(); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + return new MultiDimensionalCoordinateRanges[] { + new MultiDimensionalCoordinateRanges() + }; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleIntegerIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleIntegerIndexStrategy.java new file mode 100644 index 0000000..4d7bfdf --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleIntegerIndexStrategy.java @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import org.locationtech.sfcurve.geowave.index.lexicoder.Lexicoders; + +/** + * A simple 1-dimensional NumericIndexStrategy that represents an index of + * signed integer values. The strategy doesn't use any binning. The ids are + * simply the byte arrays of the value. This index strategy will not perform + * well for inserting ranges because there will be too much replication of data. + * + */ +public class SimpleIntegerIndexStrategy extends + SimpleNumericIndexStrategy +{ + + public SimpleIntegerIndexStrategy() { + super( + Lexicoders.INT); + } + + @Override + public byte[] toBinary() { + return new byte[] {}; + } + + @Override + public void fromBinary( + final byte[] bytes ) {} + + @Override + protected Integer cast( + final double value ) { + return (int) value; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + return 0; + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleLongIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleLongIndexStrategy.java new file mode 100644 index 0000000..b70e242 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleLongIndexStrategy.java @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import org.locationtech.sfcurve.geowave.index.lexicoder.Lexicoders; + +/** + * A simple 1-dimensional NumericIndexStrategy that represents an index of + * signed long values. The strategy doesn't use any binning. The ids are simply + * the byte arrays of the value. This index strategy will not perform well for + * inserting ranges because there will be too much replication of data. + * + */ +public class SimpleLongIndexStrategy extends + SimpleNumericIndexStrategy +{ + + public SimpleLongIndexStrategy() { + super( + Lexicoders.LONG); + } + + @Override + public byte[] toBinary() { + return new byte[] {}; + } + + @Override + public void fromBinary( + final byte[] bytes ) {} + + @Override + protected Long cast( + final double value ) { + return (long) value; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + return 0; + } + +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleNumericIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleNumericIndexStrategy.java new file mode 100644 index 0000000..ba0545b --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleNumericIndexStrategy.java @@ -0,0 +1,250 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.Coordinate; +import org.locationtech.sfcurve.geowave.index.IndexMetaData; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinateRanges; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.NumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.StringUtils; +import org.locationtech.sfcurve.geowave.index.dimension.BasicDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.lexicoder.NumberLexicoder; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericValue; + +/** + * A simple 1-dimensional NumericIndexStrategy that represents an index of + * signed integer values (currently supports 16 bit, 32 bit, and 64 bit + * integers). The strategy doesn't use any binning. The ids are simply the byte + * arrays of the value. This index strategy will not perform well for inserting + * ranges because there will be too much replication of data. + * + */ +public abstract class SimpleNumericIndexStrategy implements + NumericIndexStrategy +{ + + private final NumberLexicoder lexicoder; + private final NumericDimensionDefinition[] definitions; + + protected SimpleNumericIndexStrategy( + final NumberLexicoder lexicoder ) { + this.lexicoder = lexicoder; + this.definitions = new NumericDimensionDefinition[] { + new BasicDimensionDefinition( + lexicoder.getMinimumValue().doubleValue(), + lexicoder.getMaximumValue().doubleValue()) + }; + } + + protected NumberLexicoder getLexicoder() { + return lexicoder; + } + + /** + * Cast a double into the type T + * + * @param value + * a double value + * @return the value represented as a T + */ + protected abstract T cast( + double value ); + + /** + * Always returns a single range since this is a 1-dimensional index. The + * sort-order of the bytes is the same as the sort order of values, so an + * indexedRange can be represented by a single contiguous ByteArrayRange. + * {@inheritDoc} + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final IndexMetaData... hints ) { + return getQueryRanges( + indexedRange, + -1, + hints); + } + + /** + * Always returns a single range since this is a 1-dimensional index. The + * sort-order of the bytes is the same as the sort order of values, so an + * indexedRange can be represented by a single contiguous ByteArrayRange. + * {@inheritDoc} + */ + @Override + public List getQueryRanges( + final MultiDimensionalNumericData indexedRange, + final int maxEstimatedRangeDecomposition, + final IndexMetaData... hints ) { + final T min = cast(indexedRange.getMinValuesPerDimension()[0]); + final ByteArrayId start = new ByteArrayId( + lexicoder.toByteArray(min)); + final T max = cast(Math.ceil(indexedRange.getMaxValuesPerDimension()[0])); + final ByteArrayId end = new ByteArrayId( + lexicoder.toByteArray(max)); + final ByteArrayRange range = new ByteArrayRange( + start, + end); + return Collections.singletonList(range); + } + + /** + * Returns all of the insertion ids for the range. Since this index strategy + * doensn't use binning, it will return the ByteArrayId of every value in + * the range (i.e. if you are storing a range using this index strategy, + * your data will be replicated for every integer value in the range). + * + * {@inheritDoc} + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData ) { + return getInsertionIds( + indexedData, + -1); + } + + /** + * Returns all of the insertion ids for the range. Since this index strategy + * doensn't use binning, it will return the ByteArrayId of every value in + * the range (i.e. if you are storing a range using this index strategy, + * your data will be replicated for every integer value in the range). + * + * {@inheritDoc} + */ + @Override + public List getInsertionIds( + final MultiDimensionalNumericData indexedData, + final int maxEstimatedDuplicateIds ) { + final long min = (long) indexedData.getMinValuesPerDimension()[0]; + final long max = (long) Math.ceil(indexedData.getMaxValuesPerDimension()[0]); + final List insertionIds = new ArrayList<>( + (int) (max - min) + 1); + for (long i = min; i <= max; i++) { + insertionIds.add(new ByteArrayId( + lexicoder.toByteArray(cast(i)))); + } + return insertionIds; + } + + @Override + public NumericDimensionDefinition[] getOrderedDimensionDefinitions() { + return definitions; + } + + @Override + public MultiDimensionalNumericData getRangeForId( + final ByteArrayId insertionId ) { + final long value = Long.class.cast(lexicoder.fromByteArray(insertionId.getBytes())); + final NumericData[] dataPerDimension = new NumericData[] { + new NumericValue( + value) + }; + return new BasicNumericDataset( + dataPerDimension); + } + + @Override + public MultiDimensionalCoordinates getCoordinatesPerDimension( + final ByteArrayId insertionId ) { + return new MultiDimensionalCoordinates( + null, + new Coordinate[] { + new Coordinate( + Long.class.cast(lexicoder.fromByteArray(insertionId.getBytes())), + null) + }); + } + + @Override + public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension( + final MultiDimensionalNumericData dataRange, + final IndexMetaData... hints ) { + // TODO: not sure what to do here + return new MultiDimensionalCoordinateRanges[] { + new MultiDimensionalCoordinateRanges() + }; + } + + @Override + public double[] getHighestPrecisionIdRangePerDimension() { + return new double[] { + 1d + }; + } + + @Override + public String getId() { + return StringUtils.intToString(hashCode()); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Arrays.hashCode(definitions); + result = (prime * result) + ((lexicoder == null) ? 0 : lexicoder.hashCode()); + return result; + } + + @Override + public boolean equals( + final Object obj ) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final SimpleNumericIndexStrategy other = (SimpleNumericIndexStrategy) obj; + if (!Arrays.equals( + definitions, + other.definitions)) { + return false; + } + if (lexicoder == null) { + if (other.lexicoder != null) { + return false; + } + } + else if (!lexicoder.equals(other.lexicoder)) { + return false; + } + return true; + } + + @Override + public Set getNaturalSplits() { + return null; + } + + @Override + public List createMetaData() { + return Collections.emptyList(); + } +} diff --git a/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleShortIndexStrategy.java b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleShortIndexStrategy.java new file mode 100644 index 0000000..e9a97c0 --- /dev/null +++ b/geowave/src/main/java/org/locationtech/sfcurve/geowave/index/simple/SimpleShortIndexStrategy.java @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import org.locationtech.sfcurve.geowave.index.lexicoder.Lexicoders; + +/** + * A simple 1-dimensional NumericIndexStrategy that represents an index of + * signed short values. The strategy doesn't use any binning. The ids are simply + * the byte arrays of the value. This index strategy will not perform well for + * inserting ranges because there will be too much replication of data. + * + */ +public class SimpleShortIndexStrategy extends + SimpleNumericIndexStrategy +{ + + public SimpleShortIndexStrategy() { + super( + Lexicoders.SHORT); + } + + @Override + public byte[] toBinary() { + return new byte[] {}; + } + + @Override + public void fromBinary( + final byte[] bytes ) {} + + @Override + protected Short cast( + final double value ) { + return (short) value; + } + + @Override + public int getByteOffsetFromDimensionalIndex() { + return 0; + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TemporalBinningStrategyTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TemporalBinningStrategyTest.java new file mode 100644 index 0000000..5eedfa7 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TemporalBinningStrategyTest.java @@ -0,0 +1,421 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.geotime.index.dimension; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.locationtech.sfcurve.geowave.geotime.index.dimension.TemporalBinningStrategy.Unit; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TemporalBinningStrategyTest +{ + @Before + public void setTimezoneToGMT() { + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + } + + @Test + public void testLargeNumberOfDayBins() { + internalTestBinsMatchExpectedCount( + 250000, + Unit.DAY, + 123456789876L); + } + + @Test + public void testLargeNumberOfMonthBins() { + internalTestBinsMatchExpectedCount( + 250000, + Unit.MONTH, + 9876543210L); + } + + @Test + public void testLargeNumberOfYearBins() { + // for years, use 250,000 to keep milli time values less than max long + internalTestBinsMatchExpectedCount( + 250000, + Unit.YEAR, + 0L); + } + + @Test + public void testLargeNumberOfHourBins() { + internalTestBinsMatchExpectedCount( + 250000, + Unit.HOUR, + 0L); + } + + @Test + public void testLargeNumberOfMinuteBins() { + internalTestBinsMatchExpectedCount( + 250000, + Unit.MINUTE, + 0L); + } + + private void internalTestBinsMatchExpectedCount( + final int binCount, + final Unit unit, + final long arbitraryTime ) { + final BinRange[] ranges = getBinRangesUsingFullExtents( + binCount, + unit, + arbitraryTime); + Assert.assertEquals( + binCount, + ranges.length); + } + + private BinRange[] getBinRangesUsingFullExtents( + final int binCount, + final Unit unit, + final long arbitraryTime ) { + final Calendar startCal = Calendar.getInstance(); + final long time = arbitraryTime; // hopefully these approaches work for + // any arbitrary time, but allow a + // caller to set the specific time + // so tests are all entirely + // reproducible + startCal.setTimeInMillis(time); + final Calendar endCal = Calendar.getInstance(); + endCal.setTimeInMillis(time); + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + unit); + binStrategy.setToEpoch(startCal); + binStrategy.setToEpoch(endCal); + endCal.add( + unit.toCalendarEnum(), + binCount); + return binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + (double) endCal.getTimeInMillis() - 1)); + } + + @Test + public void testFullExtentOnSingleBin() { + final BinRange[] ranges = getBinRangesUsingFullExtents( + 1, + Unit.MONTH, + 543210987654321L); + + Assert.assertEquals( + 1, + ranges.length); + Assert.assertTrue(ranges[0].isFullExtent()); + } + + @Test + public void testFullExtentOnMultipleBins() { + final Calendar startCal = Calendar.getInstance(); + final long time = 3456789012345L; + startCal.setTimeInMillis(time); + final Calendar endCal = Calendar.getInstance(); + // theoretically should get 3 bins back the first and last not having + // full extent and the middle one being full extent + endCal.setTimeInMillis(time + (TemporalBinningStrategy.MILLIS_PER_DAY * 2)); + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.DAY); + + BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + endCal.getTimeInMillis())); + Assert.assertEquals( + 3, + ranges.length); + + Assert.assertTrue(!ranges[0].isFullExtent()); + + Assert.assertTrue(ranges[1].isFullExtent()); + + Assert.assertTrue(!ranges[2].isFullExtent()); + + final Calendar startCalOnEpoch = Calendar.getInstance(); + + startCalOnEpoch.setTimeInMillis(time); + binStrategy.setToEpoch(startCalOnEpoch); + + ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCalOnEpoch.getTimeInMillis(), + endCal.getTimeInMillis())); + Assert.assertEquals( + 3, + ranges.length); + // now the first element should be full extent + Assert.assertTrue(ranges[0].isFullExtent()); + + Assert.assertTrue(ranges[1].isFullExtent()); + + Assert.assertTrue(!ranges[2].isFullExtent()); + + final Calendar endCalOnMax = Calendar.getInstance(); + // theoretically should get 3 bins back the first and last not having + // full extent and the middle one being full extent + endCalOnMax.setTimeInMillis(time + (TemporalBinningStrategy.MILLIS_PER_DAY * 3)); + binStrategy.setToEpoch(endCalOnMax); + endCalOnMax.add( + Calendar.MILLISECOND, + -1); + ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + endCalOnMax.getTimeInMillis())); + Assert.assertEquals( + 3, + ranges.length); + + Assert.assertTrue(!ranges[0].isFullExtent()); + + Assert.assertTrue(ranges[1].isFullExtent()); + + // now the last element should be full extent + Assert.assertTrue(ranges[2].isFullExtent()); + } + + @Test + public void testStartOnEpochMinusOneAndEndOnEpoch() { + final Calendar startCal = Calendar.getInstance(); + // final long time = 675849302912837456L; //this value would cause it to + // fail because we lose precision in coverting to a double (the mantissa + // of a double value is 52 bits and therefore the max long that it can + // accurately represent is 2^52 before the ulp of the double becomes + // greater than 1) + final long time = 6758493029128L; + startCal.setTimeInMillis(time); + startCal.set( + Calendar.MONTH, + 0);// make sure its a month after one with 31 days + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.MONTH); + binStrategy.setToEpoch(startCal); + final Calendar endCal = Calendar.getInstance(); + endCal.setTimeInMillis(time); + endCal.set( + Calendar.MONTH, + 0);// make sure its a month after one with 31 days + binStrategy.setToEpoch(endCal); + final BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis() - 1, + endCal.getTimeInMillis())); + + Assert.assertEquals( + 2, + ranges.length); + + // the first range should be the max possible value and both the min and + // max of the range should be equal + Assert.assertTrue(ranges[0].getNormalizedMax() == binStrategy.getBinMax()); + Assert.assertTrue(ranges[0].getNormalizedMin() == ranges[0].getNormalizedMax()); + + // the second range should be the min possible value and both the min + // and max of the range should be equal + Assert.assertTrue(ranges[1].getNormalizedMin() == binStrategy.getBinMin()); + Assert.assertTrue(ranges[0].getNormalizedMax() == ranges[0].getNormalizedMin()); + } + + @Test + public void testStartAndEndEqual() { + final long time = 123987564019283L; + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.YEAR); + final BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + time, + time)); + + Assert.assertEquals( + 1, + ranges.length); + // both the min and max of the range should be equal + Assert.assertTrue(ranges[0].getNormalizedMin() == ranges[0].getNormalizedMax()); + } + + @Test + public void testEndLessThanStart() { + final long time = 123987564019283L; + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.YEAR); + final BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + time, + time - 1)); + + Assert.assertEquals( + 0, + ranges.length); + } + + @Test + public void testFeb28ToMarch1NonLeapYear() { + final long time = 47920164930285667L; + final Calendar startCal = Calendar.getInstance(); + startCal.setTimeInMillis(time); + startCal.set( + Calendar.MONTH, + 1); + startCal.set( + Calendar.YEAR, + 2015); + startCal.set( + Calendar.DAY_OF_MONTH, + 28); + + final Calendar endCal = Calendar.getInstance(); + endCal.setTimeInMillis(time); + endCal.set( + Calendar.MONTH, + 2); + endCal.set( + Calendar.YEAR, + 2015); + endCal.set( + Calendar.DAY_OF_MONTH, + 1); + // test the day boundaries first - going from feb28 to march 1 should + // give 2 bins + TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.DAY); + BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + endCal.getTimeInMillis())); + + Assert.assertEquals( + 2, + ranges.length); + + // now test the month boundaries - adding a day to feb28 for the end + // time should give 2 bins + binStrategy = new TemporalBinningStrategy( + Unit.MONTH); + ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + startCal.getTimeInMillis() + (TemporalBinningStrategy.MILLIS_PER_DAY))); + + Assert.assertEquals( + 2, + ranges.length); + } + + @Test + public void testFeb28ToMarch1LeapYear() { + final long time = 29374659120374656L; + final Calendar startCal = Calendar.getInstance(); + startCal.setTimeInMillis(time); + startCal.set( + Calendar.MONTH, + 1); + startCal.set( + Calendar.YEAR, + 2016); + startCal.set( + Calendar.DAY_OF_MONTH, + 28); + + final Calendar endCal = Calendar.getInstance(); + endCal.setTimeInMillis(time); + endCal.set( + Calendar.MONTH, + 2); + endCal.set( + Calendar.YEAR, + 2016); + endCal.set( + Calendar.DAY_OF_MONTH, + 1); + // test the day boundaries first - going from feb28 to march 1 should + // give 3 bins + TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.DAY); + BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + endCal.getTimeInMillis())); + + Assert.assertEquals( + 3, + ranges.length); + + // now test the month boundaries - adding a day to feb28 for the end + // time should give 1 bin, adding 2 days should give 2 bins + binStrategy = new TemporalBinningStrategy( + Unit.MONTH); + ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + startCal.getTimeInMillis() + (TemporalBinningStrategy.MILLIS_PER_DAY))); + + Assert.assertEquals( + 1, + ranges.length); + // add 2 days and now we should end up with 2 bins + ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + startCal.getTimeInMillis() + (TemporalBinningStrategy.MILLIS_PER_DAY * 2))); + + Assert.assertEquals( + 2, + ranges.length); + } + + @Test + public void testNonLeapYear() { + final long time = 75470203439504394L; + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.YEAR); + final Calendar startCal = Calendar.getInstance(); + startCal.setTimeInMillis(time); + startCal.set( + Calendar.YEAR, + 2015); + binStrategy.setToEpoch(startCal); + // if we add 365 days to this we should get 2 year bins + final BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + startCal.getTimeInMillis() + (TemporalBinningStrategy.MILLIS_PER_DAY * 365))); + Assert.assertEquals( + 2, + ranges.length); + } + + @Test + public void testLeapYear() { + final long time = 94823024856598633L; + final TemporalBinningStrategy binStrategy = new TemporalBinningStrategy( + Unit.YEAR); + final Calendar startCal = Calendar.getInstance(); + startCal.setTimeInMillis(time); + startCal.set( + Calendar.YEAR, + 2016); + binStrategy.setToEpoch(startCal); + // if we add 365 days to this we should get 1 year bin + BinRange[] ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + startCal.getTimeInMillis() + (TemporalBinningStrategy.MILLIS_PER_DAY * 365))); + Assert.assertEquals( + 1, + ranges.length); + // if we add 366 days to this we should get 2 year bins, and the second + // bin should be the epoch + ranges = binStrategy.getNormalizedRanges(new NumericRange( + startCal.getTimeInMillis(), + startCal.getTimeInMillis() + (TemporalBinningStrategy.MILLIS_PER_DAY * 366))); + Assert.assertEquals( + 2, + ranges.length); + + // the second bin should just contain the epoch + Assert.assertTrue(ranges[1].getNormalizedMin() == ranges[1].getNormalizedMax()); + Assert.assertTrue(ranges[1].getNormalizedMin() == binStrategy.getBinMin()); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TimeDefinitionTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TimeDefinitionTest.java new file mode 100644 index 0000000..23c4bc0 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/geotime/index/dimension/TimeDefinitionTest.java @@ -0,0 +1,399 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.geotime.index.dimension; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.locationtech.sfcurve.geowave.geotime.index.dimension.TemporalBinningStrategy.Unit; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinningStrategy; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TimeDefinitionTest +{ + + private double DELTA = 1e-15; + + @Before + public void setTimezoneToGMT() { + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + } + + @Test + public void testTimeDefinitionMaxBinByDay() { + + final double expectedMin = 0.0; + final double expectedMax = 86399999; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.HOUR_OF_DAY, + 23); + calendar.set( + Calendar.MINUTE, + 59); + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + BinningStrategy bin = getStrategyByUnit(Unit.DAY); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + + } + + @Test + public void testTimeDefinitionMaxBinByMonth() { + + final double expectedMin = 0.0; + final double expectedMax = 2678399999.0; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.MONTH, + 6); + calendar.set( + Calendar.DATE, + 31); + calendar.set( + Calendar.HOUR_OF_DAY, + 23); + calendar.set( + Calendar.MINUTE, + 59); + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + BinningStrategy bin = getStrategyByUnit(Unit.MONTH); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + + } + + @Test + public void testTimeDefinitionMinBinByMonth() { + + final double expectedMin = 0.0; + final double expectedMax = 2678399999.0; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.MONTH, + 6); + calendar.set( + Calendar.DATE, + 1); + calendar.set( + Calendar.HOUR_OF_DAY, + 0); + calendar.set( + Calendar.MINUTE, + 0); + calendar.set( + Calendar.SECOND, + 0); + calendar.set( + Calendar.MILLISECOND, + 0); + + BinningStrategy bin = getStrategyByUnit(Unit.MONTH); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMin(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + + } + + @Test + public void testTimeDefinitionMaxBinByYEAR() { + + final double expectedMin = 0.0; + final double expectedMax = 31622399999.0; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.YEAR, + 2012); + calendar.set( + Calendar.MONTH, + 11); + calendar.set( + Calendar.DATE, + 31); + calendar.set( + Calendar.HOUR_OF_DAY, + 23); + calendar.set( + Calendar.MINUTE, + 59); + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + BinningStrategy bin = getStrategyByUnit(Unit.YEAR); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + } + + @Test + public void testTimeDefinitionBinByHour() { + + final double expectedMin = 0.0; + final double expectedMax = 3599999.0; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.MINUTE, + 59); + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + BinningStrategy bin = getStrategyByUnit(Unit.HOUR); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + + calendar.set( + Calendar.MINUTE, + 0); + calendar.set( + Calendar.SECOND, + 0); + calendar.set( + Calendar.MILLISECOND, + 0); + + Assert.assertEquals( + bin.getBinMin(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + } + + @Test + public void testTimeDefinitionBinByMinute() { + + final double expectedMin = 0.0; + final double expectedMax = 59999.0; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + BinningStrategy bin = getStrategyByUnit(Unit.MINUTE); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + + calendar.set( + Calendar.SECOND, + 0); + calendar.set( + Calendar.MILLISECOND, + 0); + + Assert.assertEquals( + bin.getBinMin(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + } + + @Test + public void testTimeDefinitionMaxBinByDecade() { + + final double expectedMin = 0.0; + final double expectedMax = 315619199999.0; + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.YEAR, + 2009); + calendar.set( + Calendar.MONTH, + 11); + calendar.set( + Calendar.DATE, + 31); + calendar.set( + Calendar.HOUR_OF_DAY, + 23); + calendar.set( + Calendar.MINUTE, + 59); + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + BinningStrategy bin = getStrategyByUnit(Unit.DECADE); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + } + + @Test + public void testTimeDefinitionMaxBinByWeek() { + + final double expectedMin = 0.0; + final double expectedMax = 604799999.0; + + BinningStrategy bin = getStrategyByUnit(Unit.WEEK); + + final Calendar calendar = Calendar.getInstance(); + + calendar.set( + Calendar.DAY_OF_WEEK, + calendar.getActualMaximum(Calendar.DAY_OF_WEEK)); + calendar.set( + Calendar.HOUR_OF_DAY, + 23); + calendar.set( + Calendar.MINUTE, + 59); + calendar.set( + Calendar.SECOND, + 59); + calendar.set( + Calendar.MILLISECOND, + 999); + + Assert.assertEquals( + expectedMin, + bin.getBinMin(), + DELTA); + Assert.assertEquals( + expectedMax, + bin.getBinMax(), + DELTA); + Assert.assertEquals( + bin.getBinMax(), + bin.getBinnedValue( + calendar.getTimeInMillis()).getNormalizedValue(), + DELTA); + } + + private BinningStrategy getStrategyByUnit( + Unit unit ) { + return new TimeDefinition( + unit).getBinningStrategy(); + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/ByteArrayRangeTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/ByteArrayRangeTest.java new file mode 100644 index 0000000..83d48ad --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/ByteArrayRangeTest.java @@ -0,0 +1,207 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.UUID; + +import org.locationtech.sfcurve.geowave.index.ByteArrayRange.MergeOperation; + +import org.junit.Test; + +public class ByteArrayRangeTest +{ + + @Test + public void testUnion() { + ByteArrayRange bar1 = new ByteArrayRange( + new ByteArrayId( + "232"), + new ByteArrayId( + "332")); + ByteArrayRange bar2 = new ByteArrayRange( + new ByteArrayId( + "282"), + new ByteArrayId( + "300")); + ByteArrayRange bar3 = new ByteArrayRange( + new ByteArrayId( + "272"), + new ByteArrayId( + "340")); + ByteArrayRange bar4 = new ByteArrayRange( + new ByteArrayId( + "392"), + new ByteArrayId( + "410")); + + List l1 = new ArrayList( + Arrays.asList( + bar4, + bar3, + bar1, + bar2)); + l1 = ByteArrayRange.mergeIntersections( + l1, + MergeOperation.UNION); + + List l2 = new ArrayList( + Arrays.asList( + bar1, + bar4, + bar2, + bar3)); + l2 = ByteArrayRange.mergeIntersections( + l2, + MergeOperation.UNION); + + assertEquals( + 2, + l1.size()); + + assertEquals( + l1, + l2); + + assertEquals( + new ByteArrayRange( + new ByteArrayId( + "232"), + new ByteArrayId( + "340")), + l1.get(0)); + assertEquals( + new ByteArrayRange( + new ByteArrayId( + "392"), + new ByteArrayId( + "410")), + l1.get(1)); + + } + + @Test + public void testIntersection() { + ByteArrayRange bar1 = new ByteArrayRange( + new ByteArrayId( + "232"), + new ByteArrayId( + "332")); + ByteArrayRange bar2 = new ByteArrayRange( + new ByteArrayId( + "282"), + new ByteArrayId( + "300")); + ByteArrayRange bar3 = new ByteArrayRange( + new ByteArrayId( + "272"), + new ByteArrayId( + "340")); + ByteArrayRange bar4 = new ByteArrayRange( + new ByteArrayId( + "392"), + new ByteArrayId( + "410")); + + List l1 = new ArrayList( + Arrays.asList( + bar4, + bar3, + bar1, + bar2)); + l1 = ByteArrayRange.mergeIntersections( + l1, + MergeOperation.INTERSECTION); + + List l2 = new ArrayList( + Arrays.asList( + bar1, + bar4, + bar2, + bar3)); + l2 = ByteArrayRange.mergeIntersections( + l2, + MergeOperation.INTERSECTION); + + assertEquals( + 2, + l1.size()); + + assertEquals( + l1, + l2); + + assertEquals( + new ByteArrayRange( + new ByteArrayId( + "282"), + new ByteArrayId( + "300")), + l1.get(0)); + assertEquals( + new ByteArrayRange( + new ByteArrayId( + "392"), + new ByteArrayId( + "410")), + l1.get(1)); + + } + + final Random random = new Random(); + + public String increment( + String id ) { + int v = (int) (Math.abs(random.nextDouble()) * 10000); + StringBuffer buf = new StringBuffer(); + int pos = id.length() - 1; + int r = 0; + while (v > 0) { + int m = (v - ((v >> 8) << 8)); + int c = id.charAt(pos); + int n = c + m + r; + buf.append((char) (n % 255)); + r = n / 255; + v >>= 8; + pos--; + } + while (pos >= 0) { + buf.append(id.charAt(pos--)); + } + return buf.reverse().toString(); + } + + @Test + public void bigTest() { + List l2 = new ArrayList(); + for (int i = 0; i < 3000; i++) { + String seed = UUID.randomUUID().toString(); + for (int j = 0; j < 500; j++) { + l2.add(new ByteArrayRange( + new ByteArrayId( + seed), + new ByteArrayId( + increment(seed)))); + seed = increment(seed); + } + } + + ByteArrayRange.mergeIntersections( + l2, + MergeOperation.INTERSECTION); + + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/ByteArrayUtilsTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/ByteArrayUtilsTest.java new file mode 100644 index 0000000..99ae9b8 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/ByteArrayUtilsTest.java @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import org.apache.commons.lang3.tuple.Pair; +import org.junit.Assert; +import org.junit.Test; + +public class ByteArrayUtilsTest +{ + + @Test + public void testSplit() { + final ByteArrayId first = new ByteArrayId( + "first"); + final ByteArrayId second = new ByteArrayId( + "second"); + final byte[] combined = ByteArrayUtils.combineVariableLengthArrays( + first.getBytes(), + second.getBytes()); + final Pair split = ByteArrayUtils.splitVariableLengthArrays(combined); + Assert.assertArrayEquals( + first.getBytes(), + split.getLeft()); + Assert.assertArrayEquals( + second.getBytes(), + split.getRight()); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/CompoundIndexStrategyTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/CompoundIndexStrategyTest.java new file mode 100644 index 0000000..88d1a44 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/CompoundIndexStrategyTest.java @@ -0,0 +1,382 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.Assert; +import org.junit.Test; + +import org.locationtech.sfcurve.geowave.index.dimension.BasicDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory.SFCType; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexFactory; + +public class CompoundIndexStrategyTest +{ + + private static final NumericDimensionDefinition[] DIMENSIONS = new NumericDimensionDefinition[] { + new BasicDimensionDefinition( + 0, + 1000) + }; + + private static final NumericIndexStrategy simpleIndexStrategy = TieredSFCIndexFactory.createSingleTierStrategy( + DIMENSIONS, + new int[] { + 16 + }, + SFCType.HILBERT); + + private static final NumericDimensionDefinition[] SPATIAL_DIMENSIONS = new NumericDimensionDefinition[] { + new BasicDimensionDefinition( + -180, + 180), + new BasicDimensionDefinition( + -90, + 90) + }; + + private static final NumericIndexStrategy sfcIndexStrategy = TieredSFCIndexFactory.createSingleTierStrategy( + SPATIAL_DIMENSIONS, + new int[] { + 16, + 16 + }, + SFCType.HILBERT); + + private static final CompoundIndexStrategy compoundIndexStrategy = new CompoundIndexStrategy( + simpleIndexStrategy, + sfcIndexStrategy); + + private static final NumericRange dimension1Range = new NumericRange( + 2, + 4); + private static final NumericRange dimension2Range = new NumericRange( + 50.0, + 50.025); + private static final NumericRange dimension3Range = new NumericRange( + -20.5, + -20.455); + private static final MultiDimensionalNumericData simpleIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension1Range, + }); + private static final MultiDimensionalNumericData sfcIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension2Range, + dimension3Range + }); + private static final MultiDimensionalNumericData compoundIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension1Range, + dimension2Range, + dimension3Range + }); + + @Test + public void testBinaryEncoding() { + final byte[] bytes = PersistenceUtils.toBinary(compoundIndexStrategy); + final CompoundIndexStrategy deserializedStrategy = PersistenceUtils.fromBinary( + bytes, + CompoundIndexStrategy.class); + final byte[] bytes2 = PersistenceUtils.toBinary(deserializedStrategy); + Assert.assertArrayEquals( + bytes, + bytes2); + } + + @Test + public void testNumberOfDimensionsPerIndexStrategy() { + final int[] numDimensionsPerStrategy = compoundIndexStrategy.getNumberOfDimensionsPerIndexStrategy(); + Assert.assertEquals( + 1, + numDimensionsPerStrategy[0]); + Assert.assertEquals( + 2, + numDimensionsPerStrategy[1]); + } + + @Test + public void testGetNumberOfDimensions() { + final int numDimensions = compoundIndexStrategy.getNumberOfDimensions(); + Assert.assertEquals( + 3, + numDimensions); + } + + @Test + public void testCompositionOfByteArrayId() { + final ByteArrayId id1 = new ByteArrayId( + "hello"); + final ByteArrayId id2 = new ByteArrayId( + "world!!"); + final ByteArrayId compoundId = compoundIndexStrategy.composeByteArrayId( + id1, + id2); + final ByteArrayId[] decomposedId = compoundIndexStrategy.decomposeByteArrayId(compoundId); + Assert.assertArrayEquals( + id1.getBytes(), + decomposedId[0].getBytes()); + Assert.assertArrayEquals( + id2.getBytes(), + decomposedId[1].getBytes()); + } + + @Test + public void testGetQueryRangesWithMaximumNumberOfRanges() { + final List simpleIndexRanges = simpleIndexStrategy.getQueryRanges(simpleIndexedRange); + final List sfcIndexRanges = sfcIndexStrategy.getQueryRanges(sfcIndexedRange); + final List ranges = new ArrayList<>(); + for (final ByteArrayRange r1 : simpleIndexRanges) { + for (final ByteArrayRange r2 : sfcIndexRanges) { + final ByteArrayId start = compoundIndexStrategy.composeByteArrayId( + r1.getStart(), + r2.getStart()); + final ByteArrayId end = compoundIndexStrategy.composeByteArrayId( + r1.getEnd(), + r2.getEnd()); + ranges.add(new ByteArrayRange( + start, + end)); + } + } + final Set testRanges = new HashSet<>( + ranges); + final Set compoundIndexRanges = new HashSet<>( + compoundIndexStrategy.getQueryRanges(compoundIndexedRange)); + Assert.assertTrue(testRanges.containsAll(compoundIndexRanges)); + Assert.assertTrue(compoundIndexRanges.containsAll(testRanges)); + } + + @Test + public void testGetQueryRanges() { + final List simpleIndexRanges = simpleIndexStrategy.getQueryRanges( + simpleIndexedRange, + 3); + final int maxRangesStrategy2 = 8 / simpleIndexRanges.size(); + final List sfcIndexRanges = sfcIndexStrategy.getQueryRanges( + sfcIndexedRange, + maxRangesStrategy2); + final List ranges = new ArrayList<>( + simpleIndexRanges.size() * sfcIndexRanges.size()); + for (final ByteArrayRange r1 : simpleIndexRanges) { + for (final ByteArrayRange r2 : sfcIndexRanges) { + final ByteArrayId start = compoundIndexStrategy.composeByteArrayId( + r1.getStart(), + r2.getStart()); + final ByteArrayId end = compoundIndexStrategy.composeByteArrayId( + r1.getEnd(), + r2.getEnd()); + ranges.add(new ByteArrayRange( + start, + end)); + } + } + final Set testRanges = new HashSet<>( + ranges); + final Set compoundIndexRanges = new HashSet<>( + compoundIndexStrategy.getQueryRanges( + compoundIndexedRange, + 8)); + Assert.assertTrue(testRanges.containsAll(compoundIndexRanges)); + Assert.assertTrue(compoundIndexRanges.containsAll(testRanges)); + } + + @Test + public void testGetInsertionIds() { + final List ids = new ArrayList<>(); + final List ids1 = simpleIndexStrategy.getInsertionIds( + simpleIndexedRange, + 3); + final int maxEstDuplicatesStrategy2 = 8 / ids1.size(); + final List ids2 = sfcIndexStrategy.getInsertionIds( + sfcIndexedRange, + maxEstDuplicatesStrategy2); + for (final ByteArrayId id1 : ids1) { + for (final ByteArrayId id2 : ids2) { + ids.add(compoundIndexStrategy.composeByteArrayId( + id1, + id2)); + } + } + final Set testIds = new HashSet<>( + ids); + final Set compoundIndexIds = new HashSet<>( + compoundIndexStrategy.getInsertionIds( + compoundIndexedRange, + 8)); + Assert.assertTrue(testIds.containsAll(compoundIndexIds)); + Assert.assertTrue(compoundIndexIds.containsAll(testIds)); + } + + @Test + public void testGetCoordinatesPerDimension() { + final ByteArrayId compoundIndexInsertionId = new ByteArrayId( + new byte[] { + 16, + 0, + -125, + 16, + -46, + -93, + -110, + -31, + 0, + 0, + 0, + 3 + }); + final ByteArrayId[] insertionIds = compoundIndexStrategy.decomposeByteArrayId(compoundIndexInsertionId); + final MultiDimensionalCoordinates simpleIndexCoordinatesPerDim = simpleIndexStrategy + .getCoordinatesPerDimension(insertionIds[0]); + final MultiDimensionalCoordinates sfcIndexCoordinatesPerDim = sfcIndexStrategy + .getCoordinatesPerDimension(insertionIds[1]); + final MultiDimensionalCoordinates coordinatesPerDim = compoundIndexStrategy + .getCoordinatesPerDimension(compoundIndexInsertionId); + Assert.assertTrue(Long.compare( + simpleIndexCoordinatesPerDim.getCoordinate( + 0).getCoordinate(), + coordinatesPerDim.getCoordinate( + 0).getCoordinate()) == 0); + Assert.assertTrue(Long.compare( + sfcIndexCoordinatesPerDim.getCoordinate( + 0).getCoordinate(), + coordinatesPerDim.getCoordinate( + 1).getCoordinate()) == 0); + Assert.assertTrue(Long.compare( + sfcIndexCoordinatesPerDim.getCoordinate( + 1).getCoordinate(), + coordinatesPerDim.getCoordinate( + 2).getCoordinate()) == 0); + } + + @Test + public void testGetRangeForId() { + final ByteArrayId compoundIndexInsertionId = new ByteArrayId( + new byte[] { + 16, + 0, + -125, + 16, + -46, + -93, + -110, + -31, + 0, + 0, + 0, + 3 + }); + final ByteArrayId[] insertionIds = compoundIndexStrategy.decomposeByteArrayId(compoundIndexInsertionId); + final MultiDimensionalNumericData simpleIndexRange = simpleIndexStrategy.getRangeForId(insertionIds[0]); + final MultiDimensionalNumericData sfcIndexRange = sfcIndexStrategy.getRangeForId(insertionIds[1]); + final MultiDimensionalNumericData range = compoundIndexStrategy.getRangeForId(compoundIndexInsertionId); + Assert.assertEquals( + simpleIndexRange.getDimensionCount(), + 1); + Assert.assertEquals( + sfcIndexRange.getDimensionCount(), + 2); + Assert.assertEquals( + range.getDimensionCount(), + 3); + Assert.assertTrue(Double.compare( + simpleIndexRange.getMinValuesPerDimension()[0], + range.getMinValuesPerDimension()[0]) == 0); + Assert.assertTrue(Double.compare( + sfcIndexRange.getMinValuesPerDimension()[0], + range.getMinValuesPerDimension()[1]) == 0); + Assert.assertTrue(Double.compare( + sfcIndexRange.getMinValuesPerDimension()[1], + range.getMinValuesPerDimension()[2]) == 0); + Assert.assertTrue(Double.compare( + simpleIndexRange.getMaxValuesPerDimension()[0], + range.getMaxValuesPerDimension()[0]) == 0); + Assert.assertTrue(Double.compare( + sfcIndexRange.getMaxValuesPerDimension()[0], + range.getMaxValuesPerDimension()[1]) == 0); + Assert.assertTrue(Double.compare( + sfcIndexRange.getMaxValuesPerDimension()[1], + range.getMaxValuesPerDimension()[2]) == 0); + } + + @Test + public void testGetHighestPrecisionIdRangePerDimension() { + final double[] simpleIndexPrecision = simpleIndexStrategy.getHighestPrecisionIdRangePerDimension(); + final double[] sfcIndexPrecision = sfcIndexStrategy.getHighestPrecisionIdRangePerDimension(); + final double[] precisionPerDim = compoundIndexStrategy.getHighestPrecisionIdRangePerDimension(); + Assert.assertTrue(Double.compare( + precisionPerDim[0], + simpleIndexPrecision[0]) == 0); + Assert.assertTrue(Double.compare( + precisionPerDim[1], + sfcIndexPrecision[0]) == 0); + Assert.assertTrue(Double.compare( + precisionPerDim[2], + sfcIndexPrecision[1]) == 0); + } + + @Test + public void testHints() { + final List ids = compoundIndexStrategy.getInsertionIds( + compoundIndexedRange, + 8); + + List metaData = compoundIndexStrategy.createMetaData(); + for (IndexMetaData imd : metaData) { + imd.insertionIdsAdded(ids); + } + + final List simpleIndexRanges = simpleIndexStrategy.getQueryRanges(simpleIndexedRange); + final List sfcIndexRanges = sfcIndexStrategy.getQueryRanges(sfcIndexedRange); + final List ranges = new ArrayList<>(); + for (final ByteArrayRange r1 : simpleIndexRanges) { + for (final ByteArrayRange r2 : sfcIndexRanges) { + final ByteArrayId start = compoundIndexStrategy.composeByteArrayId( + r1.getStart(), + r2.getStart()); + final ByteArrayId end = compoundIndexStrategy.composeByteArrayId( + r1.getEnd(), + r2.getEnd()); + ranges.add(new ByteArrayRange( + start, + end)); + } + } + + final Set compoundIndexRangesWithoutHints = new HashSet<>( + compoundIndexStrategy.getQueryRanges(compoundIndexedRange)); + final Set compoundIndexRangesWithHints = new HashSet<>( + compoundIndexStrategy.getQueryRanges( + compoundIndexedRange, + metaData.toArray(new IndexMetaData[metaData.size()]))); + Assert.assertTrue(compoundIndexRangesWithoutHints.containsAll(compoundIndexRangesWithHints)); + Assert.assertTrue(compoundIndexRangesWithHints.containsAll(compoundIndexRangesWithoutHints)); + + List newMetaData = PersistenceUtils.fromBinary(PersistenceUtils.toBinary(metaData)); + final Set compoundIndexRangesWithHints2 = new HashSet<>( + compoundIndexStrategy.getQueryRanges( + compoundIndexedRange, + metaData.toArray(new IndexMetaData[newMetaData.size()]))); + Assert.assertTrue(compoundIndexRangesWithoutHints.containsAll(compoundIndexRangesWithHints2)); + Assert.assertTrue(compoundIndexRangesWithHints2.containsAll(compoundIndexRangesWithoutHints)); + + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/PersistenceUtilsTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/PersistenceUtilsTest.java new file mode 100644 index 0000000..0e964d1 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/PersistenceUtilsTest.java @@ -0,0 +1,65 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; + +import org.junit.Assert; +import org.junit.Test; + +public class PersistenceUtilsTest +{ + + public static class APersistable implements + Persistable + { + + @Override + public byte[] toBinary() { + return new byte[] { + 1, + 2, + 3 + }; + } + + @Override + public void fromBinary( + byte[] bytes ) { + Assert.assertTrue(Arrays.equals( + bytes, + new byte[] { + 1, + 2, + 3 + })); + + } + + } + + @Test + public void test() { + APersistable persistable = new APersistable(); + Assert.assertTrue(PersistenceUtils.fromBinary( + PersistenceUtils.toBinary(new ArrayList())).isEmpty()); + Assert.assertTrue(PersistenceUtils.fromBinary( + PersistenceUtils.toBinary(Collections. singleton(persistable))).size() == 1); + + Assert.assertTrue(PersistenceUtils.fromBinary( + PersistenceUtils.toBinary(Arrays. asList(new Persistable[] { + persistable, + persistable + }))).size() == 2); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/StringUtilsTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/StringUtilsTest.java new file mode 100644 index 0000000..46ac029 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/StringUtilsTest.java @@ -0,0 +1,43 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class StringUtilsTest +{ + @Test + public void testFull() { + String[] result = StringUtils.stringsFromBinary(StringUtils.stringsToBinary(new String[] { + "12", + "34" + })); + assertEquals( + 2, + result.length); + assertEquals( + "12", + result[0]); + assertEquals( + "34", + result[1]); + } + + @Test + public void testEmpty() { + String[] result = StringUtils.stringsFromBinary(StringUtils.stringsToBinary(new String[] {})); + assertEquals( + 0, + result.length); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/BasicDimensionDefinitionTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/BasicDimensionDefinitionTest.java new file mode 100644 index 0000000..e813dd4 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/BasicDimensionDefinitionTest.java @@ -0,0 +1,150 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension; + +import org.locationtech.sfcurve.geowave.index.dimension.BasicDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +import org.junit.Assert; +import org.junit.Test; + +public class BasicDimensionDefinitionTest +{ + + private double MINIMUM = 20; + private double MAXIMUM = 100; + private double DELTA = 1e-15; + + @Test + public void testNormalizeMidValue() { + + final double midValue = 60; + final double normalizedValue = 0.5; + + Assert.assertEquals( + normalizedValue, + getNormalizedValueUsingBounds( + MINIMUM, + MAXIMUM, + midValue), + DELTA); + + } + + @Test + public void testNormalizeUpperValue() { + + final double lowerValue = 20; + final double normalizedValue = 0.0; + + Assert.assertEquals( + normalizedValue, + getNormalizedValueUsingBounds( + MINIMUM, + MAXIMUM, + lowerValue), + DELTA); + + } + + @Test + public void testNormalizeLowerValue() { + + final double upperValue = 100; + final double normalizedValue = 1.0; + + Assert.assertEquals( + normalizedValue, + getNormalizedValueUsingBounds( + MINIMUM, + MAXIMUM, + upperValue), + DELTA); + + } + + @Test + public void testNormalizeClampOutOfBoundsValue() { + + final double value = 1; + final double normalizedValue = 0.0; + + Assert.assertEquals( + normalizedValue, + getNormalizedValueUsingBounds( + MINIMUM, + MAXIMUM, + value), + DELTA); + + } + + @Test + public void testNormalizeRangesBinRangeCount() { + + final double minRange = 40; + final double maxRange = 50; + final int binCount = 1; + + BinRange[] binRange = getNormalizedRangesUsingBounds( + minRange, + maxRange); + + Assert.assertEquals( + binCount, + binRange.length); + + } + + @Test + public void testNormalizeClampOutOfBoundsRanges() { + + final double minRange = 1; + final double maxRange = 150; + + BinRange[] binRange = getNormalizedRangesUsingBounds( + minRange, + maxRange); + + Assert.assertEquals( + MINIMUM, + binRange[0].getNormalizedMin(), + DELTA); + Assert.assertEquals( + MAXIMUM, + binRange[0].getNormalizedMax(), + DELTA); + + } + + private double getNormalizedValueUsingBounds( + final double min, + final double max, + final double value ) { + return new BasicDimensionDefinition( + min, + max).normalize(value); + } + + private BinRange[] getNormalizedRangesUsingBounds( + final double minRange, + final double maxRange ) { + + return new BasicDimensionDefinition( + MINIMUM, + MAXIMUM).getNormalizedRanges(new NumericRange( + minRange, + maxRange)); + + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinRangeTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinRangeTest.java new file mode 100644 index 0000000..2352f79 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinRangeTest.java @@ -0,0 +1,75 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension.bin; + +import java.nio.ByteBuffer; + +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinRange; + +import org.junit.Assert; +import org.junit.Test; + +public class BinRangeTest +{ + + private final double MINIMUM_RANGE = 20; + private final double MAXIMUM_RANGE = 100; + private double DELTA = 1e-15; + + @Test + public void testBinRangeValues() { + + BinRange binRange = new BinRange( + MINIMUM_RANGE, + MAXIMUM_RANGE); + + Assert.assertEquals( + MINIMUM_RANGE, + binRange.getNormalizedMin(), + DELTA); + Assert.assertEquals( + MAXIMUM_RANGE, + binRange.getNormalizedMax(), + DELTA); + + Assert.assertFalse(binRange.isFullExtent()); + + } + + @Test + public void testBinRangeFullExtent() { + + final int binIdValue = 120; + final byte[] binID = ByteBuffer.allocate( + 4).putInt( + binIdValue).array(); + final boolean fullExtent = true; + + BinRange binRange = new BinRange( + binID, + MINIMUM_RANGE, + MAXIMUM_RANGE, + fullExtent); + + Assert.assertEquals( + MINIMUM_RANGE, + binRange.getNormalizedMin(), + DELTA); + Assert.assertEquals( + MAXIMUM_RANGE, + binRange.getNormalizedMax(), + DELTA); + + Assert.assertTrue(binRange.isFullExtent()); + + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinValueTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinValueTest.java new file mode 100644 index 0000000..e76b98b --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/dimension/bin/BinValueTest.java @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.dimension.bin; + +import java.nio.ByteBuffer; + +import org.locationtech.sfcurve.geowave.index.dimension.bin.BinValue; + +import org.junit.Assert; +import org.junit.Test; + +public class BinValueTest +{ + + final double BIN_VALUE = 100; + private double DELTA = 1e-15; + + @Test + public void testBinValue() { + + final int binIdValue = 2; + final byte[] binID = ByteBuffer.allocate( + 4).putInt( + binIdValue).array(); + + BinValue binValue = new BinValue( + binID, + BIN_VALUE); + + Assert.assertEquals( + BIN_VALUE, + binValue.getNormalizedValue(), + DELTA); + + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/AbstractLexicoderTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/AbstractLexicoderTest.java new file mode 100644 index 0000000..9a90806 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/AbstractLexicoderTest.java @@ -0,0 +1,73 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +import org.junit.Assert; +import org.junit.Test; + +public abstract class AbstractLexicoderTest> +{ + private NumberLexicoder lexicoder; + private T expectedMin; + private T expectedMax; + private T[] unsortedVals; + private Comparator comparator; + + public AbstractLexicoderTest( + final NumberLexicoder lexicoder, + final T expectedMin, + final T expectedMax, + final T[] unsortedVals, + final Comparator comparator ) { + super(); + this.lexicoder = lexicoder; + this.expectedMin = expectedMin; + this.expectedMax = expectedMax; + this.unsortedVals = unsortedVals; + this.comparator = comparator; + } + + @Test + public void testRanges() { + Assert.assertTrue(lexicoder.getMinimumValue().equals( + expectedMin)); + Assert.assertTrue(lexicoder.getMaximumValue().equals( + expectedMax)); + } + + @Test + public void testSortOrder() { + final List list = Arrays.asList(unsortedVals); + final Map sortedByteArrayToRawTypeMappings = new TreeMap<>( + comparator); + for (final T d : list) { + sortedByteArrayToRawTypeMappings.put( + lexicoder.toByteArray(d), + d); + } + Collections.sort(list); + int idx = 0; + final Set sortedByteArrays = sortedByteArrayToRawTypeMappings.keySet(); + for (final byte[] byteArray : sortedByteArrays) { + final T value = sortedByteArrayToRawTypeMappings.get(byteArray); + Assert.assertTrue(value.equals(list.get(idx++))); + } + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/DoubleLexicoderTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/DoubleLexicoderTest.java new file mode 100644 index 0000000..8098ff1 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/DoubleLexicoderTest.java @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.SignedBytes; + +public class DoubleLexicoderTest extends + AbstractLexicoderTest +{ + public DoubleLexicoderTest() { + super( + Lexicoders.DOUBLE, + Double.MIN_VALUE, + Double.MAX_VALUE, + new Double[] { + -10d, + Double.MIN_VALUE, + 11d, + -14.2, + 14.2, + -100.002, + 100.002, + -11d, + Double.MAX_VALUE, + 0d + }, + SignedBytes.lexicographicalComparator()); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/IntegerLexicoderTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/IntegerLexicoderTest.java new file mode 100644 index 0000000..0e72b72 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/IntegerLexicoderTest.java @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.UnsignedBytes; + +public class IntegerLexicoderTest extends + AbstractLexicoderTest +{ + public IntegerLexicoderTest() { + super( + Lexicoders.INT, + Integer.MIN_VALUE, + Integer.MAX_VALUE, + new Integer[] { + -10, + Integer.MIN_VALUE, + 2678, + Integer.MAX_VALUE, + 0 + }, + UnsignedBytes.lexicographicalComparator()); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/LongLexicoderTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/LongLexicoderTest.java new file mode 100644 index 0000000..3a5c182 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/LongLexicoderTest.java @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.UnsignedBytes; + +public class LongLexicoderTest extends + AbstractLexicoderTest +{ + public LongLexicoderTest() { + super( + Lexicoders.LONG, + Long.MIN_VALUE, + Long.MAX_VALUE, + new Long[] { + -10l, + Long.MIN_VALUE, + 2678l, + Long.MAX_VALUE, + 0l + }, + UnsignedBytes.lexicographicalComparator()); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/ShortLexicoderTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/ShortLexicoderTest.java new file mode 100644 index 0000000..2edd696 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/lexicoder/ShortLexicoderTest.java @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.lexicoder; + +import com.google.common.primitives.UnsignedBytes; + +public class ShortLexicoderTest extends + AbstractLexicoderTest +{ + public ShortLexicoderTest() { + super( + Lexicoders.SHORT, + Short.MIN_VALUE, + Short.MAX_VALUE, + new Short[] { + (short) -10, + Short.MIN_VALUE, + (short) 2678, + Short.MAX_VALUE, + (short) 0 + }, + UnsignedBytes.lexicographicalComparator()); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/BasicNumericDatasetTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/BasicNumericDatasetTest.java new file mode 100644 index 0000000..8186f92 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/BasicNumericDatasetTest.java @@ -0,0 +1,156 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericValue; + +import org.junit.Assert; +import org.junit.Test; + +public class BasicNumericDatasetTest +{ + + private double DELTA = 1e-15; + + private BasicNumericDataset basicNumericDatasetRanges = new BasicNumericDataset( + new NumericData[] { + new NumericRange( + 10, + 50), + new NumericRange( + 25, + 95), + new NumericRange( + -50, + 50) + }); + + private BasicNumericDataset basicNumericDatasetValues = new BasicNumericDataset( + new NumericData[] { + new NumericValue( + 25), + new NumericValue( + 60), + new NumericValue( + 0) + }); + + @Test + public void testNumericRangesMinValues() { + + int expectedCount = 3; + double[] expectedMinValues = new double[] { + 10, + 25, + -50 + }; + double[] mins = basicNumericDatasetRanges.getMinValuesPerDimension(); + + Assert.assertEquals( + expectedCount, + basicNumericDatasetRanges.getDimensionCount()); + + for (int i = 0; i < basicNumericDatasetRanges.getDimensionCount(); i++) { + Assert.assertEquals( + expectedMinValues[i], + mins[i], + DELTA); + } + + } + + @Test + public void testNumericRangesMaxValues() { + + int expectedCount = 3; + double[] expectedMaxValues = new double[] { + 50, + 95, + 50 + }; + double[] max = basicNumericDatasetRanges.getMaxValuesPerDimension(); + + Assert.assertEquals( + expectedCount, + basicNumericDatasetRanges.getDimensionCount()); + + for (int i = 0; i < basicNumericDatasetRanges.getDimensionCount(); i++) { + Assert.assertEquals( + expectedMaxValues[i], + max[i], + DELTA); + } + } + + @Test + public void testNumericRangesCentroidValues() { + + int expectedCount = 3; + double[] expectedCentroidValues = new double[] { + 30, + 60, + 0 + }; + double[] centroid = basicNumericDatasetRanges.getCentroidPerDimension(); + + Assert.assertEquals( + expectedCount, + basicNumericDatasetRanges.getDimensionCount()); + + for (int i = 0; i < basicNumericDatasetRanges.getDimensionCount(); i++) { + Assert.assertEquals( + expectedCentroidValues[i], + centroid[i], + DELTA); + } + + } + + @Test + public void testNumericValuesAllValues() { + + int expectedCount = 3; + + double[] expectedValues = new double[] { + 25, + 60, + 0 + }; + + double[] mins = basicNumericDatasetValues.getMinValuesPerDimension(); + double[] max = basicNumericDatasetValues.getMaxValuesPerDimension(); + double[] centroid = basicNumericDatasetValues.getCentroidPerDimension(); + + Assert.assertEquals( + expectedCount, + basicNumericDatasetValues.getDimensionCount()); + + for (int i = 0; i < basicNumericDatasetValues.getDimensionCount(); i++) { + Assert.assertEquals( + expectedValues[i], + mins[i], + DELTA); + Assert.assertEquals( + expectedValues[i], + max[i], + DELTA); + Assert.assertEquals( + expectedValues[i], + centroid[i], + DELTA); + } + + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericRangeTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericRangeTest.java new file mode 100644 index 0000000..db504e0 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericRangeTest.java @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +import org.junit.Assert; +import org.junit.Test; + +public class NumericRangeTest +{ + + private double MINIMUM = 20; + private double MAXIMUM = 100; + private double CENTROID = 60; + private double DELTA = 1e-15; + + @Test + public void testNumericRangeValues() { + + NumericRange numericRange = new NumericRange( + MINIMUM, + MAXIMUM); + + Assert.assertEquals( + MINIMUM, + numericRange.getMin(), + DELTA); + Assert.assertEquals( + MAXIMUM, + numericRange.getMax(), + DELTA); + Assert.assertEquals( + CENTROID, + numericRange.getCentroid(), + DELTA); + Assert.assertTrue(numericRange.isRange()); + + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericValueTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericValueTest.java new file mode 100644 index 0000000..54567c0 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/data/NumericValueTest.java @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.data; + +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericValue; + +import org.junit.Assert; +import org.junit.Test; + +public class NumericValueTest +{ + + private double VALUE = 50; + private double DELTA = 1e-15; + + @Test + public void testNumericValue() { + + NumericValue numericValue = new NumericValue( + VALUE); + + Assert.assertEquals( + VALUE, + numericValue.getMin(), + DELTA); + Assert.assertEquals( + VALUE, + numericValue.getMax(), + DELTA); + Assert.assertEquals( + VALUE, + numericValue.getCentroid(), + DELTA); + Assert.assertFalse(numericValue.isRange()); + + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZOrderSFCTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZOrderSFCTest.java new file mode 100644 index 0000000..b5807c6 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/xz/XZOrderSFCTest.java @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.xz; + +import org.junit.Assert; +import org.junit.Test; + +import org.locationtech.sfcurve.geowave.index.dimension.BasicDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; + +public class XZOrderSFCTest +{ + + @Test + public void testIndex() { + double[] values = { + 42, + 43, + 57, + 59 + }; + // TODO Meaningful examination of results? + Assert.assertNotNull(createSFC().getId( + values)); + } + + @Test + public void testRangeDecomposition() { + NumericRange longBounds = new NumericRange( + 19.0, + 21.0); + NumericRange latBounds = new NumericRange( + 33.0, + 34.0); + NumericData[] dataPerDimension = { + longBounds, + latBounds + }; + MultiDimensionalNumericData query = new BasicNumericDataset( + dataPerDimension); + // TODO Meaningful examination of results? + Assert.assertNotNull(createSFC().decomposeRangeFully( + query)); + } + + private XZOrderSFC createSFC() { + SFCDimensionDefinition[] dimensions = { + new SFCDimensionDefinition( + new BasicDimensionDefinition( + -180.0, + 180.0), + 32), + new SFCDimensionDefinition( + new BasicDimensionDefinition( + -90.0, + 90.0), + 32) + }; + return new XZOrderSFC( + dimensions); + } +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderSFCTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderSFCTest.java new file mode 100644 index 0000000..804f35d --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/sfc/zorder/ZOrderSFCTest.java @@ -0,0 +1,17 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.sfc.zorder; + +public class ZOrderSFCTest +{ + + // TODO: add unit tests for ZOrder implementation +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/HashKeyIndexStrategyTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/HashKeyIndexStrategyTest.java new file mode 100644 index 0000000..ed47323 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/HashKeyIndexStrategyTest.java @@ -0,0 +1,220 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.CompoundIndexStrategy; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.NumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.dimension.BasicDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory.SFCType; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexFactory; + +import org.junit.Assert; +import org.junit.Test; + +public class HashKeyIndexStrategyTest +{ + + private static final NumericDimensionDefinition[] SPATIAL_DIMENSIONS = new NumericDimensionDefinition[] { + new BasicDimensionDefinition( + -180, + 180), + new BasicDimensionDefinition( + -90, + 90) + }; + + private static final NumericIndexStrategy sfcIndexStrategy = TieredSFCIndexFactory.createSingleTierStrategy( + SPATIAL_DIMENSIONS, + new int[] { + 16, + 16 + }, + SFCType.HILBERT); + + private static final HashKeyIndexStrategy hashIdexStrategy = new HashKeyIndexStrategy( + 3); + private static final CompoundIndexStrategy compoundIndexStrategy = new CompoundIndexStrategy( + hashIdexStrategy, + sfcIndexStrategy); + + private static final NumericRange dimension1Range = new NumericRange( + 50.0, + 50.025); + private static final NumericRange dimension2Range = new NumericRange( + -20.5, + -20.455); + private static final MultiDimensionalNumericData sfcIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension1Range, + dimension2Range + }); + + @Test + public void testDistribution() { + final Map counts = new HashMap(); + int total = 0; + for (double x = 90; x < 180; x += 0.05) { + for (double y = 50; y < 90; y += 0.5) { + final NumericRange dimension1Range = new NumericRange( + x, + x + 0.002); + final NumericRange dimension2Range = new NumericRange( + y - 0.002, + y); + final MultiDimensionalNumericData sfcIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension1Range, + dimension2Range + }); + for (ByteArrayId id : hashIdexStrategy.getInsertionIds(sfcIndexedRange)) { + Long count = counts.get(id); + long nextcount = count == null ? 1 : count + 1; + counts.put( + id, + nextcount); + total++; + } + } + } + + double mean = total / counts.size(); + double diff = 0.0; + for (Long count : counts.values()) { + diff += Math.pow( + mean - count, + 2); + } + double sd = Math.sqrt(diff / counts.size()); + assertTrue(sd < mean * 0.18); + } + + @Test + public void testBinaryEncoding() { + final byte[] bytes = PersistenceUtils.toBinary(compoundIndexStrategy); + final CompoundIndexStrategy deserializedStrategy = PersistenceUtils.fromBinary( + bytes, + CompoundIndexStrategy.class); + final byte[] bytes2 = PersistenceUtils.toBinary(deserializedStrategy); + Assert.assertArrayEquals( + bytes, + bytes2); + } + + @Test + public void testNumberOfDimensionsPerIndexStrategy() { + final int[] numDimensionsPerStrategy = compoundIndexStrategy.getNumberOfDimensionsPerIndexStrategy(); + Assert.assertEquals( + 0, + numDimensionsPerStrategy[0]); + Assert.assertEquals( + 2, + numDimensionsPerStrategy[1]); + } + + @Test + public void testGetNumberOfDimensions() { + final int numDimensions = compoundIndexStrategy.getNumberOfDimensions(); + Assert.assertEquals( + 2, + numDimensions); + } + + @Test + public void testGetCoordinatesPerDimension() { + + final NumericRange dimension1Range = new NumericRange( + 20.01, + 20.02); + final NumericRange dimension2Range = new NumericRange( + 30.51, + 30.59); + final MultiDimensionalNumericData sfcIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension1Range, + dimension2Range + }); + for (ByteArrayId id : compoundIndexStrategy.getInsertionIds(sfcIndexedRange)) { + MultiDimensionalCoordinates coords = compoundIndexStrategy.getCoordinatesPerDimension(id); + assertTrue(coords.getCoordinate( + 0).getCoordinate() > 0); + assertTrue(coords.getCoordinate( + 1).getCoordinate() > 0); + MultiDimensionalNumericData nd = compoundIndexStrategy.getRangeForId(id); + assertEquals( + 20.02, + nd.getMaxValuesPerDimension()[0], + 0.1); + assertEquals( + 30.59, + nd.getMaxValuesPerDimension()[1], + 0.2); + assertEquals( + 20.01, + nd.getMinValuesPerDimension()[0], + 0.1); + assertEquals( + 30.57, + nd.getMinValuesPerDimension()[1], + 0.2); + } + } + + @Test + public void testGetQueryRangesWithMaximumNumberOfRanges() { + final List sfcIndexRanges = sfcIndexStrategy.getQueryRanges(sfcIndexedRange); + final List ranges = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + for (final ByteArrayRange r2 : sfcIndexRanges) { + final ByteArrayId start = compoundIndexStrategy.composeByteArrayId( + new ByteArrayId( + new byte[] { + (byte) i + }), + r2.getStart()); + final ByteArrayId end = compoundIndexStrategy.composeByteArrayId( + new ByteArrayId( + new byte[] { + (byte) i + }), + r2.getEnd()); + ranges.add(new ByteArrayRange( + start, + end)); + } + } + final Set testRanges = new HashSet<>( + ranges); + final Set compoundIndexRanges = new HashSet<>( + compoundIndexStrategy.getQueryRanges(sfcIndexedRange)); + Assert.assertTrue(testRanges.containsAll(compoundIndexRanges)); + Assert.assertTrue(compoundIndexRanges.containsAll(testRanges)); + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/RoundRobinKeyIndexStrategyTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/RoundRobinKeyIndexStrategyTest.java new file mode 100644 index 0000000..43f52d6 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/RoundRobinKeyIndexStrategyTest.java @@ -0,0 +1,201 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.junit.Assert; +import org.junit.Test; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.CompoundIndexStrategy; +import org.locationtech.sfcurve.geowave.index.MultiDimensionalCoordinates; +import org.locationtech.sfcurve.geowave.index.NumericIndexStrategy; +import org.locationtech.sfcurve.geowave.index.PersistenceUtils; +import org.locationtech.sfcurve.geowave.index.dimension.BasicDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.dimension.NumericDimensionDefinition; +import org.locationtech.sfcurve.geowave.index.sfc.SFCFactory.SFCType; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; +import org.locationtech.sfcurve.geowave.index.sfc.tiered.TieredSFCIndexFactory; + +public class RoundRobinKeyIndexStrategyTest +{ + + private static final NumericDimensionDefinition[] SPATIAL_DIMENSIONS = new NumericDimensionDefinition[] { + new BasicDimensionDefinition( + -180, + 180), + new BasicDimensionDefinition( + -90, + 90) + }; + + private static final NumericIndexStrategy sfcIndexStrategy = TieredSFCIndexFactory.createSingleTierStrategy( + SPATIAL_DIMENSIONS, + new int[] { + 16, + 16 + }, + SFCType.HILBERT); + + private static final CompoundIndexStrategy compoundIndexStrategy = new CompoundIndexStrategy( + new RoundRobinKeyIndexStrategy(), + sfcIndexStrategy); + + private static final NumericRange dimension1Range = new NumericRange( + 50.0, + 50.025); + private static final NumericRange dimension2Range = new NumericRange( + -20.5, + -20.455); + private static final MultiDimensionalNumericData sfcIndexedRange = new BasicNumericDataset( + new NumericData[] { + dimension1Range, + dimension2Range + }); + + @Test + public void testBinaryEncoding() { + final byte[] bytes = PersistenceUtils.toBinary(compoundIndexStrategy); + final CompoundIndexStrategy deserializedStrategy = PersistenceUtils.fromBinary( + bytes, + CompoundIndexStrategy.class); + final byte[] bytes2 = PersistenceUtils.toBinary(deserializedStrategy); + Assert.assertArrayEquals( + bytes, + bytes2); + } + + @Test + public void testNumberOfDimensionsPerIndexStrategy() { + final int[] numDimensionsPerStrategy = compoundIndexStrategy.getNumberOfDimensionsPerIndexStrategy(); + Assert.assertEquals( + 0, + numDimensionsPerStrategy[0]); + Assert.assertEquals( + 2, + numDimensionsPerStrategy[1]); + } + + @Test + public void testGetNumberOfDimensions() { + final int numDimensions = compoundIndexStrategy.getNumberOfDimensions(); + Assert.assertEquals( + 2, + numDimensions); + } + + @Test + public void testGetQueryRangesWithMaximumNumberOfRanges() { + final List sfcIndexRanges = sfcIndexStrategy.getQueryRanges(sfcIndexedRange); + final List ranges = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + for (final ByteArrayRange r2 : sfcIndexRanges) { + final ByteArrayId start = compoundIndexStrategy.composeByteArrayId( + new ByteArrayId( + new byte[] { + (byte) i + }), + r2.getStart()); + final ByteArrayId end = compoundIndexStrategy.composeByteArrayId( + new ByteArrayId( + new byte[] { + (byte) i + }), + r2.getEnd()); + ranges.add(new ByteArrayRange( + start, + end)); + } + } + final Set testRanges = new HashSet<>( + ranges); + final Set compoundIndexRanges = new HashSet<>( + compoundIndexStrategy.getQueryRanges(sfcIndexedRange)); + Assert.assertTrue(testRanges.containsAll(compoundIndexRanges)); + Assert.assertTrue(compoundIndexRanges.containsAll(testRanges)); + } + + @Test + public void testUniformityAndLargeKeySet() { + final RoundRobinKeyIndexStrategy strategy = new RoundRobinKeyIndexStrategy( + 512); + final Map countMap = new HashMap(); + for (int i = 0; i < 2048; i++) { + final List ids = strategy.getInsertionIds(sfcIndexedRange); + assertEquals( + 1, + ids.size()); + final ByteArrayId key = ids.get(0); + if (countMap.containsKey(key)) { + countMap.put( + key, + countMap.get(key) + 1); + } + else { + countMap.put( + key, + 1); + } + + } + for (final Integer i : countMap.values()) { + assertEquals( + 4, + i.intValue()); + } + } + + @Test + public void testGetInsertionIds() { + final List ids = new ArrayList<>(); + + final List ids2 = sfcIndexStrategy.getInsertionIds( + sfcIndexedRange, + 1); + for (int i = 0; i < 3; i++) { + for (final ByteArrayId id2 : ids2) { + ids.add(compoundIndexStrategy.composeByteArrayId( + new ByteArrayId( + new byte[] { + (byte) i + }), + id2)); + } + } + final Set testIds = new HashSet<>( + ids); + final Set compoundIndexIds = new HashSet<>( + compoundIndexStrategy.getInsertionIds( + sfcIndexedRange, + 8)); + Assert.assertTrue(testIds.containsAll(compoundIndexIds)); + + final MultiDimensionalCoordinates sfcIndexCoordinatesPerDim = sfcIndexStrategy.getCoordinatesPerDimension(ids2 + .get(0)); + final MultiDimensionalCoordinates coordinatesPerDim = compoundIndexStrategy.getCoordinatesPerDimension(ids + .get(0)); + + Assert.assertTrue(sfcIndexCoordinatesPerDim.equals(coordinatesPerDim)); + } + +} diff --git a/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/SimpleNumericIndexStrategyTest.java b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/SimpleNumericIndexStrategyTest.java new file mode 100644 index 0000000..89df174 --- /dev/null +++ b/geowave/src/test/java/org/locationtech/sfcurve/geowave/index/simple/SimpleNumericIndexStrategyTest.java @@ -0,0 +1,226 @@ +/******************************************************************************* + * Copyright (c) 2013-2017 Contributors to the Eclipse Foundation + * + * See the NOTICE file distributed with this work for additional + * information regarding copyright ownership. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Apache License, + * Version 2.0 which accompanies this distribution and is available at + * http://www.apache.org/licenses/LICENSE-2.0.txt + ******************************************************************************/ +package org.locationtech.sfcurve.geowave.index.simple; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.google.common.primitives.UnsignedBytes; + +import org.locationtech.sfcurve.geowave.index.ByteArrayId; +import org.locationtech.sfcurve.geowave.index.ByteArrayRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.BasicNumericDataset; +import org.locationtech.sfcurve.geowave.index.sfc.data.MultiDimensionalNumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericData; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericRange; +import org.locationtech.sfcurve.geowave.index.sfc.data.NumericValue; + +@RunWith(Parameterized.class) +public class SimpleNumericIndexStrategyTest +{ + + private final SimpleNumericIndexStrategy strategy; + + public SimpleNumericIndexStrategyTest( + final SimpleNumericIndexStrategy strategy ) { + this.strategy = strategy; + } + + @Parameters + public static Collection instancesToTest() { + return Arrays.asList( + new Object[] { + new SimpleShortIndexStrategy() + }, + new Object[] { + new SimpleIntegerIndexStrategy() + }, + new Object[] { + new SimpleLongIndexStrategy() + }); + } + + private static long castToLong( + final Number n ) { + if (n instanceof Short) { + return ((short) n.shortValue()); + } + else if (n instanceof Integer) { + return ((int) n.intValue()); + } + else if (n instanceof Long) { + return (long) n.longValue(); + } + else { + throw new UnsupportedOperationException( + "only supports casting Short, Integer, and Long"); + } + } + + private static MultiDimensionalNumericData getIndexedRange( + final long value ) { + return getIndexedRange( + value, + value); + } + + private static MultiDimensionalNumericData getIndexedRange( + final long min, + final long max ) { + NumericData[] dataPerDimension; + if (min == max) { + dataPerDimension = new NumericData[] { + new NumericValue( + min) + }; + } + else { + dataPerDimension = new NumericData[] { + new NumericRange( + min, + max) + }; + } + return new BasicNumericDataset( + dataPerDimension); + } + + private byte[] getByteArray( + final long value ) { + final MultiDimensionalNumericData indexedRange = getIndexedRange(value); + final List insertionIds = strategy.getInsertionIds(indexedRange); + final ByteArrayId insertionId = insertionIds.get(0); + return insertionId.getBytes(); + } + + @Test + public void testGetQueryRangesPoint() { + final MultiDimensionalNumericData indexedRange = getIndexedRange(10l); + final List ranges = strategy.getQueryRanges(indexedRange); + Assert.assertEquals( + ranges.size(), + 1); + final ByteArrayRange range = ranges.get(0); + final ByteArrayId start = range.getStart(); + final ByteArrayId end = range.getEnd(); + Assert.assertTrue(Arrays.equals( + start.getBytes(), + end.getBytes())); + Assert.assertEquals( + 10L, + castToLong(strategy.getLexicoder().fromByteArray( + start.getBytes()))); + } + + @Test + public void testGetQueryRangesRange() { + final long startValue = 10; + final long endValue = 15; + final MultiDimensionalNumericData indexedRange = getIndexedRange( + startValue, + endValue); + final List ranges = strategy.getQueryRanges(indexedRange); + Assert.assertEquals( + ranges.size(), + 1); + final ByteArrayRange range = ranges.get(0); + final ByteArrayId start = range.getStart(); + final ByteArrayId end = range.getEnd(); + Assert.assertEquals( + castToLong(strategy.getLexicoder().fromByteArray( + start.getBytes())), + startValue); + Assert.assertEquals( + castToLong(strategy.getLexicoder().fromByteArray( + end.getBytes())), + endValue); + } + + /** + * Check that lexicographical sorting of the byte arrays yields the same + * sort order as sorting the values + */ + @Test + public void testRangeSortOrder() { + final List values = Arrays.asList( + 10l, + 0l, + 15l, + -275l, + 982l, + 430l, + -1l, + 1l, + 82l); + final List byteArrays = new ArrayList<>( + values.size()); + for (final long value : values) { + final byte[] bytes = getByteArray(value); + byteArrays.add(bytes); + } + Collections.sort(values); + Collections.sort( + byteArrays, + UnsignedBytes.lexicographicalComparator()); + final List convertedValues = new ArrayList<>( + values.size()); + for (final byte[] bytes : byteArrays) { + final long value = castToLong(strategy.getLexicoder().fromByteArray( + bytes)); + convertedValues.add(value); + } + Assert.assertTrue(values.equals(convertedValues)); + } + + @Test + public void testGetInsertionIdsPoint() { + final long pointValue = 5926; + final MultiDimensionalNumericData indexedData = getIndexedRange(pointValue); + final List insertionIds = strategy.getInsertionIds(indexedData); + Assert.assertEquals( + insertionIds.size(), + 1); + final ByteArrayId insertionId = insertionIds.get(0); + Assert.assertEquals( + castToLong(strategy.getLexicoder().fromByteArray( + insertionId.getBytes())), + pointValue); + } + + @Test + public void testGetInsertionIdsRange() { + final long startValue = 9876; + final long endValue = startValue + 15; + final MultiDimensionalNumericData indexedData = getIndexedRange( + startValue, + endValue); + final List insertionIds = strategy.getInsertionIds(indexedData); + Assert.assertEquals( + insertionIds.size(), + (int) ((endValue - startValue) + 1)); + int i = 0; + for (final ByteArrayId insertionId : insertionIds) { + Assert.assertEquals( + castToLong(strategy.getLexicoder().fromByteArray( + insertionId.getBytes())), + startValue + i++); + } + } +} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 3911c29..d7d1f5a 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,6 +1,10 @@ import sbt._ object Dependencies { - def scalaTest = "org.scalatest" %% "scalatest" % "2.2.0" - def uzaygezen = "com.google.uzaygezen" % "uzaygezen-core" % "0.2" + def jsonLib = "net.sf.json-lib" % "json-lib" % "2.4" classifier "jdk15" + def log4j12 = "org.slf4j" % "slf4j-log4j12" % "1.7.21" + def scalaTest = "org.scalatest" %% "scalatest" % "2.2.0" + def uzaygezen = "com.google.uzaygezen" % "uzaygezen-core" % "0.2" + def junit = "junit" % "junit" % "4.12" + def junitIface = "com.novocode" % "junit-interface" % "0.11" }