From e938645baa0ede65a0da69369ff9af80a74e88c9 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Tue, 25 Nov 2014 16:01:00 +0300 Subject: [PATCH 01/15] Add a new Catalog object GI - Created "GlobalIndex.java" class which implements "CatalogObject.java". - Implemented "CatalogObject.java" methods and added new methods for "GlobalIndex.java" class. - Created "GlobalIndexRecord.java" class to hold information about a single global index record. - Updated "CatalogObjects.thrift" to add a new catalog object type "GLOBAL_INDEX". --- common/thrift/CatalogObjects.thrift | 1 + .../spatialImpala/catalog/GlobalIndex.java | 68 +++++++++++++++++++ .../catalog/GlobalIndexRecord.java | 31 +++++++++ .../spatialImpala/catalog/Rectangle.java | 21 ++++++ 4 files changed, 121 insertions(+) create mode 100644 fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java create mode 100644 fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java create mode 100644 fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift index c051dc8383..18e0ee2e0b 100644 --- a/common/thrift/CatalogObjects.thrift +++ b/common/thrift/CatalogObjects.thrift @@ -32,6 +32,7 @@ enum TCatalogObjectType { TABLE, VIEW, FUNCTION, + GLOBAL_INDEX, DATA_SOURCE, ROLE, PRIVILEGE, diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java new file mode 100644 index 0000000000..1b897a31c6 --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -0,0 +1,68 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.catalog; + +import com.cloudera.impala.catalog.Catalog; +import com.cloudera.impala.catalog.CatalogObject; +import com.cloudera.impala.thrift.TCatalogObjectType; + +import java.util.ArrayList; +import java.util.List; +import java.util.HashMap; + +/* + * Global Index's catalog class responsible for holding the global indexes' + * records of a specific Spatial Table. + */ +public class GlobalIndex implements CatalogObject { + private static String GLOBAL_INDEX_SUFFIX = "_global_index"; + private HashMap globalIndexMap = new HashMap(); + private final String tableName_; + private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION; + + private GlobalIndex(String tableName, + HashMap globalIndexMap) { + this.tableName_ = tableName; + this.globalIndexMap = globalIndexMap; + } + + public List getGIsforPoint(int x, int y) { + List globalIndexes = new ArrayList(); + for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { + if (gIRecord.getMBR().includesPoint(x, y)) + globalIndexes.add(gIRecord); + } + return globalIndexes; + } + + public GlobalIndexRecord getGIRecordforTag(String tag) { + return globalIndexMap.get(tag); + } + + public TCatalogObjectType getCatalogObjectType() { + return TCatalogObjectType.GLOBAL_INDEX; + } + + public String getName() { + return tableName_ + GLOBAL_INDEX_SUFFIX; + } + + public long getCatalogVersion() { + return catalogVersion_; + } + + public void setCatalogVersion(long newVersion) { + catalogVersion_ = newVersion; + } + + public boolean isLoaded() { + return true; + } + + // TODO: Add methods fromThrift and toThrift after creating TGlobalIndex. + + public static GlobalIndex loadAndCreateGlobalIndex(String tableName) { + // TODO: Add file path as a param. and load the indexes into an object. + return new GlobalIndex(tableName, null); + } +} \ No newline at end of file diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java new file mode 100644 index 0000000000..a0f4d7b5f1 --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java @@ -0,0 +1,31 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.catalog; + +/* + * Global Index Record class responsible for holding a single + * global index partition. + */ +public class GlobalIndexRecord { + private int id; + private String tag; + private Rectangle mbr; + + public GlobalIndexRecord(int id, String tag, Rectangle mbr) { + this.id = id; + this.tag = tag; + this.mbr = mbr; + } + + public int getId() { + return id; + } + + public String getTag() { + return tag; + } + + public Rectangle getMBR() { + return mbr; + } +} \ No newline at end of file diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java new file mode 100644 index 0000000000..07315698f2 --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -0,0 +1,21 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.catalog; + +public class Rectangle { + private double x1; + private double y1; + private double x2; + private double y2; + + public Rectangle(double x1, double y1, double x2, double y2) { + this.x1 = x1; + this.y1 = y1; + this.x2 = x2; + this.y2 = y2; + } + + public boolean includesPoint(int x, int y) { + return (x >= x1) && (x <= x2) && (y >= y1) && (y <= y2); + } +} \ No newline at end of file From d578ebba33beed5c35a65d9664690e9ded334bb7 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Wed, 26 Nov 2014 10:40:22 +0300 Subject: [PATCH 02/15] Add GI to catalog object Table and handle getting it - Added a reference to "GlobalIndex.java" instance in "Table.java" catalog object class. - Added a method to get that instance and to call the loading of the GlobalIndex from HDFS file path. --- .../com/cloudera/impala/catalog/Table.java | 18 ++++++++++++++++++ .../spatialImpala/catalog/GlobalIndex.java | 1 + 2 files changed, 19 insertions(+) diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Table.java b/fe/src/main/java/com/cloudera/impala/catalog/Table.java index cda21d8234..07d2f57cc6 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java @@ -38,6 +38,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.gistic.spatialImpala.catalog.GlobalIndex; + /** * Base class for table metadata. * @@ -60,6 +62,7 @@ public abstract class Table implements CatalogObject { protected final Db db_; protected final String name_; protected final String owner_; + protected GlobalIndex global_index_; protected TTableDescriptor tableDesc_; protected List fields_; protected TAccessLevel accessLevel_ = TAccessLevel.READ_WRITE; @@ -93,6 +96,7 @@ protected Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, owner_ = owner; colsByPos_ = Lists.newArrayList(); colsByName_ = Maps.newHashMap(); + global_index_ = initializeGlobalIndex(name_, msTable); lastDdlTime_ = (msTable_ != null) ? CatalogServiceCatalog.getLastDdlTime(msTable_) : -1; } @@ -109,6 +113,20 @@ protected Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, public abstract void load(Table oldValue, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException; + public GlobalIndex getGlobalIndexIfAny() { + return global_index_; + } + + private GlobalIndex initializeGlobalIndex(String tableName, org.apache.hadoop.hive.metastore.api.Table msTbl) { + if (msTbl == null) + return null; + + Map params = msTbl.getParameters(); + String globalIndexPath = params.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); + // TODO: Pass the global index path. + return GlobalIndex.loadAndCreateGlobalIndex(tableName); + } + public void addColumn(Column col) { colsByPos_.add(col); colsByName_.put(col.getName().toLowerCase(), col); diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index 1b897a31c6..aaadeed3af 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -16,6 +16,7 @@ */ public class GlobalIndex implements CatalogObject { private static String GLOBAL_INDEX_SUFFIX = "_global_index"; + public static String GLOBAL_INDEX_TABLE_PARAM = "globalIndex"; private HashMap globalIndexMap = new HashMap(); private final String tableName_; private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION; From b17a554da43269ef38beb58593a98228f500b20f Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Wed, 26 Nov 2014 11:25:46 +0300 Subject: [PATCH 03/15] Handle updating Analyzer of CreateTableStmt - Updated method analyze() in "CreateTableStmt.java" to handle GI path. - Used "HdfsUri.java" to analyze the path. --- .../com/cloudera/impala/analysis/CreateTableStmt.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java index 9c347dc98e..0c68777d21 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java @@ -37,6 +37,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import org.gistic.spatialImpala.catalog.GlobalIndex; + /** * Represents a CREATE TABLE statement. */ @@ -205,6 +207,12 @@ public void analyze(Analyzer analyzer) throws AnalysisException { } if (location_ != null) location_.analyze(analyzer, Privilege.ALL); + + // If the table is Spatial, ensure that the global index path exists + // and is valid. + String globalIndexPathIfAny = tblProperties_.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); + if (globalIndexPathIfAny != null) + (new HdfsUri(globalIndexPathIfAny)).analyze(analyzer, Privilege.ALL); analyzeRowFormatValue(rowFormat_.getFieldDelimiter()); analyzeRowFormatValue(rowFormat_.getLineDelimiter()); From 19f665aa68848e07073834c2e4fe0bfba04456be Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Wed, 26 Nov 2014 13:39:06 +0300 Subject: [PATCH 04/15] Create thrift objects for GlobalIndex objects - Created thrift structs: "TGlobalIndex", "TGlobalIndexRecord" and "TRectangle". - Handled the mapping between the java classes and the thrift structs. - Added methods "toThrift()" and "fromThrift()" to related java classes. - Added "TGlobalIndex" to "TTable" as an optional instance. - Handled transfroming "GlobalIndex" to thrift in "Table.java". - Handled transforming "TGlobalIndex" to "GlobalIndex" in "Table.java". --- common/thrift/CatalogObjects.thrift | 32 +++++++++++++++++++ .../com/cloudera/impala/catalog/Table.java | 7 ++++ .../spatialImpala/catalog/GlobalIndex.java | 21 +++++++++++- .../catalog/GlobalIndexRecord.java | 12 +++++++ .../spatialImpala/catalog/Rectangle.java | 10 ++++++ 5 files changed, 81 insertions(+), 1 deletion(-) diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift index 18e0ee2e0b..6331dc816f 100644 --- a/common/thrift/CatalogObjects.thrift +++ b/common/thrift/CatalogObjects.thrift @@ -266,6 +266,35 @@ struct TDataSourceTable { 2: required string init_string } +// Represents a rectangle used in a global index record for spatial tables. +struct TRectangle { + 1: required double x1 + 2: required double y1 + 3: required double x2 + 4: required double y2 +} + +// Represents a global index record for spatial tables. +struct TGlobalIndexRecord { + // Id of the record. + 1: required i32 id + + // Tag of the record. + 2: required string tag + + // MBR of the record. + 3: required TRectangle mbr +} + +// Represents a global index used for spatial tables. +struct TGlobalIndex { + // Name of the table that this global index belongs to. + 1: required string tbl_name + + // Map of global indexes' records. + 2: required map globalIndexMap +} + // Represents a table or view. struct TTable { // Name of the parent database. Case insensitive, expected to be stored as lowercase. @@ -310,6 +339,9 @@ struct TTable { // Set iff this is a table from an external data source 13: optional TDataSourceTable data_source_table + + // Set iff this table is spatial with initialized global index. + 14: optional TGlobalIndex globalIndex; } // Represents a database. diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Table.java b/fe/src/main/java/com/cloudera/impala/catalog/Table.java index 07d2f57cc6..149a105ed7 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java @@ -35,6 +35,7 @@ import com.cloudera.impala.thrift.TTable; import com.cloudera.impala.thrift.TTableDescriptor; import com.cloudera.impala.thrift.TTableStats; +import com.cloudera.impala.thrift.TGlobalIndex; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -262,6 +263,8 @@ protected void loadFromThrift(TTable thriftTable) throws TableLoadingException { // Default to READ_WRITE access if the field is not set. accessLevel_ = thriftTable.isSetAccess_level() ? thriftTable.getAccess_level() : TAccessLevel.READ_WRITE; + + global_index_ = GlobalIndex.fromThrift(thriftTable.getGlobalIndex()); } /** @@ -300,6 +303,10 @@ public TTable toThrift() { table.setTable_stats(new TTableStats()); table.getTable_stats().setNum_rows(numRows_); } + + if (global_index_ != null) + table.setGlobalIndex(global_index_.toThrift()); + return table; } diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index aaadeed3af..c1ec990022 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -5,10 +5,13 @@ import com.cloudera.impala.catalog.Catalog; import com.cloudera.impala.catalog.CatalogObject; import com.cloudera.impala.thrift.TCatalogObjectType; +import com.cloudera.impala.thrift.TGlobalIndex; +import com.cloudera.impala.thrift.TGlobalIndexRecord; import java.util.ArrayList; import java.util.List; import java.util.HashMap; +import java.util.Map.Entry; /* * Global Index's catalog class responsible for holding the global indexes' @@ -17,7 +20,7 @@ public class GlobalIndex implements CatalogObject { private static String GLOBAL_INDEX_SUFFIX = "_global_index"; public static String GLOBAL_INDEX_TABLE_PARAM = "globalIndex"; - private HashMap globalIndexMap = new HashMap(); + private HashMap globalIndexMap; private final String tableName_; private long catalogVersion_ = Catalog.INITIAL_CATALOG_VERSION; @@ -59,6 +62,14 @@ public void setCatalogVersion(long newVersion) { public boolean isLoaded() { return true; } + + public TGlobalIndex toThrift() { + HashMap tGlobalIndexMap = new HashMap(); + for (Entry gIRecord : globalIndexMap.entrySet()) { + tGlobalIndexMap.put(gIRecord.getKey(), gIRecord.getValue().toThrift()); + } + return new TGlobalIndex(tableName_, tGlobalIndexMap); + } // TODO: Add methods fromThrift and toThrift after creating TGlobalIndex. @@ -66,4 +77,12 @@ public static GlobalIndex loadAndCreateGlobalIndex(String tableName) { // TODO: Add file path as a param. and load the indexes into an object. return new GlobalIndex(tableName, null); } + + public static GlobalIndex fromThrift(TGlobalIndex tGlobalIndex) { + HashMap gIMap = new HashMap(); + for (Entry gIRecord : tGlobalIndex.getGlobalIndexMap().entrySet()) { + gIMap.put(gIRecord.getKey(), GlobalIndexRecord.fromThrift(gIRecord.getValue())); + } + return new GlobalIndex(tGlobalIndex.getTbl_name(), gIMap); + } } \ No newline at end of file diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java index a0f4d7b5f1..aa8e19a4c9 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndexRecord.java @@ -2,6 +2,9 @@ package org.gistic.spatialImpala.catalog; +import com.cloudera.impala.thrift.TGlobalIndexRecord; +import com.cloudera.impala.thrift.TRectangle; + /* * Global Index Record class responsible for holding a single * global index partition. @@ -28,4 +31,13 @@ public String getTag() { public Rectangle getMBR() { return mbr; } + + public TGlobalIndexRecord toThrift() { + return new TGlobalIndexRecord(this.id, this.tag, this.mbr.toThrift()); + } + + public static GlobalIndexRecord fromThrift(TGlobalIndexRecord gIRecord) { + TRectangle rect = gIRecord.getMbr(); + return new GlobalIndexRecord(gIRecord.getId(), gIRecord.getTag(), Rectangle.fromThrift(rect)); + } } \ No newline at end of file diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index 07315698f2..700528e833 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -2,6 +2,8 @@ package org.gistic.spatialImpala.catalog; +import com.cloudera.impala.thrift.TRectangle; + public class Rectangle { private double x1; private double y1; @@ -18,4 +20,12 @@ public Rectangle(double x1, double y1, double x2, double y2) { public boolean includesPoint(int x, int y) { return (x >= x1) && (x <= x2) && (y >= y1) && (y <= y2); } + + public TRectangle toThrift() { + return new TRectangle(this.x1, this.y1, this.x2, this.y2); + } + + public static Rectangle fromThrift(TRectangle rect) { + return new Rectangle(rect.getX1(), rect.getY1(), rect.getX2(), rect.getY2()); + } } \ No newline at end of file From bf159c91eee69d95aae60b5e7f2a779d4e413df5 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Wed, 26 Nov 2014 15:44:54 +0300 Subject: [PATCH 05/15] Handle reading the Global Index file - Added method "loadGlobalIndex()" which loads the global index records provided the path of the file. - Handled returning loaded GlobalIndex to the Table. - Handled logging errors in case of a failure while parsing the file. --- .../com/cloudera/impala/catalog/Table.java | 3 +- .../spatialImpala/catalog/GlobalIndex.java | 69 +++++++++++++++++-- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Table.java b/fe/src/main/java/com/cloudera/impala/catalog/Table.java index 149a105ed7..95110d9d84 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java @@ -124,8 +124,7 @@ private GlobalIndex initializeGlobalIndex(String tableName, org.apache.hadoop.hi Map params = msTbl.getParameters(); String globalIndexPath = params.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); - // TODO: Pass the global index path. - return GlobalIndex.loadAndCreateGlobalIndex(tableName); + return GlobalIndex.loadAndCreateGlobalIndex(tableName, globalIndexPath); } public void addColumn(Column col) { diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index c1ec990022..b26cca7926 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -2,12 +2,19 @@ package org.gistic.spatialImpala.catalog; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Logger; + import com.cloudera.impala.catalog.Catalog; import com.cloudera.impala.catalog.CatalogObject; +import com.cloudera.impala.common.FileSystemUtil; import com.cloudera.impala.thrift.TCatalogObjectType; import com.cloudera.impala.thrift.TGlobalIndex; import com.cloudera.impala.thrift.TGlobalIndexRecord; +import java.io.IOException; +import java.util.Scanner; import java.util.ArrayList; import java.util.List; import java.util.HashMap; @@ -18,6 +25,9 @@ * records of a specific Spatial Table. */ public class GlobalIndex implements CatalogObject { + private static final Logger LOG = Logger.getLogger(GlobalIndex.class); + + private static String GLOBAL_INDEX_READ_EXCEPTION_MSG = "Couldn't parse Global Index file: "; private static String GLOBAL_INDEX_SUFFIX = "_global_index"; public static String GLOBAL_INDEX_TABLE_PARAM = "globalIndex"; private HashMap globalIndexMap; @@ -71,11 +81,60 @@ public TGlobalIndex toThrift() { return new TGlobalIndex(tableName_, tGlobalIndexMap); } - // TODO: Add methods fromThrift and toThrift after creating TGlobalIndex. - - public static GlobalIndex loadAndCreateGlobalIndex(String tableName) { - // TODO: Add file path as a param. and load the indexes into an object. - return new GlobalIndex(tableName, null); + private static HashMap loadGlobalIndex(String globalIndexPath) { + Path gIPath = new Path(globalIndexPath.trim()); + gIPath = FileSystemUtil.createFullyQualifiedPath(gIPath); + String data = null; + try { + data = FileSystemUtil.readFile(gIPath); + } catch (IOException e) { + LOG.error(GLOBAL_INDEX_READ_EXCEPTION_MSG + globalIndexPath); + return null; + } + + if (data == null || data.length() == 0) { + LOG.error(GLOBAL_INDEX_READ_EXCEPTION_MSG + globalIndexPath); + return null; + } + + HashMap gIMap = new HashMap(); + Scanner scanner = new Scanner(data); + while (scanner.hasNext()) { + String[] separatedRecord = scanner.next().split(","); + if (separatedRecord.length != 6) { + LOG.error(GLOBAL_INDEX_READ_EXCEPTION_MSG + globalIndexPath); + scanner.close(); + return null; + } + + int id = 0; + double x1 = 0; + double y1 = 0; + double x2 = 0; + double y2 = 0; + + try { + id = Integer.parseInt(separatedRecord[0]); + x1 = Double.parseDouble(separatedRecord[1]); + y1 = Double.parseDouble(separatedRecord[2]); + x2 = Double.parseDouble(separatedRecord[3]); + y2 = Double.parseDouble(separatedRecord[4]); + } catch (Exception e) { + LOG.error(GLOBAL_INDEX_READ_EXCEPTION_MSG + globalIndexPath); + scanner.close(); + return null; + } + + GlobalIndexRecord gIRecord = new GlobalIndexRecord(id, separatedRecord[5], new Rectangle(x1, y1, x2, y2)); + gIMap.put(separatedRecord[5], gIRecord); + } + scanner.close(); + return gIMap; + } + + public static GlobalIndex loadAndCreateGlobalIndex(String tableName, String globalIndexPath) { + HashMap gIMap = loadGlobalIndex(globalIndexPath); + return gIMap != null ? new GlobalIndex(tableName, gIMap) : null; } public static GlobalIndex fromThrift(TGlobalIndex tGlobalIndex) { From b16a369c28cd23b10f2022d7314602948c34332e Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Thu, 27 Nov 2014 16:03:08 +0300 Subject: [PATCH 06/15] Handling not creating global index if table properties not provided. --- .../com/cloudera/impala/analysis/CreateTableStmt.java | 10 ++++++---- .../main/java/com/cloudera/impala/catalog/Table.java | 6 ++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java index 0c68777d21..31f0c36345 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java @@ -210,10 +210,12 @@ public void analyze(Analyzer analyzer) throws AnalysisException { // If the table is Spatial, ensure that the global index path exists // and is valid. - String globalIndexPathIfAny = tblProperties_.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); - if (globalIndexPathIfAny != null) - (new HdfsUri(globalIndexPathIfAny)).analyze(analyzer, Privilege.ALL); - + if (tblProperties_ != null) { + String globalIndexPathIfAny = tblProperties_.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); + if (globalIndexPathIfAny != null) + (new HdfsUri(globalIndexPathIfAny)).analyze(analyzer, Privilege.ALL); + } + analyzeRowFormatValue(rowFormat_.getFieldDelimiter()); analyzeRowFormatValue(rowFormat_.getLineDelimiter()); analyzeRowFormatValue(rowFormat_.getEscapeChar()); diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Table.java b/fe/src/main/java/com/cloudera/impala/catalog/Table.java index 95110d9d84..c00729d996 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java @@ -123,7 +123,13 @@ private GlobalIndex initializeGlobalIndex(String tableName, org.apache.hadoop.hi return null; Map params = msTbl.getParameters(); + if (params == null) + return null; + String globalIndexPath = params.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); + if (globalIndexPath == null) + return null; + return GlobalIndex.loadAndCreateGlobalIndex(tableName, globalIndexPath); } From 3c9adc1e8c78bb1ee4f652be767b3f9b18d2bc91 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Sun, 30 Nov 2014 13:24:19 +0300 Subject: [PATCH 07/15] Minimize changes in Impala packages - Created class "SpatialHdfsTable.java" to hold GlobalIndex if the Hdfs table is Spatial. - Removed any reference in "Table.java" of "GlobalIndex". - Handled creating "SpatialHdfsTable.java" in case the table is Hdfs table and Spatial. - "SpatialHdfsTable" extends "HdfsTable" class and overrides methods "toThrift()" and "loadFromThrift()". --- .../com/cloudera/impala/catalog/Table.java | 36 ++-------- .../catalog/SpatialHdfsTable.java | 71 +++++++++++++++++++ 2 files changed, 77 insertions(+), 30 deletions(-) create mode 100644 fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Table.java b/fe/src/main/java/com/cloudera/impala/catalog/Table.java index c00729d996..cf34842e3f 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java @@ -35,11 +35,10 @@ import com.cloudera.impala.thrift.TTable; import com.cloudera.impala.thrift.TTableDescriptor; import com.cloudera.impala.thrift.TTableStats; -import com.cloudera.impala.thrift.TGlobalIndex; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.gistic.spatialImpala.catalog.GlobalIndex; +import org.gistic.spatialImpala.catalog.SpatialHdfsTable; /** * Base class for table metadata. @@ -63,7 +62,6 @@ public abstract class Table implements CatalogObject { protected final Db db_; protected final String name_; protected final String owner_; - protected GlobalIndex global_index_; protected TTableDescriptor tableDesc_; protected List fields_; protected TAccessLevel accessLevel_ = TAccessLevel.READ_WRITE; @@ -97,7 +95,6 @@ protected Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, owner_ = owner; colsByPos_ = Lists.newArrayList(); colsByName_ = Maps.newHashMap(); - global_index_ = initializeGlobalIndex(name_, msTable); lastDdlTime_ = (msTable_ != null) ? CatalogServiceCatalog.getLastDdlTime(msTable_) : -1; } @@ -113,25 +110,6 @@ protected Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, */ public abstract void load(Table oldValue, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException; - - public GlobalIndex getGlobalIndexIfAny() { - return global_index_; - } - - private GlobalIndex initializeGlobalIndex(String tableName, org.apache.hadoop.hive.metastore.api.Table msTbl) { - if (msTbl == null) - return null; - - Map params = msTbl.getParameters(); - if (params == null) - return null; - - String globalIndexPath = params.get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); - if (globalIndexPath == null) - return null; - - return GlobalIndex.loadAndCreateGlobalIndex(tableName, globalIndexPath); - } public void addColumn(Column col) { colsByPos_.add(col); @@ -219,7 +197,11 @@ public static Table fromMetastoreTable(TableId id, Db db, // data source. table = new DataSourceTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner()); } else if (HdfsFileFormat.isHdfsFormatClass(msTbl.getSd().getInputFormat())) { - table = new HdfsTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner()); + // Checking if the table is Spatial or not. + if (SpatialHdfsTable.isSpatial(msTbl)) + table = new SpatialHdfsTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner()); + else + table = new HdfsTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner()); } return table; } @@ -268,8 +250,6 @@ protected void loadFromThrift(TTable thriftTable) throws TableLoadingException { // Default to READ_WRITE access if the field is not set. accessLevel_ = thriftTable.isSetAccess_level() ? thriftTable.getAccess_level() : TAccessLevel.READ_WRITE; - - global_index_ = GlobalIndex.fromThrift(thriftTable.getGlobalIndex()); } /** @@ -308,10 +288,6 @@ public TTable toThrift() { table.setTable_stats(new TTableStats()); table.getTable_stats().setNum_rows(numRows_); } - - if (global_index_ != null) - table.setGlobalIndex(global_index_.toThrift()); - return table; } diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java new file mode 100644 index 0000000000..8ee48dff66 --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java @@ -0,0 +1,71 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.catalog; + +import com.cloudera.impala.catalog.Db; +import com.cloudera.impala.catalog.HdfsTable; +import com.cloudera.impala.catalog.TableId; +import com.cloudera.impala.catalog.TableLoadingException; +import com.cloudera.impala.thrift.TTable; + +import java.util.Map; + +public class SpatialHdfsTable extends HdfsTable { + protected GlobalIndex global_index_; + + public SpatialHdfsTable(TableId id, + org.apache.hadoop.hive.metastore.api.Table msTbl, Db db, + String name, String owner) { + + super(id, msTbl, db, name, owner); + global_index_ = initializeGlobalIndex(this.getName(), msTbl); + } + + public GlobalIndex getGlobalIndexIfAny() { + return global_index_; + } + + private GlobalIndex initializeGlobalIndex(String tableName, + org.apache.hadoop.hive.metastore.api.Table msTbl) { + + if (msTbl == null) + return null; + + Map params = msTbl.getParameters(); + if (params == null) + return null; + + String globalIndexPath = params + .get(GlobalIndex.GLOBAL_INDEX_TABLE_PARAM); + if (globalIndexPath == null) + return null; + + return GlobalIndex.loadAndCreateGlobalIndex(tableName, globalIndexPath); + } + + @Override + public TTable toThrift() { + // Send all metadata between the catalog service and the FE. + TTable table = super.toThrift(); + if (global_index_ != null) + table.setGlobalIndex(global_index_.toThrift()); + return table; + } + + @Override + protected void loadFromThrift(TTable thriftTable) + throws TableLoadingException { + + super.loadFromThrift(thriftTable); + global_index_ = GlobalIndex.fromThrift(thriftTable.getGlobalIndex()); + } + + public static boolean isSpatial( + org.apache.hadoop.hive.metastore.api.Table msTbl) { + + return msTbl != null + && msTbl.getParameters() != null + && msTbl.getParameters().get( + GlobalIndex.GLOBAL_INDEX_TABLE_PARAM) != null; + } +} \ No newline at end of file From fd2e318dc5cadd43c4a26f7261d245f563317739 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Mon, 1 Dec 2014 14:44:00 +0300 Subject: [PATCH 08/15] Handling logging in GlobalIndex - Added logging while loading the global index file. - Handled returning null in case the global index has errors in SpatialHdfsTable while loading from thrift object. --- .../java/org/gistic/spatialImpala/catalog/GlobalIndex.java | 3 ++- .../org/gistic/spatialImpala/catalog/SpatialHdfsTable.java | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index b26cca7926..090ca19fa3 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -82,6 +82,7 @@ public TGlobalIndex toThrift() { } private static HashMap loadGlobalIndex(String globalIndexPath) { + LOG.info("Loading global index file."); Path gIPath = new Path(globalIndexPath.trim()); gIPath = FileSystemUtil.createFullyQualifiedPath(gIPath); String data = null; @@ -124,7 +125,7 @@ private static HashMap loadGlobalIndex(String globalI scanner.close(); return null; } - + LOG.info("Reading Record: [" + id + ", " + separatedRecord[5] + ", " + x1 + ", " + y1 + ", " + x2 + ", " + y2 + "]"); GlobalIndexRecord gIRecord = new GlobalIndexRecord(id, separatedRecord[5], new Rectangle(x1, y1, x2, y2)); gIMap.put(separatedRecord[5], gIRecord); } diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java index 8ee48dff66..678617ded1 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/SpatialHdfsTable.java @@ -57,12 +57,14 @@ protected void loadFromThrift(TTable thriftTable) throws TableLoadingException { super.loadFromThrift(thriftTable); - global_index_ = GlobalIndex.fromThrift(thriftTable.getGlobalIndex()); + if (thriftTable.getGlobalIndex() != null) + global_index_ = GlobalIndex + .fromThrift(thriftTable.getGlobalIndex()); } public static boolean isSpatial( org.apache.hadoop.hive.metastore.api.Table msTbl) { - + return msTbl != null && msTbl.getParameters() != null && msTbl.getParameters().get( From b5deeb7db9e20fd923d12522b381709b3a419d9b Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Wed, 3 Dec 2014 16:56:57 +0300 Subject: [PATCH 09/15] Create point inclusion SQL statement - Added needed keywords to "sql-parser.y" and "sql-scanner.flex". - Created "SpatialPointInclusionStmt" to represent the new spatial query statement. - Handled parsing Rectangle as an object in "sql-parser.y". - Handled parsing the new spatial statement in "sql-parser.y". - Added "toString()" method to "Rectangle.java" to be used in "SpatialPointInclusionStmt". --- fe/src/main/cup/sql-parser.y | 35 +++++++++++++++++-- .../impala/analysis/StatementBase.java | 2 +- .../analysis/SpatialPointInclusionStmt.java | 35 +++++++++++++++++++ .../spatialImpala/catalog/Rectangle.java | 6 ++++ fe/src/main/jflex/sql-scanner.flex | 3 ++ 5 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index 22051f7089..b6b2c04a2d 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -14,6 +14,9 @@ package com.cloudera.impala.analysis; +import org.gistic.spatialImpala.catalog.Rectangle; +import org.gistic.spatialImpala.analysis.SpatialPointInclusionStmt; + import com.cloudera.impala.catalog.Type; import com.cloudera.impala.catalog.ScalarType; import com.cloudera.impala.catalog.ArrayType; @@ -241,9 +244,9 @@ terminal KW_INNER, KW_INPATH, KW_INSERT, KW_INT, KW_INTERMEDIATE, KW_INTERVAL, KW_INTO, KW_INVALIDATE, KW_IS, KW_JOIN, KW_LAST, KW_LEFT, KW_LIKE, KW_LIMIT, KW_LINES, KW_LOAD, KW_LOCATION, KW_MAP, KW_MERGE_FN, KW_METADATA, KW_NOT, KW_NULL, KW_NULLS, KW_OFFSET, - KW_ON, KW_OR, KW_ORDER, KW_OUTER, KW_OVER, KW_OVERWRITE, KW_PARQUET, KW_PARQUETFILE, - KW_PARTITION, KW_PARTITIONED, KW_PARTITIONS, KW_PRECEDING, - KW_PREPARE_FN, KW_PRODUCED, KW_RANGE, KW_RCFILE, KW_REFRESH, KW_REGEXP, KW_RENAME, + KW_ON, KW_OR, KW_ORDER, KW_OUTER, KW_OVER, KW_OVERLAPS, KW_OVERWRITE, KW_PARQUET, KW_PARQUETFILE, + KW_PARTITION, KW_PARTITIONED, KW_PARTITIONS, KW_POINTS, KW_PRECEDING, + KW_PREPARE_FN, KW_PRODUCED, KW_RANGE, KW_RCFILE, KW_RECTANGLE, KW_REFRESH, KW_REGEXP, KW_RENAME, KW_REPLACE, KW_RETURNS, KW_REVOKE, KW_RIGHT, KW_RLIKE, KW_ROLE, KW_ROLES, KW_ROW, KW_ROWS, KW_SCHEMA, KW_SCHEMAS, KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES, KW_SERIALIZE_FN, KW_SET, KW_SHOW, KW_SMALLINT, KW_STORED, KW_STRAIGHT_JOIN, @@ -438,6 +441,11 @@ nonterminal TFunctionCategory opt_function_category; nonterminal HashMap create_function_args_map; nonterminal CreateFunctionStmtBase.OptArg create_function_arg_key; +// For Spatial related queries +nonterminal SpatialPointInclusionStmt spatial_point_inclusion_stmt; +nonterminal Rectangle rectangle; +nonterminal NumericLiteral rectangle_arg; + precedence left KW_OR; precedence left KW_AND; precedence left KW_NOT, NOT; @@ -549,8 +557,29 @@ stmt ::= {: RESULT = grant_privilege; :} | revoke_privilege_stmt:revoke_privilege {: RESULT = revoke_privilege; :} + | spatial_point_inclusion_stmt:spatial_point_inclusion + {: RESULT = spatial_point_inclusion; :} + ; + +spatial_point_inclusion_stmt ::= + KW_LOAD KW_POINTS KW_FROM KW_TABLE table_name:tbl KW_OVERLAPS rectangle:rect + {: RESULT = new SpatialPointInclusionStmt(tbl, rect); :} + ; + +rectangle ::= + KW_RECTANGLE LPAREN rectangle_arg:x1 COMMA rectangle_arg:y1 COMMA + rectangle_arg:x2 COMMA rectangle_arg:y2 RPAREN + {: RESULT = new Rectangle(x1.getDoubleValue(), y1.getDoubleValue(), + x2.getDoubleValue(), y2.getDoubleValue()); :} ; +rectangle_arg ::= + INTEGER_LITERAL:l + {: RESULT = new NumericLiteral(l); :} + | DECIMAL_LITERAL:l + {: RESULT = new NumericLiteral(l); :} + ; + load_stmt ::= KW_LOAD KW_DATA KW_INPATH STRING_LITERAL:path overwrite_val:overwrite KW_INTO KW_TABLE table_name:table opt_partition_spec:partition diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StatementBase.java b/fe/src/main/java/com/cloudera/impala/analysis/StatementBase.java index 86effef23f..4c2732e06e 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/StatementBase.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/StatementBase.java @@ -19,7 +19,7 @@ /** * Base class for all Impala SQL statements. */ -abstract class StatementBase implements ParseNode { +public abstract class StatementBase implements ParseNode { // True if this Stmt is the top level of an explain stmt. protected boolean isExplain_ = false; diff --git a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java new file mode 100644 index 0000000000..f0d3662892 --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java @@ -0,0 +1,35 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.analysis; + +import org.gistic.spatialImpala.catalog.*; + +import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.analysis.Analyzer; +import com.cloudera.impala.analysis.StatementBase; +import com.cloudera.impala.analysis.TableName; + +/** + * Represents a Spatial Point Inclusion statement + */ +public class SpatialPointInclusionStmt extends StatementBase { + private final TableName tableName_; + private final Rectangle rect_; + private String dbName_; + + public SpatialPointInclusionStmt(TableName tblName, Rectangle rect) { + this.tableName_ = tblName; + this.rect_ = rect; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + + } + + @Override + public String toSql() { + return "load points from table " + tableName_.getTbl() + + " overlaps rectangle" + rect_.toString() + ";"; + } +} \ No newline at end of file diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index 700528e833..d6b0bde71a 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -28,4 +28,10 @@ public TRectangle toThrift() { public static Rectangle fromThrift(TRectangle rect) { return new Rectangle(rect.getX1(), rect.getY1(), rect.getX2(), rect.getY2()); } + + @Override + public String toString() { + return "(" + x1 + ", " + y1 + ", " + + x2 + ", " + y2 + ")"; + } } \ No newline at end of file diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index 07d327279a..547c1ea5e4 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -151,18 +151,21 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("order", new Integer(SqlParserSymbols.KW_ORDER)); keywordMap.put("outer", new Integer(SqlParserSymbols.KW_OUTER)); keywordMap.put("over", new Integer(SqlParserSymbols.KW_OVER)); + keywordMap.put("overlaps", new Integer(SqlParserSymbols.KW_OVERLAPS)); keywordMap.put("overwrite", new Integer(SqlParserSymbols.KW_OVERWRITE)); keywordMap.put("parquet", new Integer(SqlParserSymbols.KW_PARQUET)); keywordMap.put("parquetfile", new Integer(SqlParserSymbols.KW_PARQUETFILE)); keywordMap.put("partition", new Integer(SqlParserSymbols.KW_PARTITION)); keywordMap.put("partitioned", new Integer(SqlParserSymbols.KW_PARTITIONED)); keywordMap.put("partitions", new Integer(SqlParserSymbols.KW_PARTITIONS)); + keywordMap.put("points", new Integer(SqlParserSymbols.KW_POINTS)); keywordMap.put("preceding", new Integer(SqlParserSymbols.KW_PRECEDING)); keywordMap.put("prepare_fn", new Integer(SqlParserSymbols.KW_PREPARE_FN)); keywordMap.put("produced", new Integer(SqlParserSymbols.KW_PRODUCED)); keywordMap.put("range", new Integer(SqlParserSymbols.KW_RANGE)); keywordMap.put("rcfile", new Integer(SqlParserSymbols.KW_RCFILE)); keywordMap.put("real", new Integer(SqlParserSymbols.KW_DOUBLE)); + keywordMap.put("rectangle", new Integer(SqlParserSymbols.KW_RECTANGLE)); keywordMap.put("refresh", new Integer(SqlParserSymbols.KW_REFRESH)); keywordMap.put("regexp", new Integer(SqlParserSymbols.KW_REGEXP)); keywordMap.put("rename", new Integer(SqlParserSymbols.KW_RENAME)); From 769f7946c0732a6ed32c40d6b844d7ed8f643100 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Thu, 4 Dec 2014 15:07:57 +0300 Subject: [PATCH 10/15] Create point inclusion SQL statement - Handled analysis phase for "SpatialPointInclusionStmt". - Handled checking for table existence, table being spatial. - Handled checking for table containing GlobalIndex and containing required columns. - Updated "GlobalIndex" and "Rectangle" to return overlapping records with the provided Rectangle. --- .../analysis/SpatialPointInclusionStmt.java | 51 +++++++++++++++++-- .../spatialImpala/catalog/GlobalIndex.java | 9 ++++ .../spatialImpala/catalog/Rectangle.java | 10 ++++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java index f0d3662892..a6e032f793 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java +++ b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java @@ -4,18 +4,30 @@ import org.gistic.spatialImpala.catalog.*; +import com.cloudera.impala.catalog.Table; +import com.cloudera.impala.authorization.Privilege; import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.analysis.Analyzer; import com.cloudera.impala.analysis.StatementBase; +import com.cloudera.impala.analysis.SelectStmt; import com.cloudera.impala.analysis.TableName; +import java.util.List; + /** * Represents a Spatial Point Inclusion statement */ public class SpatialPointInclusionStmt extends StatementBase { - private final TableName tableName_; + private static final String TABLE_NOT_SPATIAL_ERROR_MSG + = "Table is not a spatial table."; + private static final String TAG = "tag"; + private static final String X = "x"; + private static final String Y = "y"; + private TableName tableName_; private final Rectangle rect_; - private String dbName_; + + // Initialized during analysis. + private SelectStmt selectStmt_; public SpatialPointInclusionStmt(TableName tblName, Rectangle rect) { this.tableName_ = tblName; @@ -24,7 +36,40 @@ public SpatialPointInclusionStmt(TableName tblName, Rectangle rect) { @Override public void analyze(Analyzer analyzer) throws AnalysisException { - + // Getting table and checking for existence. + Table table; + if (!tableName_.isFullyQualified()) { + tableName_ = new TableName(analyzer.getDefaultDb(), + tableName_.getTbl()); + } + table = analyzer.getTable(tableName_, Privilege.SELECT); + + // Table should be an instance of a Spatial table. + if (! (table instanceof SpatialHdfsTable)) + throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG); + + SpatialHdfsTable spatialTable = (SpatialHdfsTable) table; + + // Global index shouldn't be null. + GlobalIndex globalIndex = spatialTable.getGlobalIndexIfAny(); + if (globalIndex == null) + throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG + + " : Table doesn't have global indexes."); + + List columnNames = spatialTable.getColumnNames(); + if (! (columnNames.contains(TAG) && columnNames.contains(X) + && columnNames.contains(Y))) + throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG + + " : Table doesn't have the required columns."); + + List globalIndexes = globalIndex.getGIsforRectangle(rect_); + if (globalIndexes.size() == 0) { + // TODO: Create a select stmt to return an empty result. + return; + } + + // TODO: Create a select stmt containg information about + // the optimization done for the point inclusion statement. } @Override diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index 090ca19fa3..6563695a66 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -48,6 +48,15 @@ public List getGIsforPoint(int x, int y) { } return globalIndexes; } + + public List getGIsforRectangle(Rectangle rect) { + List globalIndexes = new ArrayList(); + for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { + if (gIRecord.getMBR().overlaps(rect)) + globalIndexes.add(gIRecord); + } + return globalIndexes; + } public GlobalIndexRecord getGIRecordforTag(String tag) { return globalIndexMap.get(tag); diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index d6b0bde71a..376c26e930 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -21,6 +21,16 @@ public boolean includesPoint(int x, int y) { return (x >= x1) && (x <= x2) && (y >= y1) && (y <= y2); } + public boolean overlaps(Rectangle rect) { + if (this.x1 > rect.x2 || this.x2 < rect.x1) + return false; + + if (this.y1 < rect.y2 || this.y2 > rect.y1) + return false; + + return true; + } + public TRectangle toThrift() { return new TRectangle(this.x1, this.y1, this.x2, this.y2); } From e9faf497ec36bf080d682610d5326d4fe00d8018 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Sun, 7 Dec 2014 15:28:18 +0300 Subject: [PATCH 11/15] Create point inclusion SQL statement - Changed modifier of both "SelectStmt" and "SelectListItem" to be used inside the new statement. - Added getters to "x1, y1, x2 and y2" to be used inside the point inclusion statement. - Updated "SpatialPointInclusionStmt" to return the "SelectStmt" and it will be only available after analysis. - Handled creating "SelectStmt" providing the table reference, select list and where predicates. - Handled creating where predicate for "SelectStmt" with no global index overlapping. - Handled creating where predicate using the overlapping global indexes and the boundaries of the provided "Recatngle". --- .../impala/analysis/SelectListItem.java | 2 +- .../cloudera/impala/analysis/SelectStmt.java | 2 +- .../analysis/SpatialPointInclusionStmt.java | 113 +++++++++++++++--- .../spatialImpala/catalog/Rectangle.java | 16 +++ 4 files changed, 112 insertions(+), 21 deletions(-) diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java b/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java index 03b6cd2505..5398be2303 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java @@ -16,7 +16,7 @@ import com.google.common.base.Preconditions; -class SelectListItem { +public class SelectListItem { private final Expr expr_; private String alias_; diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java index 9f5e5c50fb..9b27c8c906 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java @@ -65,7 +65,7 @@ public class SelectStmt extends QueryStmt { // directly private ExprSubstitutionMap baseTblSmap_ = new ExprSubstitutionMap(); - SelectStmt(SelectList selectList, + public SelectStmt(SelectList selectList, List tableRefList, Expr wherePredicate, ArrayList groupingExprs, Expr havingPredicate, ArrayList orderByElements, diff --git a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java index a6e032f793..0614d8212b 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java +++ b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java @@ -10,28 +10,41 @@ import com.cloudera.impala.analysis.Analyzer; import com.cloudera.impala.analysis.StatementBase; import com.cloudera.impala.analysis.SelectStmt; +import com.cloudera.impala.analysis.SelectListItem; +import com.cloudera.impala.analysis.SelectList; import com.cloudera.impala.analysis.TableName; +import com.cloudera.impala.analysis.TableRef; +import com.cloudera.impala.analysis.SlotRef; +import com.cloudera.impala.analysis.IsNullPredicate; +import com.cloudera.impala.analysis.Expr; +import com.cloudera.impala.analysis.CompoundPredicate; +import com.cloudera.impala.analysis.BinaryPredicate; +import com.cloudera.impala.analysis.IsNullPredicate; +import com.cloudera.impala.analysis.StringLiteral; +import com.cloudera.impala.analysis.NumericLiteral; import java.util.List; +import java.util.ArrayList; +import java.math.BigDecimal; /** * Represents a Spatial Point Inclusion statement */ public class SpatialPointInclusionStmt extends StatementBase { - private static final String TABLE_NOT_SPATIAL_ERROR_MSG - = "Table is not a spatial table."; + private static final String TABLE_NOT_SPATIAL_ERROR_MSG = "Table is not a spatial table."; private static final String TAG = "tag"; private static final String X = "x"; private static final String Y = "y"; private TableName tableName_; private final Rectangle rect_; - + // Initialized during analysis. private SelectStmt selectStmt_; public SpatialPointInclusionStmt(TableName tblName, Rectangle rect) { this.tableName_ = tblName; this.rect_ = rect; + this.selectStmt_ = null; } @Override @@ -43,33 +56,40 @@ public void analyze(Analyzer analyzer) throws AnalysisException { tableName_.getTbl()); } table = analyzer.getTable(tableName_, Privilege.SELECT); - + // Table should be an instance of a Spatial table. - if (! (table instanceof SpatialHdfsTable)) + if (!(table instanceof SpatialHdfsTable)) throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG); - + SpatialHdfsTable spatialTable = (SpatialHdfsTable) table; - + // Global index shouldn't be null. GlobalIndex globalIndex = spatialTable.getGlobalIndexIfAny(); if (globalIndex == null) throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG + " : Table doesn't have global indexes."); - + List columnNames = spatialTable.getColumnNames(); - if (! (columnNames.contains(TAG) && columnNames.contains(X) - && columnNames.contains(Y))) + if (!(columnNames.contains(TAG) && columnNames.contains(X) && columnNames + .contains(Y))) throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG + " : Table doesn't have the required columns."); - - List globalIndexes = globalIndex.getGIsforRectangle(rect_); - if (globalIndexes.size() == 0) { - // TODO: Create a select stmt to return an empty result. - return; - } - - // TODO: Create a select stmt containg information about - // the optimization done for the point inclusion statement. + + List globalIndexes = globalIndex + .getGIsforRectangle(rect_); + + // Preparing data for SelectStmt. + List tableRefs = new ArrayList(); + tableRefs.add(new TableRef(tableName_, null)); + + List items = new ArrayList(); + items.add(new SelectListItem(new SlotRef(tableName_, X), null)); + items.add(new SelectListItem(new SlotRef(tableName_, Y), null)); + + selectStmt_ = new SelectStmt(new SelectList(items), tableRefs, + createWherePredicate(globalIndexes), null, null, null, null); + + selectStmt_.analyze(analyzer); } @Override @@ -77,4 +97,59 @@ public String toSql() { return "load points from table " + tableName_.getTbl() + " overlaps rectangle" + rect_.toString() + ";"; } + + public SelectStmt getSelectStmtIfAny() { + return selectStmt_; + } + + private Expr createWherePredicate(List globalIndexes) { + SlotRef globalIndexSlotRef = new SlotRef(tableName_, TAG); + if (globalIndexes == null || globalIndexes.size() == 0) { + return new IsNullPredicate(globalIndexSlotRef, false); + } + + // Creating Where predicate. + Expr wherePredicate = null; + + // Create Global Index predicate. + wherePredicate = new BinaryPredicate(BinaryPredicate.Operator.EQ, + globalIndexSlotRef, new StringLiteral(globalIndexes.get(0) + .getTag())); + + for (int i = 1; i < globalIndexes.size(); i++) { + Expr globalIndexPredicate = new BinaryPredicate( + BinaryPredicate.Operator.EQ, globalIndexSlotRef, + new StringLiteral(globalIndexes.get(i).getTag())); + + wherePredicate = new CompoundPredicate( + CompoundPredicate.Operator.OR, wherePredicate, + globalIndexPredicate); + } + + // Create Rectangle predicate. + SlotRef xSlotRef = new SlotRef(tableName_, X); + SlotRef ySlotRef = new SlotRef(tableName_, Y); + + wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, + wherePredicate, new BinaryPredicate( + BinaryPredicate.Operator.LE, xSlotRef, + new NumericLiteral(new BigDecimal(rect_.getX1())))); + + wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, + wherePredicate, new BinaryPredicate( + BinaryPredicate.Operator.GE, xSlotRef, + new NumericLiteral(new BigDecimal(rect_.getX2())))); + + wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, + wherePredicate, new BinaryPredicate( + BinaryPredicate.Operator.LE, ySlotRef, + new NumericLiteral(new BigDecimal(rect_.getY1())))); + + wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, + wherePredicate, new BinaryPredicate( + BinaryPredicate.Operator.GE, ySlotRef, + new NumericLiteral(new BigDecimal(rect_.getY2())))); + + return wherePredicate; + } } \ No newline at end of file diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index 376c26e930..1e10c2f0a3 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -39,6 +39,22 @@ public static Rectangle fromThrift(TRectangle rect) { return new Rectangle(rect.getX1(), rect.getY1(), rect.getX2(), rect.getY2()); } + public double getX1() { + return x1; + } + + public double getY1() { + return y1; + } + + public double getX2() { + return x2; + } + + public double getY2() { + return y2; + } + @Override public String toString() { return "(" + x1 + ", " + y1 + ", " From d1b7c176814c8c3228444ab10927e259ed153e47 Mon Sep 17 00:00:00 2001 From: Ammar Bakeer Date: Mon, 8 Dec 2014 15:31:24 +0300 Subject: [PATCH 12/15] Create point inclusion SQL statement - Updated "AnalysisContext" to handle "SpatialPointInclusionStmt". - Added logging to "GlobalIndex" while checking for overlapping records. - Updated "overlaps()" method in "Rectangle" to be consistent with the "GlobalIndex" record. - Updated the where predicates in "SpatialPointInclusionStmt" to be consistent with the "GlobalIndex" records. - Tested the point inclusion statement on a partitioned spatial table. - Tested the statement in cases of rectangles out of the partitions, overlaps only one partition and overlaps two or more partitions. --- .../cloudera/impala/analysis/AnalysisContext.java | 14 ++++++++++++-- .../analysis/SpatialPointInclusionStmt.java | 8 ++++---- .../gistic/spatialImpala/catalog/GlobalIndex.java | 9 ++++++++- .../gistic/spatialImpala/catalog/Rectangle.java | 2 +- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java b/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java index 25cada1d78..9839242c9f 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java @@ -27,6 +27,8 @@ import com.cloudera.impala.thrift.TQueryCtx; import com.google.common.base.Preconditions; +import org.gistic.spatialImpala.analysis.*; + /** * Wrapper class for parser and analyzer. */ @@ -54,7 +56,10 @@ static public class AnalysisResult { public boolean isAlterTableStmt() { return stmt_ instanceof AlterTableStmt; } public boolean isAlterViewStmt() { return stmt_ instanceof AlterViewStmt; } public boolean isComputeStatsStmt() { return stmt_ instanceof ComputeStatsStmt; } - public boolean isQueryStmt() { return stmt_ instanceof QueryStmt; } + public boolean isQueryStmt() { + return (stmt_ instanceof QueryStmt + || stmt_ instanceof SpatialPointInclusionStmt); + } public boolean isInsertStmt() { return stmt_ instanceof InsertStmt; } public boolean isDropDbStmt() { return stmt_ instanceof DropDbStmt; } public boolean isDropTableOrViewStmt() { @@ -203,7 +208,12 @@ public LoadDataStmt getLoadDataStmt() { public QueryStmt getQueryStmt() { Preconditions.checkState(isQueryStmt()); - return (QueryStmt) stmt_; + if (stmt_ instanceof QueryStmt) + return (QueryStmt) stmt_; + else if (stmt_ instanceof SpatialPointInclusionStmt) + return ((SpatialPointInclusionStmt) stmt_).getSelectStmtIfAny(); + else + return null; } public InsertStmt getInsertStmt() { diff --git a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java index 0614d8212b..cfa6d9691a 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java +++ b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialPointInclusionStmt.java @@ -132,22 +132,22 @@ globalIndexSlotRef, new StringLiteral(globalIndexes.get(0) wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, wherePredicate, new BinaryPredicate( - BinaryPredicate.Operator.LE, xSlotRef, + BinaryPredicate.Operator.GE, xSlotRef, new NumericLiteral(new BigDecimal(rect_.getX1())))); wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, wherePredicate, new BinaryPredicate( - BinaryPredicate.Operator.GE, xSlotRef, + BinaryPredicate.Operator.LE, xSlotRef, new NumericLiteral(new BigDecimal(rect_.getX2())))); wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, wherePredicate, new BinaryPredicate( - BinaryPredicate.Operator.LE, ySlotRef, + BinaryPredicate.Operator.GE, ySlotRef, new NumericLiteral(new BigDecimal(rect_.getY1())))); wherePredicate = new CompoundPredicate(CompoundPredicate.Operator.AND, wherePredicate, new BinaryPredicate( - BinaryPredicate.Operator.GE, ySlotRef, + BinaryPredicate.Operator.LE, ySlotRef, new NumericLiteral(new BigDecimal(rect_.getY2())))); return wherePredicate; diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index 6563695a66..e42c817cd6 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -52,8 +52,15 @@ public List getGIsforPoint(int x, int y) { public List getGIsforRectangle(Rectangle rect) { List globalIndexes = new ArrayList(); for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { - if (gIRecord.getMBR().overlaps(rect)) + if (gIRecord.getMBR().overlaps(rect)) { + LOG.info("GI record: " + gIRecord.getMBR() + + " overlaps with: " + rect); globalIndexes.add(gIRecord); + } + else { + LOG.info("GI record: " + gIRecord.getMBR() + + " does not overlap with: " + rect); + } } return globalIndexes; } diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index 1e10c2f0a3..81d854ec47 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -25,7 +25,7 @@ public boolean overlaps(Rectangle rect) { if (this.x1 > rect.x2 || this.x2 < rect.x1) return false; - if (this.y1 < rect.y2 || this.y2 > rect.y1) + if (this.y1 > rect.y2 || this.y2 < rect.y1) return false; return true; From d74250e0c374bb69d3bac7171473edf07fe44c85 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 5 Jan 2015 11:39:24 +0000 Subject: [PATCH 13/15] adding the KNN spatial Query 1) Adding point class 2) Adding thrift point class 3) Adding KNN spatial stmt 4) Editting Global index 5) Editting Sql parser and scanner 6) Editting Analysis context --- common/thrift/CatalogObjects.thrift | 6 + fe/src/main/cup/sql-parser.y | 30 +++- .../impala/analysis/AnalysisContext.java | 5 +- .../analysis/SpatialKnnStmt.java | 133 ++++++++++++++++++ .../spatialImpala/catalog/GlobalIndex.java | 18 ++- .../gistic/spatialImpala/catalog/Point.java | 36 +++++ .../spatialImpala/catalog/Rectangle.java | 14 +- fe/src/main/jflex/sql-scanner.flex | 3 + 8 files changed, 238 insertions(+), 7 deletions(-) create mode 100644 fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java create mode 100644 fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift index 6331dc816f..e616e7c3bb 100644 --- a/common/thrift/CatalogObjects.thrift +++ b/common/thrift/CatalogObjects.thrift @@ -266,6 +266,12 @@ struct TDataSourceTable { 2: required string init_string } +// Represents a point +struct TPoint { + 1: required double x + 2: required double y +} + // Represents a rectangle used in a global index record for spatial tables. struct TRectangle { 1: required double x1 diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index b6b2c04a2d..cade8407fb 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -14,8 +14,10 @@ package com.cloudera.impala.analysis; +import org.gistoc.spatialImpala.catalog.Point; import org.gistic.spatialImpala.catalog.Rectangle; import org.gistic.spatialImpala.analysis.SpatialPointInclusionStmt; +import org.gistic.spatialImpala.analysis.SpatialKnnStmt; import com.cloudera.impala.catalog.Type; import com.cloudera.impala.catalog.ScalarType; @@ -242,10 +244,10 @@ terminal KW_FIRST, KW_FLOAT, KW_FOLLOWING, KW_FOR, KW_FORMAT, KW_FORMATTED, KW_FROM, KW_FULL, KW_FUNCTION, KW_FUNCTIONS, KW_GRANT, KW_GROUP, KW_HAVING, KW_IF, KW_IN, KW_INIT_FN, KW_INNER, KW_INPATH, KW_INSERT, KW_INT, KW_INTERMEDIATE, KW_INTERVAL, KW_INTO, - KW_INVALIDATE, KW_IS, KW_JOIN, KW_LAST, KW_LEFT, KW_LIKE, KW_LIMIT, KW_LINES, KW_LOAD, + KW_INVALIDATE, KW_IS, KW_JOIN, KW_KNN, KW_LAST, KW_LEFT, KW_LIKE, KW_LIMIT, KW_LINES, KW_LOAD, KW_LOCATION, KW_MAP, KW_MERGE_FN, KW_METADATA, KW_NOT, KW_NULL, KW_NULLS, KW_OFFSET, KW_ON, KW_OR, KW_ORDER, KW_OUTER, KW_OVER, KW_OVERLAPS, KW_OVERWRITE, KW_PARQUET, KW_PARQUETFILE, - KW_PARTITION, KW_PARTITIONED, KW_PARTITIONS, KW_POINTS, KW_PRECEDING, + KW_PARTITION, KW_PARTITIONED, KW_PARTITIONS, KW_POINT, KW_POINTS, KW_PRECEDING, KW_PREPARE_FN, KW_PRODUCED, KW_RANGE, KW_RCFILE, KW_RECTANGLE, KW_REFRESH, KW_REGEXP, KW_RENAME, KW_REPLACE, KW_RETURNS, KW_REVOKE, KW_RIGHT, KW_RLIKE, KW_ROLE, KW_ROLES, KW_ROW, KW_ROWS, KW_SCHEMA, KW_SCHEMAS, KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES, @@ -254,7 +256,7 @@ terminal KW_TEXTFILE, KW_THEN, KW_TIMESTAMP, KW_TINYINT, KW_STATS, KW_TO, KW_TRUE, KW_UNBOUNDED, KW_UNCACHED, KW_UNION, KW_UPDATE_FN, KW_USE, KW_USING, - KW_VALUES, KW_VARCHAR, KW_VIEW, KW_WHEN, KW_WHERE, KW_WITH; + KW_VALUES, KW_VARCHAR, KW_VIEW, KW_WHEN, KW_WHERE, KW_WITH, KW_WITHK; terminal COLON, COMMA, DOT, DOTDOTDOT, STAR, LPAREN, RPAREN, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; @@ -445,6 +447,9 @@ nonterminal CreateFunctionStmtBase.OptArg create_function_arg_key; nonterminal SpatialPointInclusionStmt spatial_point_inclusion_stmt; nonterminal Rectangle rectangle; nonterminal NumericLiteral rectangle_arg; +nonterminal SpatialKnnStmt spatial_knn_stmt; +nonterminal Point point; +nonterminal NumericLiteral point_arg; precedence left KW_OR; precedence left KW_AND; @@ -559,6 +564,8 @@ stmt ::= {: RESULT = revoke_privilege; :} | spatial_point_inclusion_stmt:spatial_point_inclusion {: RESULT = spatial_point_inclusion; :} + | spatial_knn_stmt:spatial_knn + {: RESULT = spatial_knn; :} ; spatial_point_inclusion_stmt ::= @@ -566,6 +573,11 @@ spatial_point_inclusion_stmt ::= {: RESULT = new SpatialPointInclusionStmt(tbl, rect); :} ; +spatial_knn_stmt ::= + LW_LOAD KW_POINTS KW_FROM KW_TABLE table_name:tbl KW_KNN point:p KW_WITHK expr:k + {: RESULT = new SpatialKnnStmt(tbl,p,k) :} + ; + rectangle ::= KW_RECTANGLE LPAREN rectangle_arg:x1 COMMA rectangle_arg:y1 COMMA rectangle_arg:x2 COMMA rectangle_arg:y2 RPAREN @@ -573,6 +585,11 @@ rectangle ::= x2.getDoubleValue(), y2.getDoubleValue()); :} ; +point ::= + KW_POINT LPAREN point_arg:x COMMA point_arg:y RPAREN + {: RESULT = new Point(X.getDoubleValue(), y1.getDoubleValue()); :} + ; + rectangle_arg ::= INTEGER_LITERAL:l {: RESULT = new NumericLiteral(l); :} @@ -580,6 +597,13 @@ rectangle_arg ::= {: RESULT = new NumericLiteral(l); :} ; +point_arg ::= + INTEGER_LITERAL:l + {: RESULT = new NumericLiteral(l); :} + | DECIMAL_LITERAL:l + {: RESULT = new NumericLiteral(l); :} + ; + load_stmt ::= KW_LOAD KW_DATA KW_INPATH STRING_LITERAL:path overwrite_val:overwrite KW_INTO KW_TABLE table_name:table opt_partition_spec:partition diff --git a/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java b/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java index 9839242c9f..a4103ab6f8 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/AnalysisContext.java @@ -58,7 +58,8 @@ static public class AnalysisResult { public boolean isComputeStatsStmt() { return stmt_ instanceof ComputeStatsStmt; } public boolean isQueryStmt() { return (stmt_ instanceof QueryStmt - || stmt_ instanceof SpatialPointInclusionStmt); + || stmt_ instanceof SpatialPointInclusionStmt + || stmt_ instanceof SpatialKnnStmt); } public boolean isInsertStmt() { return stmt_ instanceof InsertStmt; } public boolean isDropDbStmt() { return stmt_ instanceof DropDbStmt; } @@ -212,6 +213,8 @@ public QueryStmt getQueryStmt() { return (QueryStmt) stmt_; else if (stmt_ instanceof SpatialPointInclusionStmt) return ((SpatialPointInclusionStmt) stmt_).getSelectStmtIfAny(); + else if (stmt_ instanceof SpatialKnnStmt) + return ((SpatialKnnStmt) stmt_).getSelectStmtIfAny(); else return null; } diff --git a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java new file mode 100644 index 0000000000..5ca1c4dccf --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java @@ -0,0 +1,133 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.analysis; + +import org.gistic.spatialImpala.catalog.*; + +import com.cloudera.impala.catalog.Table; +import com.cloudera.impala.authorization.Privilege; +import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.analysis.Analyzer; +import com.cloudera.impala.analysis.StatementBase; +import com.cloudera.impala.analysis.SelectStmt; +import com.cloudera.impala.analysis.SelectListItem; +import com.cloudera.impala.analysis.SelectList; +import com.cloudera.impala.analysis.TableName; +import com.cloudera.impala.analysis.TableRef; +import com.cloudera.impala.analysis.SlotRef; +import com.cloudera.impala.analysis.IsNullPredicate; +import com.cloudera.impala.analysis.Expr; +import com.cloudera.impala.analysis.CompoundPredicate; +import com.cloudera.impala.analysis.BinaryPredicate; +import com.cloudera.impala.analysis.IsNullPredicate; +import com.cloudera.impala.analysis.StringLiteral; +import com.cloudera.impala.analysis.NumericLiteral; + +import java.util.List; +import java.util.ArrayList; +import java.math.BigDecimal; + +/** + * Represents a Spatial Point Inclusion statement + */ +public class SpatialKnnStmt extends StatementBase { + private static final String TABLE_NOT_SPATIAL_ERROR_MSG = "Table is not a spatial table."; + private static final String TAG = "tag"; + private static final String X = "x"; + private static final String Y = "y"; + private TableName tableName_; + private final Point p_; + private final int k_; + + // Initialized during analysis. + private SelectStmt selectStmt_; + + public SpatialKnnStmt(TableName tblName, Point p, int k) { + this.tableName_ = tblName; + this.p_ = p; + this.k_ = k; + this.selectStmt_ = null; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + // Getting table and checking for existence. + Table table; + if (!tableName_.isFullyQualified()) { + tableName_ = new TableName(analyzer.getDefaultDb(), + tableName_.getTbl()); + } + table = analyzer.getTable(tableName_, Privilege.SELECT); + + // Table should be an instance of a Spatial table. + if (!(table instanceof SpatialHdfsTable)) + throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG); + + SpatialHdfsTable spatialTable = (SpatialHdfsTable) table; + + // Global index shouldn't be null. + GlobalIndex globalIndex = spatialTable.getGlobalIndexIfAny(); + if (globalIndex == null) + throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG + + " : Table doesn't have global indexes."); + + List columnNames = spatialTable.getColumnNames(); + if (!(columnNames.contains(TAG) && columnNames.contains(X) && columnNames + .contains(Y))) + throw new AnalysisException(TABLE_NOT_SPATIAL_ERROR_MSG + + " : Table doesn't have the required columns."); + + List globalIndexes = globalIndex + .getGIsforKnn(p_); + + // Preparing data for SelectStmt. + List tableRefs = new ArrayList(); + tableRefs.add(new TableRef(tableName_, null)); + + List items = new ArrayList(); + items.add(new SelectListItem(new SlotRef(tableName_, X), null)); + items.add(new SelectListItem(new SlotRef(tableName_, Y), null)); + + selectStmt_ = new SelectStmt(new SelectList(items), tableRefs, + createWherePredicate(globalIndexes), null, null, null, new LimitElement(new NumericLiteral(BigDecimal.valueOf(this.k_)), null)); + + selectStmt_.analyze(analyzer); + } + + @Override + public String toSql() { + return "load points from table " + tableName_.getTbl() + + " knn " + p_.toString() + "with_k " + k_ + ";"; + } + + public SelectStmt getSelectStmtIfAny() { + return selectStmt_; + } + + private Expr createWherePredicate(List globalIndexes) { + SlotRef globalIndexSlotRef = new SlotRef(tableName_, TAG); + if (globalIndexes == null || globalIndexes.size() == 0) { + return new IsNullPredicate(globalIndexSlotRef, false); + } + + // Creating Where predicate. + Expr wherePredicate = null; + + // Create Global Index predicate. + wherePredicate = new BinaryPredicate(BinaryPredicate.Operator.EQ, + globalIndexSlotRef, new StringLiteral(globalIndexes.get(0) + .getTag())); + + for (int i = 1; i < globalIndexes.size(); i++) { + Expr globalIndexPredicate = new BinaryPredicate( + BinaryPredicate.Operator.EQ, globalIndexSlotRef, + new StringLiteral(globalIndexes.get(i).getTag())); + + wherePredicate = new CompoundPredicate( + CompoundPredicate.Operator.OR, wherePredicate, + globalIndexPredicate); + } + + return wherePredicate; + } +} diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index e42c817cd6..a173d92ad8 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -48,7 +48,21 @@ public List getGIsforPoint(int x, int y) { } return globalIndexes; } - + + public List getGIsforKnn(Point p) { + int maxdist = 0; + for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { + if (gIRecord.getMBR().includesPoint(p.getX(), p.getY())) + maxdist = Math.max(maxdist, gIRecord.getMaxDist(p.getX(), p.getY())); + } + List globalIndexes = new ArrayList(); + for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { + if (gIRecord.getMBR().getMinDist(p.getX(), p.getY())) + globalIndexes.add(gIRecord); + } + return globalIndexes; + } + public List getGIsforRectangle(Rectangle rect) { List globalIndexes = new ArrayList(); for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { @@ -161,4 +175,4 @@ public static GlobalIndex fromThrift(TGlobalIndex tGlobalIndex) { } return new GlobalIndex(tGlobalIndex.getTbl_name(), gIMap); } -} \ No newline at end of file +} diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java new file mode 100644 index 0000000000..1cb0667820 --- /dev/null +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java @@ -0,0 +1,36 @@ +// Copyright 2014 GISTIC. + +package org.gistic.spatialImpala.catalog; + +import com.cloudera.impala.thrift.TPoint; + +public class Point { + private double x; + private double y; + + public Point(double x, double y) { + this.x = x; + this.y = y; + } + + public TPoint toThrift() { + return new TRectangle(this.x, this.y); + } + + public static Point fromThrift(TPoint p) { + return new Point(p.getX(), p.getY()); + } + + public double getX1() { + return x; + } + + public double getY1() { + return y; + } + + @Override + public String toString() { + return "(" + x + ", " + y + ")"; + } +} diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index 81d854ec47..8f861fc305 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -20,6 +20,18 @@ public Rectangle(double x1, double y1, double x2, double y2) { public boolean includesPoint(int x, int y) { return (x >= x1) && (x <= x2) && (y >= y1) && (y <= y2); } + + public double getMaxDist(int x, int y) { + return Math.max(Math.max(dist(x,y,x1,y1),dist(x,y,x2,y1)),Math.max(dist(x,y,x1,y2),dist(x,y,x2,y2))); + } + + public double getMinDist(int x, int y) { + return Math.min(Math.min(dist(x,y,x1,y1),dist(x,y,x2,y1)),Math.min(dist(x,y,x1,y2),dist(x,y,x2,y2))); + } + + private double dist(int x1, int y1, int X2, int y2) { + return Math.sqrt( (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) ); + } public boolean overlaps(Rectangle rect) { if (this.x1 > rect.x2 || this.x2 < rect.x1) @@ -60,4 +72,4 @@ public String toString() { return "(" + x1 + ", " + y1 + ", " + x2 + ", " + y2 + ")"; } -} \ No newline at end of file +} diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index 547c1ea5e4..5af3816e23 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -131,6 +131,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("invalidate", new Integer(SqlParserSymbols.KW_INVALIDATE)); keywordMap.put("is", new Integer(SqlParserSymbols.KW_IS)); keywordMap.put("join", new Integer(SqlParserSymbols.KW_JOIN)); + keywordMap.put("knn", new Integer(SqlParserSymbols.KW_KNN)); keywordMap.put("last", new Integer(SqlParserSymbols.KW_LAST)); keywordMap.put("left", new Integer(SqlParserSymbols.KW_LEFT)); keywordMap.put("like", new Integer(SqlParserSymbols.KW_LIKE)); @@ -158,6 +159,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("partition", new Integer(SqlParserSymbols.KW_PARTITION)); keywordMap.put("partitioned", new Integer(SqlParserSymbols.KW_PARTITIONED)); keywordMap.put("partitions", new Integer(SqlParserSymbols.KW_PARTITIONS)); + keywordMap.put("point", new Integer(SqlParserSymbols.KW_POINT)); keywordMap.put("points", new Integer(SqlParserSymbols.KW_POINTS)); keywordMap.put("preceding", new Integer(SqlParserSymbols.KW_PRECEDING)); keywordMap.put("prepare_fn", new Integer(SqlParserSymbols.KW_PREPARE_FN)); @@ -216,6 +218,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("when", new Integer(SqlParserSymbols.KW_WHEN)); keywordMap.put("where", new Integer(SqlParserSymbols.KW_WHERE)); keywordMap.put("with", new Integer(SqlParserSymbols.KW_WITH)); + keywordMap.put("with_k", new Integer(SqlParserSymbols.KW_WITHK)); } // map from token id to token description From 7d28befb8cdf191547e076b8c4819f01d64af62a Mon Sep 17 00:00:00 2001 From: root Date: Mon, 5 Jan 2015 19:39:07 +0000 Subject: [PATCH 14/15] adding < condition in the GI --- .../spatialImpala/catalog/GlobalIndex.java | 2 +- fe/src/test/resources/hive-log4j.properties | 62 +++++++++++++++++++ fe/src/test/resources/sentry-site.xml | 61 ++++++++++++++++++ 3 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 fe/src/test/resources/hive-log4j.properties create mode 100644 fe/src/test/resources/sentry-site.xml diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index a173d92ad8..ed2dbce4a4 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -57,7 +57,7 @@ public List getGIsforKnn(Point p) { } List globalIndexes = new ArrayList(); for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { - if (gIRecord.getMBR().getMinDist(p.getX(), p.getY())) + if (gIRecord.getMBR().getMinDist(p.getX(), p.getY()) < maxdist ) globalIndexes.add(gIRecord); } return globalIndexes; diff --git a/fe/src/test/resources/hive-log4j.properties b/fe/src/test/resources/hive-log4j.properties new file mode 100644 index 0000000000..ed532f8eee --- /dev/null +++ b/fe/src/test/resources/hive-log4j.properties @@ -0,0 +1,62 @@ +# Define some default values that can be overridden by system properties +hive.root.logger=INFO,DRFA +hive.log.dir=/home/ubuntu/SpatialImpala/cluster_logs/hive +hive.log.file=hive.log + +# Define the root logger to the system property "hadoop.root.logger". +log4j.rootLogger=${hive.root.logger}, EventCounter + +# Logging Threshold +log4j.threshhold=WARN + +# +# Daily Rolling File Appender +# + +log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file} + +# Rollver at midnight +log4j.appender.DRFA.DatePattern=.yyyy-MM-dd + +# 30-day backup +#log4j.appender.DRFA.MaxBackupIndex=30 +log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# console +# Add "console" to rootlogger above if you want to use this +# + +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n + +#custom logging levels +#log4j.logger.xxx=DEBUG + +# +# Event Counter Appender +# Sends counts of logging messages at different severity levels to Hadoop Metrics. +# +log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter + + +log4j.category.DataNucleus=ERROR,DRFA +log4j.category.Datastore=ERROR,DRFA +log4j.category.Datastore.Schema=ERROR,DRFA +log4j.category.JPOX.Datastore=ERROR,DRFA +log4j.category.JPOX.Plugin=ERROR,DRFA +log4j.category.JPOX.MetaData=ERROR,DRFA +log4j.category.JPOX.Query=ERROR,DRFA +log4j.category.JPOX.General=ERROR,DRFA +log4j.category.JPOX.Enhancer=ERROR,DRFA +log4j.logger.org.apache.hadoop.conf.Configuration=ERROR,DRFA + diff --git a/fe/src/test/resources/sentry-site.xml b/fe/src/test/resources/sentry-site.xml new file mode 100644 index 0000000000..d9c76cde1d --- /dev/null +++ b/fe/src/test/resources/sentry-site.xml @@ -0,0 +1,61 @@ + + + + + + + sentry.service.security.mode + none + + + sentry.service.admin.group + root + + + sentry.service.server.rpc-address + localhost + + + sentry.service.server.rpc-port + 30911 + + + sentry.service.client.server.rpc-address + localhost + + + sentry.service.client.server.rpc-port + 30911 + + + sentry.store.jdbc.url + jdbc:postgresql://localhost:5432/sentry_policy/;create=true + + + sentry.store.jdbc.user + hiveuser + + + sentry.store.jdbc.password + password + + + sentry.verify.schema.version + false + + From 24e0d481b25fe972edef931119b360c5bab8fa59 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 8 Jan 2015 11:12:36 +0000 Subject: [PATCH 15/15] Running Knn Query. Running Knn query with format: LOAD POINTS FROM TABLE [table_name] KNN [point(x,y)] WITH_K [k] WITH_DIST [distance equation/UDF] --- fe/src/main/cup/sql-parser.y | 10 +++++----- .../com/cloudera/impala/analysis/LimitElement.java | 2 +- .../gistic/spatialImpala/analysis/SpatialKnnStmt.java | 10 +++++++--- .../org/gistic/spatialImpala/catalog/GlobalIndex.java | 10 +++++----- .../java/org/gistic/spatialImpala/catalog/Point.java | 6 +++--- .../org/gistic/spatialImpala/catalog/Rectangle.java | 8 ++++---- fe/src/main/jflex/sql-scanner.flex | 1 + 7 files changed, 26 insertions(+), 21 deletions(-) diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index cade8407fb..1bd79527f9 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -14,7 +14,7 @@ package com.cloudera.impala.analysis; -import org.gistoc.spatialImpala.catalog.Point; +import org.gistic.spatialImpala.catalog.Point; import org.gistic.spatialImpala.catalog.Rectangle; import org.gistic.spatialImpala.analysis.SpatialPointInclusionStmt; import org.gistic.spatialImpala.analysis.SpatialKnnStmt; @@ -256,7 +256,7 @@ terminal KW_TEXTFILE, KW_THEN, KW_TIMESTAMP, KW_TINYINT, KW_STATS, KW_TO, KW_TRUE, KW_UNBOUNDED, KW_UNCACHED, KW_UNION, KW_UPDATE_FN, KW_USE, KW_USING, - KW_VALUES, KW_VARCHAR, KW_VIEW, KW_WHEN, KW_WHERE, KW_WITH, KW_WITHK; + KW_VALUES, KW_VARCHAR, KW_VIEW, KW_WHEN, KW_WHERE, KW_WITH, KW_WITHDIST, KW_WITHK; terminal COLON, COMMA, DOT, DOTDOTDOT, STAR, LPAREN, RPAREN, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; @@ -574,8 +574,8 @@ spatial_point_inclusion_stmt ::= ; spatial_knn_stmt ::= - LW_LOAD KW_POINTS KW_FROM KW_TABLE table_name:tbl KW_KNN point:p KW_WITHK expr:k - {: RESULT = new SpatialKnnStmt(tbl,p,k) :} + KW_LOAD KW_POINTS KW_FROM KW_TABLE table_name:tbl KW_KNN point:p KW_WITHK expr:k KW_WITHDIST order_by_elements:dist + {: RESULT = new SpatialKnnStmt(tbl,p,new LimitElement(k, null), dist); :} ; rectangle ::= @@ -587,7 +587,7 @@ rectangle ::= point ::= KW_POINT LPAREN point_arg:x COMMA point_arg:y RPAREN - {: RESULT = new Point(X.getDoubleValue(), y1.getDoubleValue()); :} + {: RESULT = new Point(x.getDoubleValue(), y.getDoubleValue()); :} ; rectangle_arg ::= diff --git a/fe/src/main/java/com/cloudera/impala/analysis/LimitElement.java b/fe/src/main/java/com/cloudera/impala/analysis/LimitElement.java index f2f1ef3c57..f7e211b867 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/LimitElement.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/LimitElement.java @@ -23,7 +23,7 @@ /** * Combination of limit and offset expressions. */ -class LimitElement { +public class LimitElement { private final Expr limitExpr_; private final Expr offsetExpr_; private long limit_; diff --git a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java index 5ca1c4dccf..781bc19408 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java +++ b/fe/src/main/java/org/gistic/spatialImpala/analysis/SpatialKnnStmt.java @@ -22,6 +22,8 @@ import com.cloudera.impala.analysis.IsNullPredicate; import com.cloudera.impala.analysis.StringLiteral; import com.cloudera.impala.analysis.NumericLiteral; +import com.cloudera.impala.analysis.LimitElement; +import com.cloudera.impala.analysis.OrderByElement; import java.util.List; import java.util.ArrayList; @@ -37,15 +39,17 @@ public class SpatialKnnStmt extends StatementBase { private static final String Y = "y"; private TableName tableName_; private final Point p_; - private final int k_; + private final LimitElement k_; + private final ArrayList dist_; // Initialized during analysis. private SelectStmt selectStmt_; - public SpatialKnnStmt(TableName tblName, Point p, int k) { + public SpatialKnnStmt(TableName tblName, Point p, LimitElement k, ArrayList dist) { this.tableName_ = tblName; this.p_ = p; this.k_ = k; + this.dist_=dist; this.selectStmt_ = null; } @@ -89,7 +93,7 @@ public void analyze(Analyzer analyzer) throws AnalysisException { items.add(new SelectListItem(new SlotRef(tableName_, Y), null)); selectStmt_ = new SelectStmt(new SelectList(items), tableRefs, - createWherePredicate(globalIndexes), null, null, null, new LimitElement(new NumericLiteral(BigDecimal.valueOf(this.k_)), null)); + createWherePredicate(globalIndexes), null, null, dist_, k_); selectStmt_.analyze(analyzer); } diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java index ed2dbce4a4..934ed83a93 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/GlobalIndex.java @@ -49,15 +49,15 @@ public List getGIsforPoint(int x, int y) { return globalIndexes; } - public List getGIsforKnn(Point p) { - int maxdist = 0; + public List getGIsforKnn(Point pt) { + double maxdist = 0; for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { - if (gIRecord.getMBR().includesPoint(p.getX(), p.getY())) - maxdist = Math.max(maxdist, gIRecord.getMaxDist(p.getX(), p.getY())); + if (gIRecord.getMBR().includesPoint(pt.getX(), pt.getY())) + maxdist = Math.max(maxdist, gIRecord.getMBR().getMaxDist(pt.getX(), pt.getY())); } List globalIndexes = new ArrayList(); for (GlobalIndexRecord gIRecord : globalIndexMap.values()) { - if (gIRecord.getMBR().getMinDist(p.getX(), p.getY()) < maxdist ) + if (gIRecord.getMBR().getMinDist(pt.getX(), pt.getY()) < maxdist ) globalIndexes.add(gIRecord); } return globalIndexes; diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java index 1cb0667820..48716b0944 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Point.java @@ -14,18 +14,18 @@ public Point(double x, double y) { } public TPoint toThrift() { - return new TRectangle(this.x, this.y); + return new TPoint(this.x, this.y); } public static Point fromThrift(TPoint p) { return new Point(p.getX(), p.getY()); } - public double getX1() { + public double getX() { return x; } - public double getY1() { + public double getY() { return y; } diff --git a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java index 8f861fc305..73114f7225 100644 --- a/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java +++ b/fe/src/main/java/org/gistic/spatialImpala/catalog/Rectangle.java @@ -17,19 +17,19 @@ public Rectangle(double x1, double y1, double x2, double y2) { this.y2 = y2; } - public boolean includesPoint(int x, int y) { + public boolean includesPoint(double x, double y) { return (x >= x1) && (x <= x2) && (y >= y1) && (y <= y2); } - public double getMaxDist(int x, int y) { + public double getMaxDist(double x, double y) { return Math.max(Math.max(dist(x,y,x1,y1),dist(x,y,x2,y1)),Math.max(dist(x,y,x1,y2),dist(x,y,x2,y2))); } - public double getMinDist(int x, int y) { + public double getMinDist(double x, double y) { return Math.min(Math.min(dist(x,y,x1,y1),dist(x,y,x2,y1)),Math.min(dist(x,y,x1,y2),dist(x,y,x2,y2))); } - private double dist(int x1, int y1, int X2, int y2) { + private double dist(double x1, double y1, double X2, double y2) { return Math.sqrt( (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) ); } diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index 5af3816e23..904bba623a 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -218,6 +218,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("when", new Integer(SqlParserSymbols.KW_WHEN)); keywordMap.put("where", new Integer(SqlParserSymbols.KW_WHERE)); keywordMap.put("with", new Integer(SqlParserSymbols.KW_WITH)); + keywordMap.put("with_dist", new Integer(SqlParserSymbols.KW_WITHDIST)); keywordMap.put("with_k", new Integer(SqlParserSymbols.KW_WITHK)); }