From 06b1f02430d982f19759ead8286ebd495629dece Mon Sep 17 00:00:00 2001 From: XingY Date: Thu, 17 Jul 2025 16:10:34 -0700 Subject: [PATCH 1/7] Issue 52657: sample name case sensitivity query performance --- .../labkey/api/data/PropertyStorageSpec.java | 16 +++++++++++-- .../labkey/api/data/SchemaColumnMetaData.java | 7 ++++++ .../data/dialect/BasePostgreSqlDialect.java | 24 ++++++++++--------- .../labkey/api/data/dialect/SqlDialect.java | 3 ++- .../api/exp/api/SampleTypeDomainKind.java | 3 ++- .../experiment/api/DataClassDomainKind.java | 6 +++-- .../api/property/StorageProvisionerImpl.java | 5 +++- 7 files changed, 46 insertions(+), 18 deletions(-) diff --git a/api/src/org/labkey/api/data/PropertyStorageSpec.java b/api/src/org/labkey/api/data/PropertyStorageSpec.java index 66a40909c0e..2f7c9667399 100644 --- a/api/src/org/labkey/api/data/PropertyStorageSpec.java +++ b/api/src/org/labkey/api/data/PropertyStorageSpec.java @@ -431,6 +431,7 @@ public static class Index { final public String[] columnNames; final public boolean isUnique; + final public boolean isCaseInsensitive; // use for postgres only final public boolean isClustered; public Index(boolean unique, String... columnNames) @@ -438,6 +439,7 @@ public Index(boolean unique, String... columnNames) this.columnNames = columnNames; this.isUnique = unique; this.isClustered = false; + this.isCaseInsensitive = false; } /** If the set of names refers to propertydescriptors, then the PropertyDescriptors must be provided */ @@ -451,7 +453,7 @@ public Index translateToStorageNames(Domain domain) throw new IllegalStateException("Property " + propertyName + " has no storage column name"); return null == pd ? propertyName : pd.getStorageColumnName(); }).toArray(String[]::new); - return new Index(isUnique, isClustered, translatedNames); + return new Index(isUnique, isClustered, isCaseInsensitive, translatedNames); } public Index(boolean unique, Collection columnNames) @@ -459,6 +461,7 @@ public Index(boolean unique, Collection columnNames) this.columnNames = columnNames.toArray(new String[0]); this.isUnique = unique; this.isClustered = false; + this.isCaseInsensitive = false; } public Index(boolean unique, boolean clustered, String... columnNames) @@ -466,6 +469,15 @@ public Index(boolean unique, boolean clustered, String... columnNames) this.columnNames = columnNames; this.isUnique = unique; this.isClustered = clustered; + this.isCaseInsensitive = false; + } + + public Index(boolean unique, boolean clustered, boolean caseInsensitive, String... columnNames) + { + this.columnNames = columnNames; + this.isUnique = unique; + this.isClustered = clustered; + this.isCaseInsensitive = caseInsensitive; } /** @@ -474,7 +486,7 @@ public Index(boolean unique, boolean clustered, String... columnNames) */ public static boolean isSameIndex(Index propertyIndex, Index tableIndex) { - if (propertyIndex.isUnique != tableIndex.isUnique || propertyIndex.columnNames.length != tableIndex.columnNames.length) + if (propertyIndex.isCaseInsensitive != tableIndex.isCaseInsensitive || propertyIndex.isUnique != tableIndex.isUnique || propertyIndex.columnNames.length != tableIndex.columnNames.length) return false; Set piColumns = new CaseInsensitiveHashSet(Arrays.asList(propertyIndex.columnNames)); Set tiColumns = new CaseInsensitiveHashSet(Arrays.asList(tableIndex.columnNames)); diff --git a/api/src/org/labkey/api/data/SchemaColumnMetaData.java b/api/src/org/labkey/api/data/SchemaColumnMetaData.java index cba8e1638f2..5de634a960d 100644 --- a/api/src/org/labkey/api/data/SchemaColumnMetaData.java +++ b/api/src/org/labkey/api/data/SchemaColumnMetaData.java @@ -364,6 +364,13 @@ private void loadIndices(SchemaTableInfo ti) throws SQLException } ColumnInfo colInfo = getColumn(colName); + if (colInfo == null && colName.startsWith("lower(")) + { + colInfo = getColumn(colName.substring("lower(".length(), colName.length() - 1)); + if (colInfo == null && colName.endsWith(")::text)")) + colInfo = getColumn(colName.substring("lower(".length(), colName.length() - ")::text)".length())); + } + // Column will be null for indices over expressions, eg.: "lower(name)" if (colInfo == null) ignoreIndex.add(indexName); diff --git a/api/src/org/labkey/api/data/dialect/BasePostgreSqlDialect.java b/api/src/org/labkey/api/data/dialect/BasePostgreSqlDialect.java index f341051b1e8..662e7af7f78 100644 --- a/api/src/org/labkey/api/data/dialect/BasePostgreSqlDialect.java +++ b/api/src/org/labkey/api/data/dialect/BasePostgreSqlDialect.java @@ -1182,8 +1182,8 @@ private List getRenameColumnsStatement(TableChange change) { PropertyStorageSpec.Index oldIndex = oldToNew.getKey(); PropertyStorageSpec.Index newIndex = oldToNew.getValue(); - String oldName = nameIndex(change.getTableName(), oldIndex.columnNames); - String newName = nameIndex(change.getTableName(), newIndex.columnNames); + String oldName = nameIndex(change.getTableName(), oldIndex); + String newName = nameIndex(change.getTableName(), newIndex); if (!oldName.equals(newName)) { statements.add(String.format("ALTER INDEX %s.%s RENAME TO %s", @@ -1336,22 +1336,23 @@ private void addCreateIndexStatements(List statements, TableChange chang { statements.add(String.format("CREATE %sINDEX %s ON %s (%s);", index.isUnique ? "UNIQUE " : "", - nameIndex(change.getTableName(), index.columnNames), + nameIndex(change.getTableName(), index), makeTableIdentifier(change), - makePropertyIdentifiers(index.columnNames))); + makePropertyIdentifiers(index))); if (index.isClustered) { statements.add(String.format("%s %s.%s USING %s", PropertyStorageSpec.CLUSTER_TYPE.CLUSTER, change.getSchemaName(), - change.getTableName(), nameIndex(change.getTableName(), index.columnNames))); + change.getTableName(), nameIndex(change.getTableName(), index))); } } } @Override - public String nameIndex(String tableName, String[] indexedColumns) + public String nameIndex(String tableName, PropertyStorageSpec.Index index) { - return AliasManager.makeLegalName(tableName + '_' + StringUtils.join(indexedColumns, "_"), this); + String suffix = index.isCaseInsensitive ? "_lower" : ""; + return AliasManager.makeLegalName(tableName + '_' + StringUtils.join(index.columnNames, "_") + suffix, this); } private String getSqlColumnSpec(PropertyStorageSpec prop) @@ -1435,14 +1436,15 @@ else if (prop.getJdbcType() == JdbcType.VARCHAR && (prop.getSize() == -1 || prop } } - // Create comma-separated list of property identifiers - private String makePropertyIdentifiers(String[] names) + // Create comma-separated list of property identifiers for index + private String makePropertyIdentifiers(PropertyStorageSpec.Index index) { String sep = ""; StringBuilder sb = new StringBuilder(); - for (String name : names) + boolean isCaseInsensitive = index.isCaseInsensitive; + for (String name : index.columnNames) { - sb.append(sep).append(makePropertyIdentifier(name)); + sb.append(sep).append(isCaseInsensitive ? "lower(" : "").append(makePropertyIdentifier(name)).append(isCaseInsensitive ? ")" : ""); sep = ", "; } return sb.toString(); diff --git a/api/src/org/labkey/api/data/dialect/SqlDialect.java b/api/src/org/labkey/api/data/dialect/SqlDialect.java index 537929ef88e..48d70502009 100644 --- a/api/src/org/labkey/api/data/dialect/SqlDialect.java +++ b/api/src/org/labkey/api/data/dialect/SqlDialect.java @@ -149,7 +149,8 @@ public DatabaseTableType getTableType(String tableTypeName) return description; } - public String nameIndex(String tableName, String[] indexedColumns){ + public String nameIndex(String tableName, PropertyStorageSpec.Index index) + { throw new UnsupportedOperationException("Update " + this.getClass().getSimpleName() + " to add this functionality"); } diff --git a/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java b/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java index e3c55d3286c..039244d2762 100644 --- a/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java +++ b/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java @@ -152,7 +152,8 @@ public class SampleTypeDomainKind extends AbstractDomainKind getRequiredIndices(Domain domain, for (PropertyStorageSpec.Index index : requiredIndices) { + if (sqlDialect.isSqlServer() && index.isCaseInsensitive) // skip case insensitive index for sql server + continue; + // TODO: Bad!! Shouldn't be making up an index name here! Ideally, we use the AbstractAuditTypeProvider.updateIndices() approach instead. - requiredIndicesMap.put(sqlDialect.nameIndex(storageTableName, index.columnNames), index); + requiredIndicesMap.put(sqlDialect.nameIndex(storageTableName, index), index); } return requiredIndicesMap; } From 0ec4a37cafe642ba006861a976d39e84e816889b Mon Sep 17 00:00:00 2001 From: XingY Date: Thu, 17 Jul 2025 19:50:07 -0700 Subject: [PATCH 2/7] batch validate duplicate names --- .../labkey/api/data/SchemaColumnMetaData.java | 2 +- .../DuplicateNameCheckDataIterator.java | 143 ++++++++++++++++++ .../labkey/api/exp/api/ExperimentService.java | 4 + .../labkey/experiment/ExpDataIterators.java | 81 +--------- .../experiment/api/ExperimentServiceImpl.java | 47 +++++- 5 files changed, 196 insertions(+), 81 deletions(-) create mode 100644 api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java diff --git a/api/src/org/labkey/api/data/SchemaColumnMetaData.java b/api/src/org/labkey/api/data/SchemaColumnMetaData.java index 5de634a960d..88339766c5e 100644 --- a/api/src/org/labkey/api/data/SchemaColumnMetaData.java +++ b/api/src/org/labkey/api/data/SchemaColumnMetaData.java @@ -368,7 +368,7 @@ private void loadIndices(SchemaTableInfo ti) throws SQLException { colInfo = getColumn(colName.substring("lower(".length(), colName.length() - 1)); if (colInfo == null && colName.endsWith(")::text)")) - colInfo = getColumn(colName.substring("lower(".length(), colName.length() - ")::text)".length())); + colInfo = getColumn(colName.substring("lower((".length(), colName.length() - ")::text)".length())); } // Column will be null for indices over expressions, eg.: "lower(name)" diff --git a/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java b/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java new file mode 100644 index 00000000000..e0e8d0c6a0b --- /dev/null +++ b/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java @@ -0,0 +1,143 @@ +package org.labkey.api.dataiterator; + +import org.apache.commons.lang3.StringUtils; +import org.labkey.api.data.TableInfo; +import org.labkey.api.exp.api.ExperimentService; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.ValidationException; + +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +public class DuplicateNameCheckDataIterator extends WrapperDataIterator +{ + final static String NAME_FIELD = "name"; + private final DataIteratorContext _context; + private final Integer _nameCol; + private final Integer _lsidCol; + private final TableInfo _tableInfo; + private final boolean _isUpdateUsingLsid; + final CachingDataIterator _unwrapped; + int lastPrefetchRowNumber = -1; + + public DuplicateNameCheckDataIterator(DataIterator di, DataIteratorContext context, boolean isUpdateUsingLsid, TableInfo tableInfo) + { + super(di); + this._unwrapped = (CachingDataIterator)di; + _context = context; + _tableInfo = tableInfo; + _isUpdateUsingLsid = isUpdateUsingLsid; + Map map = DataIteratorUtil.createColumnNameMap(di); + _nameCol = map.get(NAME_FIELD); + _lsidCol = map.get("lsid"); + } + + protected void checkNames() throws BatchValidationException + { + Integer rowNumber = (Integer)_delegate.get(0); + if (rowNumber <= lastPrefetchRowNumber) + return; + + int rowsToFetch = 50; + Set names = new HashSet<>(); + Map nameLsidMap = new LinkedHashMap<>(); + do + { + lastPrefetchRowNumber = (Integer) _delegate.get(0); + + if (_nameCol == null) + continue; + + Object nameObj = get(_nameCol); + if (nameObj == null) + continue; + + String newName = String.valueOf(nameObj); + if (StringUtils.isEmpty(newName)) + continue; + + Map existingValues = getExistingRecord(); + if (existingValues != null && !existingValues.isEmpty() && existingValues.get(NAME_FIELD).equals(newName)) + continue; + + names.add(newName); + + if (_isUpdateUsingLsid && _lsidCol != null) + { + Object lsidObj = get(_lsidCol); + if (lsidObj != null) + { + String lsid = String.valueOf(lsidObj); + if (!StringUtils.isEmpty(lsid)) + nameLsidMap.put(newName, lsid); + } + } + } + while (--rowsToFetch > 0 && _delegate.next()); + + if (!names.isEmpty()) + { + String duplicateName = null; + if (!_context.getInsertOption().allowUpdate) // insert new + { + duplicateName = ExperimentService.get().getDuplicateNewOrExistingNames(names, _tableInfo, false); + } + else if (_isUpdateUsingLsid && _lsidCol != null) // update using rowId is not yet supported for DIB + { + Set newOrExistingNamesLc = new HashSet<>(); + for (String name : nameLsidMap.keySet()) + { + String newOrExistingNameLc = name.toLowerCase(); + if (newOrExistingNamesLc.contains(newOrExistingNameLc)) + { + duplicateName = name; + break; + } + newOrExistingNamesLc.add(newOrExistingNameLc); + } + + for (String name : nameLsidMap.keySet()) + { + if (!ExperimentService.get().canRename(nameLsidMap.get(name), name, _tableInfo)) + { + duplicateName = name; + break; + } + } + } + else if (_context.getInsertOption().mergeRows) // merge + { + duplicateName = ExperimentService.get().getDuplicateNewOrExistingNames(names, _tableInfo, true); + } + + if (duplicateName != null) + { + String error = String.format("The name '%s' already exists.", duplicateName); + if (_context.getInsertOption().mergeRows) + error = String.format("The name '%s' could not be resolved. Please check the casing of the provided name.", duplicateName); + _context.getErrors().addRowError(new ValidationException(error)); + } + + } + + // backup to where we started so caller can iterate through them one at a time + _unwrapped.reset(); // unwrapped _delegate + _delegate.next(); + } + + @Override + public boolean next() throws BatchValidationException + { + // NOTE: we have to call mark() before we call next() if we want the 'next' row to be cached + _unwrapped.mark(); // unwrapped _delegate + boolean ret = super.next(); + + if (ret) + checkNames(); + + return ret; + } + +} diff --git a/api/src/org/labkey/api/exp/api/ExperimentService.java b/api/src/org/labkey/api/exp/api/ExperimentService.java index 5e369154827..5999cd0a75c 100644 --- a/api/src/org/labkey/api/exp/api/ExperimentService.java +++ b/api/src/org/labkey/api/exp/api/ExperimentService.java @@ -140,6 +140,10 @@ static void setInstance(ExperimentService impl) boolean canMoveFileReference(User user, Container sourceContainer, File sourceFile, int moveCount); + String getDuplicateNewOrExistingNames(Set newOrExistingNames, @NotNull TableInfo tableInfo, boolean allowExisting); + + boolean canRename(String lsid, String newName, TableInfo tableInfo); + enum QueryOptions { UseLsidForUpdate, diff --git a/experiment/src/org/labkey/experiment/ExpDataIterators.java b/experiment/src/org/labkey/experiment/ExpDataIterators.java index b5c8ad6509f..c2622f9c754 100644 --- a/experiment/src/org/labkey/experiment/ExpDataIterators.java +++ b/experiment/src/org/labkey/experiment/ExpDataIterators.java @@ -41,10 +41,12 @@ import org.labkey.api.data.UpdateableTableInfo; import org.labkey.api.data.validator.ColumnValidator; import org.labkey.api.data.validator.RequiredValidator; +import org.labkey.api.dataiterator.CachingDataIterator; import org.labkey.api.dataiterator.DataIterator; import org.labkey.api.dataiterator.DataIteratorBuilder; import org.labkey.api.dataiterator.DataIteratorContext; import org.labkey.api.dataiterator.DataIteratorUtil; +import org.labkey.api.dataiterator.DuplicateNameCheckDataIterator; import org.labkey.api.dataiterator.ErrorIterator; import org.labkey.api.dataiterator.ExistingRecordDataIterator; import org.labkey.api.dataiterator.LoggingDataIterator; @@ -3071,84 +3073,7 @@ public DuplicateNameCheckIteratorBuilder(@NotNull DataIteratorBuilder in, boolea public DataIterator getDataIterator(DataIteratorContext context) { DataIterator pre = _in.getDataIterator(context); - return LoggingDataIterator.wrap(new DuplicateNameCheckDataIterator(pre, context, _isUpdateUsingLsid, _tableInfo)); - } - } - - public static class DuplicateNameCheckDataIterator extends WrapperDataIterator - { - final static String NAME_FIELD = "name"; - private final DataIteratorContext _context; - private final Integer _nameCol; - private final Integer _lsidCol; - private final TableInfo _tableInfo; - private final boolean _isUpdateUsingLsid; - - protected DuplicateNameCheckDataIterator(DataIterator di, DataIteratorContext context, boolean isUpdateUsingLsid, TableInfo tableInfo) - { - super(di); - _context = context; - _tableInfo = tableInfo; - _isUpdateUsingLsid = isUpdateUsingLsid; - Map map = DataIteratorUtil.createColumnNameMap(di); - _nameCol = map.get(NAME_FIELD); - _lsidCol = map.get("lsid"); - } - - @Override - public boolean next() throws BatchValidationException - { - boolean hasNext = super.next(); - if (!hasNext) - return false; - - if (_context.getErrors().hasErrors()) - return hasNext; - - if (_nameCol == null) - return hasNext; - - Object nameObj = get(_nameCol); - if (nameObj == null) - return hasNext; - - String newName = String.valueOf(nameObj); - if (StringUtils.isEmpty(newName)) - return hasNext; - - Map existingValues = getExistingRecord(); - if (existingValues != null && !existingValues.isEmpty() && existingValues.get(NAME_FIELD).equals(newName)) - return hasNext; - - boolean isNameValid = true; - if (!_context.getInsertOption().allowUpdate) // insert new - { - isNameValid = ExperimentServiceImpl.get().isValidNewOrExistingName(newName, _tableInfo, false); - } - else if (_isUpdateUsingLsid && _lsidCol != null) // update using rowId is not yet supported for DIB - { - Object lsidObj = get(_lsidCol); - if (lsidObj != null) - { - String lsid = String.valueOf(lsidObj); - if (!StringUtils.isEmpty(lsid) && !ExperimentServiceImpl.get().canRename(lsid, newName, _tableInfo)) - isNameValid = false; - } - } - else if (_context.getInsertOption().mergeRows) // merge - { - isNameValid = ExperimentServiceImpl.get().isValidNewOrExistingName(newName, _tableInfo,true); - } - - if (!isNameValid) - { - String error = String.format("The name '%s' already exists.", newName); - if (_context.getInsertOption().mergeRows) - error = String.format("The name '%s' could not be resolved. Please check the casing of the provided name.", newName); - _context.getErrors().addRowError(new ValidationException(error)); - } - - return hasNext; + return LoggingDataIterator.wrap(new DuplicateNameCheckDataIterator(new CachingDataIterator(pre), context, _isUpdateUsingLsid, _tableInfo)); } } diff --git a/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java b/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java index 0409a0d1fd3..f7510f57371 100644 --- a/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java +++ b/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java @@ -9142,7 +9142,7 @@ public Map> getDomainMetrics() return metrics; } - public boolean isValidNewOrExistingName(String newOrExistingName, @NotNull TableInfo tableInfo, boolean allowExisting) + public boolean isDuplicateNewOrExistingName(String newOrExistingName, @NotNull TableInfo tableInfo, boolean allowExisting) { SQLFragment dataRowSQL = new SQLFragment("SELECT name FROM ") .append(tableInfo) @@ -9155,9 +9155,52 @@ public boolean isValidNewOrExistingName(String newOrExistingName, @NotNull Table dataRowSQL.append(" COLLATE Latin1_General_BIN"); // force case sensitive comparison for <> operator to exclude existing name } - return !(new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).exists()); + return (new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).exists()); } + @Override + public String getDuplicateNewOrExistingNames(Set newOrExistingNames, @NotNull TableInfo tableInfo, boolean allowExisting) + { + if (newOrExistingNames == null || newOrExistingNames.isEmpty()) + return null; + + Map lowerToOriginalMap = new CaseInsensitiveHashMap<>(); + for (String newOrExistingName : newOrExistingNames) + { + String newOrExistingNameLc = newOrExistingName.toLowerCase(); + if (lowerToOriginalMap.containsKey(newOrExistingNameLc)) + return newOrExistingName; + lowerToOriginalMap.put(newOrExistingNameLc, newOrExistingName); + } + + SqlDialect dialect = tableInfo.getSqlDialect(); + SQLFragment dataRowSQL = new SQLFragment("SELECT name FROM ") + .append(tableInfo) + .append(" WHERE LOWER(name) "); + dialect.appendInClauseSql(dataRowSQL, lowerToOriginalMap.keySet()); + if (allowExisting) // allow existing name for merge + { + dataRowSQL.append(" AND name "); + if (dialect.isSqlServer()) + dataRowSQL.append("COLLATE Latin1_General_BIN "); // force case-sensitive comparison for <> operator to exclude existing name + dataRowSQL + .append("NOT IN(") + .append(StringUtils.repeat("?", ", ", newOrExistingNames.size())) + .append(")") + .addAll(newOrExistingNames); + } + + if (new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).exists()) + { + String duplicateName = new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).getArray(String.class)[0]; + return lowerToOriginalMap.get(duplicateName); // get original case that matches input + } + + return null; + } + + + @Override public boolean canRename(@NotNull String lsid, @NotNull String newName, @NotNull TableInfo tableInfo) { SQLFragment dataRowSQL = new SQLFragment("SELECT name FROM ") From 5648bf34453016c83665ad720984827d30b3e074 Mon Sep 17 00:00:00 2001 From: XingY Date: Fri, 18 Jul 2025 08:25:36 -0700 Subject: [PATCH 3/7] revert index --- api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java b/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java index 039244d2762..e3c55d3286c 100644 --- a/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java +++ b/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java @@ -152,8 +152,7 @@ public class SampleTypeDomainKind extends AbstractDomainKind Date: Fri, 18 Jul 2025 11:03:37 -0700 Subject: [PATCH 4/7] add index back --- api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java b/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java index e3c55d3286c..039244d2762 100644 --- a/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java +++ b/api/src/org/labkey/api/exp/api/SampleTypeDomainKind.java @@ -152,7 +152,8 @@ public class SampleTypeDomainKind extends AbstractDomainKind Date: Sat, 19 Jul 2025 12:26:17 -0700 Subject: [PATCH 5/7] Reject exact case match --- .../DuplicateNameCheckDataIterator.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java b/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java index e0e8d0c6a0b..858b7e0fa37 100644 --- a/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java +++ b/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java @@ -40,6 +40,7 @@ protected void checkNames() throws BatchValidationException if (rowNumber <= lastPrefetchRowNumber) return; + String duplicateName = null; int rowsToFetch = 50; Set names = new HashSet<>(); Map nameLsidMap = new LinkedHashMap<>(); @@ -62,7 +63,10 @@ protected void checkNames() throws BatchValidationException if (existingValues != null && !existingValues.isEmpty() && existingValues.get(NAME_FIELD).equals(newName)) continue; - names.add(newName); + if (names.contains(newName) && duplicateName == null) + duplicateName = newName; // reject exact case match + else + names.add(newName); if (_isUpdateUsingLsid && _lsidCol != null) { @@ -77,9 +81,8 @@ protected void checkNames() throws BatchValidationException } while (--rowsToFetch > 0 && _delegate.next()); - if (!names.isEmpty()) + if (!names.isEmpty() && duplicateName == null) { - String duplicateName = null; if (!_context.getInsertOption().allowUpdate) // insert new { duplicateName = ExperimentService.get().getDuplicateNewOrExistingNames(names, _tableInfo, false); @@ -111,17 +114,17 @@ else if (_context.getInsertOption().mergeRows) // merge { duplicateName = ExperimentService.get().getDuplicateNewOrExistingNames(names, _tableInfo, true); } + } - if (duplicateName != null) - { - String error = String.format("The name '%s' already exists.", duplicateName); - if (_context.getInsertOption().mergeRows) - error = String.format("The name '%s' could not be resolved. Please check the casing of the provided name.", duplicateName); - _context.getErrors().addRowError(new ValidationException(error)); - } - + if (duplicateName != null) + { + String error = String.format("The name '%s' already exists.", duplicateName); + if (_context.getInsertOption().mergeRows) + error = String.format("The name '%s' could not be resolved. Please check the casing of the provided name.", duplicateName); + _context.getErrors().addRowError(new ValidationException(error)); } + // backup to where we started so caller can iterate through them one at a time _unwrapped.reset(); // unwrapped _delegate _delegate.next(); From 28342b9fe90798004a033437f19cbef2ba7360c1 Mon Sep 17 00:00:00 2001 From: XingY Date: Sat, 19 Jul 2025 20:06:04 -0700 Subject: [PATCH 6/7] switch to use cache instead of batch due to name expression --- .../DuplicateNameCheckDataIterator.java | 146 ------------------ .../labkey/api/exp/api/ExperimentService.java | 4 - .../labkey/experiment/ExpDataIterators.java | 139 ++++++++++++++++- .../experiment/api/ExperimentServiceImpl.java | 47 +----- 4 files changed, 138 insertions(+), 198 deletions(-) delete mode 100644 api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java diff --git a/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java b/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java deleted file mode 100644 index 858b7e0fa37..00000000000 --- a/api/src/org/labkey/api/dataiterator/DuplicateNameCheckDataIterator.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.labkey.api.dataiterator; - -import org.apache.commons.lang3.StringUtils; -import org.labkey.api.data.TableInfo; -import org.labkey.api.exp.api.ExperimentService; -import org.labkey.api.query.BatchValidationException; -import org.labkey.api.query.ValidationException; - -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Set; - -public class DuplicateNameCheckDataIterator extends WrapperDataIterator -{ - final static String NAME_FIELD = "name"; - private final DataIteratorContext _context; - private final Integer _nameCol; - private final Integer _lsidCol; - private final TableInfo _tableInfo; - private final boolean _isUpdateUsingLsid; - final CachingDataIterator _unwrapped; - int lastPrefetchRowNumber = -1; - - public DuplicateNameCheckDataIterator(DataIterator di, DataIteratorContext context, boolean isUpdateUsingLsid, TableInfo tableInfo) - { - super(di); - this._unwrapped = (CachingDataIterator)di; - _context = context; - _tableInfo = tableInfo; - _isUpdateUsingLsid = isUpdateUsingLsid; - Map map = DataIteratorUtil.createColumnNameMap(di); - _nameCol = map.get(NAME_FIELD); - _lsidCol = map.get("lsid"); - } - - protected void checkNames() throws BatchValidationException - { - Integer rowNumber = (Integer)_delegate.get(0); - if (rowNumber <= lastPrefetchRowNumber) - return; - - String duplicateName = null; - int rowsToFetch = 50; - Set names = new HashSet<>(); - Map nameLsidMap = new LinkedHashMap<>(); - do - { - lastPrefetchRowNumber = (Integer) _delegate.get(0); - - if (_nameCol == null) - continue; - - Object nameObj = get(_nameCol); - if (nameObj == null) - continue; - - String newName = String.valueOf(nameObj); - if (StringUtils.isEmpty(newName)) - continue; - - Map existingValues = getExistingRecord(); - if (existingValues != null && !existingValues.isEmpty() && existingValues.get(NAME_FIELD).equals(newName)) - continue; - - if (names.contains(newName) && duplicateName == null) - duplicateName = newName; // reject exact case match - else - names.add(newName); - - if (_isUpdateUsingLsid && _lsidCol != null) - { - Object lsidObj = get(_lsidCol); - if (lsidObj != null) - { - String lsid = String.valueOf(lsidObj); - if (!StringUtils.isEmpty(lsid)) - nameLsidMap.put(newName, lsid); - } - } - } - while (--rowsToFetch > 0 && _delegate.next()); - - if (!names.isEmpty() && duplicateName == null) - { - if (!_context.getInsertOption().allowUpdate) // insert new - { - duplicateName = ExperimentService.get().getDuplicateNewOrExistingNames(names, _tableInfo, false); - } - else if (_isUpdateUsingLsid && _lsidCol != null) // update using rowId is not yet supported for DIB - { - Set newOrExistingNamesLc = new HashSet<>(); - for (String name : nameLsidMap.keySet()) - { - String newOrExistingNameLc = name.toLowerCase(); - if (newOrExistingNamesLc.contains(newOrExistingNameLc)) - { - duplicateName = name; - break; - } - newOrExistingNamesLc.add(newOrExistingNameLc); - } - - for (String name : nameLsidMap.keySet()) - { - if (!ExperimentService.get().canRename(nameLsidMap.get(name), name, _tableInfo)) - { - duplicateName = name; - break; - } - } - } - else if (_context.getInsertOption().mergeRows) // merge - { - duplicateName = ExperimentService.get().getDuplicateNewOrExistingNames(names, _tableInfo, true); - } - } - - if (duplicateName != null) - { - String error = String.format("The name '%s' already exists.", duplicateName); - if (_context.getInsertOption().mergeRows) - error = String.format("The name '%s' could not be resolved. Please check the casing of the provided name.", duplicateName); - _context.getErrors().addRowError(new ValidationException(error)); - } - - - // backup to where we started so caller can iterate through them one at a time - _unwrapped.reset(); // unwrapped _delegate - _delegate.next(); - } - - @Override - public boolean next() throws BatchValidationException - { - // NOTE: we have to call mark() before we call next() if we want the 'next' row to be cached - _unwrapped.mark(); // unwrapped _delegate - boolean ret = super.next(); - - if (ret) - checkNames(); - - return ret; - } - -} diff --git a/api/src/org/labkey/api/exp/api/ExperimentService.java b/api/src/org/labkey/api/exp/api/ExperimentService.java index 5999cd0a75c..5e369154827 100644 --- a/api/src/org/labkey/api/exp/api/ExperimentService.java +++ b/api/src/org/labkey/api/exp/api/ExperimentService.java @@ -140,10 +140,6 @@ static void setInstance(ExperimentService impl) boolean canMoveFileReference(User user, Container sourceContainer, File sourceFile, int moveCount); - String getDuplicateNewOrExistingNames(Set newOrExistingNames, @NotNull TableInfo tableInfo, boolean allowExisting); - - boolean canRename(String lsid, String newName, TableInfo tableInfo); - enum QueryOptions { UseLsidForUpdate, diff --git a/experiment/src/org/labkey/experiment/ExpDataIterators.java b/experiment/src/org/labkey/experiment/ExpDataIterators.java index c2622f9c754..6efd09da579 100644 --- a/experiment/src/org/labkey/experiment/ExpDataIterators.java +++ b/experiment/src/org/labkey/experiment/ExpDataIterators.java @@ -34,19 +34,19 @@ import org.labkey.api.data.DbScope; import org.labkey.api.data.NameGenerator; import org.labkey.api.data.RemapCache; +import org.labkey.api.data.SQLFragment; import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.SqlSelector; import org.labkey.api.data.TSVWriter; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; import org.labkey.api.data.UpdateableTableInfo; import org.labkey.api.data.validator.ColumnValidator; import org.labkey.api.data.validator.RequiredValidator; -import org.labkey.api.dataiterator.CachingDataIterator; import org.labkey.api.dataiterator.DataIterator; import org.labkey.api.dataiterator.DataIteratorBuilder; import org.labkey.api.dataiterator.DataIteratorContext; import org.labkey.api.dataiterator.DataIteratorUtil; -import org.labkey.api.dataiterator.DuplicateNameCheckDataIterator; import org.labkey.api.dataiterator.ErrorIterator; import org.labkey.api.dataiterator.ExistingRecordDataIterator; import org.labkey.api.dataiterator.LoggingDataIterator; @@ -3073,7 +3073,140 @@ public DuplicateNameCheckIteratorBuilder(@NotNull DataIteratorBuilder in, boolea public DataIterator getDataIterator(DataIteratorContext context) { DataIterator pre = _in.getDataIterator(context); - return LoggingDataIterator.wrap(new DuplicateNameCheckDataIterator(new CachingDataIterator(pre), context, _isUpdateUsingLsid, _tableInfo)); + return LoggingDataIterator.wrap(new DuplicateNameCheckDataIterator(pre, context, _isUpdateUsingLsid, _tableInfo)); + } + } + + public static class DuplicateNameCheckDataIterator extends WrapperDataIterator + { + final static String NAME_FIELD = "name"; + final static Integer INIT_CACHE_LIMIT = 100_000; + private final DataIteratorContext _context; + private final Integer _nameCol; + private final Integer _lsidCol; + private final TableInfo _tableInfo; + private final boolean _isUpdateUsingLsid; + private boolean _useNameCache = false; + private final Set newOrExistingName = new HashSet<>(); // use lower case + private final Set newOrExistingNameLc = new HashSet<>(); // keep case + + protected DuplicateNameCheckDataIterator(DataIterator di, DataIteratorContext context, boolean isUpdateUsingLsid, TableInfo tableInfo) + { + super(di); + _context = context; + _tableInfo = tableInfo; + _isUpdateUsingLsid = isUpdateUsingLsid; + Map map = DataIteratorUtil.createColumnNameMap(di); + _nameCol = map.get(NAME_FIELD); + _lsidCol = map.get("lsid"); + init(); + } + + private void init() + { + // do total count, if <= INIT_CACHE_LIMIT initial size, use cache + SQLFragment countSql = new SQLFragment("SELECT COUNT(*) FROM ").append(_tableInfo); + Long existingRows = new SqlSelector(ExperimentService.get().getSchema(), countSql).getObject(Long.class); + if (existingRows <= INIT_CACHE_LIMIT) + { + _useNameCache = true; + SQLFragment nameSql = new SQLFragment("SELECT DISTINCT(name) FROM ").append(_tableInfo); + String[] allExistingNames = new SqlSelector(ExperimentService.get().getSchema(), nameSql).getArray(String.class); + for (String existingName : allExistingNames) + { + newOrExistingNameLc.add(existingName.toLowerCase()); + if (_context.getInsertOption().mergeRows) + newOrExistingName.add(existingName); + } + } + } + + private boolean addNewName(String newName) + { + if (!_useNameCache) + return false; + + String newNameLc = newName.toLowerCase(); + if (!_context.getInsertOption().allowUpdate) // insert doesn't allow case different match + { + if (newOrExistingNameLc.contains(newNameLc)) + addNameError(newName); + newOrExistingNameLc.add(newNameLc); + return true; + } + else if (_context.getInsertOption().mergeRows) + { + if (newOrExistingName.contains(newName)) // OK to contain exact match, this is an update + return true; + if (!newName.equals(newNameLc) && newOrExistingNameLc.contains(newNameLc)) // not ok to contain case different match + addNameError(newName); + newOrExistingNameLc.add(newNameLc); + return true; + } + + return false; // for update action, always use sql + } + + private void addNameError(String newName) + { + String error = String.format("The name '%s' already exists.", newName); + if (_context.getInsertOption().mergeRows) + error = String.format("The name '%s' could not be resolved. Please check the casing of the provided name.", newName); + _context.getErrors().addRowError(new ValidationException(error)); + } + + @Override + public boolean next() throws BatchValidationException + { + boolean hasNext = super.next(); + if (!hasNext) + return false; + + if (_context.getErrors().hasErrors()) + return hasNext; + + if (_nameCol == null) + return hasNext; + + Object nameObj = get(_nameCol); + if (nameObj == null) + return hasNext; + + String newName = String.valueOf(nameObj); + if (StringUtils.isEmpty(newName)) + return hasNext; + + if (addNewName(newName)) // if name can be validated by cache, skip sql query + return hasNext; + + Map existingValues = getExistingRecord(); + if (existingValues != null && !existingValues.isEmpty() && existingValues.get(NAME_FIELD).equals(newName)) + return hasNext; + + boolean isNameValid = true; + if (!_context.getInsertOption().allowUpdate) // insert new + { + isNameValid = ExperimentServiceImpl.get().isValidNewOrExistingName(newName, _tableInfo, false); + } + else if (_isUpdateUsingLsid && _lsidCol != null) // update using rowId is not yet supported for DIB + { + Object lsidObj = get(_lsidCol); + if (lsidObj != null) + { + String lsid = String.valueOf(lsidObj); + if (!StringUtils.isEmpty(lsid) && !ExperimentServiceImpl.get().canRename(lsid, newName, _tableInfo)) + isNameValid = false; + } + } + else if (_context.getInsertOption().mergeRows) // merge + { + isNameValid = ExperimentServiceImpl.get().isValidNewOrExistingName(newName, _tableInfo,true); + } + + if (!isNameValid) + addNameError(newName); + + return hasNext; } } diff --git a/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java b/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java index f7510f57371..0409a0d1fd3 100644 --- a/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java +++ b/experiment/src/org/labkey/experiment/api/ExperimentServiceImpl.java @@ -9142,7 +9142,7 @@ public Map> getDomainMetrics() return metrics; } - public boolean isDuplicateNewOrExistingName(String newOrExistingName, @NotNull TableInfo tableInfo, boolean allowExisting) + public boolean isValidNewOrExistingName(String newOrExistingName, @NotNull TableInfo tableInfo, boolean allowExisting) { SQLFragment dataRowSQL = new SQLFragment("SELECT name FROM ") .append(tableInfo) @@ -9155,52 +9155,9 @@ public boolean isDuplicateNewOrExistingName(String newOrExistingName, @NotNull T dataRowSQL.append(" COLLATE Latin1_General_BIN"); // force case sensitive comparison for <> operator to exclude existing name } - return (new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).exists()); - } - - @Override - public String getDuplicateNewOrExistingNames(Set newOrExistingNames, @NotNull TableInfo tableInfo, boolean allowExisting) - { - if (newOrExistingNames == null || newOrExistingNames.isEmpty()) - return null; - - Map lowerToOriginalMap = new CaseInsensitiveHashMap<>(); - for (String newOrExistingName : newOrExistingNames) - { - String newOrExistingNameLc = newOrExistingName.toLowerCase(); - if (lowerToOriginalMap.containsKey(newOrExistingNameLc)) - return newOrExistingName; - lowerToOriginalMap.put(newOrExistingNameLc, newOrExistingName); - } - - SqlDialect dialect = tableInfo.getSqlDialect(); - SQLFragment dataRowSQL = new SQLFragment("SELECT name FROM ") - .append(tableInfo) - .append(" WHERE LOWER(name) "); - dialect.appendInClauseSql(dataRowSQL, lowerToOriginalMap.keySet()); - if (allowExisting) // allow existing name for merge - { - dataRowSQL.append(" AND name "); - if (dialect.isSqlServer()) - dataRowSQL.append("COLLATE Latin1_General_BIN "); // force case-sensitive comparison for <> operator to exclude existing name - dataRowSQL - .append("NOT IN(") - .append(StringUtils.repeat("?", ", ", newOrExistingNames.size())) - .append(")") - .addAll(newOrExistingNames); - } - - if (new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).exists()) - { - String duplicateName = new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).getArray(String.class)[0]; - return lowerToOriginalMap.get(duplicateName); // get original case that matches input - } - - return null; + return !(new SqlSelector(ExperimentService.get().getSchema(), dataRowSQL).exists()); } - - @Override public boolean canRename(@NotNull String lsid, @NotNull String newName, @NotNull TableInfo tableInfo) { SQLFragment dataRowSQL = new SQLFragment("SELECT name FROM ") From f0a657d47875a3c786e6018e3c83c05cab0c0b23 Mon Sep 17 00:00:00 2001 From: XingY Date: Sun, 20 Jul 2025 19:50:25 -0700 Subject: [PATCH 7/7] clean --- .../src/org/labkey/experiment/ExpDataIterators.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/experiment/src/org/labkey/experiment/ExpDataIterators.java b/experiment/src/org/labkey/experiment/ExpDataIterators.java index 6efd09da579..ab106da2a8b 100644 --- a/experiment/src/org/labkey/experiment/ExpDataIterators.java +++ b/experiment/src/org/labkey/experiment/ExpDataIterators.java @@ -3099,12 +3099,12 @@ protected DuplicateNameCheckDataIterator(DataIterator di, DataIteratorContext co Map map = DataIteratorUtil.createColumnNameMap(di); _nameCol = map.get(NAME_FIELD); _lsidCol = map.get("lsid"); - init(); + initCache(); } - private void init() + private void initCache() { - // do total count, if <= INIT_CACHE_LIMIT initial size, use cache + // if existing total row <= INIT_CACHE_LIMIT initial size, use cache SQLFragment countSql = new SQLFragment("SELECT COUNT(*) FROM ").append(_tableInfo); Long existingRows = new SqlSelector(ExperimentService.get().getSchema(), countSql).getObject(Long.class); if (existingRows <= INIT_CACHE_LIMIT) @@ -3121,7 +3121,7 @@ private void init() } } - private boolean addNewName(String newName) + private boolean validateNameUsingCache(String newName) { if (!_useNameCache) return false; @@ -3176,7 +3176,7 @@ public boolean next() throws BatchValidationException if (StringUtils.isEmpty(newName)) return hasNext; - if (addNewName(newName)) // if name can be validated by cache, skip sql query + if (validateNameUsingCache(newName)) return hasNext; Map existingValues = getExistingRecord();