From c749179529a874d1049f23324a681f947428640d Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Mon, 13 May 2024 12:36:06 -0400 Subject: [PATCH 1/4] Implement plugin to fixup biom migration for VDI --- .../plugins/BiomVdiMigrationFixer.java | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java new file mode 100644 index 000000000..28a75d71e --- /dev/null +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java @@ -0,0 +1,73 @@ +package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; + +import org.apache.log4j.Logger; +import org.gusdb.wdk.model.WdkModel; +import org.gusdb.wdk.model.fix.table.TableRowInterfaces; +import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; +import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; +import org.json.JSONObject; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class BiomVdiMigrationFixer extends AbstractAnalysisUpdater { + private static final Logger LOG = Logger.getLogger(BiomVdiMigrationFixer.class); + private static final Pattern VAR_ID_PATTERN = Pattern.compile("variableId\":\\s*\"([a-zA-Z0-9_-]+)"); + + private Map fixedUpMapping; + + @Override + public void configure(WdkModel wdkModel, List additionalArgs) throws Exception { + this.fixedUpMapping = readFixedUpMapping(Path.of(additionalArgs.get(0))); + this._writeToDb = additionalArgs.size() == 2 && additionalArgs.get(1).equals("-write"); + } + + @Override + public boolean isPerformTableBackup() { + return _writeToDb; + } + + @Override + public TableRowInterfaces.RowResult processRecord(AnalysisRow nextRow) throws Exception { + String descriptor = nextRow.getDescriptor().toString(); + + // Find all variable IDs. + final Set currentVarIds = VAR_ID_PATTERN.matcher(descriptor).results() + .map(match -> match.group(1)) + .collect(Collectors.toSet()); + + // Replace all variable IDs with value converted from legacy variable ID. + for (String currentVarId: currentVarIds) { + descriptor = descriptor.replaceAll(currentVarId, fixupId(currentVarId)); + } + + // Create a copy with just the dataset ID updated to VDI counterpart. + nextRow.setDescriptor(new JSONObject(descriptor)); + + LOG.info("Analysis descriptor after migration: " + descriptor); + + return new TableRowInterfaces.RowResult<>(nextRow) + .setShouldWrite(_writeToDb); + } + + private Map readFixedUpMapping(Path mappingFile) throws IOException { + return Files.readAllLines(mappingFile).stream() + .map(line -> line.split(",")) + .collect(Collectors.toMap(tokens -> tokens[0], tokens -> tokens[1])); + } + + private String fixupId(String currentVarId) { + return this.fixedUpMapping.getOrDefault(currentVarId, currentVarId); + } + + @Override + public void dumpStatistics() { + + } +} From c28372fd6a8a3da0c63ac383da3ef556e0f05e00 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Mon, 13 May 2024 13:01:28 -0400 Subject: [PATCH 2/4] Log before migration --- .../fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java index 28a75d71e..b8fa2b9bf 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java @@ -36,7 +36,8 @@ public boolean isPerformTableBackup() { @Override public TableRowInterfaces.RowResult processRecord(AnalysisRow nextRow) throws Exception { String descriptor = nextRow.getDescriptor().toString(); - + LOG.info("Analysis descriptor before migration: " + descriptor); + // Find all variable IDs. final Set currentVarIds = VAR_ID_PATTERN.matcher(descriptor).results() .map(match -> match.group(1)) From 459cce7b82ca06031d303a5e9426ef2841115272 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Mon, 13 May 2024 13:25:53 -0400 Subject: [PATCH 3/4] Only process UDs --- .../table/edaanalysis/plugins/BiomVdiMigrationFixer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java index b8fa2b9bf..b426f62dc 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java @@ -35,9 +35,13 @@ public boolean isPerformTableBackup() { @Override public TableRowInterfaces.RowResult processRecord(AnalysisRow nextRow) throws Exception { + if (!nextRow.getDatasetId().startsWith("EDAUD")) { + new TableRowInterfaces.RowResult<>(nextRow) + .setShouldWrite(false); + } String descriptor = nextRow.getDescriptor().toString(); LOG.info("Analysis descriptor before migration: " + descriptor); - + // Find all variable IDs. final Set currentVarIds = VAR_ID_PATTERN.matcher(descriptor).results() .map(match -> match.group(1)) From 145b0ded5ad5a723c622e786883c3211a6e4d96f Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Mon, 13 May 2024 16:08:17 -0400 Subject: [PATCH 4/4] Fixed the var ID regex --- .../fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java index b426f62dc..11288d3be 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/BiomVdiMigrationFixer.java @@ -18,7 +18,7 @@ public class BiomVdiMigrationFixer extends AbstractAnalysisUpdater { private static final Logger LOG = Logger.getLogger(BiomVdiMigrationFixer.class); - private static final Pattern VAR_ID_PATTERN = Pattern.compile("variableId\":\\s*\"([a-zA-Z0-9_-]+)"); + private static final Pattern VAR_ID_PATTERN = Pattern.compile("variableId\":\\s*\"([a-zA-Z0-9\\._-]+)"); private Map fixedUpMapping;