This repository was archived by the owner on Mar 12, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Scratch Pad Sample Code
Austin Lee edited this page Feb 12, 2021
·
8 revisions
Below are some scratch pad code that interacts with the various column files locally. NOTE: This is just scratch pad code, can be used to quickly load and step through the code locally on an IDE.
public static void main(String[] args) throws IOException {
String path = args[0];
FileReadStore fileStore = new FileReadStore(Paths.get(path));
List<ShardId> shardIds = fileStore.findShardIds("testorg1", "asset_sw");
System.out.println(shardIds);
SlowArmorReader armorReader1 = new SlowArmorReader(fileStore);
Column<?> test = armorReader1.getColumn("testorg1", "asset_sw", "assetId", 9);
System.out.println(test.countUnique());
Set<Object> tt2 = Arrays.stream(test.asObjectArray()).collect(Collectors.toSet());
System.out.println(tt2.size());
Instant mark = Instant.now();
FastArmorReader armorReader = new FastArmorReader(fileStore);
FastArmorColumnReader fastReader = armorReader.getColumn("testorg2", "asset_sw", "version", 0);
FastArmorBlock fb = fastReader.getStringBlock(100);
System.out.println("Took " + Duration.between(mark, Instant.now()) + " to load fast armor shard");
}
public static void main(String[] args) throws IOException {
Path p1 = Paths.get(args[0]);
Path p2 = Paths.get(args[1]);
ColumnWriter writer = new ColumnWriter(new DataInputStream(Files.newInputStream(p1, StandardOpenOption.READ)),
new ColumnShardId(new ShardId(1, "myorg", "table"), new ColumnName("dd", "I")));
Map<Integer, EntityRecord> baseLineRecords = writer.getEntites();
List<EntityRecordSummary> baselineSummaries = writer.getEntityRecordSummaries();
ColumnWriter writer1 = new ColumnWriter(new DataInputStream(Files.newInputStream(p2, StandardOpenOption.READ)),
new ColumnShardId(new ShardId(1, "myorg", "table"), new ColumnName("dd", "S")));
List<EntityRecordSummary> testSummaries = writer1.getEntityRecordSummaries();
for (int i = 0; i < baselineSummaries.size(); i++) {
System.out.println(baselineSummaries.get(i).getId() + " vs " + testSummaries.get(i).getId());
System.out.println(baselineSummaries.get(i).getNumRows() + " vs " + testSummaries.get(i).getNumRows());
if (!baselineSummaries.get(i).getId().equals(testSummaries.get(i).getId())) {
System.out.println("Difference here in order maybe expected at " + i + baselineSummaries.get(i) + " " + testSummaries.get(i));
}
}
List<EntityRecord> records = EntityRecord.sortRecordsByOffset(baseLineRecords.values());
for (EntityRecord er : records) {
System.out.println("Sorted baseline " + er.getEntityId() + "_" + er.getRowGroupOffset() + "_" + er.getValueLength() + "_" + er.totalLength());
}
}
public class SpecialDriverWrite {
private static final Logger LOGGER = LoggerFactory.getLogger(SpecialDriverWrite.class);
private static final long K = 1024;
private static final long M = K * K;
private static final long G = M * K;
private static final long T = G * K;
public static String convertToStringRepresentation(final long value) {
final long[] dividers = new long[] {T, G, M, K, 1};
final String[] units = new String[] {"TB", "GB", "MB", "KB", "B"};
if (value < 1)
throw new IllegalArgumentException("Invalid file size: " + value);
String result = null;
for (int i = 0; i < dividers.length; i++) {
final long divider = dividers[i];
if (value >= divider) {
result = format(value, divider, units[i]);
break;
}
}
return result;
}
private static String format(final long value,
final long divider,
final String unit) {
final double result =
divider > 1 ? (double) value / (double) divider : (double) value;
return new DecimalFormat("#,##0.#").format(result) + " " + unit;
}
private static List<ColumnName> buildColumns() {
return Arrays.asList(
new ColumnName("family", DataType.STRING.getCode()),
new ColumnName("vendor", DataType.STRING.getCode()),
new ColumnName("product", DataType.STRING.getCode()),
new ColumnName("version", DataType.STRING.getCode()));
}
public static void main(String[] args) throws IOException {
ModShardStrategy mss = new ModShardStrategy(10);
String csvInput = args[0];
String targetLocation = args[1];
FileWriteStore fileStore = new FileWriteStore(Paths.get(targetLocation), mss);
ArmorWriter aw = new ArmorWriter("test", fileStore, 10, false, null, null);
String transaction = aw.startTransaction();
Instant start = Instant.now();
try (BufferedReader csvReader = new BufferedReader(new FileReader(csvInput))) {
String row;
boolean headersSet = false;
int count = 0;
int assetIdColumn = -1;
int familyColumn = -1;
int productColumn = -1;
int vendorColumn = -1;
int versionColumn = -1;
Asset currentAsset = new Asset();
List<Entity> entities = new ArrayList<>();
while ((row = csvReader.readLine()) != null) {
count++;
if (count % 10000000 == 0) {
System.out.println("Traversed through " + count + " going to write for " + entities.size() + " entities");
long mark = System.currentTimeMillis();
aw.write(transaction, "testorg1", "asset_sw", entities);
System.out.println("End write took " + (System.currentTimeMillis() - mark) / 1000 + " seconds");
long mark2 = System.currentTimeMillis();
aw.save(transaction, "testorg1", "asset_sw");
entities.clear();
System.out.println("End saving took " + (System.currentTimeMillis() - mark2) / 1000 + " seconds");
}
String[] data = row.split(",", -1);
if (!headersSet) {
boolean isHeader = Arrays.stream(data).anyMatch(d -> d.equalsIgnoreCase("assetId"));
if (!isHeader) {
System.out.println("Need headers " + row);
throw new RuntimeException("Need headers, here are the rows " + data);
} else
System.out.println("Detected headers " + row);
for (int ii = 0; ii < data.length; ii++) {
if (data[ii].equalsIgnoreCase("assetId"))
assetIdColumn = ii;
else if (data[ii].equalsIgnoreCase("family"))
familyColumn = ii;
else if (data[ii].equalsIgnoreCase("product"))
productColumn = ii;
else if (data[ii].equalsIgnoreCase("version"))
versionColumn = ii;
else if (data[ii].equalsIgnoreCase("vendor"))
vendorColumn = ii;
}
LOGGER.info("The columns selected are {}:{}:{}:{}:{}", assetIdColumn, familyColumn, productColumn, versionColumn, vendorColumn);
headersSet = true;
continue;
}
try {
String assetId = data[assetIdColumn];
if (currentAsset == null || currentAsset.assetId == null || !currentAsset.assetId.equals(assetId)) {
if (currentAsset != null && currentAsset.assetId != null) {
Entity entity = Entity.buildEntity("assetId", currentAsset.assetId, System.currentTimeMillis(), null, buildColumns());
for (Software s : currentAsset.software) {
entity.addRow(s.family, s.vendor, s.product, s.version);
}
entities.add(entity);
}
currentAsset = new Asset();
currentAsset.assetId = assetId;
}
Software sw = new Software();
if (data.length > familyColumn)
sw.family = data[familyColumn];
if (data.length > productColumn)
sw.product = data[productColumn];
if (data.length > vendorColumn)
sw.vendor = data[vendorColumn];
if (data.length > versionColumn)
sw.version = data[versionColumn];
currentAsset.software.add(sw);
} catch (ArrayIndexOutOfBoundsException e) {
LOGGER.info("Unexpected error for {}", row, e);
throw e;
}
}
aw.close();
System.gc();
System.out.println("going to exit took " + Duration.between(start, Instant.now()));
System.exit(0);
}
}
public static class Asset {
private String assetId;
private final List<Software> software = new ArrayList<>();
}
public static class Software {
private String family;
private String vendor;
private String product;
private String version;
}
}
public class SpecialDriverWrite1 {
private static final Logger LOGGER = LoggerFactory.getLogger(SpecialDriverWrite1.class);
private static final long K = 1024;
private static final long M = K * K;
private static final long G = M * K;
private static final long T = G * K;
public static String convertToStringRepresentation(final long value) {
final long[] dividers = new long[] {T, G, M, K, 1};
final String[] units = new String[] {"TB", "GB", "MB", "KB", "B"};
if (value < 1)
throw new IllegalArgumentException("Invalid file size: " + value);
String result = null;
for (int i = 0; i < dividers.length; i++) {
final long divider = dividers[i];
if (value >= divider) {
result = format(value, divider, units[i]);
break;
}
}
return result;
}
private static String format(final long value,
final long divider,
final String unit) {
final double result =
divider > 1 ? (double) value / (double) divider : (double) value;
return new DecimalFormat("#,##0.#").format(result) + " " + unit;
}
private static List<ColumnName> buildColumns() {
return Arrays.asList(
new ColumnName("family", DataType.INTEGER.getCode()),
new ColumnName("vendor", DataType.INTEGER.getCode()),
new ColumnName("product", DataType.INTEGER.getCode()),
new ColumnName("version", DataType.INTEGER.getCode()));
}
public static void main(String[] args) throws IOException {
ModShardStrategy mss = new ModShardStrategy(10);
String input = args[0];
String targetDir = args[1];
FileWriteStore fileStore = new FileWriteStore(Paths.get(targetDir), mss);
ArmorWriter aw = new ArmorWriter("test", fileStore, 10, false, null, null);
String transction = aw.startTransaction();
Instant start = Instant.now();
try (BufferedReader csvReader = new BufferedReader(new FileReader(input))) {
String row;
boolean headersSet = false;
int count = 0;
int assetIdColumn = -1;
int familyColumn = -1;
int productColumn = -1;
int vendorColumn = -1;
int versionColumn = -1;
Asset currentAsset = new Asset();
List<Entity> entities = new ArrayList<>();
while ((row = csvReader.readLine()) != null) {
count++;
if (count % 10000000 == 0) {
System.out.println("Traversed through " + count + " going to write for " + entities.size() + " entities");
long mark = System.currentTimeMillis();
aw.write(transction, "testorg1", "asset_sw_int", entities);
System.out.println("End write took " + (System.currentTimeMillis() - mark) / 1000 + " seconds");
long mark2 = System.currentTimeMillis();
aw.save(transction, "testorg1", "asset_sw_int");
entities.clear();
System.out.println("End saving took " + (System.currentTimeMillis() - mark2) / 1000 + " seconds");
}
String[] data = row.split(",", -1);
if (!headersSet) {
boolean isHeader = Arrays.stream(data).anyMatch(d -> d.equalsIgnoreCase("assetId"));
if (!isHeader) {
System.out.println("Need headers " + row);
throw new RuntimeException("Need headers, here are the rows " + data);
} else
System.out.println("Detected headers " + row);
for (int ii = 0; ii < data.length; ii++) {
if (data[ii].equalsIgnoreCase("assetId"))
assetIdColumn = ii;
else if (data[ii].equalsIgnoreCase("family"))
familyColumn = ii;
else if (data[ii].equalsIgnoreCase("product"))
productColumn = ii;
else if (data[ii].equalsIgnoreCase("version"))
versionColumn = ii;
else if (data[ii].equalsIgnoreCase("vendor"))
vendorColumn = ii;
}
LOGGER.info("The columns selected are {}:{}:{}:{}:{}", assetIdColumn, familyColumn, productColumn, versionColumn, vendorColumn);
headersSet = true;
continue;
}
try {
String assetId = data[assetIdColumn];
if (currentAsset == null || currentAsset.assetId == null || !currentAsset.assetId.equals(assetId)) {
if (currentAsset != null && currentAsset.assetId != null) {
Entity entity = Entity.buildEntity("assetId", currentAsset.assetId, System.currentTimeMillis(), null, buildColumns());
for (Software s : currentAsset.software) {
entity.addRow(s.family, s.vendor, s.product, s.version);
}
entities.add(entity);
}
currentAsset = new Asset();
currentAsset.assetId = assetId;
}
Software sw = new Software();
if (data.length > familyColumn)
sw.family = data[familyColumn].hashCode();
if (data.length > productColumn)
sw.product = data[productColumn].hashCode();
if (data.length > vendorColumn)
sw.vendor = data[vendorColumn].hashCode();
if (data.length > versionColumn)
sw.version = data[versionColumn].hashCode();
currentAsset.software.add(sw);
} catch (ArrayIndexOutOfBoundsException e) {
LOGGER.info("Unexpected error for {}", row, e);
throw e;
}
}
aw.close();
System.out.println("going to exit took " + Duration.between(start, Instant.now()));
System.exit(0);
}
}
public static class Asset {
private String assetId;
private final List<Software> software = new ArrayList<>();
}
public static class Software {
private int family;
private int vendor;
private int product;
private int version;
}
}
package com.rapid7.armor.util;
import com.rapid7.armor.entity.EntityRecord;
import com.rapid7.armor.entity.EntityRecordSummary;
import com.rapid7.armor.schema.ColumnId;
import com.rapid7.armor.shard.ColumnShardId;
import com.rapid7.armor.shard.ShardId;
import com.rapid7.armor.write.component.DictionaryWriter;
import com.rapid7.armor.write.writers.ColumnFileWriter;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Compares two columns as a writer for analysis, note this should be two columns of the same type but different versions.
*/
public class ColumnWriterComparison {
public static void main(String[] args) throws IOException {
String column1 = "/home/alee/下載/family_S_T"; // args[0];
String column2 = "/home/alee/下載/family_S_T"; //args[1];
String columnType = "S"; //args[3];
ColumnFileWriter writer1 = new ColumnFileWriter(new DataInputStream(Files.newInputStream(Paths.get(column1), StandardOpenOption.READ)),
new ColumnShardId(new ShardId(1, "dummy", "dummy"), new ColumnId("1", columnType)));
ColumnFileWriter writer2 = new ColumnFileWriter(new DataInputStream(Files.newInputStream(Paths.get(column2), StandardOpenOption.READ)),
new ColumnShardId(new ShardId(1, "dummy", "dummy"), new ColumnId("2", columnType)));
List<EntityRecordSummary> summaries1 = writer1.getEntityRecordSummaries();
List<EntityRecordSummary> summaries2 = writer2.getEntityRecordSummaries();
for (int i = 0; i < summaries1.size(); i++) {
System.out.println(i + " " + summaries1.get(i));
}
for (int i = 0; i < summaries2.size(); i++) {
System.out.println(i + " " + summaries2.get(i));
}
if (!summaries1.equals(summaries2)) {
for (int i = 0; i < summaries1.size(); i++) {
EntityRecordSummary ers1 = summaries1.get(i);
EntityRecordSummary ers2 = summaries2.get(i);
System.out.println(i + ":" + ers1.getId() + ":" + ers1.getOffset() + " " + ers2.getId() + ":" + ers2.getOffset());
if (ers1.getId().equals("111383")) {
//System.out.println(i);
}
if (ers2.getId().equals("111383")) {
//System.out.println(i);
}
if (!ers1.equals(ers2)) {
System.out.println("!!!!!!");
}
}
}
// for (EntityRecordSummary ers : summaries) {
// if (ers.getId().equals("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-111383")) {
// System.out.println("");
// }
// }
DictionaryWriter dw1 = writer1.getEntityDictionary();
Set<Object> ee = dw1.isCorrupted();
DictionaryWriter dw2 = writer2.getEntityDictionary();
Set<Object> ff = dw2.isCorrupted();
Integer test1 = dw1.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-111383");
String value1 = dw1.getValue(test1);
Integer test3 = dw1.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-154162");
String value2 = dw1.getValue(test3);
for (Integer v1 : dw2.getStrToInt().values()) {
if (v1.equals(test1)) {
System.out.println("Got test1");
}
if (v1.equals(test3)) {
System.out.println("Got test3");
}
}
Integer test2 = dw2.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-111383");
String value3 = dw1.getValue(test2);
Integer test4 = dw2.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-154162");
String value4 = dw1.getValue(test4);
for (Integer v1 : dw2.getStrToInt().values()) {
if (v1.equals(test2)) {
System.out.println("Got test2");
}
if (v1.equals(test4)) {
System.out.println("Got test4");
}
}
if (dw1 != null && dw2 != null) {
System.out.println("Analyzing dictionary for any changes");
for (Map.Entry<String, Integer> entry : dw1.getStrToInt().entrySet()) {
String value = entry.getKey();
Integer surrogate = entry.getValue();
Integer otherSurrogate = dw2.getSurrogate(value);
if (otherSurrogate == null) {
System.out.println("The value " + value + " was removed from 2nd column");
} else if (surrogate.intValue() != otherSurrogate.intValue()) {
System.out.println("The value " + value + " surrogate was changed from " + surrogate + " to " + otherSurrogate);
}
}
// Do a check the other way now
Set<String> differences = dw2.getStrToInt().keySet().stream()
.filter(key -> !dw1.getStrToInt().containsKey(key))
.collect(Collectors.toSet());
for (String difference2 : differences) {
System.out.println("The value " + difference2 + " is new in the 2nd colunmn");
}
} else {
if (dw1 == null && dw2 == null)
System.out.println("No dictionary analysis can be applied");
else
System.out.println("One of the columns dictionaries doesn't exist");
}
System.out.println("Executing entity record analysis");
Map<Integer, EntityRecord> entityRecords1 = writer1.getEntites();
Map<Integer, EntityRecord> entityRecords2 = writer2.getEntites();
// With records analysis we will now be looking at the changes, meaning records that have changed from 1 vs 2.
Set<EntityRecord> changes2 = new HashSet<>();
Set<EntityRecord> removed = new HashSet<>();
Set<EntityRecord> added = new HashSet<>();
for (EntityRecord entityRecord1 : entityRecords1.values()) {
EntityRecord entityRecord2 = entityRecords2.get(entityRecord1.getEntityId());
if (entityRecord2 != null) {
if (entityRecord1.getRowGroupOffset() == entityRecord2.getRowGroupOffset()) {
continue;
}
changes2.add(entityRecord2);
} else {
removed.add(entityRecord1);
}
}
Set<Integer> erDiff = entityRecords2.keySet().stream()
.filter(key -> !entityRecords1.containsKey(key))
.collect(Collectors.toSet());
for (Integer id2 : erDiff) {
added.add(entityRecords2.get(id2));
}
System.out.println("Here are the entities that were removed");
for (EntityRecord removedRecord : removed) {
System.out.println(removedRecord);
}
System.out.println("Here are the entities that were added " + added.size() + " were added");
for (EntityRecord add : added) {
System.out.println(add);
}
System.out.println("Here are the entities that were added");
for (EntityRecord change2 : changes2) {
EntityRecord change1 = entityRecords1.get(change2.getEntityId());
System.out.println("Changed from " + change1.getEntityId() + ":" + change1.getRowGroupOffset() + " to " + change2.getEntityId() + ":" + change2.getRowGroupOffset());
}
System.out.println("Finished analysis");
}
}