Skip to content
This repository was archived by the owner on Mar 12, 2024. It is now read-only.

Scratch Pad Sample Code

Austin Lee edited this page Feb 12, 2021 · 8 revisions

Scratch pad code

Below are some scratch pad code that interacts with the various column files locally. NOTE: This is just scratch pad code, can be used to quickly load and step through the code locally on an IDE.

Read a column shard locally

  public static void main(String[] args) throws IOException {
    String path = args[0];
    FileReadStore fileStore = new FileReadStore(Paths.get(path));
    List<ShardId> shardIds = fileStore.findShardIds("testorg1", "asset_sw");
    System.out.println(shardIds);

    SlowArmorReader armorReader1 = new SlowArmorReader(fileStore);
    Column<?> test = armorReader1.getColumn("testorg1", "asset_sw", "assetId", 9);
    System.out.println(test.countUnique());
    Set<Object> tt2 = Arrays.stream(test.asObjectArray()).collect(Collectors.toSet());
    System.out.println(tt2.size());

    Instant mark = Instant.now();
    FastArmorReader armorReader = new FastArmorReader(fileStore);
    FastArmorColumnReader fastReader = armorReader.getColumn("testorg2", "asset_sw", "version", 0);
    FastArmorBlock fb = fastReader.getStringBlock(100);

    System.out.println("Took " + Duration.between(mark, Instant.now()) + " to load fast armor shard");
  }

Read some files on disk and print out the summaries

public static void main(String[] args) throws IOException {
    Path p1 = Paths.get(args[0]);
    Path p2 = Paths.get(args[1]);
    ColumnWriter writer = new ColumnWriter(new DataInputStream(Files.newInputStream(p1, StandardOpenOption.READ)),
        new ColumnShardId(new ShardId(1, "myorg", "table"), new ColumnName("dd", "I")));
    Map<Integer, EntityRecord> baseLineRecords = writer.getEntites();
    List<EntityRecordSummary> baselineSummaries = writer.getEntityRecordSummaries();

    ColumnWriter writer1 = new ColumnWriter(new DataInputStream(Files.newInputStream(p2, StandardOpenOption.READ)),
        new ColumnShardId(new ShardId(1, "myorg", "table"), new ColumnName("dd", "S")));
    List<EntityRecordSummary> testSummaries = writer1.getEntityRecordSummaries();
    for (int i = 0; i < baselineSummaries.size(); i++) {
      System.out.println(baselineSummaries.get(i).getId() + " vs " + testSummaries.get(i).getId());
      System.out.println(baselineSummaries.get(i).getNumRows() + " vs " + testSummaries.get(i).getNumRows());

      if (!baselineSummaries.get(i).getId().equals(testSummaries.get(i).getId())) {
        System.out.println("Difference here in order maybe expected at " + i + baselineSummaries.get(i) + " " + testSummaries.get(i));
      }
    }
    List<EntityRecord> records = EntityRecord.sortRecordsByOffset(baseLineRecords.values());
    for (EntityRecord er : records) {
      System.out.println("Sorted baseline " + er.getEntityId() + "_" + er.getRowGroupOffset() + "_" + er.getValueLength() + "_" + er.totalLength());
    }
  }

Read a asset software CSV file to a String based table.

public class SpecialDriverWrite {
  private static final Logger LOGGER = LoggerFactory.getLogger(SpecialDriverWrite.class);
  private static final long K = 1024;
  private static final long M = K * K;
  private static final long G = M * K;
  private static final long T = G * K;

  public static String convertToStringRepresentation(final long value) {
    final long[] dividers = new long[] {T, G, M, K, 1};
    final String[] units = new String[] {"TB", "GB", "MB", "KB", "B"};
    if (value < 1)
      throw new IllegalArgumentException("Invalid file size: " + value);
    String result = null;
    for (int i = 0; i < dividers.length; i++) {
      final long divider = dividers[i];
      if (value >= divider) {
        result = format(value, divider, units[i]);
        break;
      }
    }
    return result;
  }

  private static String format(final long value,
                               final long divider,
                               final String unit) {
    final double result =
        divider > 1 ? (double) value / (double) divider : (double) value;
    return new DecimalFormat("#,##0.#").format(result) + " " + unit;
  }

  private static List<ColumnName> buildColumns() {
    return Arrays.asList(
        new ColumnName("family", DataType.STRING.getCode()),
        new ColumnName("vendor", DataType.STRING.getCode()),
        new ColumnName("product", DataType.STRING.getCode()),
        new ColumnName("version", DataType.STRING.getCode()));
  }

  public static void main(String[] args) throws IOException {
    ModShardStrategy mss = new ModShardStrategy(10);
    String csvInput = args[0];
    String targetLocation = args[1];

    FileWriteStore fileStore = new FileWriteStore(Paths.get(targetLocation), mss);
    ArmorWriter aw = new ArmorWriter("test", fileStore, 10, false, null, null);
    String transaction = aw.startTransaction();
    Instant start = Instant.now();
    try (BufferedReader csvReader = new BufferedReader(new FileReader(csvInput))) {
      String row;

      boolean headersSet = false;
      int count = 0;
      int assetIdColumn = -1;
      int familyColumn = -1;
      int productColumn = -1;
      int vendorColumn = -1;
      int versionColumn = -1;
      Asset currentAsset = new Asset();
      List<Entity> entities = new ArrayList<>();
      while ((row = csvReader.readLine()) != null) {
        count++;
        if (count % 10000000 == 0) {
          System.out.println("Traversed through " + count + " going to write for " + entities.size() + " entities");
          long mark = System.currentTimeMillis();
          aw.write(transaction, "testorg1", "asset_sw", entities);
          System.out.println("End write took " + (System.currentTimeMillis() - mark) / 1000 + " seconds");
          long mark2 = System.currentTimeMillis();
          aw.save(transaction, "testorg1", "asset_sw");
          entities.clear();
          System.out.println("End saving took " + (System.currentTimeMillis() - mark2) / 1000 + " seconds");
        }
        String[] data = row.split(",", -1);
        if (!headersSet) {
          boolean isHeader = Arrays.stream(data).anyMatch(d -> d.equalsIgnoreCase("assetId"));
          if (!isHeader) {
            System.out.println("Need headers " + row);
            throw new RuntimeException("Need headers, here are the rows " + data);
          } else
            System.out.println("Detected headers " + row);

          for (int ii = 0; ii < data.length; ii++) {
            if (data[ii].equalsIgnoreCase("assetId"))
              assetIdColumn = ii;
            else if (data[ii].equalsIgnoreCase("family"))
              familyColumn = ii;
            else if (data[ii].equalsIgnoreCase("product"))
              productColumn = ii;
            else if (data[ii].equalsIgnoreCase("version"))
              versionColumn = ii;
            else if (data[ii].equalsIgnoreCase("vendor"))
              vendorColumn = ii;
          }
          LOGGER.info("The columns selected are {}:{}:{}:{}:{}", assetIdColumn, familyColumn, productColumn, versionColumn, vendorColumn);
          headersSet = true;
          continue;
        }
        try {
          String assetId = data[assetIdColumn];
          if (currentAsset == null || currentAsset.assetId == null || !currentAsset.assetId.equals(assetId)) {
            if (currentAsset != null && currentAsset.assetId != null) {
              Entity entity = Entity.buildEntity("assetId", currentAsset.assetId, System.currentTimeMillis(), null, buildColumns());
              for (Software s : currentAsset.software) {
                entity.addRow(s.family, s.vendor, s.product, s.version);
              }
              entities.add(entity);
            }
            currentAsset = new Asset();
            currentAsset.assetId = assetId;
          }

          Software sw = new Software();
          if (data.length > familyColumn)
            sw.family = data[familyColumn];
          if (data.length > productColumn)
            sw.product = data[productColumn];
          if (data.length > vendorColumn)
            sw.vendor = data[vendorColumn];
          if (data.length > versionColumn)
            sw.version = data[versionColumn];

          currentAsset.software.add(sw);
        } catch (ArrayIndexOutOfBoundsException e) {
          LOGGER.info("Unexpected error for {}", row, e);
          throw e;
        }
      }
      aw.close();
      System.gc();
      System.out.println("going to exit took " + Duration.between(start, Instant.now()));
      System.exit(0);
    }
  }

  public static class Asset {
    private String assetId;
    private final List<Software> software = new ArrayList<>();
  }

  public static class Software {
    private String family;
    private String vendor;
    private String product;
    private String version;
  }
}

Import asset software csv into a table of integers


public class SpecialDriverWrite1 {
  private static final Logger LOGGER = LoggerFactory.getLogger(SpecialDriverWrite1.class);
  private static final long K = 1024;
  private static final long M = K * K;
  private static final long G = M * K;
  private static final long T = G * K;

  public static String convertToStringRepresentation(final long value) {
    final long[] dividers = new long[] {T, G, M, K, 1};
    final String[] units = new String[] {"TB", "GB", "MB", "KB", "B"};
    if (value < 1)
      throw new IllegalArgumentException("Invalid file size: " + value);
    String result = null;
    for (int i = 0; i < dividers.length; i++) {
      final long divider = dividers[i];
      if (value >= divider) {
        result = format(value, divider, units[i]);
        break;
      }
    }
    return result;
  }

  private static String format(final long value,
                               final long divider,
                               final String unit) {
    final double result =
        divider > 1 ? (double) value / (double) divider : (double) value;
    return new DecimalFormat("#,##0.#").format(result) + " " + unit;
  }

  private static List<ColumnName> buildColumns() {
    return Arrays.asList(
        new ColumnName("family", DataType.INTEGER.getCode()),
        new ColumnName("vendor", DataType.INTEGER.getCode()),
        new ColumnName("product", DataType.INTEGER.getCode()),
        new ColumnName("version", DataType.INTEGER.getCode()));
  }

  public static void main(String[] args) throws IOException {
    ModShardStrategy mss = new ModShardStrategy(10);
    String input = args[0];
    String targetDir = args[1];
    FileWriteStore fileStore = new FileWriteStore(Paths.get(targetDir), mss);
    ArmorWriter aw = new ArmorWriter("test", fileStore, 10, false, null, null);
    String transction = aw.startTransaction();
    Instant start = Instant.now();
    try (BufferedReader csvReader = new BufferedReader(new FileReader(input))) {
      String row;

      boolean headersSet = false;
      int count = 0;
      int assetIdColumn = -1;
      int familyColumn = -1;
      int productColumn = -1;
      int vendorColumn = -1;
      int versionColumn = -1;
      Asset currentAsset = new Asset();
      List<Entity> entities = new ArrayList<>();
      while ((row = csvReader.readLine()) != null) {
        count++;
        if (count % 10000000 == 0) {
          System.out.println("Traversed through " + count + " going to write for " + entities.size() + " entities");
          long mark = System.currentTimeMillis();
          aw.write(transction, "testorg1", "asset_sw_int", entities);
          System.out.println("End write took " + (System.currentTimeMillis() - mark) / 1000 + " seconds");
          long mark2 = System.currentTimeMillis();
          aw.save(transction, "testorg1", "asset_sw_int");
          entities.clear();
          System.out.println("End saving took " + (System.currentTimeMillis() - mark2) / 1000 + " seconds");
        }
        String[] data = row.split(",", -1);
        if (!headersSet) {
          boolean isHeader = Arrays.stream(data).anyMatch(d -> d.equalsIgnoreCase("assetId"));
          if (!isHeader) {
            System.out.println("Need headers " + row);
            throw new RuntimeException("Need headers, here are the rows " + data);
          } else
            System.out.println("Detected headers " + row);

          for (int ii = 0; ii < data.length; ii++) {
            if (data[ii].equalsIgnoreCase("assetId"))
              assetIdColumn = ii;
            else if (data[ii].equalsIgnoreCase("family"))
              familyColumn = ii;
            else if (data[ii].equalsIgnoreCase("product"))
              productColumn = ii;
            else if (data[ii].equalsIgnoreCase("version"))
              versionColumn = ii;
            else if (data[ii].equalsIgnoreCase("vendor"))
              vendorColumn = ii;
          }
          LOGGER.info("The columns selected are {}:{}:{}:{}:{}", assetIdColumn, familyColumn, productColumn, versionColumn, vendorColumn);
          headersSet = true;
          continue;
        }
        try {
          String assetId = data[assetIdColumn];
          if (currentAsset == null || currentAsset.assetId == null || !currentAsset.assetId.equals(assetId)) {
            if (currentAsset != null && currentAsset.assetId != null) {
              Entity entity = Entity.buildEntity("assetId", currentAsset.assetId, System.currentTimeMillis(), null, buildColumns());
              for (Software s : currentAsset.software) {
                entity.addRow(s.family, s.vendor, s.product, s.version);
              }
              entities.add(entity);
            }
            currentAsset = new Asset();
            currentAsset.assetId = assetId;
          }

          Software sw = new Software();
          if (data.length > familyColumn)
            sw.family = data[familyColumn].hashCode();
          if (data.length > productColumn)
            sw.product = data[productColumn].hashCode();
          if (data.length > vendorColumn)
            sw.vendor = data[vendorColumn].hashCode();
          if (data.length > versionColumn)
            sw.version = data[versionColumn].hashCode();

          currentAsset.software.add(sw);
        } catch (ArrayIndexOutOfBoundsException e) {
          LOGGER.info("Unexpected error for {}", row, e);
          throw e;
        }
      }
      aw.close();
      System.out.println("going to exit took " + Duration.between(start, Instant.now()));
      System.exit(0);
    }
  }

  public static class Asset {
    private String assetId;
    private final List<Software> software = new ArrayList<>();
  }

  public static class Software {
    private int family;
    private int vendor;
    private int product;
    private int version;
  }
}

Compare two columFiles

package com.rapid7.armor.util;

import com.rapid7.armor.entity.EntityRecord;
import com.rapid7.armor.entity.EntityRecordSummary;
import com.rapid7.armor.schema.ColumnId;
import com.rapid7.armor.shard.ColumnShardId;
import com.rapid7.armor.shard.ShardId;
import com.rapid7.armor.write.component.DictionaryWriter;
import com.rapid7.armor.write.writers.ColumnFileWriter;

import java.io.DataInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Compares two columns as a writer for analysis, note this should be two columns of the same type but different versions.
 */
public class ColumnWriterComparison {


  public static void main(String[] args) throws IOException {
    String column1 = "/home/alee/下載/family_S_T"; // args[0];
    String column2 = "/home/alee/下載/family_S_T"; //args[1];
    String columnType = "S"; //args[3];
    ColumnFileWriter writer1 = new ColumnFileWriter(new DataInputStream(Files.newInputStream(Paths.get(column1), StandardOpenOption.READ)),
        new ColumnShardId(new ShardId(1, "dummy", "dummy"), new ColumnId("1", columnType)));
    ColumnFileWriter writer2 = new ColumnFileWriter(new DataInputStream(Files.newInputStream(Paths.get(column2), StandardOpenOption.READ)),
        new ColumnShardId(new ShardId(1, "dummy", "dummy"), new ColumnId("2", columnType)));

    List<EntityRecordSummary> summaries1 = writer1.getEntityRecordSummaries();
    List<EntityRecordSummary> summaries2 = writer2.getEntityRecordSummaries();

    for (int i = 0; i < summaries1.size(); i++) {

      System.out.println(i + " " + summaries1.get(i));
    }

    for (int i = 0; i < summaries2.size(); i++) {
      System.out.println(i + " " + summaries2.get(i));
    }
    if (!summaries1.equals(summaries2)) {
      for (int i = 0; i < summaries1.size(); i++) {

        EntityRecordSummary ers1 = summaries1.get(i);
        EntityRecordSummary ers2 = summaries2.get(i);
        System.out.println(i + ":" + ers1.getId() + ":" + ers1.getOffset() + " " + ers2.getId() + ":" + ers2.getOffset());
        if (ers1.getId().equals("111383")) {
          //System.out.println(i);
        }
        if (ers2.getId().equals("111383")) {
          //System.out.println(i);
        }
        if (!ers1.equals(ers2)) {

          System.out.println("!!!!!!");
        }
      }
    }


//    for (EntityRecordSummary ers : summaries) {
//      if (ers.getId().equals("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-111383")) {
//        System.out.println("");
//      }
//    }
    DictionaryWriter dw1 = writer1.getEntityDictionary();
    Set<Object> ee = dw1.isCorrupted();
    DictionaryWriter dw2 = writer2.getEntityDictionary();
    Set<Object> ff = dw2.isCorrupted();

    Integer test1 = dw1.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-111383");
    String value1 = dw1.getValue(test1);
    Integer test3 = dw1.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-154162");
    String value2 = dw1.getValue(test3);

    for (Integer v1 : dw2.getStrToInt().values()) {
      if (v1.equals(test1)) {
        System.out.println("Got test1");
      }
      if (v1.equals(test3)) {
        System.out.println("Got test3");
      }
    }


    Integer test2 = dw2.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-111383");
    String value3 = dw1.getValue(test2);

    Integer test4 = dw2.getSurrogate("920e6463-8df5-4137-becb-c2f4ea2f3688-default-asset-154162");
    String value4 = dw1.getValue(test4);

    for (Integer v1 : dw2.getStrToInt().values()) {
      if (v1.equals(test2)) {
        System.out.println("Got test2");
      }
      if (v1.equals(test4)) {
        System.out.println("Got test4");
      }
    }

    if (dw1 != null && dw2 != null) {
      System.out.println("Analyzing dictionary for any changes");
      for (Map.Entry<String, Integer> entry : dw1.getStrToInt().entrySet()) {
        String value = entry.getKey();
        Integer surrogate = entry.getValue();
        Integer otherSurrogate = dw2.getSurrogate(value);
        if (otherSurrogate == null) {
          System.out.println("The value " + value + " was removed from 2nd column");
        } else if (surrogate.intValue() != otherSurrogate.intValue()) {
          System.out.println("The value " + value + " surrogate was changed from " + surrogate + " to " + otherSurrogate);
        }
      }

      // Do a check the other way now
      Set<String> differences = dw2.getStrToInt().keySet().stream()
          .filter(key -> !dw1.getStrToInt().containsKey(key))
          .collect(Collectors.toSet());
      for (String difference2 : differences) {
        System.out.println("The value " + difference2 + " is new in the 2nd colunmn");
      }
    } else {
      if (dw1 == null && dw2 == null)
        System.out.println("No dictionary analysis can be applied");
      else
        System.out.println("One of the columns dictionaries doesn't exist");
    }

    System.out.println("Executing entity record analysis");
    Map<Integer, EntityRecord> entityRecords1 = writer1.getEntites();
    Map<Integer, EntityRecord> entityRecords2 = writer2.getEntites();

    // With records analysis we will now be looking at the changes, meaning records that have changed from 1 vs 2.
    Set<EntityRecord> changes2 = new HashSet<>();
    Set<EntityRecord> removed = new HashSet<>();
    Set<EntityRecord> added = new HashSet<>();

    for (EntityRecord entityRecord1 : entityRecords1.values()) {
      EntityRecord entityRecord2 = entityRecords2.get(entityRecord1.getEntityId());
      if (entityRecord2 != null) {
        if (entityRecord1.getRowGroupOffset() == entityRecord2.getRowGroupOffset()) {
          continue;
        }
        changes2.add(entityRecord2);
      } else {
        removed.add(entityRecord1);
      }
    }

    Set<Integer> erDiff = entityRecords2.keySet().stream()
        .filter(key -> !entityRecords1.containsKey(key))
        .collect(Collectors.toSet());
    for (Integer id2 : erDiff) {
      added.add(entityRecords2.get(id2));
    }

    System.out.println("Here are the entities that were removed");
    for (EntityRecord removedRecord : removed) {
      System.out.println(removedRecord);
    }
    System.out.println("Here are the entities that were added " + added.size() + " were added");
    for (EntityRecord add : added) {
      System.out.println(add);
    }
    System.out.println("Here are the entities that were added");
    for (EntityRecord change2 : changes2) {
      EntityRecord change1 = entityRecords1.get(change2.getEntityId());
      System.out.println("Changed from " + change1.getEntityId() + ":" + change1.getRowGroupOffset() + " to " + change2.getEntityId() + ":" + change2.getRowGroupOffset());
    }
    System.out.println("Finished analysis");
  }
}

Clone this wiki locally