diff --git a/.gitignore b/.gitignore index 8068abf..0dea21b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ target *.iml *.ipr .idea +*.db* +.shell_history diff --git a/copy-store.sh b/copy-store.sh new file mode 100755 index 0000000..e0723ee --- /dev/null +++ b/copy-store.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +EDITION=${1-community} +shift +SRC=$1 +DST=$2 +SKIP_RELS=$3 +SKIP_PROPS=$4 +SKIP_LABELS=$5 +DELETE_NODES=$6 +KEEP_NODE_IDS=$7 +HEAP=4G +CACHE=2G +CACHE_SRC=1G +#$CACHE +echo "Usage: copy-store.sh [community|enterprise] source.db target.db [RELS,TO,SKIP] [props,to,skip] [Labels,To,Skip] [Labels,To,Delete,Nodes]" + +if [[ "$EDITION" != "enterprise" && "$EDITION" != "community" ]] +then + echo "ATTENTION: The parameter '$EDITION' you passed in for the edition is neither 'community' nor 'enterprise'. Aborting." + exit +fi +if [[ "$SRC" = "" || "$DST" = "" ]] +then + echo "ATTENTION: Source '$SRC' or target '$DST' directory not provided. Aborting." + exit +fi + +if [[ ! -d $SRC ]] +then + echo "ATTENTION: Source '$SRC' is not a directory. Aborting." + exit +fi + +echo "Using: Heap $HEAP Pagecache $CACHE Edition '$EDITION' from '$SRC' to '$DST' skipping labels: '$SKIP_LABELS', removing nodes with labels: '$DELETE_NODES' rels: '$SKIP_RELS' props '$SKIP_PROPS' Keeping Node Ids: $KEEP_NODE_IDS" +echo +echo "Please note that you will need this memory ($CACHE + $CACHE_SRC + $HEAP) as it opens 2 databases one for reading and one for writing." +# heap config +export MAVEN_OPTS="-Xmx$HEAP -Xms$HEAP -XX:+UseG1GC" + +mvn clean compile exec:java -P${EDITION} -e -Dexec.mainClass="org.neo4j.tool.StoreCopy" -Ddbms.pagecache.memory=$CACHE -Ddbms.pagecache.memory.source=$CACHE_SRC \ + -Dexec.args="$SRC $DST $SKIP_RELS $SKIP_PROPS $SKIP_LABELS $DELETE_NODES $KEEP_NODE_IDS" + +#-Dneo4j.version=2.3.0 diff --git a/neo4j.properties b/neo4j.properties new file mode 100644 index 0000000..f3e1314 --- /dev/null +++ b/neo4j.properties @@ -0,0 +1,18 @@ +dbms.pagecache.memory=2G +dbms.pagecache.memory.source=2G + +cache_type=none +allow_store_upgrade=true + +source_db_dir= +target_db_dir= + +keep_node_ids=true + +properties_to_ignore= +labels_to_ignore= +labels_to_delete= +rel_types_to_ignore= + +store_copy_log_dir= +bad_entries_log_dir= diff --git a/pom.xml b/pom.xml index 112b95a..11232fd 100644 --- a/pom.xml +++ b/pom.xml @@ -4,15 +4,31 @@ org.neo4j store-util - 2.0.1 + 3.1.0 jar store-util UTF-8 + ${project.version} + + + enterprise + + + org.neo4j + neo4j-enterprise + ${neo4j.version} + + + + + community + + junit @@ -20,15 +36,20 @@ 4.11 test + + org.neo4j + neo4j-io + ${neo4j.version} + org.neo4j neo4j-kernel - ${project.version} + ${neo4j.version} org.neo4j neo4j-lucene-index - ${project.version} + ${neo4j.version} @@ -39,8 +60,8 @@ maven-compiler-plugin 2.3.2 - 1.7 - 1.7 + 1.8 + 1.8 -Xlint:all true false diff --git a/readme.md b/readme.md index d28ab0b..c2df9fc 100644 --- a/readme.md +++ b/readme.md @@ -1,22 +1,67 @@ -## Tools to copy and compare Neo4j Stores +## Tool to copy Neo4j Stores -Uses the GraphDatabaseService to read a store and the batch-inserter API to write the target store keeping the node-ids. -Copies the index-files as is. -Ignores broken nodes and relationships. +Uses the BatchInserterImpl to read a store and write the target store keeping the node-ids. +Copies the manual (legacy) index-files as is, please note it performs no index upgrade! -Also useful to skip no longer wanted properties or relationships with a certain type. Good for store compaction as it -rewrites the store file reclaiming space that is sitting empty. +You will have to recreate any schema indexes too. -Change the Neo4j version in pom.xml before running. (Currently 1.9.5) +Ignores broken nodes and relationships and records them in `target/store-copy.log` + +Also useful to skip no longer wanted properties, relationships with a certain type. +Or of certain labels and even nodes with certain labels. + +Good for store compaction and reorganization of relationships and properties as +it rewrites the store file reclaiming space that is sitting empty. + +NOTE: With Neo4j 3.x there are two different store formats, so you have to provide "enterprise" or "community" as first argument of the call! + +You can now also decide if you want to compact the node-store, then you have to pass "false" as the parameter for keep-node-ids. + +Config is read from `neo4j.properties` file in current directory if it exists, but command line options override. + +neo4j.properties + +``` +source_db_dir= +target_db_dir= + +keep_node_ids=true + +properties_to_ignore= +labels_to_ignore= +labels_to_delete= +rel_types_to_ignore= + +store_copy_log_dir= +bad_entries_log_dir= +``` ### Store Copy -Usage: + copy-store.sh [enterprise|community] source.db target.db [RELS,TO,SKIP] [props,to,skip] [Labels,To,Skip] [Labels,To,Delete,Nodes] [keep-node-ids:true/false] + + +The provided script contains these settings for page-cache (note you can configure a different, smaller setting for the source store than the target store). + + dbms.pagecache.memory.source=2G + dbms.pagecache.memory=2G + +Heap config is in the shell-script, default is: + + export MAVEN_OPTS="-Xmx4G -Xms4G -Xmn1G -XX:+UseG1GC" + +**Please adapt the settings as needed for your store.** + +**Please note that you will need the memory for (source-page-cache + target-page-cache + 1x heap) as it opens 2 databases one for reading and one for writing.** + +Change the Neo4j version in pom.xml before running as needed. (Currently 3.1.0) + +Optionally changeable from the outside with `-Dneo4j.version=3.1.0` on the `mvn` invocation. + +### Internally - mvn compile exec:java -Dexec.mainClass="org.neo4j.tool.StoreCopy" \ - -Dexec.args="source-dir target-dir [rel,types,to,ignore] [properties,to,ignore] [labels,to,ignore]" +Note: It calls under the hood: -# Store Compare + mvn compile exec:java -Dexec.mainClass="org.neo4j.tool.StoreCopy" -Penterprise \ + -Dexec.args="source-dir target-dir [REL,TYPES,TO,IGNORE] [properties,to,ignore] [Labels,To,Ignore] [Labels,To,Delete,Nodes] [keep-node-ids:true/false]" - mvn compile exec:java -Dexec.mainClass="org.neo4j.tool.StoreComparer" \ - -Dexec.args="source-dir target-dir [rel,types,to,ignore] [properties,to,ignore]" diff --git a/src/main/java/org/neo4j/tool/DomainAnalyzer.java b/src/main/java/org/neo4j/tool/DomainAnalyzer.java index 1ce7f26..489810e 100644 --- a/src/main/java/org/neo4j/tool/DomainAnalyzer.java +++ b/src/main/java/org/neo4j/tool/DomainAnalyzer.java @@ -3,9 +3,9 @@ import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.factory.GraphDatabaseFactory; -import org.neo4j.helpers.collection.IteratorUtil; -import org.neo4j.kernel.EmbeddedGraphDatabase; +import org.neo4j.helpers.collection.Iterables; +import java.io.File; import java.util.*; /** @@ -74,13 +74,13 @@ private String toString(Object value) { } } public static void main(String[] args) { - graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(args[0]); + graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(new File(args[0])); long time = System.currentTimeMillis(); Map,Sample> statistics = new HashMap, Sample>(); int count = 0; for (Node node : graphDb.getAllNodes()) { - final HashSet keys = IteratorUtil.addToCollection(node.getPropertyKeys(), new HashSet()); + final HashSet keys = Iterables.addToCollection(node.getPropertyKeys(), new HashSet()); Sample sample = statistics.get(keys); if (sample==null) { sample = new Sample(node); diff --git a/src/main/java/org/neo4j/tool/GraphGenerator.java b/src/main/java/org/neo4j/tool/GraphGenerator.java index e54fe11..fa4ecfd 100644 --- a/src/main/java/org/neo4j/tool/GraphGenerator.java +++ b/src/main/java/org/neo4j/tool/GraphGenerator.java @@ -5,8 +5,8 @@ import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.Transaction; import org.neo4j.graphdb.factory.GraphDatabaseFactory; -import org.neo4j.kernel.EmbeddedGraphDatabase; +import java.io.File; import java.util.Arrays; /** @@ -17,7 +17,7 @@ public class GraphGenerator { public static final int MILLION = 1000 * 1000; public static void main(String[] args) { - final GraphDatabaseService gdb = new GraphDatabaseFactory().newEmbeddedDatabase("target/data"); + final GraphDatabaseService gdb = new GraphDatabaseFactory().newEmbeddedDatabase(new File("target/data")); createDatabase(gdb); gdb.shutdown(); } @@ -39,14 +39,15 @@ public static void createDatabase(GraphDatabaseService graphdb) { System.out.print("."); if ((i % 10000) == 0) { tx.success(); - tx.finish(); + tx.close(); System.out.println(" " + i); tx = graphdb.beginTx(); } } } } finally { - tx.finish(); + tx.success(); + tx.close(); } System.out.println(); long delta = (System.currentTimeMillis() - cpuTime); diff --git a/src/main/java/org/neo4j/tool/PropertyAnalyzer.java b/src/main/java/org/neo4j/tool/PropertyAnalyzer.java index abef9ff..f582c81 100644 --- a/src/main/java/org/neo4j/tool/PropertyAnalyzer.java +++ b/src/main/java/org/neo4j/tool/PropertyAnalyzer.java @@ -3,8 +3,8 @@ import org.neo4j.graphdb.*; import org.neo4j.graphdb.factory.GraphDatabaseFactory; import org.neo4j.helpers.collection.MapUtil; -import org.neo4j.kernel.EmbeddedGraphDatabase; +import java.io.File; import java.lang.reflect.Array; import java.util.*; @@ -96,7 +96,7 @@ public static Map config() { } public static void main(String[] args) { - final GraphDatabaseService db = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(args[0]).setConfig(config()).newGraphDatabase(); + final GraphDatabaseService db = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(new File(args[0])).setConfig(config()).newGraphDatabase(); int withoutProps=0, nodes = 0, rels = 0; Map props=new HashMap(); for (Node node : db.getAllNodes()) { diff --git a/src/main/java/org/neo4j/tool/SingleRelationshipDeletion.java b/src/main/java/org/neo4j/tool/SingleRelationshipDeletion.java deleted file mode 100644 index b596b26..0000000 --- a/src/main/java/org/neo4j/tool/SingleRelationshipDeletion.java +++ /dev/null @@ -1,82 +0,0 @@ -package org.neo4j.tool; - -/** - * @author mh - * @since 12.08.11 - */ - -import org.neo4j.graphdb.GraphDatabaseService; -import org.neo4j.graphdb.NotFoundException; -import org.neo4j.graphdb.Relationship; -import org.neo4j.graphdb.Transaction; -import org.neo4j.graphdb.factory.GraphDatabaseFactory; -import org.neo4j.kernel.EmbeddedGraphDatabase; - -import java.io.File; -import java.util.HashSet; -import java.util.Random; -import java.util.Set; - -/** - * @author mh - * @since 11.08.11 - */ -public class SingleRelationshipDeletion { - - public static final int COUNT = 1000; - private static final int ROUNDS = 50; - - public static void main(String[] args) { - final File dir = new File("target/data"); - boolean mustCreate = !dir.exists(); - final GraphDatabaseService graphdb = new GraphDatabaseFactory().newEmbeddedDatabase(dir.getAbsolutePath()); - if (mustCreate) GraphGenerator.createDatabase(graphdb); - - for (int round = 0; round < ROUNDS; round++) { - long[] relIds = createRelIds(COUNT); - int success = 0, fail = 0; - long cpuTime = System.currentTimeMillis(); - final Transaction tx = graphdb.beginTx(); - try { - for (int i = 0; i < COUNT; i++) { - final Relationship rel; - try { - rel = graphdb.getRelationshipById(relIds[i]); - } catch (NotFoundException nfe) { - fail++; - continue; - } - try { - rel.delete(); - tx.success(); - success++; - } catch (NotFoundException nfe) { - fail++; -// tx.failure(); - } - } - } finally { - tx.finish(); - } - - long delta = (System.currentTimeMillis() - cpuTime); - System.out.printf("round %d delete %d relationships time = %d ms, succ %d failed %d%n", round, COUNT, delta, success, fail); - } - graphdb.shutdown(); - } - - private static long[] createRelIds(final int count) { - Random random = new Random(System.currentTimeMillis()); - Set values = new HashSet(); - do { - values.add(random.nextInt(GraphGenerator.MILLION)); - } while (values.size() < count); - final long[] result = new long[count]; - int i = 0; - for (Integer value : values) { - result[i++] = value.longValue(); - } - return result; - } - -} diff --git a/src/main/java/org/neo4j/tool/StoreComparer.java b/src/main/java/org/neo4j/tool/StoreComparer.java index b790ff5..90ce4d9 100644 --- a/src/main/java/org/neo4j/tool/StoreComparer.java +++ b/src/main/java/org/neo4j/tool/StoreComparer.java @@ -2,17 +2,15 @@ import org.neo4j.graphdb.*; import org.neo4j.graphdb.factory.GraphDatabaseFactory; -import org.neo4j.helpers.collection.IteratorUtil; +import org.neo4j.helpers.collection.Iterables; import org.neo4j.helpers.collection.MapUtil; -import org.neo4j.kernel.EmbeddedGraphDatabase; -import org.neo4j.kernel.impl.util.FileUtils; +import org.neo4j.io.fs.FileUtils; import java.io.File; import java.io.IOException; import java.util.*; import static java.util.Arrays.asList; -import static java.util.Arrays.toString; import static java.util.Collections.emptySet; public class StoreComparer { @@ -55,12 +53,15 @@ private static void compareStore(String sourceDir, String targetDir, Set if (!target.exists()) throw new IllegalArgumentException("Target Directory does not exists " + target); if (!source.exists()) throw new IllegalArgumentException("Source Database does not exist " + source); - GraphDatabaseService targetDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(target.getAbsolutePath()).setConfig(config()).newGraphDatabase(); - GraphDatabaseService sourceDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(sourceDir).setConfig(config()).newGraphDatabase(); + GraphDatabaseService targetDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(target).setConfig(config()).newGraphDatabase(); + GraphDatabaseService sourceDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(source).setConfig(config()).newGraphDatabase(); - compareCounts(sourceDb, targetDb, ignoreRelTypes, ignoreProperties); - compareNodes(sourceDb, targetDb, ignoreProperties); - compareRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties); + try (Transaction srcDbTx = sourceDb.beginTx(); + Transaction targetDbTx = targetDb.beginTx()) { + compareCounts(sourceDb, targetDb, ignoreRelTypes, ignoreProperties); + compareNodes(sourceDb, targetDb, ignoreProperties); + compareRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties); + } targetDb.shutdown(); sourceDb.shutdown(); @@ -98,7 +99,7 @@ private static Statistics count(GraphDatabaseService db, Set ignoreRelTy } private static int countProperties(Set ignoreProperties, PropertyContainer node) { - final Collection keys = IteratorUtil.addToCollection(node.getPropertyKeys(), new HashSet()); + final Collection keys = Iterables.addToCollection(node.getPropertyKeys(), new HashSet()); keys.removeAll(ignoreProperties); return keys.size(); } @@ -142,8 +143,8 @@ private static Relationship getTargetRel(GraphDatabaseService gdb,long startNode } private static void compareProperties(PropertyContainer pc1, PropertyContainer pc2, Set ignoreProperties) { - final Collection keys1 = IteratorUtil.addToCollection(pc1.getPropertyKeys(), new HashSet()); - final Collection keys2 = IteratorUtil.addToCollection(pc2.getPropertyKeys(), new HashSet()); + final Collection keys1 = Iterables.addToCollection(pc1.getPropertyKeys(), new HashSet()); + final Collection keys2 = Iterables.addToCollection(pc2.getPropertyKeys(), new HashSet()); keys2.removeAll(ignoreProperties); keys1.removeAll(ignoreProperties); if (!keys1.equals(keys2)) { diff --git a/src/main/java/org/neo4j/tool/StoreCopy.java b/src/main/java/org/neo4j/tool/StoreCopy.java index 95fe717..3960358 100644 --- a/src/main/java/org/neo4j/tool/StoreCopy.java +++ b/src/main/java/org/neo4j/tool/StoreCopy.java @@ -1,22 +1,22 @@ package org.neo4j.tool; +import org.neo4j.collection.primitive.Primitive; +import org.neo4j.collection.primitive.PrimitiveLongLongMap; import org.neo4j.graphdb.*; import org.neo4j.graphdb.factory.GraphDatabaseFactory; -import org.neo4j.helpers.Pair; -import org.neo4j.helpers.collection.IteratorUtil; +import org.neo4j.helpers.Exceptions; +import org.neo4j.helpers.collection.Iterables; import org.neo4j.helpers.collection.MapUtil; -import org.neo4j.kernel.GraphDatabaseAPI; -import org.neo4j.kernel.impl.core.NodeManager; -import org.neo4j.kernel.impl.nioneo.store.InvalidRecordException; -import org.neo4j.kernel.impl.util.FileUtils; -import org.neo4j.unsafe.batchinsert.BatchInserter; -import org.neo4j.unsafe.batchinsert.BatchInserters; -import org.neo4j.unsafe.batchinsert.BatchRelationship; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; +import org.neo4j.helpers.collection.Pair; +import org.neo4j.io.fs.FileUtils; +import org.neo4j.kernel.impl.store.id.IdGeneratorFactory; +import org.neo4j.kernel.impl.store.id.IdType; +import org.neo4j.kernel.internal.GraphDatabaseAPI; +import org.neo4j.unsafe.batchinsert.*; +import org.neo4j.unsafe.batchinsert.internal.BatchInserterImpl; + +import java.io.*; +import java.lang.reflect.Field; import java.util.*; import static java.util.Arrays.asList; @@ -27,40 +27,40 @@ public class StoreCopy { private static final Label[] NO_LABELS = new Label[0]; private static PrintWriter logs; - @SuppressWarnings("unchecked") - public static Map config() { - return (Map) MapUtil.map( - "neostore.nodestore.db.mapped_memory", "100M", - "neostore.relationshipstore.db.mapped_memory", "500M", - "neostore.propertystore.db.mapped_memory", "300M", - "neostore.propertystore.db.strings.mapped_memory", "1G", - "neostore.propertystore.db.arrays.mapped_memory", "300M", - "neostore.propertystore.db.index.keys.mapped_memory", "100M", - "neostore.propertystore.db.index.mapped_memory", "100M", - "cache_type", "weak" - ); - } - public static void main(String[] args) throws Exception { if (args.length < 2) { - System.err.println("Usage: StoryCopy source target [rel,types,to,ignore] [properties,to,ignore]"); + System.err.println("Usage: StoryCopy source target [rel,types,to,ignore] [properties,to,ignore] [labels,to,ignore] [labels,to,delete]"); return; } - String sourceDir = args[0]; - String targetDir = args[1]; - Set ignoreRelTypes = splitOptionIfExists(args, 2); - Set ignoreProperties = splitOptionIfExists(args, 3); - Set ignoreLabels = splitOptionIfExists(args, 4); - System.out.printf("Copying from %s to %s ingoring rel-types %s ignoring properties %s ignoring labels %s %n", sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels); - copyStore(sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels); + Properties properties = new Properties(); + properties.load(new FileReader("neo4j.properties")); + String sourceDir = getArgument(args,0,properties,"source_db_dir"); + String targetDir = getArgument(args,1,properties,"target_db_dir"); + + Set ignoreRelTypes = splitToSet(getArgument(args,2,properties,"rel_types_to_ignore")); + Set ignoreProperties = splitToSet(getArgument(args,3,properties,"properties_to_ignore")); + Set ignoreLabels = splitToSet(getArgument(args,4,properties,"labels_to_ignore")); + Set deleteNodesWithLabels = splitToSet(getArgument(args,5,properties,"labels_to_delete")); + String keepNodeIdsParam = getArgument(args, 6, properties, "keep_node_ids"); + boolean keepNodeIds = !("false".equalsIgnoreCase(keepNodeIdsParam)); + System.out.printf("Copying from %s to %s ingoring rel-types %s ignoring properties %s ignoring labels %s removing nodes with labels %s keep node ids %s %n", sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels, deleteNodesWithLabels,keepNodeIds); + copyStore(sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels,deleteNodesWithLabels, keepNodeIds); + } + + private static String getArgument(String[] args, int index, Properties properties, String key) { + if (args.length > index) return args[index]; + return properties.getProperty(key); } - private static Set splitOptionIfExists(String[] args, final int index) { - if (args.length <= index) return emptySet(); - return new HashSet(asList(args[index].toLowerCase().split(","))); + private static Set splitToSet(String value) { + if (value == null || value.trim().isEmpty()) return emptySet(); + return new HashSet<>(asList(value.trim().split(", *"))); } - private static void copyStore(String sourceDir, String targetDir, Set ignoreRelTypes, Set ignoreProperties, Set ignoreLabels) throws Exception { + interface Flusher { + void flush(); + } + private static void copyStore(String sourceDir, String targetDir, Set ignoreRelTypes, Set ignoreProperties, Set ignoreLabels, Set deleteNodesWithLabels, boolean stableNodeIds) throws Exception { final File target = new File(targetDir); final File source = new File(sourceDir); if (target.exists()) { @@ -70,24 +70,53 @@ private static void copyStore(String sourceDir, String targetDir, Set ig if (!source.exists()) throw new IllegalArgumentException("Source Database does not exist " + source); Pair highestIds = getHighestNodeId(source); - BatchInserter targetDb = BatchInserters.inserter(target.getAbsolutePath(), config()); - BatchInserter sourceDb = BatchInserters.inserter(source.getAbsolutePath(), config()); - logs = new PrintWriter(new FileWriter(new File(target, "store-copy.log"))); + String pageCacheSize = System.getProperty("dbms.pagecache.memory","2G"); + BatchInserter targetDb = BatchInserters.inserter(target, MapUtil.stringMap("dbms.pagecache.memory", pageCacheSize)); + BatchInserter sourceDb = BatchInserters.inserter(source, MapUtil.stringMap("dbms.pagecache.memory", System.getProperty("dbms.pagecache.memory.source",pageCacheSize))); + Flusher flusher = getFlusher(sourceDb); - copyNodes(sourceDb, targetDb, ignoreProperties, ignoreLabels, highestIds.first()); - copyRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties, highestIds.other()); + logs = new PrintWriter(new FileWriter(new File(target, "store-copy.log"))); + PrimitiveLongLongMap copiedNodeIds = copyNodes(sourceDb, targetDb, ignoreProperties, ignoreLabels, deleteNodesWithLabels, highestIds.first(),flusher, stableNodeIds); + copyRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties, copiedNodeIds, highestIds.other(), flusher); targetDb.shutdown(); - sourceDb.shutdown(); + try { + sourceDb.shutdown(); + } catch (Exception e) { + logs.append(String.format("Noncritical error closing the source database:%n%s", Exceptions.stringify(e))); + } logs.close(); copyIndex(source, target); } + private static Flusher getFlusher(final BatchInserter db) { + try { + Field field = BatchInserterImpl.class.getDeclaredField("recordAccess"); + field.setAccessible(true); + final DirectRecordAccessSet recordAccessSet = (DirectRecordAccessSet) field.get(db); + final Field cacheField = DirectRecordAccess.class.getDeclaredField("batch"); + cacheField.setAccessible(true); + return new Flusher() { + @Override public void flush() { + try { + ((Map) cacheField.get(recordAccessSet.getNodeRecords())).clear(); + ((Map) cacheField.get(recordAccessSet.getRelRecords())).clear(); + ((Map) cacheField.get(recordAccessSet.getPropertyRecords())).clear(); + } catch (IllegalAccessException e) { + throw new RuntimeException("Error clearing cache "+cacheField,e); + } + } + }; + } catch (IllegalAccessException | NoSuchFieldException e) { + throw new RuntimeException("Error accessing cache field ", e); + } + } + private static Pair getHighestNodeId(File source) { - GraphDatabaseAPI api = (GraphDatabaseAPI) new GraphDatabaseFactory().newEmbeddedDatabase(source.getAbsolutePath()); - NodeManager nodeManager = api.getDependencyResolver().resolveDependency(NodeManager.class); - long highestNodeId = nodeManager.getHighestPossibleIdInUse(Node.class); - long highestRelId = nodeManager.getHighestPossibleIdInUse(Relationship.class); + GraphDatabaseAPI api = (GraphDatabaseAPI) new GraphDatabaseFactory().newEmbeddedDatabase(source); + IdGeneratorFactory idGenerators = api.getDependencyResolver().resolveDependency(IdGeneratorFactory.class); + long highestNodeId = idGenerators.get(IdType.NODE).getHighestPossibleIdInUse(); + long highestRelId = idGenerators.get(IdType.RELATIONSHIP).getHighestPossibleIdInUse(); api.shutdown(); return Pair.of(highestNodeId, highestRelId); } @@ -103,59 +132,143 @@ private static void copyIndex(File source, File target) throws IOException { } } - private static void copyRelationships(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreRelTypes, Set ignoreProperties, long highestRelId) { + private static void copyRelationships(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreRelTypes, Set ignoreProperties, PrimitiveLongLongMap copiedNodeIds, long highestRelId, Flusher flusher) { long time = System.currentTimeMillis(); long relId = 0; long notFound = 0; + long removed = 0; while (relId <= highestRelId) { BatchRelationship rel = null; + String type = null; try { rel = sourceDb.getRelationshipById(relId++); - } catch (InvalidRecordException nfe) { - notFound++; - continue; + type = rel.getType().name(); + if (!ignoreRelTypes.contains(type)) { + if (!createRelationship(targetDb, sourceDb, rel, ignoreProperties, copiedNodeIds)) { + removed++; + } + } else { + removed++; + } + } catch (Exception e) { + if (e instanceof org.neo4j.kernel.impl.store.InvalidRecordException && e.getMessage().endsWith("not in use")) { + notFound++; + } else { + addLog(rel, "copy Relationship: " + (relId - 1) + "-[:" + type + "]" + "->?", e.getMessage()); + } + } + if (relId % 10000 == 0) { + System.out.print("."); + logs.flush(); + } + if (relId % 500000 == 0) { + flusher.flush(); + System.out.printf(" %d / %d (%d%%) unused %d removed %d%n", relId, highestRelId, percent(relId,highestRelId), notFound,removed); } - if (ignoreRelTypes.contains(rel.getType().name().toLowerCase())) continue; - createRelationship(targetDb, sourceDb, rel, ignoreProperties); - if (relId % 1000 == 0) System.out.print("."); - if (relId % 100000 == 0) System.out.println(" " + rel); } - System.out.println("\n copying of "+relId+" relationships took "+(System.currentTimeMillis()-time)+" ms. Not found "+notFound); + time = Math.max(1,(System.currentTimeMillis() - time)/1000); + System.out.printf("%n copying of %d relationship records took %d seconds (%d rec/s). Unused Records %d (%d%%) Removed Records %d (%d%%)%n", + relId, time, relId/time, notFound, percent(notFound,relId),removed, percent(removed,relId)); } - private static void createRelationship(BatchInserter targetDb, BatchInserter sourceDb, BatchRelationship rel, Set ignoreProperties) { - long startNodeId = rel.getStartNode(); - long endNodeId = rel.getEndNode(); + private static int percent(Number part, Number total) { + return (int) (100 * part.floatValue() / total.floatValue()); + } + + private static long firstNode(BatchInserter sourceDb, long highestNodeId) { + long node = -1; + while (++node <= highestNodeId) { + if (sourceDb.nodeExists(node) && !sourceDb.getNodeProperties(node).isEmpty()) return node; + } + return -1; + } + + private static void flushCache(BatchInserter sourceDb, long node) { + Map nodeProperties = sourceDb.getNodeProperties(node); + Iterator> iterator = nodeProperties.entrySet().iterator(); + if (iterator.hasNext()) { + Map.Entry firstProp = iterator.next(); + sourceDb.nodeHasProperty(node,firstProp.getKey()); + sourceDb.setNodeProperty(node, firstProp.getKey(), firstProp.getValue()); // force flush + System.out.print(" flush"); + } + } + + private static boolean createRelationship(BatchInserter targetDb, BatchInserter sourceDb, BatchRelationship rel, Set ignoreProperties, PrimitiveLongLongMap copiedNodeIds) { + long startNodeId = copiedNodeIds.get(rel.getStartNode()); + long endNodeId = copiedNodeIds.get(rel.getEndNode()); + if (startNodeId == -1L || endNodeId == -1L) return false; final RelationshipType type = rel.getType(); try { - targetDb.createRelationship(startNodeId, endNodeId, type, getProperties(sourceDb.getRelationshipProperties(rel.getId()), ignoreProperties)); - } catch (InvalidRecordException ire) { - addLog(rel, "create Relationship: " + startNodeId + "-[:" + type + "]" + "->" + endNodeId, ire.getMessage()); + Map props = getProperties(sourceDb.getRelationshipProperties(rel.getId()), ignoreProperties); +// if (props.isEmpty()) props = Collections.singletonMap("old_id",rel.getId()); else props.put("old_id",rel.getId()); + targetDb.createRelationship(startNodeId, endNodeId, type, props); + return true; + } catch (Exception e) { + addLog(rel, "create Relationship: " + startNodeId + "-[:" + type + "]" + "->" + endNodeId, e.getMessage()); + return false; } } - private static void copyNodes(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreProperties, Set ignoreLabels, long highestNodeId) { + private static PrimitiveLongLongMap copyNodes(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreProperties, Set ignoreLabels, Set deleteNodesWithLabels, long highestNodeId, Flusher flusher, boolean stableNodeIds) { + PrimitiveLongLongMap copiedNodes = Primitive.offHeapLongLongMap(); long time = System.currentTimeMillis(); - int node = -1; - while (++node <= highestNodeId) { - if (!sourceDb.nodeExists(node)) continue; - targetDb.createNode(node, getProperties(sourceDb.getNodeProperties(node), ignoreProperties), labelsArray(sourceDb, node,ignoreLabels)); - if (node % 1000 == 0) System.out.print("."); - if (node % 100000 == 0) { + long node = 0; + long notFound = 0; + long removed = 0; + while (node <= highestNodeId) { + try { + if (sourceDb.nodeExists(node)) { + if (labelInSet(sourceDb.getNodeLabels(node),deleteNodesWithLabels)) { + removed ++; + } else { + long newNodeId=node; + if (stableNodeIds) { + targetDb.createNode(node, getProperties(sourceDb.getNodeProperties(node), ignoreProperties), labelsArray(sourceDb, node, ignoreLabels)); + } else { + newNodeId = targetDb.createNode(getProperties(sourceDb.getNodeProperties(node), ignoreProperties), labelsArray(sourceDb, node, ignoreLabels)); + } + copiedNodes.put(node,newNodeId); + } + } else { + notFound++; + } + } catch (Exception e) { + if (e instanceof org.neo4j.kernel.impl.store.InvalidRecordException && e.getMessage().endsWith("not in use")) { + notFound++; + } else addLog(node, e.getMessage()); + } + node++; + if (node % 10000 == 0) { + System.out.print("."); + } + if (node % 500000 == 0) { + flusher.flush(); logs.flush(); - System.out.println(" " + node); + System.out.printf(" %d / %d (%d%%) unused %d removed %d%n", node, highestNodeId, percent(node,highestNodeId), notFound, removed); } } - System.out.println("\n copying of " + node + " nodes took " + (System.currentTimeMillis() - time) + " ms."); + time = Math.max(1,(System.currentTimeMillis() - time)/1000); + System.out.printf("%n copying of %d node records took %d seconds (%d rec/s). Unused Records %d (%d%%). Removed Records %d (%d%%).%n", + node, time, node/time, notFound, percent(notFound,node),removed, percent(removed,node)); + return copiedNodes; + } + + private static boolean labelInSet(Iterable