diff --git a/.gitignore b/.gitignore
index 8068abf..0dea21b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,6 @@ target
*.iml
*.ipr
.idea
+*.db*
+.shell_history
diff --git a/copy-store.sh b/copy-store.sh
new file mode 100755
index 0000000..e0723ee
--- /dev/null
+++ b/copy-store.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+EDITION=${1-community}
+shift
+SRC=$1
+DST=$2
+SKIP_RELS=$3
+SKIP_PROPS=$4
+SKIP_LABELS=$5
+DELETE_NODES=$6
+KEEP_NODE_IDS=$7
+HEAP=4G
+CACHE=2G
+CACHE_SRC=1G
+#$CACHE
+echo "Usage: copy-store.sh [community|enterprise] source.db target.db [RELS,TO,SKIP] [props,to,skip] [Labels,To,Skip] [Labels,To,Delete,Nodes]"
+
+if [[ "$EDITION" != "enterprise" && "$EDITION" != "community" ]]
+then
+ echo "ATTENTION: The parameter '$EDITION' you passed in for the edition is neither 'community' nor 'enterprise'. Aborting."
+ exit
+fi
+if [[ "$SRC" = "" || "$DST" = "" ]]
+then
+ echo "ATTENTION: Source '$SRC' or target '$DST' directory not provided. Aborting."
+ exit
+fi
+
+if [[ ! -d $SRC ]]
+then
+ echo "ATTENTION: Source '$SRC' is not a directory. Aborting."
+ exit
+fi
+
+echo "Using: Heap $HEAP Pagecache $CACHE Edition '$EDITION' from '$SRC' to '$DST' skipping labels: '$SKIP_LABELS', removing nodes with labels: '$DELETE_NODES' rels: '$SKIP_RELS' props '$SKIP_PROPS' Keeping Node Ids: $KEEP_NODE_IDS"
+echo
+echo "Please note that you will need this memory ($CACHE + $CACHE_SRC + $HEAP) as it opens 2 databases one for reading and one for writing."
+# heap config
+export MAVEN_OPTS="-Xmx$HEAP -Xms$HEAP -XX:+UseG1GC"
+
+mvn clean compile exec:java -P${EDITION} -e -Dexec.mainClass="org.neo4j.tool.StoreCopy" -Ddbms.pagecache.memory=$CACHE -Ddbms.pagecache.memory.source=$CACHE_SRC \
+ -Dexec.args="$SRC $DST $SKIP_RELS $SKIP_PROPS $SKIP_LABELS $DELETE_NODES $KEEP_NODE_IDS"
+
+#-Dneo4j.version=2.3.0
diff --git a/neo4j.properties b/neo4j.properties
new file mode 100644
index 0000000..f3e1314
--- /dev/null
+++ b/neo4j.properties
@@ -0,0 +1,18 @@
+dbms.pagecache.memory=2G
+dbms.pagecache.memory.source=2G
+
+cache_type=none
+allow_store_upgrade=true
+
+source_db_dir=
+target_db_dir=
+
+keep_node_ids=true
+
+properties_to_ignore=
+labels_to_ignore=
+labels_to_delete=
+rel_types_to_ignore=
+
+store_copy_log_dir=
+bad_entries_log_dir=
diff --git a/pom.xml b/pom.xml
index 112b95a..11232fd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,15 +4,31 @@
org.neo4j
store-util
- 2.0.1
+ 3.1.0
jar
store-util
UTF-8
+ ${project.version}
+
+
+ enterprise
+
+
+ org.neo4j
+ neo4j-enterprise
+ ${neo4j.version}
+
+
+
+
+ community
+
+
junit
@@ -20,15 +36,20 @@
4.11
test
+
+ org.neo4j
+ neo4j-io
+ ${neo4j.version}
+
org.neo4j
neo4j-kernel
- ${project.version}
+ ${neo4j.version}
org.neo4j
neo4j-lucene-index
- ${project.version}
+ ${neo4j.version}
@@ -39,8 +60,8 @@
maven-compiler-plugin
2.3.2
- 1.7
- 1.7
+ 1.8
+ 1.8
-Xlint:all
true
false
diff --git a/readme.md b/readme.md
index d28ab0b..c2df9fc 100644
--- a/readme.md
+++ b/readme.md
@@ -1,22 +1,67 @@
-## Tools to copy and compare Neo4j Stores
+## Tool to copy Neo4j Stores
-Uses the GraphDatabaseService to read a store and the batch-inserter API to write the target store keeping the node-ids.
-Copies the index-files as is.
-Ignores broken nodes and relationships.
+Uses the BatchInserterImpl to read a store and write the target store keeping the node-ids.
+Copies the manual (legacy) index-files as is, please note it performs no index upgrade!
-Also useful to skip no longer wanted properties or relationships with a certain type. Good for store compaction as it
-rewrites the store file reclaiming space that is sitting empty.
+You will have to recreate any schema indexes too.
-Change the Neo4j version in pom.xml before running. (Currently 1.9.5)
+Ignores broken nodes and relationships and records them in `target/store-copy.log`
+
+Also useful to skip no longer wanted properties, relationships with a certain type.
+Or of certain labels and even nodes with certain labels.
+
+Good for store compaction and reorganization of relationships and properties as
+it rewrites the store file reclaiming space that is sitting empty.
+
+NOTE: With Neo4j 3.x there are two different store formats, so you have to provide "enterprise" or "community" as first argument of the call!
+
+You can now also decide if you want to compact the node-store, then you have to pass "false" as the parameter for keep-node-ids.
+
+Config is read from `neo4j.properties` file in current directory if it exists, but command line options override.
+
+neo4j.properties
+
+```
+source_db_dir=
+target_db_dir=
+
+keep_node_ids=true
+
+properties_to_ignore=
+labels_to_ignore=
+labels_to_delete=
+rel_types_to_ignore=
+
+store_copy_log_dir=
+bad_entries_log_dir=
+```
### Store Copy
-Usage:
+ copy-store.sh [enterprise|community] source.db target.db [RELS,TO,SKIP] [props,to,skip] [Labels,To,Skip] [Labels,To,Delete,Nodes] [keep-node-ids:true/false]
+
+
+The provided script contains these settings for page-cache (note you can configure a different, smaller setting for the source store than the target store).
+
+ dbms.pagecache.memory.source=2G
+ dbms.pagecache.memory=2G
+
+Heap config is in the shell-script, default is:
+
+ export MAVEN_OPTS="-Xmx4G -Xms4G -Xmn1G -XX:+UseG1GC"
+
+**Please adapt the settings as needed for your store.**
+
+**Please note that you will need the memory for (source-page-cache + target-page-cache + 1x heap) as it opens 2 databases one for reading and one for writing.**
+
+Change the Neo4j version in pom.xml before running as needed. (Currently 3.1.0)
+
+Optionally changeable from the outside with `-Dneo4j.version=3.1.0` on the `mvn` invocation.
+
+### Internally
- mvn compile exec:java -Dexec.mainClass="org.neo4j.tool.StoreCopy" \
- -Dexec.args="source-dir target-dir [rel,types,to,ignore] [properties,to,ignore] [labels,to,ignore]"
+Note: It calls under the hood:
-# Store Compare
+ mvn compile exec:java -Dexec.mainClass="org.neo4j.tool.StoreCopy" -Penterprise \
+ -Dexec.args="source-dir target-dir [REL,TYPES,TO,IGNORE] [properties,to,ignore] [Labels,To,Ignore] [Labels,To,Delete,Nodes] [keep-node-ids:true/false]"
- mvn compile exec:java -Dexec.mainClass="org.neo4j.tool.StoreComparer" \
- -Dexec.args="source-dir target-dir [rel,types,to,ignore] [properties,to,ignore]"
diff --git a/src/main/java/org/neo4j/tool/DomainAnalyzer.java b/src/main/java/org/neo4j/tool/DomainAnalyzer.java
index 1ce7f26..489810e 100644
--- a/src/main/java/org/neo4j/tool/DomainAnalyzer.java
+++ b/src/main/java/org/neo4j/tool/DomainAnalyzer.java
@@ -3,9 +3,9 @@
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
-import org.neo4j.helpers.collection.IteratorUtil;
-import org.neo4j.kernel.EmbeddedGraphDatabase;
+import org.neo4j.helpers.collection.Iterables;
+import java.io.File;
import java.util.*;
/**
@@ -74,13 +74,13 @@ private String toString(Object value) {
}
}
public static void main(String[] args) {
- graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(args[0]);
+ graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(new File(args[0]));
long time = System.currentTimeMillis();
Map,Sample> statistics = new HashMap, Sample>();
int count = 0;
for (Node node : graphDb.getAllNodes()) {
- final HashSet keys = IteratorUtil.addToCollection(node.getPropertyKeys(), new HashSet());
+ final HashSet keys = Iterables.addToCollection(node.getPropertyKeys(), new HashSet());
Sample sample = statistics.get(keys);
if (sample==null) {
sample = new Sample(node);
diff --git a/src/main/java/org/neo4j/tool/GraphGenerator.java b/src/main/java/org/neo4j/tool/GraphGenerator.java
index e54fe11..fa4ecfd 100644
--- a/src/main/java/org/neo4j/tool/GraphGenerator.java
+++ b/src/main/java/org/neo4j/tool/GraphGenerator.java
@@ -5,8 +5,8 @@
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
-import org.neo4j.kernel.EmbeddedGraphDatabase;
+import java.io.File;
import java.util.Arrays;
/**
@@ -17,7 +17,7 @@ public class GraphGenerator {
public static final int MILLION = 1000 * 1000;
public static void main(String[] args) {
- final GraphDatabaseService gdb = new GraphDatabaseFactory().newEmbeddedDatabase("target/data");
+ final GraphDatabaseService gdb = new GraphDatabaseFactory().newEmbeddedDatabase(new File("target/data"));
createDatabase(gdb);
gdb.shutdown();
}
@@ -39,14 +39,15 @@ public static void createDatabase(GraphDatabaseService graphdb) {
System.out.print(".");
if ((i % 10000) == 0) {
tx.success();
- tx.finish();
+ tx.close();
System.out.println(" " + i);
tx = graphdb.beginTx();
}
}
}
} finally {
- tx.finish();
+ tx.success();
+ tx.close();
}
System.out.println();
long delta = (System.currentTimeMillis() - cpuTime);
diff --git a/src/main/java/org/neo4j/tool/PropertyAnalyzer.java b/src/main/java/org/neo4j/tool/PropertyAnalyzer.java
index abef9ff..f582c81 100644
--- a/src/main/java/org/neo4j/tool/PropertyAnalyzer.java
+++ b/src/main/java/org/neo4j/tool/PropertyAnalyzer.java
@@ -3,8 +3,8 @@
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.helpers.collection.MapUtil;
-import org.neo4j.kernel.EmbeddedGraphDatabase;
+import java.io.File;
import java.lang.reflect.Array;
import java.util.*;
@@ -96,7 +96,7 @@ public static Map config() {
}
public static void main(String[] args) {
- final GraphDatabaseService db = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(args[0]).setConfig(config()).newGraphDatabase();
+ final GraphDatabaseService db = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(new File(args[0])).setConfig(config()).newGraphDatabase();
int withoutProps=0, nodes = 0, rels = 0;
Map props=new HashMap();
for (Node node : db.getAllNodes()) {
diff --git a/src/main/java/org/neo4j/tool/SingleRelationshipDeletion.java b/src/main/java/org/neo4j/tool/SingleRelationshipDeletion.java
deleted file mode 100644
index b596b26..0000000
--- a/src/main/java/org/neo4j/tool/SingleRelationshipDeletion.java
+++ /dev/null
@@ -1,82 +0,0 @@
-package org.neo4j.tool;
-
-/**
- * @author mh
- * @since 12.08.11
- */
-
-import org.neo4j.graphdb.GraphDatabaseService;
-import org.neo4j.graphdb.NotFoundException;
-import org.neo4j.graphdb.Relationship;
-import org.neo4j.graphdb.Transaction;
-import org.neo4j.graphdb.factory.GraphDatabaseFactory;
-import org.neo4j.kernel.EmbeddedGraphDatabase;
-
-import java.io.File;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-/**
- * @author mh
- * @since 11.08.11
- */
-public class SingleRelationshipDeletion {
-
- public static final int COUNT = 1000;
- private static final int ROUNDS = 50;
-
- public static void main(String[] args) {
- final File dir = new File("target/data");
- boolean mustCreate = !dir.exists();
- final GraphDatabaseService graphdb = new GraphDatabaseFactory().newEmbeddedDatabase(dir.getAbsolutePath());
- if (mustCreate) GraphGenerator.createDatabase(graphdb);
-
- for (int round = 0; round < ROUNDS; round++) {
- long[] relIds = createRelIds(COUNT);
- int success = 0, fail = 0;
- long cpuTime = System.currentTimeMillis();
- final Transaction tx = graphdb.beginTx();
- try {
- for (int i = 0; i < COUNT; i++) {
- final Relationship rel;
- try {
- rel = graphdb.getRelationshipById(relIds[i]);
- } catch (NotFoundException nfe) {
- fail++;
- continue;
- }
- try {
- rel.delete();
- tx.success();
- success++;
- } catch (NotFoundException nfe) {
- fail++;
-// tx.failure();
- }
- }
- } finally {
- tx.finish();
- }
-
- long delta = (System.currentTimeMillis() - cpuTime);
- System.out.printf("round %d delete %d relationships time = %d ms, succ %d failed %d%n", round, COUNT, delta, success, fail);
- }
- graphdb.shutdown();
- }
-
- private static long[] createRelIds(final int count) {
- Random random = new Random(System.currentTimeMillis());
- Set values = new HashSet();
- do {
- values.add(random.nextInt(GraphGenerator.MILLION));
- } while (values.size() < count);
- final long[] result = new long[count];
- int i = 0;
- for (Integer value : values) {
- result[i++] = value.longValue();
- }
- return result;
- }
-
-}
diff --git a/src/main/java/org/neo4j/tool/StoreComparer.java b/src/main/java/org/neo4j/tool/StoreComparer.java
index b790ff5..90ce4d9 100644
--- a/src/main/java/org/neo4j/tool/StoreComparer.java
+++ b/src/main/java/org/neo4j/tool/StoreComparer.java
@@ -2,17 +2,15 @@
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
-import org.neo4j.helpers.collection.IteratorUtil;
+import org.neo4j.helpers.collection.Iterables;
import org.neo4j.helpers.collection.MapUtil;
-import org.neo4j.kernel.EmbeddedGraphDatabase;
-import org.neo4j.kernel.impl.util.FileUtils;
+import org.neo4j.io.fs.FileUtils;
import java.io.File;
import java.io.IOException;
import java.util.*;
import static java.util.Arrays.asList;
-import static java.util.Arrays.toString;
import static java.util.Collections.emptySet;
public class StoreComparer {
@@ -55,12 +53,15 @@ private static void compareStore(String sourceDir, String targetDir, Set
if (!target.exists()) throw new IllegalArgumentException("Target Directory does not exists " + target);
if (!source.exists()) throw new IllegalArgumentException("Source Database does not exist " + source);
- GraphDatabaseService targetDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(target.getAbsolutePath()).setConfig(config()).newGraphDatabase();
- GraphDatabaseService sourceDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(sourceDir).setConfig(config()).newGraphDatabase();
+ GraphDatabaseService targetDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(target).setConfig(config()).newGraphDatabase();
+ GraphDatabaseService sourceDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder(source).setConfig(config()).newGraphDatabase();
- compareCounts(sourceDb, targetDb, ignoreRelTypes, ignoreProperties);
- compareNodes(sourceDb, targetDb, ignoreProperties);
- compareRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties);
+ try (Transaction srcDbTx = sourceDb.beginTx();
+ Transaction targetDbTx = targetDb.beginTx()) {
+ compareCounts(sourceDb, targetDb, ignoreRelTypes, ignoreProperties);
+ compareNodes(sourceDb, targetDb, ignoreProperties);
+ compareRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties);
+ }
targetDb.shutdown();
sourceDb.shutdown();
@@ -98,7 +99,7 @@ private static Statistics count(GraphDatabaseService db, Set ignoreRelTy
}
private static int countProperties(Set ignoreProperties, PropertyContainer node) {
- final Collection keys = IteratorUtil.addToCollection(node.getPropertyKeys(), new HashSet());
+ final Collection keys = Iterables.addToCollection(node.getPropertyKeys(), new HashSet());
keys.removeAll(ignoreProperties);
return keys.size();
}
@@ -142,8 +143,8 @@ private static Relationship getTargetRel(GraphDatabaseService gdb,long startNode
}
private static void compareProperties(PropertyContainer pc1, PropertyContainer pc2, Set ignoreProperties) {
- final Collection keys1 = IteratorUtil.addToCollection(pc1.getPropertyKeys(), new HashSet());
- final Collection keys2 = IteratorUtil.addToCollection(pc2.getPropertyKeys(), new HashSet());
+ final Collection keys1 = Iterables.addToCollection(pc1.getPropertyKeys(), new HashSet());
+ final Collection keys2 = Iterables.addToCollection(pc2.getPropertyKeys(), new HashSet());
keys2.removeAll(ignoreProperties);
keys1.removeAll(ignoreProperties);
if (!keys1.equals(keys2)) {
diff --git a/src/main/java/org/neo4j/tool/StoreCopy.java b/src/main/java/org/neo4j/tool/StoreCopy.java
index 95fe717..3960358 100644
--- a/src/main/java/org/neo4j/tool/StoreCopy.java
+++ b/src/main/java/org/neo4j/tool/StoreCopy.java
@@ -1,22 +1,22 @@
package org.neo4j.tool;
+import org.neo4j.collection.primitive.Primitive;
+import org.neo4j.collection.primitive.PrimitiveLongLongMap;
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
-import org.neo4j.helpers.Pair;
-import org.neo4j.helpers.collection.IteratorUtil;
+import org.neo4j.helpers.Exceptions;
+import org.neo4j.helpers.collection.Iterables;
import org.neo4j.helpers.collection.MapUtil;
-import org.neo4j.kernel.GraphDatabaseAPI;
-import org.neo4j.kernel.impl.core.NodeManager;
-import org.neo4j.kernel.impl.nioneo.store.InvalidRecordException;
-import org.neo4j.kernel.impl.util.FileUtils;
-import org.neo4j.unsafe.batchinsert.BatchInserter;
-import org.neo4j.unsafe.batchinsert.BatchInserters;
-import org.neo4j.unsafe.batchinsert.BatchRelationship;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
+import org.neo4j.helpers.collection.Pair;
+import org.neo4j.io.fs.FileUtils;
+import org.neo4j.kernel.impl.store.id.IdGeneratorFactory;
+import org.neo4j.kernel.impl.store.id.IdType;
+import org.neo4j.kernel.internal.GraphDatabaseAPI;
+import org.neo4j.unsafe.batchinsert.*;
+import org.neo4j.unsafe.batchinsert.internal.BatchInserterImpl;
+
+import java.io.*;
+import java.lang.reflect.Field;
import java.util.*;
import static java.util.Arrays.asList;
@@ -27,40 +27,40 @@ public class StoreCopy {
private static final Label[] NO_LABELS = new Label[0];
private static PrintWriter logs;
- @SuppressWarnings("unchecked")
- public static Map config() {
- return (Map) MapUtil.map(
- "neostore.nodestore.db.mapped_memory", "100M",
- "neostore.relationshipstore.db.mapped_memory", "500M",
- "neostore.propertystore.db.mapped_memory", "300M",
- "neostore.propertystore.db.strings.mapped_memory", "1G",
- "neostore.propertystore.db.arrays.mapped_memory", "300M",
- "neostore.propertystore.db.index.keys.mapped_memory", "100M",
- "neostore.propertystore.db.index.mapped_memory", "100M",
- "cache_type", "weak"
- );
- }
-
public static void main(String[] args) throws Exception {
if (args.length < 2) {
- System.err.println("Usage: StoryCopy source target [rel,types,to,ignore] [properties,to,ignore]");
+ System.err.println("Usage: StoryCopy source target [rel,types,to,ignore] [properties,to,ignore] [labels,to,ignore] [labels,to,delete]");
return;
}
- String sourceDir = args[0];
- String targetDir = args[1];
- Set ignoreRelTypes = splitOptionIfExists(args, 2);
- Set ignoreProperties = splitOptionIfExists(args, 3);
- Set ignoreLabels = splitOptionIfExists(args, 4);
- System.out.printf("Copying from %s to %s ingoring rel-types %s ignoring properties %s ignoring labels %s %n", sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels);
- copyStore(sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels);
+ Properties properties = new Properties();
+ properties.load(new FileReader("neo4j.properties"));
+ String sourceDir = getArgument(args,0,properties,"source_db_dir");
+ String targetDir = getArgument(args,1,properties,"target_db_dir");
+
+ Set ignoreRelTypes = splitToSet(getArgument(args,2,properties,"rel_types_to_ignore"));
+ Set ignoreProperties = splitToSet(getArgument(args,3,properties,"properties_to_ignore"));
+ Set ignoreLabels = splitToSet(getArgument(args,4,properties,"labels_to_ignore"));
+ Set deleteNodesWithLabels = splitToSet(getArgument(args,5,properties,"labels_to_delete"));
+ String keepNodeIdsParam = getArgument(args, 6, properties, "keep_node_ids");
+ boolean keepNodeIds = !("false".equalsIgnoreCase(keepNodeIdsParam));
+ System.out.printf("Copying from %s to %s ingoring rel-types %s ignoring properties %s ignoring labels %s removing nodes with labels %s keep node ids %s %n", sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels, deleteNodesWithLabels,keepNodeIds);
+ copyStore(sourceDir, targetDir, ignoreRelTypes, ignoreProperties,ignoreLabels,deleteNodesWithLabels, keepNodeIds);
+ }
+
+ private static String getArgument(String[] args, int index, Properties properties, String key) {
+ if (args.length > index) return args[index];
+ return properties.getProperty(key);
}
- private static Set splitOptionIfExists(String[] args, final int index) {
- if (args.length <= index) return emptySet();
- return new HashSet(asList(args[index].toLowerCase().split(",")));
+ private static Set splitToSet(String value) {
+ if (value == null || value.trim().isEmpty()) return emptySet();
+ return new HashSet<>(asList(value.trim().split(", *")));
}
- private static void copyStore(String sourceDir, String targetDir, Set ignoreRelTypes, Set ignoreProperties, Set ignoreLabels) throws Exception {
+ interface Flusher {
+ void flush();
+ }
+ private static void copyStore(String sourceDir, String targetDir, Set ignoreRelTypes, Set ignoreProperties, Set ignoreLabels, Set deleteNodesWithLabels, boolean stableNodeIds) throws Exception {
final File target = new File(targetDir);
final File source = new File(sourceDir);
if (target.exists()) {
@@ -70,24 +70,53 @@ private static void copyStore(String sourceDir, String targetDir, Set ig
if (!source.exists()) throw new IllegalArgumentException("Source Database does not exist " + source);
Pair highestIds = getHighestNodeId(source);
- BatchInserter targetDb = BatchInserters.inserter(target.getAbsolutePath(), config());
- BatchInserter sourceDb = BatchInserters.inserter(source.getAbsolutePath(), config());
- logs = new PrintWriter(new FileWriter(new File(target, "store-copy.log")));
+ String pageCacheSize = System.getProperty("dbms.pagecache.memory","2G");
+ BatchInserter targetDb = BatchInserters.inserter(target, MapUtil.stringMap("dbms.pagecache.memory", pageCacheSize));
+ BatchInserter sourceDb = BatchInserters.inserter(source, MapUtil.stringMap("dbms.pagecache.memory", System.getProperty("dbms.pagecache.memory.source",pageCacheSize)));
+ Flusher flusher = getFlusher(sourceDb);
- copyNodes(sourceDb, targetDb, ignoreProperties, ignoreLabels, highestIds.first());
- copyRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties, highestIds.other());
+ logs = new PrintWriter(new FileWriter(new File(target, "store-copy.log")));
+ PrimitiveLongLongMap copiedNodeIds = copyNodes(sourceDb, targetDb, ignoreProperties, ignoreLabels, deleteNodesWithLabels, highestIds.first(),flusher, stableNodeIds);
+ copyRelationships(sourceDb, targetDb, ignoreRelTypes, ignoreProperties, copiedNodeIds, highestIds.other(), flusher);
targetDb.shutdown();
- sourceDb.shutdown();
+ try {
+ sourceDb.shutdown();
+ } catch (Exception e) {
+ logs.append(String.format("Noncritical error closing the source database:%n%s", Exceptions.stringify(e)));
+ }
logs.close();
copyIndex(source, target);
}
+ private static Flusher getFlusher(final BatchInserter db) {
+ try {
+ Field field = BatchInserterImpl.class.getDeclaredField("recordAccess");
+ field.setAccessible(true);
+ final DirectRecordAccessSet recordAccessSet = (DirectRecordAccessSet) field.get(db);
+ final Field cacheField = DirectRecordAccess.class.getDeclaredField("batch");
+ cacheField.setAccessible(true);
+ return new Flusher() {
+ @Override public void flush() {
+ try {
+ ((Map) cacheField.get(recordAccessSet.getNodeRecords())).clear();
+ ((Map) cacheField.get(recordAccessSet.getRelRecords())).clear();
+ ((Map) cacheField.get(recordAccessSet.getPropertyRecords())).clear();
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException("Error clearing cache "+cacheField,e);
+ }
+ }
+ };
+ } catch (IllegalAccessException | NoSuchFieldException e) {
+ throw new RuntimeException("Error accessing cache field ", e);
+ }
+ }
+
private static Pair getHighestNodeId(File source) {
- GraphDatabaseAPI api = (GraphDatabaseAPI) new GraphDatabaseFactory().newEmbeddedDatabase(source.getAbsolutePath());
- NodeManager nodeManager = api.getDependencyResolver().resolveDependency(NodeManager.class);
- long highestNodeId = nodeManager.getHighestPossibleIdInUse(Node.class);
- long highestRelId = nodeManager.getHighestPossibleIdInUse(Relationship.class);
+ GraphDatabaseAPI api = (GraphDatabaseAPI) new GraphDatabaseFactory().newEmbeddedDatabase(source);
+ IdGeneratorFactory idGenerators = api.getDependencyResolver().resolveDependency(IdGeneratorFactory.class);
+ long highestNodeId = idGenerators.get(IdType.NODE).getHighestPossibleIdInUse();
+ long highestRelId = idGenerators.get(IdType.RELATIONSHIP).getHighestPossibleIdInUse();
api.shutdown();
return Pair.of(highestNodeId, highestRelId);
}
@@ -103,59 +132,143 @@ private static void copyIndex(File source, File target) throws IOException {
}
}
- private static void copyRelationships(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreRelTypes, Set ignoreProperties, long highestRelId) {
+ private static void copyRelationships(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreRelTypes, Set ignoreProperties, PrimitiveLongLongMap copiedNodeIds, long highestRelId, Flusher flusher) {
long time = System.currentTimeMillis();
long relId = 0;
long notFound = 0;
+ long removed = 0;
while (relId <= highestRelId) {
BatchRelationship rel = null;
+ String type = null;
try {
rel = sourceDb.getRelationshipById(relId++);
- } catch (InvalidRecordException nfe) {
- notFound++;
- continue;
+ type = rel.getType().name();
+ if (!ignoreRelTypes.contains(type)) {
+ if (!createRelationship(targetDb, sourceDb, rel, ignoreProperties, copiedNodeIds)) {
+ removed++;
+ }
+ } else {
+ removed++;
+ }
+ } catch (Exception e) {
+ if (e instanceof org.neo4j.kernel.impl.store.InvalidRecordException && e.getMessage().endsWith("not in use")) {
+ notFound++;
+ } else {
+ addLog(rel, "copy Relationship: " + (relId - 1) + "-[:" + type + "]" + "->?", e.getMessage());
+ }
+ }
+ if (relId % 10000 == 0) {
+ System.out.print(".");
+ logs.flush();
+ }
+ if (relId % 500000 == 0) {
+ flusher.flush();
+ System.out.printf(" %d / %d (%d%%) unused %d removed %d%n", relId, highestRelId, percent(relId,highestRelId), notFound,removed);
}
- if (ignoreRelTypes.contains(rel.getType().name().toLowerCase())) continue;
- createRelationship(targetDb, sourceDb, rel, ignoreProperties);
- if (relId % 1000 == 0) System.out.print(".");
- if (relId % 100000 == 0) System.out.println(" " + rel);
}
- System.out.println("\n copying of "+relId+" relationships took "+(System.currentTimeMillis()-time)+" ms. Not found "+notFound);
+ time = Math.max(1,(System.currentTimeMillis() - time)/1000);
+ System.out.printf("%n copying of %d relationship records took %d seconds (%d rec/s). Unused Records %d (%d%%) Removed Records %d (%d%%)%n",
+ relId, time, relId/time, notFound, percent(notFound,relId),removed, percent(removed,relId));
}
- private static void createRelationship(BatchInserter targetDb, BatchInserter sourceDb, BatchRelationship rel, Set ignoreProperties) {
- long startNodeId = rel.getStartNode();
- long endNodeId = rel.getEndNode();
+ private static int percent(Number part, Number total) {
+ return (int) (100 * part.floatValue() / total.floatValue());
+ }
+
+ private static long firstNode(BatchInserter sourceDb, long highestNodeId) {
+ long node = -1;
+ while (++node <= highestNodeId) {
+ if (sourceDb.nodeExists(node) && !sourceDb.getNodeProperties(node).isEmpty()) return node;
+ }
+ return -1;
+ }
+
+ private static void flushCache(BatchInserter sourceDb, long node) {
+ Map nodeProperties = sourceDb.getNodeProperties(node);
+ Iterator> iterator = nodeProperties.entrySet().iterator();
+ if (iterator.hasNext()) {
+ Map.Entry firstProp = iterator.next();
+ sourceDb.nodeHasProperty(node,firstProp.getKey());
+ sourceDb.setNodeProperty(node, firstProp.getKey(), firstProp.getValue()); // force flush
+ System.out.print(" flush");
+ }
+ }
+
+ private static boolean createRelationship(BatchInserter targetDb, BatchInserter sourceDb, BatchRelationship rel, Set ignoreProperties, PrimitiveLongLongMap copiedNodeIds) {
+ long startNodeId = copiedNodeIds.get(rel.getStartNode());
+ long endNodeId = copiedNodeIds.get(rel.getEndNode());
+ if (startNodeId == -1L || endNodeId == -1L) return false;
final RelationshipType type = rel.getType();
try {
- targetDb.createRelationship(startNodeId, endNodeId, type, getProperties(sourceDb.getRelationshipProperties(rel.getId()), ignoreProperties));
- } catch (InvalidRecordException ire) {
- addLog(rel, "create Relationship: " + startNodeId + "-[:" + type + "]" + "->" + endNodeId, ire.getMessage());
+ Map props = getProperties(sourceDb.getRelationshipProperties(rel.getId()), ignoreProperties);
+// if (props.isEmpty()) props = Collections.singletonMap("old_id",rel.getId()); else props.put("old_id",rel.getId());
+ targetDb.createRelationship(startNodeId, endNodeId, type, props);
+ return true;
+ } catch (Exception e) {
+ addLog(rel, "create Relationship: " + startNodeId + "-[:" + type + "]" + "->" + endNodeId, e.getMessage());
+ return false;
}
}
- private static void copyNodes(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreProperties, Set ignoreLabels, long highestNodeId) {
+ private static PrimitiveLongLongMap copyNodes(BatchInserter sourceDb, BatchInserter targetDb, Set ignoreProperties, Set ignoreLabels, Set deleteNodesWithLabels, long highestNodeId, Flusher flusher, boolean stableNodeIds) {
+ PrimitiveLongLongMap copiedNodes = Primitive.offHeapLongLongMap();
long time = System.currentTimeMillis();
- int node = -1;
- while (++node <= highestNodeId) {
- if (!sourceDb.nodeExists(node)) continue;
- targetDb.createNode(node, getProperties(sourceDb.getNodeProperties(node), ignoreProperties), labelsArray(sourceDb, node,ignoreLabels));
- if (node % 1000 == 0) System.out.print(".");
- if (node % 100000 == 0) {
+ long node = 0;
+ long notFound = 0;
+ long removed = 0;
+ while (node <= highestNodeId) {
+ try {
+ if (sourceDb.nodeExists(node)) {
+ if (labelInSet(sourceDb.getNodeLabels(node),deleteNodesWithLabels)) {
+ removed ++;
+ } else {
+ long newNodeId=node;
+ if (stableNodeIds) {
+ targetDb.createNode(node, getProperties(sourceDb.getNodeProperties(node), ignoreProperties), labelsArray(sourceDb, node, ignoreLabels));
+ } else {
+ newNodeId = targetDb.createNode(getProperties(sourceDb.getNodeProperties(node), ignoreProperties), labelsArray(sourceDb, node, ignoreLabels));
+ }
+ copiedNodes.put(node,newNodeId);
+ }
+ } else {
+ notFound++;
+ }
+ } catch (Exception e) {
+ if (e instanceof org.neo4j.kernel.impl.store.InvalidRecordException && e.getMessage().endsWith("not in use")) {
+ notFound++;
+ } else addLog(node, e.getMessage());
+ }
+ node++;
+ if (node % 10000 == 0) {
+ System.out.print(".");
+ }
+ if (node % 500000 == 0) {
+ flusher.flush();
logs.flush();
- System.out.println(" " + node);
+ System.out.printf(" %d / %d (%d%%) unused %d removed %d%n", node, highestNodeId, percent(node,highestNodeId), notFound, removed);
}
}
- System.out.println("\n copying of " + node + " nodes took " + (System.currentTimeMillis() - time) + " ms.");
+ time = Math.max(1,(System.currentTimeMillis() - time)/1000);
+ System.out.printf("%n copying of %d node records took %d seconds (%d rec/s). Unused Records %d (%d%%). Removed Records %d (%d%%).%n",
+ node, time, node/time, notFound, percent(notFound,node),removed, percent(removed,node));
+ return copiedNodes;
+ }
+
+ private static boolean labelInSet(Iterable