diff --git a/.gitignore b/.gitignore
index 95d6abb..0ed8d93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,53 @@
.classpath
.project
target
+
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion
+
+*.iml
+
+## Directory-based project format:
+.idea/
+# if you remove the above rule, at least ignore the following:
+
+# User-specific stuff:
+# .idea/workspace.xml
+# .idea/tasks.xml
+# .idea/dictionaries
+
+# Sensitive or high-churn files:
+# .idea/dataSources.ids
+# .idea/dataSources.xml
+# .idea/sqlDataSources.xml
+# .idea/dynamic.xml
+# .idea/uiDesigner.xml
+
+# Gradle:
+# .idea/gradle.xml
+# .idea/libraries
+
+# Mongo Explorer plugin:
+# .idea/mongoSettings.xml
+
+## File-based project format:
+*.ipr
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+
+
diff --git a/pom.xml b/pom.xml
index 4bb8bd2..8b2877f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -41,27 +41,64 @@
0.2.3
+ 1.7.12
+ 1.3
+ 4.13.1
+ 2.1.6
+ 18.0
com.google.guava
guava
- r03
+ ${guava.version}
org.slf4j
slf4j-api
- 1.6.1
+ ${slf4j.version}
org.slf4j
slf4j-log4j12
- 1.6.1
+ ${slf4j.version}
+ test
- com.github.tony19
- named-regexp
- ${named.regex.version}
+ org.jruby.joni
+ joni
+ ${joni.version}
+
+
+ junit
+ junit
+ ${junit.version}
+ test
+
+
+ org.hamcrest
+ hamcrest-library
+ ${hamcrest.version}
+ test
+
+
+ org.hamcrest
+ hamcrest-core
+ ${hamcrest.version}
+ test
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.0
+
+ 1.7
+ 1.7
+
+
+
+
diff --git a/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java b/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java
index db9ccba..0991cef 100644
--- a/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java
+++ b/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java
@@ -24,23 +24,17 @@
*/
package org.aicer.grok.dictionary;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.util.HashMap;
-import java.util.Map;
-
+import com.google.common.io.CharStreams;
+import com.google.common.io.Closeables;
import org.aicer.grok.exception.GrokCompilationException;
import org.aicer.grok.util.Grok;
+import org.joni.Regex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.code.regexp.Pattern;
-import com.google.common.io.CharStreams;
-import com.google.common.io.Closeables;
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
/**
* Grok Dictionary
@@ -75,7 +69,6 @@ public Map getRegexDictionary() {
/**
* Digests all the dictionaries loaded so far
*
- * @param file
* @throws GrokCompilationException if there is a problem
*/
public void bind() {
@@ -110,7 +103,7 @@ public Grok compileExpression(final String expression) {
logger.debug("Digested [" + expression + "] into [" + digestedExpression + "] before compilation");
- return new Grok(Pattern.compile(digestedExpression));
+ return new Grok(new Regex(digestedExpression));
}
private void digestExpressions() {
diff --git a/src/main/java/org/aicer/grok/util/Grok.java b/src/main/java/org/aicer/grok/util/Grok.java
index 3dd590c..e282d66 100644
--- a/src/main/java/org/aicer/grok/util/Grok.java
+++ b/src/main/java/org/aicer/grok/util/Grok.java
@@ -15,13 +15,15 @@
*/
package org.aicer.grok.util;
-import java.util.Map;
-
+import com.google.common.base.Strings;
import org.aicer.grok.dictionary.GrokDictionary;
+import org.joni.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import com.google.code.regexp.MatchResult;
-import com.google.code.regexp.Matcher;
-import com.google.code.regexp.Pattern;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
/**
*
@@ -30,41 +32,48 @@
*/
public final class Grok {
- private final Pattern compiledPattern;
+ private final Regex compiledPattern;
+ private final static Logger LOGGER = LoggerFactory.getLogger(Grok.class);
/**
* Constructor
*/
- public Grok(final Pattern compiledPattern) {
+ public Grok(final Regex compiledPattern) {
this.compiledPattern = compiledPattern;
}
/**
* Extracts named groups from the raw data
*
- * @param rawData
+ * @param rawData String to match pattern against
* @return A map of group names mapped to their extracted values or null if there are no matches
*/
public Map extractNamedGroups(final CharSequence rawData) {
-
- Matcher matcher = compiledPattern.matcher(rawData);
-
- if (matcher.find()) {
-
- MatchResult r = matcher.toMatchResult();
-
- if (r != null && r.namedGroups() != null) {
- return r.namedGroups();
+ Map namedGroups = new HashMap<>();
+ Matcher matcher = compiledPattern.matcher(rawData.toString().getBytes());
+ if (matcher.search(0, rawData.length(), Option.DEFAULT) != -1) {
+ Region region = matcher.getEagerRegion();
+
+ for (Iterator entry = compiledPattern.namedBackrefIterator(); entry.hasNext(); ) {
+ NameEntry e = entry.next();
+ int backRef = e.getBackRefs()[0];
+ int start = region.beg[backRef];
+ int end = region.end[backRef];
+ CharSequence charSequence = rawData.subSequence(start, end);
+ String name = new String(e.name).substring(e.nameP, e.nameEnd);
+ LOGGER.debug("{} = {}", name, charSequence);
+ namedGroups.put(name, charSequence.toString());
+ }
+ return namedGroups;
}
- }
- return null;
+ return null;
}
private static final void displayResults(final Map results) {
if (results != null) {
for(Map.Entry entry : results.entrySet()) {
- System.out.println(entry.getKey() + "=" + entry.getValue());
+ System.out.println(Strings.padEnd(entry.getKey(), 11, ' ') + " = " + entry.getValue());
}
}
}
diff --git a/src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java b/src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java
new file mode 100644
index 0000000..30ffeb1
--- /dev/null
+++ b/src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java
@@ -0,0 +1,56 @@
+package org.aicer.grok.dictionary;
+
+import org.aicer.grok.util.Grok;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+import static org.hamcrest.Matchers.*;
+public class GrokDictionaryTest {
+
+ private GrokDictionary grokDictionary;
+
+ @Before
+ public void setUp() throws Exception {
+ grokDictionary = new GrokDictionary();
+
+ }
+
+ @After
+ public void tearDown() throws Exception {
+
+ }
+
+ @Test
+ public void testGetRegexDictionary() throws Exception {
+
+ }
+
+ @Test
+ public void testBind() throws Exception {
+ grokDictionary.bind();
+ assertThat(grokDictionary.getRegexDictionary().keySet(), hasSize(0));
+
+ grokDictionary.addBuiltInDictionaries();
+ grokDictionary.bind();
+ assertThat(grokDictionary.getRegexDictionary().keySet(), hasSize(91));
+ }
+
+ @Test
+ public void testCompileExpression() throws Exception {
+ grokDictionary.addBuiltInDictionaries();
+ grokDictionary.bind();
+ Grok grok = grokDictionary.compileExpression("%{WORD:test}");
+ assertThat(grok,is(not(nullValue())));
+ }
+
+ @Test
+ public void testDigestExpression() throws Exception {
+ grokDictionary.addBuiltInDictionaries();
+ grokDictionary.bind();
+ String digestExpression = grokDictionary.digestExpression("%{WORD:test}");
+ assertThat(digestExpression,is(equalTo("(?\\b\\w+\\b)")));
+
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/org/aicer/grok/util/GrokIT.java b/src/test/java/org/aicer/grok/util/GrokIT.java
new file mode 100644
index 0000000..02707d0
--- /dev/null
+++ b/src/test/java/org/aicer/grok/util/GrokIT.java
@@ -0,0 +1,59 @@
+package org.aicer.grok.util;
+
+import org.aicer.grok.dictionary.GrokDictionary;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.theories.DataPoint;
+
+import java.util.Map;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+public class GrokIT {
+ @DataPoint
+ public static final String rawDataLine1 = "1234567 - israel.ekpo@massivelogdata.net cc55ZZ35 1789 Hello Grok";
+ @DataPoint
+ public static final String rawDataLine2 = "98AA541 - israel-ekpo@israelekpo.com mmddgg22 8800 Hello Grok";
+ @DataPoint
+ public static final String rawDataLine3 = "55BB778 - ekpo.israel@example.net secret123 4439 Valid Data Stream";
+
+ private GrokDictionary dictionary;
+
+ @Before
+ public void setUp() throws Exception {
+ dictionary = new GrokDictionary();
+
+ // Load the built-in dictionaries
+ dictionary.addBuiltInDictionaries();
+
+ // Resolve all expressions loaded
+ dictionary.bind();
+
+ // Take a look at how many expressions have been loaded
+ System.out.println("Dictionary Size: " + dictionary.getDictionarySize());
+ assertThat(dictionary.getDictionarySize(), is(equalTo(91)));
+ }
+
+ @Test
+ public void testTestData() throws Exception {
+
+ final String expression = "%{EMAIL:username} %{USERNAME:password} %{INT:yearOfBirth}";
+
+
+ Grok compiledPattern = dictionary.compileExpression(expression);
+
+ Map map = compiledPattern.extractNamedGroups(rawDataLine1);
+ assertThat(map, hasEntry("username", "israel.ekpo@massivelogdata.net"));
+ assertThat(map, hasEntry("password", "cc55ZZ35"));
+ assertThat(map, hasEntry("yearOfBirth", "1789"));
+ map = compiledPattern.extractNamedGroups(rawDataLine2);
+ assertThat(map, hasEntry("username", "israel-ekpo@israelekpo.com"));
+ assertThat(map, hasEntry("password", "mmddgg22"));
+ assertThat(map, hasEntry("yearOfBirth", "8800"));
+ map = compiledPattern.extractNamedGroups(rawDataLine3);
+ assertThat(map, hasEntry("username", "ekpo.israel@example.net"));
+ assertThat(map, hasEntry("password", "secret123"));
+ assertThat(map, hasEntry("yearOfBirth", "4439"));
+ }
+}
diff --git a/src/test/java/org/aicer/grok/util/GrokTest.java b/src/test/java/org/aicer/grok/util/GrokTest.java
new file mode 100644
index 0000000..4eeb224
--- /dev/null
+++ b/src/test/java/org/aicer/grok/util/GrokTest.java
@@ -0,0 +1,38 @@
+package org.aicer.grok.util;
+
+import org.aicer.grok.dictionary.GrokDictionary;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Map;
+
+import static org.hamcrest.Matchers.*;
+import static org.junit.Assert.assertThat;
+
+public class GrokTest {
+
+ private static final String TESTSTRING = "This is a test";
+ private GrokDictionary grokDictionary;
+
+ @Before
+ public void setUp() throws Exception {
+ grokDictionary = new GrokDictionary();
+ grokDictionary.addBuiltInDictionaries();
+ grokDictionary.bind();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+
+ }
+
+ @Test
+ public void testExtractNamedGroups() throws Exception {
+ Grok grok = grokDictionary.compileExpression("%{WORD:testName}");
+ Map stringStringMap = grok.extractNamedGroups(TESTSTRING);
+ assertThat(stringStringMap,is(notNullValue()));
+ assertThat(stringStringMap.keySet(), is(not(empty())));
+ }
+
+}
\ No newline at end of file
diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
new file mode 100644
index 0000000..61fbf6f
--- /dev/null
+++ b/src/test/resources/log4j.properties
@@ -0,0 +1,9 @@
+# Root logger option
+log4j.rootLogger=WARN, stdout
+
+# Redirect log messages to console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
+