From 8f90b930e6bce126524ce954dae2b2433cf8a3eb Mon Sep 17 00:00:00 2001 From: Per Abich Date: Tue, 2 Jun 2015 14:22:45 +0200 Subject: [PATCH 1/2] Switched regex library and reduced compile time dependencies. --- .gitignore | 50 ++++++++++++++++ pom.xml | 49 +++++++++++++-- .../aicer/grok/dictionary/GrokDictionary.java | 21 +++---- src/main/java/org/aicer/grok/util/Grok.java | 49 ++++++++------- .../grok/dictionary/GrokDictionaryTest.java | 56 ++++++++++++++++++ src/test/java/org/aicer/grok/util/GrokIT.java | 59 +++++++++++++++++++ .../java/org/aicer/grok/util/GrokTest.java | 38 ++++++++++++ src/test/resources/log4j.properties | 9 +++ 8 files changed, 291 insertions(+), 40 deletions(-) create mode 100644 src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java create mode 100644 src/test/java/org/aicer/grok/util/GrokIT.java create mode 100644 src/test/java/org/aicer/grok/util/GrokTest.java create mode 100644 src/test/resources/log4j.properties diff --git a/.gitignore b/.gitignore index 95d6abb..0ed8d93 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,53 @@ .classpath .project target + +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion + +*.iml + +## Directory-based project format: +.idea/ +# if you remove the above rule, at least ignore the following: + +# User-specific stuff: +# .idea/workspace.xml +# .idea/tasks.xml +# .idea/dictionaries + +# Sensitive or high-churn files: +# .idea/dataSources.ids +# .idea/dataSources.xml +# .idea/sqlDataSources.xml +# .idea/dynamic.xml +# .idea/uiDesigner.xml + +# Gradle: +# .idea/gradle.xml +# .idea/libraries + +# Mongo Explorer plugin: +# .idea/mongoSettings.xml + +## File-based project format: +*.ipr +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties + + diff --git a/pom.xml b/pom.xml index 4bb8bd2..4cd7018 100644 --- a/pom.xml +++ b/pom.xml @@ -41,27 +41,64 @@ 0.2.3 + 1.7.12 + 1.3 + 4.12 + 2.1.6 + 18.0 com.google.guava guava - r03 + ${guava.version} org.slf4j slf4j-api - 1.6.1 + ${slf4j.version} org.slf4j slf4j-log4j12 - 1.6.1 + ${slf4j.version} + test - com.github.tony19 - named-regexp - ${named.regex.version} + org.jruby.joni + joni + ${joni.version} + + + junit + junit + ${junit.version} + test + + + org.hamcrest + hamcrest-library + ${hamcrest.version} + test + + + org.hamcrest + hamcrest-core + ${hamcrest.version} + test + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.0 + + 1.7 + 1.7 + + + + diff --git a/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java b/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java index db9ccba..0991cef 100644 --- a/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java +++ b/src/main/java/org/aicer/grok/dictionary/GrokDictionary.java @@ -24,23 +24,17 @@ */ package org.aicer.grok.dictionary; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; - +import com.google.common.io.CharStreams; +import com.google.common.io.Closeables; import org.aicer.grok.exception.GrokCompilationException; import org.aicer.grok.util.Grok; +import org.joni.Regex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.code.regexp.Pattern; -import com.google.common.io.CharStreams; -import com.google.common.io.Closeables; +import java.io.*; +import java.util.HashMap; +import java.util.Map; /** * Grok Dictionary @@ -75,7 +69,6 @@ public Map getRegexDictionary() { /** * Digests all the dictionaries loaded so far * - * @param file * @throws GrokCompilationException if there is a problem */ public void bind() { @@ -110,7 +103,7 @@ public Grok compileExpression(final String expression) { logger.debug("Digested [" + expression + "] into [" + digestedExpression + "] before compilation"); - return new Grok(Pattern.compile(digestedExpression)); + return new Grok(new Regex(digestedExpression)); } private void digestExpressions() { diff --git a/src/main/java/org/aicer/grok/util/Grok.java b/src/main/java/org/aicer/grok/util/Grok.java index 3dd590c..e282d66 100644 --- a/src/main/java/org/aicer/grok/util/Grok.java +++ b/src/main/java/org/aicer/grok/util/Grok.java @@ -15,13 +15,15 @@ */ package org.aicer.grok.util; -import java.util.Map; - +import com.google.common.base.Strings; import org.aicer.grok.dictionary.GrokDictionary; +import org.joni.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.google.code.regexp.MatchResult; -import com.google.code.regexp.Matcher; -import com.google.code.regexp.Pattern; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; /** * @@ -30,41 +32,48 @@ */ public final class Grok { - private final Pattern compiledPattern; + private final Regex compiledPattern; + private final static Logger LOGGER = LoggerFactory.getLogger(Grok.class); /** * Constructor */ - public Grok(final Pattern compiledPattern) { + public Grok(final Regex compiledPattern) { this.compiledPattern = compiledPattern; } /** * Extracts named groups from the raw data * - * @param rawData + * @param rawData String to match pattern against * @return A map of group names mapped to their extracted values or null if there are no matches */ public Map extractNamedGroups(final CharSequence rawData) { - - Matcher matcher = compiledPattern.matcher(rawData); - - if (matcher.find()) { - - MatchResult r = matcher.toMatchResult(); - - if (r != null && r.namedGroups() != null) { - return r.namedGroups(); + Map namedGroups = new HashMap<>(); + Matcher matcher = compiledPattern.matcher(rawData.toString().getBytes()); + if (matcher.search(0, rawData.length(), Option.DEFAULT) != -1) { + Region region = matcher.getEagerRegion(); + + for (Iterator entry = compiledPattern.namedBackrefIterator(); entry.hasNext(); ) { + NameEntry e = entry.next(); + int backRef = e.getBackRefs()[0]; + int start = region.beg[backRef]; + int end = region.end[backRef]; + CharSequence charSequence = rawData.subSequence(start, end); + String name = new String(e.name).substring(e.nameP, e.nameEnd); + LOGGER.debug("{} = {}", name, charSequence); + namedGroups.put(name, charSequence.toString()); + } + return namedGroups; } - } - return null; + return null; } private static final void displayResults(final Map results) { if (results != null) { for(Map.Entry entry : results.entrySet()) { - System.out.println(entry.getKey() + "=" + entry.getValue()); + System.out.println(Strings.padEnd(entry.getKey(), 11, ' ') + " = " + entry.getValue()); } } } diff --git a/src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java b/src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java new file mode 100644 index 0000000..30ffeb1 --- /dev/null +++ b/src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java @@ -0,0 +1,56 @@ +package org.aicer.grok.dictionary; + +import org.aicer.grok.util.Grok; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; +import static org.hamcrest.Matchers.*; +public class GrokDictionaryTest { + + private GrokDictionary grokDictionary; + + @Before + public void setUp() throws Exception { + grokDictionary = new GrokDictionary(); + + } + + @After + public void tearDown() throws Exception { + + } + + @Test + public void testGetRegexDictionary() throws Exception { + + } + + @Test + public void testBind() throws Exception { + grokDictionary.bind(); + assertThat(grokDictionary.getRegexDictionary().keySet(), hasSize(0)); + + grokDictionary.addBuiltInDictionaries(); + grokDictionary.bind(); + assertThat(grokDictionary.getRegexDictionary().keySet(), hasSize(91)); + } + + @Test + public void testCompileExpression() throws Exception { + grokDictionary.addBuiltInDictionaries(); + grokDictionary.bind(); + Grok grok = grokDictionary.compileExpression("%{WORD:test}"); + assertThat(grok,is(not(nullValue()))); + } + + @Test + public void testDigestExpression() throws Exception { + grokDictionary.addBuiltInDictionaries(); + grokDictionary.bind(); + String digestExpression = grokDictionary.digestExpression("%{WORD:test}"); + assertThat(digestExpression,is(equalTo("(?\\b\\w+\\b)"))); + + } +} \ No newline at end of file diff --git a/src/test/java/org/aicer/grok/util/GrokIT.java b/src/test/java/org/aicer/grok/util/GrokIT.java new file mode 100644 index 0000000..02707d0 --- /dev/null +++ b/src/test/java/org/aicer/grok/util/GrokIT.java @@ -0,0 +1,59 @@ +package org.aicer.grok.util; + +import org.aicer.grok.dictionary.GrokDictionary; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.theories.DataPoint; + +import java.util.Map; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +public class GrokIT { + @DataPoint + public static final String rawDataLine1 = "1234567 - israel.ekpo@massivelogdata.net cc55ZZ35 1789 Hello Grok"; + @DataPoint + public static final String rawDataLine2 = "98AA541 - israel-ekpo@israelekpo.com mmddgg22 8800 Hello Grok"; + @DataPoint + public static final String rawDataLine3 = "55BB778 - ekpo.israel@example.net secret123 4439 Valid Data Stream"; + + private GrokDictionary dictionary; + + @Before + public void setUp() throws Exception { + dictionary = new GrokDictionary(); + + // Load the built-in dictionaries + dictionary.addBuiltInDictionaries(); + + // Resolve all expressions loaded + dictionary.bind(); + + // Take a look at how many expressions have been loaded + System.out.println("Dictionary Size: " + dictionary.getDictionarySize()); + assertThat(dictionary.getDictionarySize(), is(equalTo(91))); + } + + @Test + public void testTestData() throws Exception { + + final String expression = "%{EMAIL:username} %{USERNAME:password} %{INT:yearOfBirth}"; + + + Grok compiledPattern = dictionary.compileExpression(expression); + + Map map = compiledPattern.extractNamedGroups(rawDataLine1); + assertThat(map, hasEntry("username", "israel.ekpo@massivelogdata.net")); + assertThat(map, hasEntry("password", "cc55ZZ35")); + assertThat(map, hasEntry("yearOfBirth", "1789")); + map = compiledPattern.extractNamedGroups(rawDataLine2); + assertThat(map, hasEntry("username", "israel-ekpo@israelekpo.com")); + assertThat(map, hasEntry("password", "mmddgg22")); + assertThat(map, hasEntry("yearOfBirth", "8800")); + map = compiledPattern.extractNamedGroups(rawDataLine3); + assertThat(map, hasEntry("username", "ekpo.israel@example.net")); + assertThat(map, hasEntry("password", "secret123")); + assertThat(map, hasEntry("yearOfBirth", "4439")); + } +} diff --git a/src/test/java/org/aicer/grok/util/GrokTest.java b/src/test/java/org/aicer/grok/util/GrokTest.java new file mode 100644 index 0000000..4eeb224 --- /dev/null +++ b/src/test/java/org/aicer/grok/util/GrokTest.java @@ -0,0 +1,38 @@ +package org.aicer.grok.util; + +import org.aicer.grok.dictionary.GrokDictionary; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.util.Map; + +import static org.hamcrest.Matchers.*; +import static org.junit.Assert.assertThat; + +public class GrokTest { + + private static final String TESTSTRING = "This is a test"; + private GrokDictionary grokDictionary; + + @Before + public void setUp() throws Exception { + grokDictionary = new GrokDictionary(); + grokDictionary.addBuiltInDictionaries(); + grokDictionary.bind(); + } + + @After + public void tearDown() throws Exception { + + } + + @Test + public void testExtractNamedGroups() throws Exception { + Grok grok = grokDictionary.compileExpression("%{WORD:testName}"); + Map stringStringMap = grok.extractNamedGroups(TESTSTRING); + assertThat(stringStringMap,is(notNullValue())); + assertThat(stringStringMap.keySet(), is(not(empty()))); + } + +} \ No newline at end of file diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties new file mode 100644 index 0000000..61fbf6f --- /dev/null +++ b/src/test/resources/log4j.properties @@ -0,0 +1,9 @@ +# Root logger option +log4j.rootLogger=WARN, stdout + +# Redirect log messages to console +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n + From e338dcfe658e5773580202e110935ffd8bfaaf96 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Nov 2021 08:21:37 +0000 Subject: [PATCH 2/2] Bump junit from 4.12 to 4.13.1 Bumps [junit](https://github.com/junit-team/junit4) from 4.12 to 4.13.1. - [Release notes](https://github.com/junit-team/junit4/releases) - [Changelog](https://github.com/junit-team/junit4/blob/main/doc/ReleaseNotes4.12.md) - [Commits](https://github.com/junit-team/junit4/compare/r4.12...r4.13.1) --- updated-dependencies: - dependency-name: junit:junit dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4cd7018..8b2877f 100644 --- a/pom.xml +++ b/pom.xml @@ -43,7 +43,7 @@ 0.2.3 1.7.12 1.3 - 4.12 + 4.13.1 2.1.6 18.0