Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,53 @@
.classpath
.project
target

### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion

*.iml

## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:

# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries

# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml

# Gradle:
# .idea/gradle.xml
# .idea/libraries

# Mongo Explorer plugin:
# .idea/mongoSettings.xml

## File-based project format:
*.ipr
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties


49 changes: 43 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,27 +41,64 @@
</issueManagement>
<properties>
<named.regex.version>0.2.3</named.regex.version>
<slf4j.version>1.7.12</slf4j.version>
<hamcrest.version>1.3</hamcrest.version>
<junit.version>4.13.1</junit.version>
<joni.version>2.1.6</joni.version>
<guava.version>18.0</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>r03</version>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.1</version>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.1</version>
<version>${slf4j.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.tony19</groupId>
<artifactId>named-regexp</artifactId>
<version>${named.regex.version}</version>
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<version>${joni.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-library</artifactId>
<version>${hamcrest.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<version>${hamcrest.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
21 changes: 7 additions & 14 deletions src/main/java/org/aicer/grok/dictionary/GrokDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,17 @@
*/
package org.aicer.grok.dictionary;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;

import com.google.common.io.CharStreams;
import com.google.common.io.Closeables;
import org.aicer.grok.exception.GrokCompilationException;
import org.aicer.grok.util.Grok;
import org.joni.Regex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.code.regexp.Pattern;
import com.google.common.io.CharStreams;
import com.google.common.io.Closeables;
import java.io.*;
import java.util.HashMap;
import java.util.Map;

/**
* Grok Dictionary
Expand Down Expand Up @@ -75,7 +69,6 @@ public Map<String, String> getRegexDictionary() {
/**
* Digests all the dictionaries loaded so far
*
* @param file
* @throws GrokCompilationException if there is a problem
*/
public void bind() {
Expand Down Expand Up @@ -110,7 +103,7 @@ public Grok compileExpression(final String expression) {

logger.debug("Digested [" + expression + "] into [" + digestedExpression + "] before compilation");

return new Grok(Pattern.compile(digestedExpression));
return new Grok(new Regex(digestedExpression));
}

private void digestExpressions() {
Expand Down
49 changes: 29 additions & 20 deletions src/main/java/org/aicer/grok/util/Grok.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
*/
package org.aicer.grok.util;

import java.util.Map;

import com.google.common.base.Strings;
import org.aicer.grok.dictionary.GrokDictionary;
import org.joni.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.code.regexp.MatchResult;
import com.google.code.regexp.Matcher;
import com.google.code.regexp.Pattern;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/**
*
Expand All @@ -30,41 +32,48 @@
*/
public final class Grok {

private final Pattern compiledPattern;
private final Regex compiledPattern;
private final static Logger LOGGER = LoggerFactory.getLogger(Grok.class);

/**
* Constructor
*/
public Grok(final Pattern compiledPattern) {
public Grok(final Regex compiledPattern) {
this.compiledPattern = compiledPattern;
}

/**
* Extracts named groups from the raw data
*
* @param rawData
* @param rawData String to match pattern against
* @return A map of group names mapped to their extracted values or null if there are no matches
*/
public Map<String, String> extractNamedGroups(final CharSequence rawData) {

Matcher matcher = compiledPattern.matcher(rawData);

if (matcher.find()) {

MatchResult r = matcher.toMatchResult();

if (r != null && r.namedGroups() != null) {
return r.namedGroups();
Map<String, String> namedGroups = new HashMap<>();
Matcher matcher = compiledPattern.matcher(rawData.toString().getBytes());
if (matcher.search(0, rawData.length(), Option.DEFAULT) != -1) {
Region region = matcher.getEagerRegion();

for (Iterator<NameEntry> entry = compiledPattern.namedBackrefIterator(); entry.hasNext(); ) {
NameEntry e = entry.next();
int backRef = e.getBackRefs()[0];
int start = region.beg[backRef];
int end = region.end[backRef];
CharSequence charSequence = rawData.subSequence(start, end);
String name = new String(e.name).substring(e.nameP, e.nameEnd);
LOGGER.debug("{} = {}", name, charSequence);
namedGroups.put(name, charSequence.toString());
}
return namedGroups;
}
}

return null;
return null;
}

private static final void displayResults(final Map<String, String> results) {
if (results != null) {
for(Map.Entry<String, String> entry : results.entrySet()) {
System.out.println(entry.getKey() + "=" + entry.getValue());
System.out.println(Strings.padEnd(entry.getKey(), 11, ' ') + " = " + entry.getValue());
}
}
}
Expand Down
56 changes: 56 additions & 0 deletions src/test/java/org/aicer/grok/dictionary/GrokDictionaryTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package org.aicer.grok.dictionary;

import org.aicer.grok.util.Grok;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.*;
import static org.hamcrest.Matchers.*;
public class GrokDictionaryTest {

private GrokDictionary grokDictionary;

@Before
public void setUp() throws Exception {
grokDictionary = new GrokDictionary();

}

@After
public void tearDown() throws Exception {

}

@Test
public void testGetRegexDictionary() throws Exception {

}

@Test
public void testBind() throws Exception {
grokDictionary.bind();
assertThat(grokDictionary.getRegexDictionary().keySet(), hasSize(0));

grokDictionary.addBuiltInDictionaries();
grokDictionary.bind();
assertThat(grokDictionary.getRegexDictionary().keySet(), hasSize(91));
}

@Test
public void testCompileExpression() throws Exception {
grokDictionary.addBuiltInDictionaries();
grokDictionary.bind();
Grok grok = grokDictionary.compileExpression("%{WORD:test}");
assertThat(grok,is(not(nullValue())));
}

@Test
public void testDigestExpression() throws Exception {
grokDictionary.addBuiltInDictionaries();
grokDictionary.bind();
String digestExpression = grokDictionary.digestExpression("%{WORD:test}");
assertThat(digestExpression,is(equalTo("(?<test>\\b\\w+\\b)")));

}
}
59 changes: 59 additions & 0 deletions src/test/java/org/aicer/grok/util/GrokIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.aicer.grok.util;

import org.aicer.grok.dictionary.GrokDictionary;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.theories.DataPoint;

import java.util.Map;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.*;

public class GrokIT {
@DataPoint
public static final String rawDataLine1 = "1234567 - israel.ekpo@massivelogdata.net cc55ZZ35 1789 Hello Grok";
@DataPoint
public static final String rawDataLine2 = "98AA541 - israel-ekpo@israelekpo.com mmddgg22 8800 Hello Grok";
@DataPoint
public static final String rawDataLine3 = "55BB778 - ekpo.israel@example.net secret123 4439 Valid Data Stream";

private GrokDictionary dictionary;

@Before
public void setUp() throws Exception {
dictionary = new GrokDictionary();

// Load the built-in dictionaries
dictionary.addBuiltInDictionaries();

// Resolve all expressions loaded
dictionary.bind();

// Take a look at how many expressions have been loaded
System.out.println("Dictionary Size: " + dictionary.getDictionarySize());
assertThat(dictionary.getDictionarySize(), is(equalTo(91)));
}

@Test
public void testTestData() throws Exception {

final String expression = "%{EMAIL:username} %{USERNAME:password} %{INT:yearOfBirth}";


Grok compiledPattern = dictionary.compileExpression(expression);

Map<String, String> map = compiledPattern.extractNamedGroups(rawDataLine1);
assertThat(map, hasEntry("username", "israel.ekpo@massivelogdata.net"));
assertThat(map, hasEntry("password", "cc55ZZ35"));
assertThat(map, hasEntry("yearOfBirth", "1789"));
map = compiledPattern.extractNamedGroups(rawDataLine2);
assertThat(map, hasEntry("username", "israel-ekpo@israelekpo.com"));
assertThat(map, hasEntry("password", "mmddgg22"));
assertThat(map, hasEntry("yearOfBirth", "8800"));
map = compiledPattern.extractNamedGroups(rawDataLine3);
assertThat(map, hasEntry("username", "ekpo.israel@example.net"));
assertThat(map, hasEntry("password", "secret123"));
assertThat(map, hasEntry("yearOfBirth", "4439"));
}
}
Loading