Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name := "ScalaApacheAccessLogParser"

version := "1.0"

scalaVersion := "2.10.0"
scalaVersion := "2.10.4"

resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/"

Expand Down
31 changes: 26 additions & 5 deletions src/main/scala/AccessLogParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,23 @@ import scala.util.{Try, Success, Failure}
*
*/

/**
* For record like:
* 94.102.63.11 - - [21/Jul/2009:02:48:13 -0700] "GET / HTTP/1.1" 200 18209 "http://acme.com/foo.php" "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)"
* it will add '-' in the beginning of the log line. Botnet is '-'
*
*
* For records like:
* Expiro 5.102.63.11 - - [3/Jan/2014:10:06:55 +0000] "GET /?f=x HTTP/1.1" 200 3594 "http://www.foo.it/foo.php" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E; InfoPath.2)"
* Botnet is 'Expiro'
*/



@SerialVersionUID(100L)
class AccessLogParser extends Serializable {

private val bot = "(\\S+)" // like 'Expiro'
private val ddd = "\\d{1,3}" // at least 1 but not more than 3 times (possessive)
private val ip = s"($ddd\\.$ddd\\.$ddd\\.$ddd)?" // like `123.456.7.89`
private val client = "(\\S+)" // '\S' is 'non-whitespace character'
Expand All @@ -30,7 +44,7 @@ class AccessLogParser extends Serializable {
private val bytes = "(\\S+)" // this can be a "-"
private val referer = "\"(.*?)\""
private val agent = "\"(.*?)\""
private val regex = s"$ip $client $user $dateTime $request $status $bytes $referer $agent"
private val regex = s"$bot $ip $client $user $dateTime $request $status $bytes $referer $agent"
private val p = Pattern.compile(regex)

/**
Expand All @@ -39,7 +53,10 @@ class AccessLogParser extends Serializable {
* @return An AccessLogRecord instance wrapped in an Option.
*/
def parseRecord(record: String): Option[AccessLogRecord] = {
val matcher = p.matcher(record)
val isbot = "(\\D+)".r
val check_record = isbot findFirstIn record
val matcher = if(check_record == Some(".")) p.matcher("- " + record)
else p.matcher(record)
if (matcher.find) {
Some(buildAccessLogRecord(matcher))
} else {
Expand All @@ -57,7 +74,10 @@ class AccessLogParser extends Serializable {
* will be empty strings.
*/
def parseRecordReturningNullObjectOnFailure(record: String): AccessLogRecord = {
val matcher = p.matcher(record)
val isbot = "(\\D+)".r
val check_record = isbot findFirstIn record
val matcher = if(check_record == Some(".")) p.matcher("- " + record)
else p.matcher(record)
if (matcher.find) {
buildAccessLogRecord(matcher)
} else {
Expand All @@ -75,7 +95,8 @@ class AccessLogParser extends Serializable {
matcher.group(6),
matcher.group(7),
matcher.group(8),
matcher.group(9))
matcher.group(9),
matcher.group(10))
}
}

Expand All @@ -85,7 +106,7 @@ class AccessLogParser extends Serializable {
*/
object AccessLogParser {

val nullObjectAccessLogRecord = AccessLogRecord("", "", "", "", "", "", "", "", "")
val nullObjectAccessLogRecord = AccessLogRecord("", "", "", "", "", "", "", "", "", "")

/**
* @param A String like "GET /the-uri-here HTTP/1.1"
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/AccessLogRecord.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package com.alvinalexander.accesslogparser
* @see http://httpd.apache.org/docs/2.2/logs.html for details
*/
case class AccessLogRecord (
botnet: String, // string or empty
clientIpAddress: String, // should be an ip address, but may also be the hostname if hostname-lookups are enabled
rfc1413ClientIdentity: String, // typically `-`
remoteUser: String, // typically `-`
Expand Down
34 changes: 34 additions & 0 deletions src/test/scala/AccessLogRecordSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ApacheCombinedAccessLogRecordSpec extends FunSpec with BeforeAndAfter with
val parser = new AccessLogParser
val rec = parser.parseRecord(records(0))
println("IP ADDRESS: " + rec.get.clientIpAddress)
println("BOTNET: " + rec.get.botnet)
Then("parsing record(0) should not return None")
assert(rec != None)
And("the ip address should be correct")
Expand All @@ -42,6 +43,39 @@ class ApacheCombinedAccessLogRecordSpec extends FunSpec with BeforeAndAfter with
assert(rec.get.userAgent == "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 GTB5")
}
}

describe("Testing the access log record with botnet ...") {
it("the data fields should be correct") {
Given("the first sample log record")
records = SampleCombinedAccessLogRecords.botnetRecord
val parser = new AccessLogParser
val rec = parser.parseRecord(records(0))
println("IP ADDRESS: " + rec.get.clientIpAddress)
println("BOTNET: " + rec.get.botnet)
Then("parsing record(0) should not return None")
assert(rec != None)
And("botnet")
assert(rec.get.botnet == "Expiro")
And("the ip address should be correct")
assert(rec.get.clientIpAddress == "5.102.63.11")
And("client identity")
assert(rec.get.rfc1413ClientIdentity == "-")
And("remote user")
assert(rec.get.remoteUser == "-")
And("date/time")
assert(rec.get.dateTime == "[31/Jan/2014:10:06:55 +0000]")
And("request")
assert(rec.get.request == "GET /?f=x HTTP/1.1")
And("status code should be 200")
assert(rec.get.httpStatusCode == "200")
And("bytes sent should be 3594")
assert(rec.get.bytesSent == "3594")
And("referer")
assert(rec.get.referer == "http://www.foo.it/foo.php")
And("user agent")
assert(rec.get.userAgent == "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E; InfoPath.2)")
}
}

describe("Testing a second access log record ...") {
records = SampleCombinedAccessLogRecords.data
Expand Down
6 changes: 6 additions & 0 deletions src/test/scala/SampleData.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ object SampleCombinedAccessLogRecords {
66.249.70.10 - - [23/Feb/2014:03:21:59 -0700] "GET /blog/post/java/how-load-multiple-spring-context-files-standalone/ HTTP/1.0" 301 - "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
""".split("\n").filter(_ != "")

val botnetRecord = """
Expiro 5.102.63.11 - - [31/Jan/2014:10:06:55 +0000] "GET /?f=x HTTP/1.1" 200 3594 "http://www.foo.it/foo.php" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E; InfoPath.2)"
Pandora 5.102.63.11 - - [31/Jan/2014:10:06:55 +0000] "GET /?f=x HTTP/1.1" 200 3594 "http://www.foo.it/foo.php" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E; InfoPath.2)"
FakeM 5.102.63.11 - - [31/Jan/2014:10:06:55 +0000] "GET /?f=x HTTP/1.1" 200 3594 "http://www.foo.it/foo.php" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E; InfoPath.2)"
Qakbot 5.102.63.11 - - [31/Jan/2014:10:06:55 +0000] "GET /?f=x HTTP/1.1" 200 3594 "http://www.foo.it/foo.php" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E; InfoPath.2)"
""".split("\n").filter(_ !="")

}