From 760868a3b7ee79676a49ff463328b3ef58f0114d Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Thu, 6 Oct 2016 16:34:06 -0500
Subject: [PATCH 01/11] Add Chunker basic structure and some edison based
 features.

---
 .../saulexamples/nlp/Chunker/ChunkerApp.scala | 120 ++++++++++++++++++
 .../nlp/Chunker/ChunkerClassifiers.scala      |  33 +++++
 .../nlp/Chunker/ChunkerDataModel.scala        |  49 +++++++
 .../nlp/Chunker/ChunkerSensors.scala          |  24 ++++
 4 files changed, 226 insertions(+)
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
new file mode 100644
index 00000000..2cc0b9a5
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
@@ -0,0 +1,120 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder
+import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation._
+import edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer
+import edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder
+import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSTaggerApp
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable
+import scala.io.{ Source, StdIn }
+
+object ChunkerApp extends App {
+  val trainFile = "../data/conll2000chunking/train.txt"
+  val testFile = "../data/conll2000chunking/test.txt"
+
+  def parseData(fileName: String): Seq[TextAnnotation] = {
+    val arrayBuffer = mutable.Buffer[TextAnnotation]()
+
+    val tokenConstituents = mutable.ArrayBuffer[String]()
+    val posLabels = mutable.ArrayBuffer[String]()
+    val chunkLabels = mutable.ArrayBuffer[String]()
+    var numSentences = 0
+
+    Source.fromFile(fileName)
+      .getLines()
+      .foreach({ line: String =>
+        if (line.isEmpty) {
+          val sentenceList = List(tokenConstituents.toArray[String])
+          val textAnnotation = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentenceList)
+
+          val posView = new TokenLabelView(ViewNames.POS, textAnnotation)
+          val chunkLabelView = new SpanLabelView(ViewNames.SHALLOW_PARSE, textAnnotation)
+
+          textAnnotation.getView(ViewNames.TOKENS)
+            .getConstituents
+            .zipWithIndex
+            .foreach({
+              case (constituent: Constituent, idx: Int) =>
+                val posCons = constituent.cloneForNewViewWithDestinationLabel(ViewNames.POS, posLabels(idx))
+                posView.addConstituent(posCons)
+
+                val chunkCons = constituent.cloneForNewViewWithDestinationLabel(ViewNames.SHALLOW_PARSE, chunkLabels(idx))
+                chunkLabelView.addConstituent(chunkCons)
+            })
+
+          textAnnotation.addView(ViewNames.POS, posView)
+          textAnnotation.addView(ViewNames.SHALLOW_PARSE, chunkLabelView)
+
+          arrayBuffer.append(textAnnotation)
+          tokenConstituents.clear()
+          posLabels.clear()
+          chunkLabels.clear()
+
+          numSentences += 1
+        } else {
+          val reader = line.split(" ")
+          tokenConstituents.append(reader(0))
+          posLabels.append(reader(1))
+          chunkLabels.append(reader(2))
+        }
+      })
+
+    println("Number of sentences = " + numSentences)
+
+    arrayBuffer
+  }
+
+  lazy val trainData = parseData(trainFile)
+  lazy val testData = parseData(testFile)
+
+  val jarModelPath = ""
+
+  trainData.foreach({ textAnnotation: TextAnnotation =>
+    val numberOfSentences = textAnnotation.getNumberOfSentences
+    val sentences = (0 until numberOfSentences).map(textAnnotation.getSentence)
+    ChunkerDataModel.sentence.populate(sentences, train = true)
+  })
+
+  testData.foreach({ textAnnotation: TextAnnotation =>
+    val numberOfSentences = textAnnotation.getNumberOfSentences
+    val sentences = (0 until numberOfSentences).map(textAnnotation.getSentence)
+    ChunkerDataModel.sentence.populate(sentences, train = false)
+  })
+
+  ChunkerClassifiers.ChunkerClassifier.learn(10)
+  println(ChunkerClassifiers.ChunkerClassifier.test())
+
+  /** Interactive model to annotate input sentences with Pre-trained models
+    */
+  def interactiveWithPretrainedModels(): Unit = {
+    val posAnnotator = POSTaggerApp.getPretrainedAnnotator(ViewNames.POS)
+    val taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer())
+
+    while (true) {
+      println("Enter a sentence to annotate (or Press Enter to exit)")
+      val input = StdIn.readLine()
+
+      input match {
+        case sentence: String if sentence.trim.nonEmpty =>
+          // Create a Text Annotation with the current input sentence.
+          val ta = taBuilder.createTextAnnotation(sentence.trim)
+          posAnnotator.addView(ta)
+
+          val tokens = ta.getView(ViewNames.TOKENS).getConstituents
+          ChunkerDataModel.tokens.populate(tokens)
+
+          println("Tokens: " + ta.getView(ViewNames.TOKENS))
+        case _ => return
+      }
+    }
+  }
+}
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
new file mode 100644
index 00000000..bdbf460c
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
@@ -0,0 +1,33 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent
+import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner }
+import edu.illinois.cs.cogcomp.saul.classifier.Learnable
+
+object ChunkerClassifiers {
+  import ChunkerDataModel._
+
+  object ChunkerClassifier extends Learnable[Constituent](tokens) {
+
+    override lazy val classifier = {
+      // Parameters
+      val params = new SparseAveragedPerceptron.Parameters()
+      params.learningRate = 0.1
+      params.thickness = 0.2
+      val baseLTU = new SparseAveragedPerceptron(params)
+
+      new SparseNetworkLearner(baseLTU)
+    }
+
+    /** Label property for users classifier */
+    override def label = chunkLabel
+
+    override def feature = using(wordTypeInformation, affixes, posWindow)
+  }
+}
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
new file mode 100644
index 00000000..408b213a
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -0,0 +1,49 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Sentence }
+import edu.illinois.cs.cogcomp.edison.features.lrec.{ Affixes, POSWindow, WordTypeInformation }
+import edu.illinois.cs.cogcomp.saul.datamodel.DataModel
+
+import scala.collection.JavaConversions._
+
+object ChunkerDataModel extends DataModel {
+  val sentence = node[Sentence]
+  val tokens = node[Constituent]
+
+  val sentenceToTokens = edge(sentence, tokens)
+  sentenceToTokens.addSensor(ChunkerSensors.getTokensInSentence _)
+
+  // Label
+  val chunkLabel = property(tokens, "ChunkLabel") { token: Constituent => token.getLabel }
+
+  // Affixes feature
+  private val affixFeatureExtractor = new Affixes(ViewNames.TOKENS)
+  val affixes = property(tokens, "Affixes") { token: Constituent =>
+    affixFeatureExtractor.getFeatures(token)
+      .map(_.getName)
+      .toList
+  }
+
+  // WordTypeInformation feature
+  private val wordTypeInformationExtractor = new WordTypeInformation(ViewNames.TOKENS)
+  val wordTypeInformation = property(tokens, "WordTypeInformation") { token: Constituent =>
+    wordTypeInformationExtractor.getFeatures(token)
+      .map(_.getName)
+      .toList
+  }
+
+  // POS Window features
+  private val posWindowExtractor = new POSWindow(ViewNames.POS)
+  val posWindow = property(tokens, "POSWindow") { token: Constituent =>
+    posWindowExtractor.getFeatures(token)
+      .map(_.getName)
+      .toList
+  }
+}
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
new file mode 100644
index 00000000..85ade7a7
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
@@ -0,0 +1,24 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Sentence, TextAnnotation }
+
+import scala.collection.JavaConversions._
+
+object ChunkerSensors {
+
+  def getSentencesInDocument(document: TextAnnotation): Seq[Sentence] = {
+    val numberOfSentences = document.getNumberOfSentences
+    (0 until numberOfSentences).map(document.getSentence)
+  }
+
+  def getTokensInSentence(sentence: Sentence): Seq[Constituent] = {
+    sentence.getView(ViewNames.SHALLOW_PARSE).getConstituents
+  }
+}

From 4e6b4927e5dbf747c99fb3412f63b7b757d54757 Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Fri, 7 Oct 2016 04:47:00 -0500
Subject: [PATCH 02/11] Chunker Context feature + Training.

---
 .../saulexamples/nlp/Chunker/ChunkerApp.scala | 32 ++-----------------
 .../nlp/Chunker/ChunkerClassifiers.scala      |  2 +-
 .../nlp/Chunker/ChunkerDataModel.scala        | 26 +++++++++++++++
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
index 2cc0b9a5..ca833d26 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
@@ -9,13 +9,11 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
 import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder
 import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation._
-import edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer
-import edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder
-import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSTaggerApp
+import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable
-import scala.io.{ Source, StdIn }
+import scala.io.Source
 
 object ChunkerApp extends App {
   val trainFile = "../data/conll2000chunking/train.txt"
@@ -92,29 +90,5 @@ object ChunkerApp extends App {
 
   ChunkerClassifiers.ChunkerClassifier.learn(10)
   println(ChunkerClassifiers.ChunkerClassifier.test())
-
-  /** Interactive model to annotate input sentences with Pre-trained models
-    */
-  def interactiveWithPretrainedModels(): Unit = {
-    val posAnnotator = POSTaggerApp.getPretrainedAnnotator(ViewNames.POS)
-    val taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer())
-
-    while (true) {
-      println("Enter a sentence to annotate (or Press Enter to exit)")
-      val input = StdIn.readLine()
-
-      input match {
-        case sentence: String if sentence.trim.nonEmpty =>
-          // Create a Text Annotation with the current input sentence.
-          val ta = taBuilder.createTextAnnotation(sentence.trim)
-          posAnnotator.addView(ta)
-
-          val tokens = ta.getView(ViewNames.TOKENS).getConstituents
-          ChunkerDataModel.tokens.populate(tokens)
-
-          println("Tokens: " + ta.getView(ViewNames.TOKENS))
-        case _ => return
-      }
-    }
-  }
+  ClassifierUtils.SaveClassifiers(ChunkerClassifiers.ChunkerClassifier)
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
index bdbf460c..17d5f274 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
@@ -28,6 +28,6 @@ object ChunkerClassifiers {
     /** Label property for users classifier */
     override def label = chunkLabel
 
-    override def feature = using(wordTypeInformation, affixes, posWindow)
+    override def feature = using(wordTypeInformation, affixes, posWindow, capitalizationWindowProperty, previousTags)
   }
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
index 408b213a..a7d40e38 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -8,6 +8,8 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
 
 import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Sentence }
+import edu.illinois.cs.cogcomp.edison.features.ContextFeatureExtractor
+import edu.illinois.cs.cogcomp.edison.features.factory.WordFeatureExtractorFactory
 import edu.illinois.cs.cogcomp.edison.features.lrec.{ Affixes, POSWindow, WordTypeInformation }
 import edu.illinois.cs.cogcomp.saul.datamodel.DataModel
 
@@ -46,4 +48,28 @@ object ChunkerDataModel extends DataModel {
       .map(_.getName)
       .toList
   }
+
+  // Capitalization features
+  private val capitalizationExtractor = new ContextFeatureExtractor(2, true, true,
+    WordFeatureExtractorFactory.capitalization)
+  val capitalizationWindowProperty = property(tokens, "Capitalization") { token: Constituent =>
+    capitalizationExtractor.getFeatures(token)
+      .map(_.getName)
+      .toList
+  }
+
+  // Filter to restrict window to current sentence's tokens only.
+  val previousTagsFilter = Seq({ token: Constituent => tokens(token) ~> -sentenceToTokens })
+  val previousTags = property(tokens, "PreviousTags", cache = true) { token: Constituent =>
+    tokens.getWithWindow(token, -2, -1, previousTagsFilter)
+      .flatten
+      .map({ previousCons: Constituent =>
+        // Use Label while training and prediction while testing.
+        if (ChunkerClassifiers.ChunkerClassifier.isTraining) {
+          chunkLabel(previousCons)
+        } else {
+          ChunkerClassifiers.ChunkerClassifier(previousCons)
+        }
+      })
+  }
 }

From b48e21e60d8f9b8a2d7b085b73660a4ff0fc9e36 Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 15 Feb 2017 19:51:53 -0600
Subject: [PATCH 03/11] Update some features.

---
 .../nlp/Chunker/ChunkerDataModel.scala        | 37 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
index a7d40e38..fe230581 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -59,9 +59,11 @@ object ChunkerDataModel extends DataModel {
   }
 
   // Filter to restrict window to current sentence's tokens only.
-  val previousTagsFilter = Seq({ token: Constituent => tokens(token) ~> -sentenceToTokens })
+  val sameSentenceTokensFilter = Seq({ token: Constituent => tokens(token) ~> -sentenceToTokens })
+
+  // Get Previous Chunk labels
   val previousTags = property(tokens, "PreviousTags", cache = true) { token: Constituent =>
-    tokens.getWithWindow(token, -2, -1, previousTagsFilter)
+    tokens.getWithWindow(token, -2, -1, sameSentenceTokensFilter)
       .flatten
       .map({ previousCons: Constituent =>
         // Use Label while training and prediction while testing.
@@ -72,4 +74,35 @@ object ChunkerDataModel extends DataModel {
         }
       })
   }
+
+  // Get surface forms in context window
+  val forms = property(tokens, "Forms") { token: Constituent =>
+    tokens.getWithWindow(token, -2, +2, sameSentenceTokensFilter)
+      .flatten
+      .map(_.getSurfaceForm)
+  }
+
+  // Formpp Feature
+  val formpp = property(tokens, "Formpp") { token: Constituent =>
+    val window = 2
+    val surfaceForms: List[String] = forms(token)
+
+    // Feature range
+    val range = for {
+      j <- 0 until window
+      i <- surfaceForms.indices
+    } yield (j, i)
+
+    range.map({ case (j: Int, i: Int) =>
+      val contextStrings = for {
+        context <- 0 until window
+        if i + context < surfaceForms.length
+      } yield s"${i}_${j}:${surfaceForms(i + context)}"
+
+      contextStrings.mkString("_")
+    })
+      .toList
+  }
+
+
 }

From 0b6cf41bef76be5641f1d792ac315284f1f1353f Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 05:24:22 -0600
Subject: [PATCH 04/11] Add evaluation using ConstituentLabelingEvaluator.

---
 .../nlp/Chunker/ChunkerAnnotator.scala        | 53 +++++++++++++++
 .../saulexamples/nlp/Chunker/ChunkerApp.scala | 45 +++++++++++--
 .../nlp/Chunker/ChunkerSensors.scala          | 10 +--
 .../nlp/Chunker/ChunkerUtilities.scala        | 64 +++++++++++++++++++
 4 files changed, 160 insertions(+), 12 deletions(-)
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
new file mode 100644
index 00000000..eea96de0
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
@@ -0,0 +1,53 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.annotation.Annotator
+import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{Constituent, TextAnnotation, TokenLabelView}
+import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager
+import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.{POSAnnotator, POSTaggerApp}
+
+import scala.collection.JavaConversions._
+
+class ChunkerAnnotator extends Annotator(ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW, Array(ViewNames.TOKENS)) {
+
+  override def initialize(rm: ResourceManager): Unit = {}
+
+  /** Adds the POS view to a TextAnnotation
+    * Note: Assumes that the classifiers are populated with required models
+    * @param ta TextAnnotation instance
+    */
+  override def addView(ta: TextAnnotation): Unit = {
+    if (!ta.hasView(ViewNames.POS)) {
+      ChunkerAnnotator.localPOSAnnotator.addView(ta)
+    }
+
+    val tokens = ta.getView(ChunkerConstants.SHALLOW_PARSE_GOLD_BIO_VIEW).getConstituents
+
+    ChunkerDataModel.sentence.clear()
+    val sentences = (0 until ta.getNumberOfSentences).map(ta.getSentence)
+    ChunkerDataModel.sentence.populate(sentences, train = false)
+
+    val chunkerBIOView = new TokenLabelView(ChunkerConstants.SHALLOW_PARSE_ANNOTATED_BIO_VIEW, ta)
+
+    tokens.foreach({ cons: Constituent =>
+      val label = ChunkerClassifiers.ChunkerClassifier(cons)
+      val posCons = cons.cloneForNewViewWithDestinationLabel(chunkerBIOView.getViewName, label)
+      chunkerBIOView.addConstituent(posCons)
+    })
+
+    ta.addView(chunkerBIOView.getViewName, chunkerBIOView)
+
+    ChunkerUtilities.addGoldSpanLabelView(ta, chunkerBIOView.getViewName, ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
+  }
+}
+
+object ChunkerAnnotator {
+  /** Instance of a local POS Annotator if required */
+  private lazy val localPOSAnnotator: POSAnnotator = POSTaggerApp.getPretrainedAnnotator()
+}
\ No newline at end of file
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
index ca833d26..d71bd07a 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
@@ -9,13 +9,25 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
 import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder
 import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation._
+import edu.illinois.cs.cogcomp.core.experiments.ClassificationTester
+import edu.illinois.cs.cogcomp.core.experiments.evaluators.ConstituentLabelingEvaluator
 import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable
 import scala.io.Source
 
+object ChunkerConstants {
+  val SHALLOW_PARSE_GOLD_SPAN_VIEW = "SHALLOW_PARSE_GOLD"
+  val SHALLOW_PARSE_GOLD_BIO_VIEW = "SHALLOW_PARSE_GOLD_BIO"
+
+  val SHALLOW_PARSE_ANNOTATED_SPAN_VIEW = "SHALLOW_PARSE_ANNOTATED"
+  val SHALLOW_PARSE_ANNOTATED_BIO_VIEW = "SHALLOW_PARSE_ANNOTATED_BIO"
+}
+
 object ChunkerApp extends App {
+  import ChunkerConstants._
+
   val trainFile = "../data/conll2000chunking/train.txt"
   val testFile = "../data/conll2000chunking/test.txt"
 
@@ -35,7 +47,7 @@ object ChunkerApp extends App {
           val textAnnotation = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentenceList)
 
           val posView = new TokenLabelView(ViewNames.POS, textAnnotation)
-          val chunkLabelView = new SpanLabelView(ViewNames.SHALLOW_PARSE, textAnnotation)
+          val chunkLabelView = new SpanLabelView(SHALLOW_PARSE_GOLD_BIO_VIEW, textAnnotation)
 
           textAnnotation.getView(ViewNames.TOKENS)
             .getConstituents
@@ -45,12 +57,14 @@ object ChunkerApp extends App {
                 val posCons = constituent.cloneForNewViewWithDestinationLabel(ViewNames.POS, posLabels(idx))
                 posView.addConstituent(posCons)
 
-                val chunkCons = constituent.cloneForNewViewWithDestinationLabel(ViewNames.SHALLOW_PARSE, chunkLabels(idx))
+                val chunkCons = constituent.cloneForNewViewWithDestinationLabel(SHALLOW_PARSE_GOLD_BIO_VIEW, chunkLabels(idx))
                 chunkLabelView.addConstituent(chunkCons)
             })
 
           textAnnotation.addView(ViewNames.POS, posView)
-          textAnnotation.addView(ViewNames.SHALLOW_PARSE, chunkLabelView)
+          textAnnotation.addView(SHALLOW_PARSE_GOLD_BIO_VIEW, chunkLabelView)
+
+          ChunkerUtilities.addGoldSpanLabelView(textAnnotation, SHALLOW_PARSE_GOLD_BIO_VIEW, SHALLOW_PARSE_GOLD_SPAN_VIEW)
 
           arrayBuffer.append(textAnnotation)
           tokenConstituents.clear()
@@ -90,5 +104,28 @@ object ChunkerApp extends App {
 
   ChunkerClassifiers.ChunkerClassifier.learn(10)
   println(ChunkerClassifiers.ChunkerClassifier.test())
-  ClassifierUtils.SaveClassifiers(ChunkerClassifiers.ChunkerClassifier)
+
+  val evaluator = new ConstituentLabelingEvaluator()
+  val tester = new ClassificationTester()
+
+  val chunkerAnnotator = new ChunkerAnnotator()
+  testData.foreach({ textAnnotation: TextAnnotation =>
+    // Remove POS View before evaluation.
+    textAnnotation.removeView(ViewNames.POS)
+
+    chunkerAnnotator.addView(textAnnotation)
+
+    val goldView = textAnnotation.getView(SHALLOW_PARSE_GOLD_SPAN_VIEW)
+    val annotatedView = textAnnotation.getView(SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
+
+    // Workaround for incorrect ConstituentLabelingEvaluator behaviour.
+    val predictedView = new SpanLabelView(SHALLOW_PARSE_GOLD_SPAN_VIEW, textAnnotation)
+    annotatedView.getConstituents.foreach({ cons: Constituent =>
+      predictedView.addConstituent(cons.cloneForNewView(SHALLOW_PARSE_GOLD_SPAN_VIEW))
+    })
+
+    evaluator.evaluate(tester, goldView, predictedView)
+  })
+
+  println(tester.getPerformanceTable.toOrgTable)
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
index 85ade7a7..b6d44d01 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
@@ -6,19 +6,13 @@
   */
 package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
 
-import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
-import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Sentence, TextAnnotation }
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Sentence }
 
 import scala.collection.JavaConversions._
 
 object ChunkerSensors {
 
-  def getSentencesInDocument(document: TextAnnotation): Seq[Sentence] = {
-    val numberOfSentences = document.getNumberOfSentences
-    (0 until numberOfSentences).map(document.getSentence)
-  }
-
   def getTokensInSentence(sentence: Sentence): Seq[Constituent] = {
-    sentence.getView(ViewNames.SHALLOW_PARSE).getConstituents
+    sentence.getView(ChunkerConstants.SHALLOW_PARSE_GOLD_BIO_VIEW).getConstituents
   }
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
new file mode 100644
index 00000000..14374cd4
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
@@ -0,0 +1,64 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{SpanLabelView, TextAnnotation, TokenLabelView}
+import edu.illinois.cs.cogcomp.saul.util.Logging
+
+import scala.collection.JavaConversions._
+
+object ChunkerUtilities extends Logging {
+
+  /** Convert the gold BIO labelling to Span Label View
+    * Note: Use this method only for the GOLD view as this does not perform error handling. */
+  def addGoldSpanLabelView(ta: TextAnnotation, sourceBIOView: String, destView: String): Unit = {
+    assert(ta.hasView(sourceBIOView))
+    assert(!ta.hasView(destView))
+
+    val destinationView = new SpanLabelView(destView, ta)
+
+    var currentSpanStart = -1
+    var currentSpanEnd = -1
+    var currentTag = ""
+
+    ta.getView(sourceBIOView).getConstituents.foreach({ constituent =>
+      val inASpan = currentSpanStart != -1
+
+      if (inASpan) {
+        if (constituent.getLabel.startsWith("O") || constituent.getLabel.startsWith("B-")) {
+          destinationView.addSpanLabel(currentSpanStart, currentSpanEnd, currentTag, 1.0d)
+          currentSpanStart = -1
+          currentSpanEnd = -1
+          currentTag = ""
+        } else {
+          // Label Starts with I-
+          if (constituent.getLabel.endsWith(currentTag)) {
+            currentSpanEnd = constituent.getEndSpan
+          } else {
+            destinationView.addSpanLabel(currentSpanStart, currentSpanEnd, currentTag, 1.0d)
+            logger.info("Dangling I-label")
+
+            currentSpanStart = -1
+            currentSpanEnd = -1
+            currentTag = ""
+          }
+        }
+      }
+
+      if (constituent.getLabel.startsWith("B-")) {
+        currentSpanStart = constituent.getStartSpan
+        currentSpanEnd = constituent.getEndSpan
+        currentTag = constituent.getLabel.substring(2)
+      }
+      else if (!inASpan && constituent.getLabel.startsWith("I-")) {
+        logger.info(s"Dangling I- label for constituent - $constituent")
+      }
+    })
+
+    ta.addView(destView, destinationView)
+  }
+}

From 0ff8d2f2e54ca06cdc1c54d99a135d0ff543cbcb Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 05:59:05 -0600
Subject: [PATCH 05/11] Add heuristic decoding of BIO annotation.

---
 .../nlp/Chunker/ChunkerAnnotator.scala        | 19 +++-
 .../nlp/Chunker/ChunkerUtilities.scala        | 87 +++++++++++++++----
 2 files changed, 86 insertions(+), 20 deletions(-)

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
index eea96de0..8c818d6f 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
@@ -14,7 +14,12 @@ import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.{POSAnnotator, POSTagg
 
 import scala.collection.JavaConversions._
 
-class ChunkerAnnotator extends Annotator(ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW, Array(ViewNames.TOKENS)) {
+/** Chunker Annotator implementation
+  *
+  * @param useHeuristics To use heuristics to fix BIO annotation.
+  */
+class ChunkerAnnotator(val useHeuristics: Boolean = true)
+  extends Annotator(ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW, Array(ViewNames.TOKENS)) {
 
   override def initialize(rm: ResourceManager): Unit = {}
 
@@ -43,7 +48,17 @@ class ChunkerAnnotator extends Annotator(ChunkerConstants.SHALLOW_PARSE_ANNOTATE
 
     ta.addView(chunkerBIOView.getViewName, chunkerBIOView)
 
-    ChunkerUtilities.addGoldSpanLabelView(ta, chunkerBIOView.getViewName, ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
+    if (useHeuristics) {
+      ChunkerUtilities.addSpanLabelViewUsingHeuristics(
+        ta,
+        chunkerBIOView.getViewName,
+        ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
+    } else {
+      ChunkerUtilities.addGoldSpanLabelView(
+        ta,
+        chunkerBIOView.getViewName,
+        ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
+    }
   }
 }
 
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
index 14374cd4..a48e81ff 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
@@ -6,7 +6,7 @@
   */
 package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
 
-import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{SpanLabelView, TextAnnotation, TokenLabelView}
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, SpanLabelView, TextAnnotation }
 import edu.illinois.cs.cogcomp.saul.util.Logging
 
 import scala.collection.JavaConversions._
@@ -21,44 +21,95 @@ object ChunkerUtilities extends Logging {
 
     val destinationView = new SpanLabelView(destView, ta)
 
-    var currentSpanStart = -1
-    var currentSpanEnd = -1
-    var currentTag = ""
+    var currentChunkStart = -1
+    var currentChunkEnd = -1
+    var cLabel = ""
 
     ta.getView(sourceBIOView).getConstituents.foreach({ constituent =>
-      val inASpan = currentSpanStart != -1
+      val inASpan = currentChunkStart != -1
 
       if (inASpan) {
         if (constituent.getLabel.startsWith("O") || constituent.getLabel.startsWith("B-")) {
-          destinationView.addSpanLabel(currentSpanStart, currentSpanEnd, currentTag, 1.0d)
-          currentSpanStart = -1
-          currentSpanEnd = -1
-          currentTag = ""
+          destinationView.addSpanLabel(currentChunkStart, currentChunkEnd, cLabel, 1.0d)
+          currentChunkStart = -1
+          currentChunkEnd = -1
+          cLabel = ""
         } else {
           // Label Starts with I-
-          if (constituent.getLabel.endsWith(currentTag)) {
-            currentSpanEnd = constituent.getEndSpan
+          if (constituent.getLabel.endsWith(cLabel)) {
+            currentChunkEnd = constituent.getEndSpan
           } else {
-            destinationView.addSpanLabel(currentSpanStart, currentSpanEnd, currentTag, 1.0d)
+            destinationView.addSpanLabel(currentChunkStart, currentChunkEnd, cLabel, 1.0d)
             logger.info("Dangling I-label")
 
-            currentSpanStart = -1
-            currentSpanEnd = -1
-            currentTag = ""
+            currentChunkStart = -1
+            currentChunkEnd = -1
+            cLabel = ""
           }
         }
       }
 
       if (constituent.getLabel.startsWith("B-")) {
-        currentSpanStart = constituent.getStartSpan
-        currentSpanEnd = constituent.getEndSpan
-        currentTag = constituent.getLabel.substring(2)
+        currentChunkStart = constituent.getStartSpan
+        currentChunkEnd = constituent.getEndSpan
+        cLabel = constituent.getLabel.substring(2)
       }
       else if (!inASpan && constituent.getLabel.startsWith("I-")) {
         logger.info(s"Dangling I- label for constituent - $constituent")
       }
     })
 
+    if (currentChunkStart != -1 && currentChunkEnd != -1 && cLabel.nonEmpty) {
+      destinationView.addSpanLabel(currentChunkStart, currentChunkEnd, cLabel, 1.0d)
+    }
+
+    ta.addView(destView, destinationView)
+  }
+
+  def addSpanLabelViewUsingHeuristics(ta: TextAnnotation, sourceBIOView: String, destView: String): Unit = {
+    assert(ta.hasView(sourceBIOView))
+    assert(!ta.hasView(destView))
+
+    val destinationView = new SpanLabelView(destView, ta)
+
+    var currentChunkStart = -1
+    var currentChunkEnd = -1
+    var cLabel = ""
+    var previousConstituent: Option[Constituent] = None
+
+    ta.getView(sourceBIOView).getConstituents.foreach({ constituent =>
+      // Running version of current constituent's predicted label.
+      var currentLabel = constituent.getLabel
+
+      if (currentLabel.startsWith("I-")) {
+        if (cLabel.isEmpty) {
+          currentLabel = "B" + currentLabel.substring(1)
+        } else if (!currentLabel.endsWith(cLabel)) {
+          currentLabel = "B" + currentLabel.substring(1)
+        }
+      }
+
+      if ((currentLabel.startsWith("B-") || currentLabel.startsWith("O")) && cLabel.nonEmpty) {
+        if (previousConstituent.nonEmpty) {
+          currentChunkEnd = previousConstituent.get.getEndSpan
+          destinationView.addSpanLabel(currentChunkStart, currentChunkEnd, cLabel, 1.0d)
+          cLabel = ""
+        }
+      }
+
+      if (currentLabel.startsWith("B-")) {
+        currentChunkStart = constituent.getStartSpan
+        cLabel = currentLabel.substring(2)
+      }
+
+      previousConstituent = Some(constituent)
+    })
+
+    if (cLabel.nonEmpty && previousConstituent.nonEmpty) {
+      currentChunkEnd = previousConstituent.get.getEndSpan
+      destinationView.addSpanLabel(currentChunkStart, currentChunkEnd, cLabel, 1.0d)
+    }
+
     ta.addView(destView, destinationView)
   }
 }

From 6b23f6c7164788fbb04618ca934b76acc0787c34 Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 15:38:43 -0600
Subject: [PATCH 06/11] Cleanup the main class and some logical separation.

---
 .../saulexamples/nlp/Chunker/ChunkerApp.scala | 177 +++++++++---------
 .../nlp/Chunker/ChunkerClassifiers.scala      |   4 +-
 .../nlp/Chunker/ChunkerConstants.scala        |  16 ++
 .../nlp/Chunker/ChunkerDataModel.scala        |  24 ++-
 .../nlp/Chunker/ChunkerDataReader.scala       |  78 ++++++++
 .../nlp/Chunker/ChunkerSensors.scala          |   1 +
 .../nlp/Chunker/ChunkerUtilities.scala        |   1 +
 7 files changed, 201 insertions(+), 100 deletions(-)
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerConstants.scala
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataReader.scala

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
index d71bd07a..49195f3e 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
@@ -6,126 +6,119 @@
   */
 package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
 
-import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder
+import java.util.Properties
+
 import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation._
 import edu.illinois.cs.cogcomp.core.experiments.ClassificationTester
 import edu.illinois.cs.cogcomp.core.experiments.evaluators.ConstituentLabelingEvaluator
+import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager
 import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils
+import edu.illinois.cs.cogcomp.saul.util.Logging
 
 import scala.collection.JavaConversions._
-import scala.collection.mutable
-import scala.io.Source
-
-object ChunkerConstants {
-  val SHALLOW_PARSE_GOLD_SPAN_VIEW = "SHALLOW_PARSE_GOLD"
-  val SHALLOW_PARSE_GOLD_BIO_VIEW = "SHALLOW_PARSE_GOLD_BIO"
 
-  val SHALLOW_PARSE_ANNOTATED_SPAN_VIEW = "SHALLOW_PARSE_ANNOTATED"
-  val SHALLOW_PARSE_ANNOTATED_BIO_VIEW = "SHALLOW_PARSE_ANNOTATED_BIO"
-}
 
-object ChunkerApp extends App {
+object ChunkerApp extends Logging {
   import ChunkerConstants._
 
   val trainFile = "../data/conll2000chunking/train.txt"
   val testFile = "../data/conll2000chunking/test.txt"
 
-  def parseData(fileName: String): Seq[TextAnnotation] = {
-    val arrayBuffer = mutable.Buffer[TextAnnotation]()
-
-    val tokenConstituents = mutable.ArrayBuffer[String]()
-    val posLabels = mutable.ArrayBuffer[String]()
-    val chunkLabels = mutable.ArrayBuffer[String]()
-    var numSentences = 0
-
-    Source.fromFile(fileName)
-      .getLines()
-      .foreach({ line: String =>
-        if (line.isEmpty) {
-          val sentenceList = List(tokenConstituents.toArray[String])
-          val textAnnotation = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentenceList)
-
-          val posView = new TokenLabelView(ViewNames.POS, textAnnotation)
-          val chunkLabelView = new SpanLabelView(SHALLOW_PARSE_GOLD_BIO_VIEW, textAnnotation)
-
-          textAnnotation.getView(ViewNames.TOKENS)
-            .getConstituents
-            .zipWithIndex
-            .foreach({
-              case (constituent: Constituent, idx: Int) =>
-                val posCons = constituent.cloneForNewViewWithDestinationLabel(ViewNames.POS, posLabels(idx))
-                posView.addConstituent(posCons)
-
-                val chunkCons = constituent.cloneForNewViewWithDestinationLabel(SHALLOW_PARSE_GOLD_BIO_VIEW, chunkLabels(idx))
-                chunkLabelView.addConstituent(chunkCons)
-            })
-
-          textAnnotation.addView(ViewNames.POS, posView)
-          textAnnotation.addView(SHALLOW_PARSE_GOLD_BIO_VIEW, chunkLabelView)
-
-          ChunkerUtilities.addGoldSpanLabelView(textAnnotation, SHALLOW_PARSE_GOLD_BIO_VIEW, SHALLOW_PARSE_GOLD_SPAN_VIEW)
-
-          arrayBuffer.append(textAnnotation)
-          tokenConstituents.clear()
-          posLabels.clear()
-          chunkLabels.clear()
-
-          numSentences += 1
-        } else {
-          val reader = line.split(" ")
-          tokenConstituents.append(reader(0))
-          posLabels.append(reader(1))
-          chunkLabels.append(reader(2))
-        }
-      })
+  val jarModelPath = ""
 
-    println("Number of sentences = " + numSentences)
+  object ChunkerExperimentType extends Enumeration {
+    val TrainAndTest, TestFromModel, Interactive = Value
 
-    arrayBuffer
+    def withNameOpt(s: String): Option[Value] = values.find(_.toString == s)
   }
 
-  lazy val trainData = parseData(trainFile)
-  lazy val testData = parseData(testFile)
+  def main(args: Array[String]): Unit = {
+    /** Try to parse the experiment type as input argument or use default */
+    val testType = args.headOption
+      .flatMap(ChunkerExperimentType.withNameOpt)
+      .getOrElse(ChunkerExperimentType.Interactive)
+
+    testType match {
+      case ChunkerExperimentType.TrainAndTest => trainAndTest()
+      case ChunkerExperimentType.TestFromModel => testWithPretrainedModels()
+      case ChunkerExperimentType.Interactive => interactiveWithPretrainedModels()
+    }
+  }
 
-  val jarModelPath = ""
+  private def loadModelFromJarPath(): Unit = {
+    // Load model from jar path
+//    ClassifierUtils.LoadClassifier(
+//      jarModelPath,
+//      ChunkerClassifiers.ChunkerClassifier)
+    ChunkerClassifiers.ChunkerClassifier.load()
+  }
+
+  private def getSentencesInTextAnnotation(taSeq: Seq[TextAnnotation]) = {
+    taSeq.flatMap({ textAnnotation: TextAnnotation =>
+      (0 until textAnnotation.getNumberOfSentences).map(textAnnotation.getSentence)
+    })
+  }
+
+  lazy val trainData = ChunkerDataReader.parseData(trainFile)
+  lazy val testData = ChunkerDataReader.parseData(testFile)
+
+  lazy val preTrainedAnnotator: ChunkerAnnotator = {
+    loadModelFromJarPath()
 
-  trainData.foreach({ textAnnotation: TextAnnotation =>
-    val numberOfSentences = textAnnotation.getNumberOfSentences
-    val sentences = (0 until numberOfSentences).map(textAnnotation.getSentence)
-    ChunkerDataModel.sentence.populate(sentences, train = true)
-  })
+    val annotatorInstance = new ChunkerAnnotator()
+    annotatorInstance.initialize(new ResourceManager(new Properties()))
+    annotatorInstance
+  }
 
-  testData.foreach({ textAnnotation: TextAnnotation =>
-    val numberOfSentences = textAnnotation.getNumberOfSentences
-    val sentences = (0 until numberOfSentences).map(textAnnotation.getSentence)
-    ChunkerDataModel.sentence.populate(sentences, train = false)
-  })
+  /** Note: This function does NOT populate testing instances.
+    * Also does not use GOLD POS tags. Instead a trained POSAnnotater is used. */
+  private def testModelImpl(): Unit = {
+    ClassifierUtils.TestClassifiers(ChunkerClassifiers.ChunkerClassifier)
 
-  ChunkerClassifiers.ChunkerClassifier.learn(10)
-  println(ChunkerClassifiers.ChunkerClassifier.test())
+    val evaluator = new ConstituentLabelingEvaluator()
+    val tester = new ClassificationTester()
 
-  val evaluator = new ConstituentLabelingEvaluator()
-  val tester = new ClassificationTester()
+    testData.foreach({ textAnnotation: TextAnnotation =>
+      // Remove POS View before evaluation.
+      textAnnotation.removeView(ViewNames.POS)
 
-  val chunkerAnnotator = new ChunkerAnnotator()
-  testData.foreach({ textAnnotation: TextAnnotation =>
-    // Remove POS View before evaluation.
-    textAnnotation.removeView(ViewNames.POS)
+      preTrainedAnnotator.addView(textAnnotation)
 
-    chunkerAnnotator.addView(textAnnotation)
+      val goldView = textAnnotation.getView(SHALLOW_PARSE_GOLD_SPAN_VIEW)
+      val annotatedView = textAnnotation.getView(SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
 
-    val goldView = textAnnotation.getView(SHALLOW_PARSE_GOLD_SPAN_VIEW)
-    val annotatedView = textAnnotation.getView(SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
+      // Workaround for incorrect ConstituentLabelingEvaluator behaviour.
+      val predictedView = new SpanLabelView(SHALLOW_PARSE_GOLD_SPAN_VIEW, textAnnotation)
+      annotatedView.getConstituents.foreach({ cons: Constituent =>
+        predictedView.addConstituent(cons.cloneForNewView(SHALLOW_PARSE_GOLD_SPAN_VIEW))
+      })
 
-    // Workaround for incorrect ConstituentLabelingEvaluator behaviour.
-    val predictedView = new SpanLabelView(SHALLOW_PARSE_GOLD_SPAN_VIEW, textAnnotation)
-    annotatedView.getConstituents.foreach({ cons: Constituent =>
-      predictedView.addConstituent(cons.cloneForNewView(SHALLOW_PARSE_GOLD_SPAN_VIEW))
+      evaluator.evaluate(tester, goldView, predictedView)
     })
 
-    evaluator.evaluate(tester, goldView, predictedView)
-  })
+    println(tester.getPerformanceTable.toOrgTable)
+  }
+
+  def trainAndTest(): Unit = {
+    ChunkerDataModel.sentence.populate(getSentencesInTextAnnotation(trainData), train = true)
+    ChunkerDataModel.sentence.populate(getSentencesInTextAnnotation(testData), train = false)
+
+    ChunkerClassifiers.ChunkerClassifier.learn(50)
+    ClassifierUtils.SaveClassifiers(ChunkerClassifiers.ChunkerClassifier)
+
+    testModelImpl()
+  }
 
-  println(tester.getPerformanceTable.toOrgTable)
+  def testWithPretrainedModels(): Unit = {
+    loadModelFromJarPath()
+
+    ChunkerDataModel.sentence.populate(getSentencesInTextAnnotation(testData), train = false)
+
+    testModelImpl()
+  }
+
+  def interactiveWithPretrainedModels(): Unit = {
+
+  }
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
index 17d5f274..392781c7 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
@@ -28,6 +28,8 @@ object ChunkerClassifiers {
     /** Label property for users classifier */
     override def label = chunkLabel
 
-    override def feature = using(wordTypeInformation, affixes, posWindow, capitalizationWindowProperty, previousTags)
+    override def feature = using(
+      wordTypeInformation, affixes, posWindow, capitalizationWindowProperty, previousTags,
+      mixed, SOPrevious)
   }
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerConstants.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerConstants.scala
new file mode 100644
index 00000000..bd83ba40
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerConstants.scala
@@ -0,0 +1,16 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+/** Constants used by the Chunker experiment */
+object ChunkerConstants {
+  val SHALLOW_PARSE_GOLD_SPAN_VIEW = "SHALLOW_PARSE_GOLD"
+  val SHALLOW_PARSE_GOLD_BIO_VIEW = "SHALLOW_PARSE_GOLD_BIO"
+
+  val SHALLOW_PARSE_ANNOTATED_SPAN_VIEW = "SHALLOW_PARSE_ANNOTATED"
+  val SHALLOW_PARSE_ANNOTATED_BIO_VIEW = "SHALLOW_PARSE_ANNOTATED_BIO"
+}
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
index fe230581..24878d3f 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -13,6 +13,7 @@ import edu.illinois.cs.cogcomp.edison.features.factory.WordFeatureExtractorFacto
 import edu.illinois.cs.cogcomp.edison.features.lrec.{ Affixes, POSWindow, WordTypeInformation }
 import edu.illinois.cs.cogcomp.saul.datamodel.DataModel
 
+import scala.collection.mutable
 import scala.collection.JavaConversions._
 
 object ChunkerDataModel extends DataModel {
@@ -85,24 +86,33 @@ object ChunkerDataModel extends DataModel {
   // Formpp Feature
   val formpp = property(tokens, "Formpp") { token: Constituent =>
     val window = 2
+    val contextBuffer = new mutable.ArrayBuffer[String]()
+
     val surfaceForms: List[String] = forms(token)
 
     // Feature range
-    val range = for {
+    for {
       j <- 0 until window
       i <- surfaceForms.indices
-    } yield (j, i)
-
-    range.map({ case (j: Int, i: Int) =>
+    } {
       val contextStrings = for {
         context <- 0 until window
         if i + context < surfaceForms.length
       } yield s"${i}_${j}:${surfaceForms(i + context)}"
 
-      contextStrings.mkString("_")
-    })
-      .toList
+      contextBuffer.append(contextStrings.mkString("_"))
+    }
+
+    contextBuffer.toList
   }
 
+  // Mixed Feature
+  val mixed = property(tokens, "Mixed") { token: Constituent =>
+    ""
+  }
 
+  // SO Previous Feature
+  val SOPrevious = property(tokens, "SOPrevious") { token: Constituent =>
+    ""
+  }
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataReader.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataReader.scala
new file mode 100644
index 00000000..4df460a7
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataReader.scala
@@ -0,0 +1,78 @@
+/** This software is released under the University of Illinois/Research and Academic Use License. See
+  * the LICENSE file in the root folder for details. Copyright (c) 2016
+  *
+  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
+  * http://cogcomp.cs.illinois.edu/
+  */
+package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
+
+import edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder
+import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
+import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{Constituent, SpanLabelView, TextAnnotation, TokenLabelView}
+import edu.illinois.cs.cogcomp.saul.util.Logging
+
+import scala.collection.mutable
+import scala.collection.JavaConversions._
+import scala.io.Source
+
+/** Data Reader for the CONLL format for training the Chunker */
+object ChunkerDataReader extends Logging {
+  import ChunkerConstants._
+
+  /** Parse the input data and create the POS View and GOLD Shallow Parse BIO View */
+  def parseData(fileName: String): Seq[TextAnnotation] = {
+    logger.info(s"Parsing file - $fileName")
+
+    val arrayBuffer = mutable.Buffer[TextAnnotation]()
+
+    val tokenConstituents = mutable.ArrayBuffer[String]()
+    val posLabels = mutable.ArrayBuffer[String]()
+    val chunkLabels = mutable.ArrayBuffer[String]()
+    var numSentences = 0
+
+    Source.fromFile(fileName)
+      .getLines()
+      .foreach({ line: String =>
+        if (line.isEmpty) {
+          val sentenceList = List(tokenConstituents.toArray[String])
+          val textAnnotation = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentenceList)
+
+          val posView = new TokenLabelView(ViewNames.POS, textAnnotation)
+          val chunkLabelView = new SpanLabelView(SHALLOW_PARSE_GOLD_BIO_VIEW, textAnnotation)
+
+          textAnnotation.getView(ViewNames.TOKENS)
+            .getConstituents
+            .zipWithIndex
+            .foreach({
+              case (constituent: Constituent, idx: Int) =>
+                val posCons = constituent.cloneForNewViewWithDestinationLabel(ViewNames.POS, posLabels(idx))
+                posView.addConstituent(posCons)
+
+                val chunkCons = constituent.cloneForNewViewWithDestinationLabel(SHALLOW_PARSE_GOLD_BIO_VIEW, chunkLabels(idx))
+                chunkLabelView.addConstituent(chunkCons)
+            })
+
+          textAnnotation.addView(ViewNames.POS, posView)
+          textAnnotation.addView(SHALLOW_PARSE_GOLD_BIO_VIEW, chunkLabelView)
+
+          ChunkerUtilities.addGoldSpanLabelView(textAnnotation, SHALLOW_PARSE_GOLD_BIO_VIEW, SHALLOW_PARSE_GOLD_SPAN_VIEW)
+
+          arrayBuffer.append(textAnnotation)
+          tokenConstituents.clear()
+          posLabels.clear()
+          chunkLabels.clear()
+
+          numSentences += 1
+        } else {
+          val reader = line.split(" ")
+          tokenConstituents.append(reader(0))
+          posLabels.append(reader(1))
+          chunkLabels.append(reader(2))
+        }
+      })
+
+    logger.info(s"Number of sentences : $numSentences")
+
+    arrayBuffer
+  }
+}
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
index b6d44d01..c5cc479a 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
@@ -12,6 +12,7 @@ import scala.collection.JavaConversions._
 
 object ChunkerSensors {
 
+  /** Sensor to populate tokens node from a Sentence instance */
   def getTokensInSentence(sentence: Sentence): Seq[Constituent] = {
     sentence.getView(ChunkerConstants.SHALLOW_PARSE_GOLD_BIO_VIEW).getConstituents
   }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
index a48e81ff..269f6323 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerUtilities.scala
@@ -66,6 +66,7 @@ object ChunkerUtilities extends Logging {
     ta.addView(destView, destinationView)
   }
 
+  /** Convert BIO labelled annotation to SpanLabelView using some heuristics to handle error scenarios */
   def addSpanLabelViewUsingHeuristics(ta: TextAnnotation, sourceBIOView: String, destView: String): Unit = {
     assert(ta.hasView(sourceBIOView))
     assert(!ta.hasView(destView))

From e1a82e3de3a0e3c74351a0edbf93ff49452ccf7b Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 15:40:00 -0600
Subject: [PATCH 07/11] Add all required features.

---
 .../cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
index 392781c7..5dc3a964 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerClassifiers.scala
@@ -30,6 +30,6 @@ object ChunkerClassifiers {
 
     override def feature = using(
       wordTypeInformation, affixes, posWindow, capitalizationWindowProperty, previousTags,
-      mixed, SOPrevious)
+      forms, formpp, mixed, SOPrevious)
   }
 }

From 6f9e56278495b96085d82c9e6ecf0a8b2079a5da Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 16:09:23 -0600
Subject: [PATCH 08/11] Some fixes. Remove redundant files etc.

---
 .../nlp/Chunker/ChunkerAnnotator.scala        |  2 +-
 .../saulexamples/nlp/Chunker/ChunkerApp.scala | 21 ++++++++++++++++++-
 .../nlp/Chunker/ChunkerDataModel.scala        | 15 +++++++++----
 .../nlp/Chunker/ChunkerSensors.scala          | 19 -----------------
 4 files changed, 32 insertions(+), 25 deletions(-)
 delete mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
index 8c818d6f..ef8a37f5 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerAnnotator.scala
@@ -32,7 +32,7 @@ class ChunkerAnnotator(val useHeuristics: Boolean = true)
       ChunkerAnnotator.localPOSAnnotator.addView(ta)
     }
 
-    val tokens = ta.getView(ChunkerConstants.SHALLOW_PARSE_GOLD_BIO_VIEW).getConstituents
+    val tokens = ta.getView(ViewNames.TOKENS).getConstituents
 
     ChunkerDataModel.sentence.clear()
     val sentences = (0 until ta.getNumberOfSentences).map(ta.getSentence)
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
index 49195f3e..bb9fef14 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerApp.scala
@@ -13,10 +13,13 @@ import edu.illinois.cs.cogcomp.core.datastructures.textannotation._
 import edu.illinois.cs.cogcomp.core.experiments.ClassificationTester
 import edu.illinois.cs.cogcomp.core.experiments.evaluators.ConstituentLabelingEvaluator
 import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager
+import edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer
+import edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder
 import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils
 import edu.illinois.cs.cogcomp.saul.util.Logging
 
 import scala.collection.JavaConversions._
+import scala.io.StdIn
 
 
 object ChunkerApp extends Logging {
@@ -119,6 +122,22 @@ object ChunkerApp extends Logging {
   }
 
   def interactiveWithPretrainedModels(): Unit = {
-
+    val taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer())
+
+    while (true) {
+      println("Enter a sentence to annotate (or Press Enter to exit)")
+      val input = StdIn.readLine()
+
+      input match {
+        case sentence: String if sentence.trim.nonEmpty =>
+          // Create a Text Annotation with the current input sentence.
+          val ta = taBuilder.createTextAnnotation(sentence.trim)
+          preTrainedAnnotator.addView(ta)
+          println("POS View            : " + ta.getView(ViewNames.POS).toString)
+          println("Annotated BIO View  : " + ta.getView(SHALLOW_PARSE_ANNOTATED_BIO_VIEW))
+          println("Annotated Span View : " + ta.getView(SHALLOW_PARSE_ANNOTATED_SPAN_VIEW))
+        case _ => return
+      }
+    }
   }
 }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
index 24878d3f..a3ff1a20 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -12,6 +12,7 @@ import edu.illinois.cs.cogcomp.edison.features.ContextFeatureExtractor
 import edu.illinois.cs.cogcomp.edison.features.factory.WordFeatureExtractorFactory
 import edu.illinois.cs.cogcomp.edison.features.lrec.{ Affixes, POSWindow, WordTypeInformation }
 import edu.illinois.cs.cogcomp.saul.datamodel.DataModel
+import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors
 
 import scala.collection.mutable
 import scala.collection.JavaConversions._
@@ -21,10 +22,16 @@ object ChunkerDataModel extends DataModel {
   val tokens = node[Constituent]
 
   val sentenceToTokens = edge(sentence, tokens)
-  sentenceToTokens.addSensor(ChunkerSensors.getTokensInSentence _)
-
-  // Label
-  val chunkLabel = property(tokens, "ChunkLabel") { token: Constituent => token.getLabel }
+  sentenceToTokens.addSensor(CommonSensors.sentenceToTokens _)
+
+  // GOLD BIO label for SHALLOW_PARSE
+  val chunkLabel = property(tokens, "ChunkLabel") { token: Constituent =>
+    token.getTextAnnotation
+      .getView(ChunkerConstants.SHALLOW_PARSE_GOLD_BIO_VIEW)
+      .getConstituentsCovering(token)
+      .head
+      .getLabel
+  }
 
   // Affixes feature
   private val affixFeatureExtractor = new Affixes(ViewNames.TOKENS)
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
deleted file mode 100644
index c5cc479a..00000000
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerSensors.scala
+++ /dev/null
@@ -1,19 +0,0 @@
-/** This software is released under the University of Illinois/Research and Academic Use License. See
-  * the LICENSE file in the root folder for details. Copyright (c) 2016
-  *
-  * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
-  * http://cogcomp.cs.illinois.edu/
-  */
-package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker
-
-import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Sentence }
-
-import scala.collection.JavaConversions._
-
-object ChunkerSensors {
-
-  /** Sensor to populate tokens node from a Sentence instance */
-  def getTokensInSentence(sentence: Sentence): Seq[Constituent] = {
-    sentence.getView(ChunkerConstants.SHALLOW_PARSE_GOLD_BIO_VIEW).getConstituents
-  }
-}

From ed0d77f27855f96c5b0aca01e7ee02d3a5e5acaa Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 17:05:33 -0600
Subject: [PATCH 09/11] Add the remaining features.`

---
 .../nlp/Chunker/ChunkerDataModel.scala        | 86 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 3 deletions(-)

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
index a3ff1a20..bc4e0284 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -114,12 +114,92 @@ object ChunkerDataModel extends DataModel {
   }
 
   // Mixed Feature
+  private val mixedBefore = 2
+  private val mixedAfter = 2
+  private val mixedK = 2
   val mixed = property(tokens, "Mixed") { token: Constituent =>
-    ""
+    val tokenNeighborhood = tokens.getWithWindow(token, -mixedBefore, mixedAfter, sameSentenceTokensFilter).flatten
+
+    val tags = new mutable.ArrayBuffer[String](mixedBefore + mixedAfter + 1)
+    val forms = new mutable.ArrayBuffer[String](mixedBefore + mixedAfter + 1)
+
+    tokenNeighborhood.foreach({ tokenNear: Constituent =>
+      val posCons = tokenNear.getTextAnnotation
+        .getView(ViewNames.POS)
+        .getConstituentsCovering(tokenNear)
+        .head
+
+      tags.append(posCons.getLabel)
+      forms.append(tokenNear.getSurfaceForm)
+    })
+
+    val mixedFeatures = new mutable.ArrayBuffer[String]()
+
+    for {
+      j <- 1 to mixedK
+      x <- 0 to 2
+    } {
+      var t: Boolean = true
+      tags.zipWithIndex
+        .foreach({ case (tag: String, i: Int) =>
+            val stringBuffer = new StringBuffer()
+
+            for {
+              context <- 0 until j
+              if i + context < tags.size
+            } {
+              if (context != 0) stringBuffer.append("_")
+
+              if (t && x == 0) {
+                stringBuffer.append(tags(i + context))
+              } else {
+                stringBuffer.append(forms(i + context))
+              }
+
+              t = !t
+            }
+
+            mixedFeatures.append(s"${i}_${j}:${stringBuffer.toString}")
+        })
+    }
+
+    mixedFeatures.toList
   }
 
   // SO Previous Feature
-  val SOPrevious = property(tokens, "SOPrevious") { token: Constituent =>
-    ""
+  val SOPrevious = property(tokens, "SOPrevious", cache = true) { token: Constituent =>
+    val tokenNeighborhood = tokens.getWithWindow(token, -2, 0, sameSentenceTokensFilter).flatten
+
+    val tags = new mutable.ArrayBuffer[String](3)
+    val labels = new mutable.ArrayBuffer[String](2)
+
+    tokenNeighborhood.foreach({ tokenNear: Constituent =>
+      val posCons = tokenNear.getTextAnnotation
+        .getView(ViewNames.POS)
+        .getConstituentsCovering(tokenNear)
+        .head
+
+      tags.append(posCons.getLabel)
+
+      // Use Label while training and prediction while testing.
+      if (ChunkerClassifiers.ChunkerClassifier.isTraining) {
+        labels.append(chunkLabel(tokenNear))
+      } else {
+        labels.append(ChunkerClassifiers.ChunkerClassifier(tokenNear))
+      }
+    })
+
+    tags.append(tags.last)
+
+    val features = new mutable.ArrayBuffer[String]()
+
+    if (labels.size >= 2) {
+      features.append(s"ll:${labels(0)}_${labels(1)}")
+      features.append(s"lt2:${labels(1)}_${tags(2)}")
+    }
+
+    features.append(s"lt1:${labels(0)}_${tags(1)}")
+
+    features.toList
   }
 }

From cfdef89094cc20cf23df443dfca6bf4dca37a865 Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 18:55:17 -0600
Subject: [PATCH 10/11] Add Readme for Chunker.

---
 saul-examples/README.md                       |  7 ++-
 .../nlp/Chunker/ChunkerDataModel.scala        | 25 ++++------
 .../saulexamples/nlp/Chunker/README.md        | 46 +++++++++++++++++++
 3 files changed, 61 insertions(+), 17 deletions(-)
 create mode 100644 saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md

diff --git a/saul-examples/README.md b/saul-examples/README.md
index fbca5a9e..16a0094e 100644
--- a/saul-examples/README.md
+++ b/saul-examples/README.md
@@ -31,12 +31,15 @@ In Spatial Role Labeling, we try to find spatial relations and label spatial rol
 6. [Part-of-Speech Tagging](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/README.md): 
 Part-of-Speech Tagging is the identification of words as nouns, verbs, adjectives, adverbs, etc.
 
-7. [Twitter Sentiment Analysis] (src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TwitterSentimentAnalysis/README.md):
+7. [Twitter Sentiment Analysis](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TwitterSentimentAnalysis/README.md):
 This example trains models for classifying twitter posts as positive, negative or neutral. It also includes a twitter client for real time processing of the tweets.
 
-8. [Question Type Classification] (src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/README.md):
+8. [Question Type Classification](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/README.md):
 This example contains a classifications to categorize questions into different semantic classes based on the possible semantic types of the answers. 
 
+9. [Chunker](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md):
+This example comtains an implementation of a Shallow Parsing system.
+
 
 * Note: Examples are under active development. 
 
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
index bc4e0284..5dff966c 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/ChunkerDataModel.scala
@@ -124,12 +124,7 @@ object ChunkerDataModel extends DataModel {
     val forms = new mutable.ArrayBuffer[String](mixedBefore + mixedAfter + 1)
 
     tokenNeighborhood.foreach({ tokenNear: Constituent =>
-      val posCons = tokenNear.getTextAnnotation
-        .getView(ViewNames.POS)
-        .getConstituentsCovering(tokenNear)
-        .head
-
-      tags.append(posCons.getLabel)
+      tags.append(CommonSensors.getPosTag(tokenNear))
       forms.append(tokenNear.getSurfaceForm)
     })
 
@@ -168,18 +163,13 @@ object ChunkerDataModel extends DataModel {
 
   // SO Previous Feature
   val SOPrevious = property(tokens, "SOPrevious", cache = true) { token: Constituent =>
-    val tokenNeighborhood = tokens.getWithWindow(token, -2, 0, sameSentenceTokensFilter).flatten
+    val tokenNeighborhood = tokens.getWithWindow(token, -2, -1, sameSentenceTokensFilter).flatten
 
     val tags = new mutable.ArrayBuffer[String](3)
     val labels = new mutable.ArrayBuffer[String](2)
 
     tokenNeighborhood.foreach({ tokenNear: Constituent =>
-      val posCons = tokenNear.getTextAnnotation
-        .getView(ViewNames.POS)
-        .getConstituentsCovering(tokenNear)
-        .head
-
-      tags.append(posCons.getLabel)
+      tags.append(CommonSensors.getPosTag(tokenNear))
 
       // Use Label while training and prediction while testing.
       if (ChunkerClassifiers.ChunkerClassifier.isTraining) {
@@ -189,16 +179,21 @@ object ChunkerDataModel extends DataModel {
       }
     })
 
-    tags.append(tags.last)
+    tags.append(CommonSensors.getPosTag(token))
 
     val features = new mutable.ArrayBuffer[String]()
 
     if (labels.size >= 2) {
       features.append(s"ll:${labels(0)}_${labels(1)}")
+    }
+
+    if (labels.size >= 2 && tags.size >= 3) {
       features.append(s"lt2:${labels(1)}_${tags(2)}")
     }
 
-    features.append(s"lt1:${labels(0)}_${tags(1)}")
+    if (tags.size >= 2) {
+      features.append(s"lt1:${labels(0)}_${tags(1)}")
+    }
 
     features.toList
   }
diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md
new file mode 100644
index 00000000..a38939b3
--- /dev/null
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md
@@ -0,0 +1,46 @@
+# Chunker (Shallow Parser)
+
+Chunking (Shallow Parsing) is the identification of constituents (noun groups, verbs, verb groups etc.) in a sentence. 
+The system implemented here is based of the following paper: 
+
+```
+@inproceedings{PunyakanokRo01,
+    author = {V. Punyakanok and D. Roth},
+    title = {The Use of Classifiers in Sequential Inference},
+    booktitle = {NIPS},
+    pages = {995--1001},
+    year = {2001},
+    publisher = {MIT Press},
+    acceptance = {25/514 (4.8\%) Oral Presentations; 152/514 (29%) overall},
+    url = " http://cogcomp.cs.illinois.edu/papers/nips01.pdf",
+    funding = {NSF98 CAREER},
+    projects = {LnI,SI,IE,NE,NLP,CCM},
+    comment = {Structured, sequential output; Sequence Prediction: HMM with classifiers, Conditional Models, Constraint Satisfaction},
+}
+```
+
+## Performance
+
+
+The data for the experiments was extracted from the dataset for the [CONLL 2000 Chunking Shared Task](http://www.cnts.ua.ac.be/conll2000/chunking/).
+The training corpus consists of 8,936 sentences composed of 210,996 tokens totally. 
+The test corpus consists of 2,012 sentences composed of $$ tokens totally.
+
+### Evaluation: BIO Labeling
+
+### Evaluation: Span Labeling
+
+Note: While evaluation (testing), POS tags are provided by an implementation of [POSTagger in Saul](https://github.com/IllinoisCogComp/saul/blob/master/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/README.md).
+
+## Testing the Chunker interactively
+
+For a quick demo of the Chunker Tagger, you can run the following command in the project's root folder.
+
+```shell
+sbt "project saulExamples" "runMain edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker.ChunkerApp"
+```
+
+
+## Related
+
+If you are looking for an implementation of the Chunker in Java, have a look at [this repository](https://github.com/IllinoisCogComp/illinois-cogcomp-nlp/blob/master/chunker/README.md).
\ No newline at end of file

From 93bacfeaa800d240e73e80fdc36d3b8791a1afc9 Mon Sep 17 00:00:00 2001
From: Bhargav Mangipudi <mangipu2@illinois.edu>
Date: Wed, 1 Mar 2017 22:27:49 -0600
Subject: [PATCH 11/11] Evaluation.

---
 .../saulexamples/nlp/Chunker/README.md        | 43 ++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md
index a38939b3..b5a9ff17 100644
--- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md
+++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md
@@ -24,12 +24,53 @@ The system implemented here is based of the following paper:
 
 The data for the experiments was extracted from the dataset for the [CONLL 2000 Chunking Shared Task](http://www.cnts.ua.ac.be/conll2000/chunking/).
 The training corpus consists of 8,936 sentences composed of 210,996 tokens totally. 
-The test corpus consists of 2,012 sentences composed of $$ tokens totally.
+The test corpus consists of 2,012 sentences composed of 47,372 tokens totally.
 
 ### Evaluation: BIO Labeling
 
+| Label     |  Precision | Recall |  F1    | LCount | PCount |
+|-----------|-----------:|-------:|-------:|-------:|-------:|
+| B-ADJP    |  80.323    | 68.192 | 73.762 |   437  |  371   |
+| B-ADVP    |  82.275    | 79.330 | 80.776 |   866  |  835   |
+| B-CONJP   |  40.000    | 66.667 | 50.000 |     9  |   15   |
+| B-INTJ    | 100.000    | 50.000 | 66.667 |     2  |    1   |
+| B-LST     |   0.000    | 0.000  | 0.000  |    5   |   3    |
+| B-NP      |  95.718    | 96.412 | 96.064 | 12404  | 12494  |
+| B-PP      |  96.456    | 97.359 | 96.905 |  4808  | 4853   |
+| B-PRT     |  79.048    | 78.302 | 78.673 |   106  | 105    |
+| B-SBAR    |  87.674    | 82.430 | 84.971 |   535  |  503   |
+| B-UCP     |   0.000    | 0.000  | 0.000  |    0   |  52    |
+| B-VP      |  94.581    | 95.292 | 94.935 |  4652  | 4687   |
+| I-ADJP    |  77.982    | 50.898 | 61.594 |   167  |  109   |
+| I-ADVP    |  60.274    | 49.438 | 54.321 |    89  |   73   |
+| I-CONJP   |  55.556    | 76.923 | 64.516 |    13  |   18   |
+| I-LST     |   0.000    | 0.000  | 0.000  |    2   |   0    |
+| I-NP      |  96.251    | 95.795 | 96.023 | 14365  | 14297  |
+| I-PP      |  86.111    | 64.583 | 73.810 |    48  |   36   |
+| I-PRT     |   0.000    | 0.000  | 0.000  |    0   |   1    |
+| I-SBAR    |  10.526    | 50.000 | 17.391 |     4  |   19   |
+| I-UCP     |   0.000    | 0.000  | 0.000  |    0   |   9    |
+| I-VP      |  94.935    | 93.712 | 94.319 |  2640  | 2606   |
+| O         |  95.172    |96.174  | 95.670 |  6169  | 6234   |
+| **Accuracy**  |  **94.945**     |  **-**     | **-**      | **-**      | **47321**  |
+
 ### Evaluation: Span Labeling
 
+| Label | Total Gold | Total Predicted | Correct Prediction | Precision | Recall | F1 |
+| ----- | ---:| ---:| ---:| ---:| ---:| ---:|
+| ADJP | 438 | 515 | 296 | 57.48 | 67.58 | 62.12 |
+| ADVP | 866 | 1032 | 670 | 64.92 | 77.37 | 70.6 |
+| CONJP | 9 | 19 | 6 | 31.58 | 66.67 | 42.86 |
+| INTJ | 2 | 2 | 1 | 50 | 50 | 50 |
+| LST | 5 | 7 | 0 | 0 | 0 | 0 |
+| NP | 12422 | 13376 | 11574 | 86.53 | 93.17 | 89.73 |
+| PP | 4811 | 4994 | 4684 | 93.79 | 97.36 | 95.54 |
+| PRT | 106 | 138 | 86 | 62.32 | 81.13 | 70.49 |
+| SBAR | 535 | 603 | 444 | 73.63 | 82.99 | 78.03 |
+| UCP | 0 | 63 | 0 | 0 | 0 | 0 |
+| VP | 4658 | 5014 | 4335 | 86.46 | 93.07 | 89.64 |
+| **All** | **23852** | **25763** | **22096** | **85.77** | **92.64** | **89.07** |
+
 Note: While evaluation (testing), POS tags are provided by an implementation of [POSTagger in Saul](https://github.com/IllinoisCogComp/saul/blob/master/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/README.md).
 
 ## Testing the Chunker interactively