Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions saul-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,15 @@ In Spatial Role Labeling, we try to find spatial relations and label spatial rol
6. [Part-of-Speech Tagging](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/README.md):
Part-of-Speech Tagging is the identification of words as nouns, verbs, adjectives, adverbs, etc.

7. [Twitter Sentiment Analysis] (src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TwitterSentimentAnalysis/README.md):
7. [Twitter Sentiment Analysis](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TwitterSentimentAnalysis/README.md):
This example trains models for classifying twitter posts as positive, negative or neutral. It also includes a twitter client for real time processing of the tweets.

8. [Question Type Classification] (src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/README.md):
8. [Question Type Classification](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/README.md):
This example contains a classifications to categorize questions into different semantic classes based on the possible semantic types of the answers.

9. [Chunker](src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/Chunker/README.md):
This example comtains an implementation of a Shallow Parsing system.


* Note: Examples are under active development.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/** This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker

import edu.illinois.cs.cogcomp.annotation.Annotator
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{Constituent, TextAnnotation, TokenLabelView}
import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager
import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.{POSAnnotator, POSTaggerApp}

import scala.collection.JavaConversions._

/** Chunker Annotator implementation
*
* @param useHeuristics To use heuristics to fix BIO annotation.
*/
class ChunkerAnnotator(val useHeuristics: Boolean = true)
extends Annotator(ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW, Array(ViewNames.TOKENS)) {

override def initialize(rm: ResourceManager): Unit = {}

/** Adds the POS view to a TextAnnotation
* Note: Assumes that the classifiers are populated with required models
* @param ta TextAnnotation instance
*/
override def addView(ta: TextAnnotation): Unit = {
if (!ta.hasView(ViewNames.POS)) {
ChunkerAnnotator.localPOSAnnotator.addView(ta)
}

val tokens = ta.getView(ViewNames.TOKENS).getConstituents

ChunkerDataModel.sentence.clear()
val sentences = (0 until ta.getNumberOfSentences).map(ta.getSentence)
ChunkerDataModel.sentence.populate(sentences, train = false)

val chunkerBIOView = new TokenLabelView(ChunkerConstants.SHALLOW_PARSE_ANNOTATED_BIO_VIEW, ta)

tokens.foreach({ cons: Constituent =>
val label = ChunkerClassifiers.ChunkerClassifier(cons)
val posCons = cons.cloneForNewViewWithDestinationLabel(chunkerBIOView.getViewName, label)
chunkerBIOView.addConstituent(posCons)
})

ta.addView(chunkerBIOView.getViewName, chunkerBIOView)

if (useHeuristics) {
ChunkerUtilities.addSpanLabelViewUsingHeuristics(
ta,
chunkerBIOView.getViewName,
ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
} else {
ChunkerUtilities.addGoldSpanLabelView(
ta,
chunkerBIOView.getViewName,
ChunkerConstants.SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)
}
}
}

object ChunkerAnnotator {
/** Instance of a local POS Annotator if required */
private lazy val localPOSAnnotator: POSAnnotator = POSTaggerApp.getPretrainedAnnotator()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/** This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker

import java.util.Properties

import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
import edu.illinois.cs.cogcomp.core.datastructures.textannotation._
import edu.illinois.cs.cogcomp.core.experiments.ClassificationTester
import edu.illinois.cs.cogcomp.core.experiments.evaluators.ConstituentLabelingEvaluator
import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager
import edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer
import edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder
import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils
import edu.illinois.cs.cogcomp.saul.util.Logging

import scala.collection.JavaConversions._
import scala.io.StdIn


object ChunkerApp extends Logging {
import ChunkerConstants._

val trainFile = "../data/conll2000chunking/train.txt"
val testFile = "../data/conll2000chunking/test.txt"

val jarModelPath = ""

object ChunkerExperimentType extends Enumeration {
val TrainAndTest, TestFromModel, Interactive = Value

def withNameOpt(s: String): Option[Value] = values.find(_.toString == s)
}

def main(args: Array[String]): Unit = {
/** Try to parse the experiment type as input argument or use default */
val testType = args.headOption
.flatMap(ChunkerExperimentType.withNameOpt)
.getOrElse(ChunkerExperimentType.Interactive)

testType match {
case ChunkerExperimentType.TrainAndTest => trainAndTest()
case ChunkerExperimentType.TestFromModel => testWithPretrainedModels()
case ChunkerExperimentType.Interactive => interactiveWithPretrainedModels()
}
}

private def loadModelFromJarPath(): Unit = {
// Load model from jar path
// ClassifierUtils.LoadClassifier(
// jarModelPath,
// ChunkerClassifiers.ChunkerClassifier)
ChunkerClassifiers.ChunkerClassifier.load()
}

private def getSentencesInTextAnnotation(taSeq: Seq[TextAnnotation]) = {
taSeq.flatMap({ textAnnotation: TextAnnotation =>
(0 until textAnnotation.getNumberOfSentences).map(textAnnotation.getSentence)
})
}

lazy val trainData = ChunkerDataReader.parseData(trainFile)
lazy val testData = ChunkerDataReader.parseData(testFile)

lazy val preTrainedAnnotator: ChunkerAnnotator = {
loadModelFromJarPath()

val annotatorInstance = new ChunkerAnnotator()
annotatorInstance.initialize(new ResourceManager(new Properties()))
annotatorInstance
}

/** Note: This function does NOT populate testing instances.
* Also does not use GOLD POS tags. Instead a trained POSAnnotater is used. */
private def testModelImpl(): Unit = {
ClassifierUtils.TestClassifiers(ChunkerClassifiers.ChunkerClassifier)

val evaluator = new ConstituentLabelingEvaluator()
val tester = new ClassificationTester()

testData.foreach({ textAnnotation: TextAnnotation =>
// Remove POS View before evaluation.
textAnnotation.removeView(ViewNames.POS)

preTrainedAnnotator.addView(textAnnotation)

val goldView = textAnnotation.getView(SHALLOW_PARSE_GOLD_SPAN_VIEW)
val annotatedView = textAnnotation.getView(SHALLOW_PARSE_ANNOTATED_SPAN_VIEW)

// Workaround for incorrect ConstituentLabelingEvaluator behaviour.
val predictedView = new SpanLabelView(SHALLOW_PARSE_GOLD_SPAN_VIEW, textAnnotation)
annotatedView.getConstituents.foreach({ cons: Constituent =>
predictedView.addConstituent(cons.cloneForNewView(SHALLOW_PARSE_GOLD_SPAN_VIEW))
})

evaluator.evaluate(tester, goldView, predictedView)
})

println(tester.getPerformanceTable.toOrgTable)
}

def trainAndTest(): Unit = {
ChunkerDataModel.sentence.populate(getSentencesInTextAnnotation(trainData), train = true)
ChunkerDataModel.sentence.populate(getSentencesInTextAnnotation(testData), train = false)

ChunkerClassifiers.ChunkerClassifier.learn(50)
ClassifierUtils.SaveClassifiers(ChunkerClassifiers.ChunkerClassifier)

testModelImpl()
}

def testWithPretrainedModels(): Unit = {
loadModelFromJarPath()

ChunkerDataModel.sentence.populate(getSentencesInTextAnnotation(testData), train = false)

testModelImpl()
}

def interactiveWithPretrainedModels(): Unit = {
val taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer())

while (true) {
println("Enter a sentence to annotate (or Press Enter to exit)")
val input = StdIn.readLine()

input match {
case sentence: String if sentence.trim.nonEmpty =>
// Create a Text Annotation with the current input sentence.
val ta = taBuilder.createTextAnnotation(sentence.trim)
preTrainedAnnotator.addView(ta)
println("POS View : " + ta.getView(ViewNames.POS).toString)
println("Annotated BIO View : " + ta.getView(SHALLOW_PARSE_ANNOTATED_BIO_VIEW))
println("Annotated Span View : " + ta.getView(SHALLOW_PARSE_ANNOTATED_SPAN_VIEW))
case _ => return
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/** This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker

import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent
import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner }
import edu.illinois.cs.cogcomp.saul.classifier.Learnable

object ChunkerClassifiers {
import ChunkerDataModel._

object ChunkerClassifier extends Learnable[Constituent](tokens) {

override lazy val classifier = {
// Parameters
val params = new SparseAveragedPerceptron.Parameters()
params.learningRate = 0.1
params.thickness = 0.2
val baseLTU = new SparseAveragedPerceptron(params)

new SparseNetworkLearner(baseLTU)
}

/** Label property for users classifier */
override def label = chunkLabel

override def feature = using(
wordTypeInformation, affixes, posWindow, capitalizationWindowProperty, previousTags,
forms, formpp, mixed, SOPrevious)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/** This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.saulexamples.nlp.Chunker

/** Constants used by the Chunker experiment */
object ChunkerConstants {
val SHALLOW_PARSE_GOLD_SPAN_VIEW = "SHALLOW_PARSE_GOLD"
val SHALLOW_PARSE_GOLD_BIO_VIEW = "SHALLOW_PARSE_GOLD_BIO"

val SHALLOW_PARSE_ANNOTATED_SPAN_VIEW = "SHALLOW_PARSE_ANNOTATED"
val SHALLOW_PARSE_ANNOTATED_BIO_VIEW = "SHALLOW_PARSE_ANNOTATED_BIO"
}
Loading