diff --git a/build.sbt b/build.sbt index 4746a096..8ce478d1 100644 --- a/build.sbt +++ b/build.sbt @@ -102,6 +102,8 @@ lazy val saulExamples = (project in file("saul-examples")). ccgGroupId % "saul-pos-tagger-models" % "1.4", ccgGroupId % "saul-er-models" % "1.8", ccgGroupId % "saul-srl-models" % "1.3", + ccgGroupId % "saul-qaTypeClassification-models" % "2.0", + ccgGroupId % "qustionTypeClassification-resources" % "1.0", "org.json" % "json" % "20140107", "com.twitter" % "hbc-core" % "2.2.0", "org.rogach" %% "scallop" % "2.0.5" diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeAnnotator.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeAnnotator.scala new file mode 100644 index 00000000..51d1c405 --- /dev/null +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeAnnotator.scala @@ -0,0 +1,47 @@ +/** This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.saulexamples.nlp.QuestionTypeClassification + +import edu.illinois.cs.cogcomp.annotation.Annotator +import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, SpanLabelView, TextAnnotation } +import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager +import edu.illinois.cs.cogcomp.saulexamples.nlp.QuestionTypeClassification.QuestionTypeClassificationClassifiers.{ CoarseTypeClassifier, FineTypeClassifier } + +class QuestionTypeAnnotator(val finalViewName: String = "QUESTION_TYPE") + extends Annotator(finalViewName, Array(ViewNames.TOKENS, ViewNames.NER_CONLL, + ViewNames.SHALLOW_PARSE, ViewNames.POS, ViewNames.LEMMA)) { + + override def initialize(rm: ResourceManager): Unit = {} + + lazy val coarseClassifier = { + val c = new CoarseTypeClassifier(QuestionTypeClassificationDataModel.propertyList) + c.modelDir = "models/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/models/" + c.load() + c + } + + lazy val fineClassifier = { + val c = new FineTypeClassifier(QuestionTypeClassificationDataModel.propertyList) + c.modelDir = "models/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/models/" + c.load() + c + } + + override def addView(ta: TextAnnotation): Unit = { + val question = QuestionTypeInstance(ta.getText, None, None, Some(ta)) + QuestionTypeClassificationDataModel.question.populate(List(question)) // TODO: is this step necessary? + val view = new SpanLabelView(finalViewName, finalViewName, ta, 1.0) + val fineLabel = fineClassifier(question) + val fineScore = fineClassifier.classifier.scores(question).get(fineLabel) + val coarseLabel = coarseClassifier(question) + val coarseScore = coarseClassifier.classifier.scores(question).get(coarseLabel) + view.addConstituent(new Constituent(fineLabel, fineScore, finalViewName, ta, 0, ta.getTokens.length)) + view.addConstituent(new Constituent(coarseLabel, coarseScore, finalViewName, ta, 0, ta.getTokens.length)) + ta.addView(finalViewName, view) + } +} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationApp.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationApp.scala index 840393f8..c95f31df 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationApp.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationApp.scala @@ -25,13 +25,14 @@ object QuestionTypeClassificationApp { def evaluate(classifier: TypeClassifier) = { populateInstances() classifier.learn(20) + classifier.save() classifier.test() } def classifySampleQuestions() = { - val coarseClassifier = new CoarseTypeClassifier(propertyList) + val coarseClassifier = new CoarseTypeClassifier(QuestionTypeClassificationDataModel.propertyList) coarseClassifier.load() - val fineClassifier = new FineTypeClassifier(propertyList) + val fineClassifier = new FineTypeClassifier(QuestionTypeClassificationDataModel.propertyList) fineClassifier.load() import QuestionTypeClassificationSensors._ val rawQuestions = Seq( @@ -49,53 +50,29 @@ object QuestionTypeClassificationApp { pipeline.addView(ta, ViewNames.POS) pipeline.addView(ta, ViewNames.SHALLOW_PARSE) pipeline.addView(ta, ViewNames.NER_CONLL) - val questioin = QuestionTypeInstance(q, None, None, Some(ta)) + val question = QuestionTypeInstance(q, None, None, Some(ta)) println(q) - println(coarseClassifier(questioin)) - println(fineClassifier(questioin)) + println(coarseClassifier(question)) + println(fineClassifier(question)) } } - val propertyList = List( - QuestionTypeClassificationDataModel.surfaceWords, - QuestionTypeClassificationDataModel.lemma, - QuestionTypeClassificationDataModel.pos, - QuestionTypeClassificationDataModel.chunks, - QuestionTypeClassificationDataModel.headChunks, - QuestionTypeClassificationDataModel.ner, - QuestionTypeClassificationDataModel.containsFoodterm, - QuestionTypeClassificationDataModel.containsMountain, - QuestionTypeClassificationDataModel.containsProfession, - QuestionTypeClassificationDataModel.numberNormalizer, - QuestionTypeClassificationDataModel.wordnetSynsetsFirstSense, - QuestionTypeClassificationDataModel.wordnetSynsetsAllSenses, - QuestionTypeClassificationDataModel.wordnetLexicographerFileNamesFirstSense, - QuestionTypeClassificationDataModel.wordnetLexicographerFileNamesAllSenses, - QuestionTypeClassificationDataModel.wordnetHypernymFirstSenseLexicographerFileNames, - QuestionTypeClassificationDataModel.wordnetHypernymAllSensesLexicographerFileNames, - QuestionTypeClassificationDataModel.wordnetHypernymsFirstSense, - QuestionTypeClassificationDataModel.wordnetHypernymsAllSenses, - QuestionTypeClassificationDataModel.wordnetPointersFirstSense, - QuestionTypeClassificationDataModel.wordnetSynonymsFirstSense, - QuestionTypeClassificationDataModel.wordnetSynonymsAllSenses, - QuestionTypeClassificationDataModel.wordnetSynonymsAllSenses, - QuestionTypeClassificationDataModel.wordGroups - ) - def coarseClassifier(): Unit = { - val classifier = new CoarseTypeClassifier(propertyList) + val classifier = new CoarseTypeClassifier(QuestionTypeClassificationDataModel.propertyList) evaluate(classifier) } def fineClassifier(): Unit = { - val classifier = new FineTypeClassifier(propertyList) + val classifier = new FineTypeClassifier(QuestionTypeClassificationDataModel.propertyList) evaluate(classifier) } def main(args: Array[String]): Unit = { val parser = new ArgumentParser(args) parser.experimentType() match { - case 1 => coarseClassifier() + case 1 => + coarseClassifier() + fineClassifier() case 2 => fineClassifier() case 3 => classifySampleQuestions() } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationDataModel.scala index 32d249ef..72b32764 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationDataModel.scala @@ -66,7 +66,7 @@ object QuestionTypeClassificationDataModel extends DataModel { val containsProfession = property(question) { x: QuestionTypeInstance => val lemmas = x.textAnnotationOpt.get.getView(ViewNames.LEMMA).getConstituents.asScala.map { _.getSurfaceForm }.toList - lemmas.exists(lemma => QuestionTypeClassificationSensors.professons.contains(lemma)).toString + lemmas.exists(lemma => QuestionTypeClassificationSensors.professions.contains(lemma)).toString } val containsFoodterm = property(question) { x: QuestionTypeInstance => @@ -144,4 +144,31 @@ object QuestionTypeClassificationDataModel extends DataModel { val cons = x.textAnnotationOpt.get.getView(ViewNames.TOKENS).getConstituents.asScala.map { _.getSurfaceForm.toLowerCase.trim }.toSet QuestionTypeClassificationSensors.wordGroupLists.collect { case (label, set) if set.intersect(cons).nonEmpty => label } } + + val propertyList = List( + surfaceWords, + lemma, + pos, + chunks, + headChunks, + ner, + containsFoodterm, + containsMountain, + containsProfession, + numberNormalizer, + wordnetSynsetsFirstSense, + wordnetSynsetsAllSenses, + wordnetLexicographerFileNamesFirstSense, + wordnetLexicographerFileNamesAllSenses, + wordnetHypernymFirstSenseLexicographerFileNames, + wordnetHypernymAllSensesLexicographerFileNames, + wordnetHypernymsFirstSense, + wordnetHypernymsAllSenses, + wordnetPointersFirstSense, + wordnetSynonymsFirstSense, + wordnetSynonymsAllSenses, + wordnetSynonymsAllSenses, + wordGroups + ) + } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala index 6271a5cd..208ddb61 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala @@ -10,17 +10,17 @@ import java.io.File import java.util.Properties import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.io.LineIO import edu.illinois.cs.cogcomp.nlp.common.PipelineConfigurator._ import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory import scala.io.Source object QuestionTypeClassificationSensors { - val dataFolder = "../data/QuestionTypeClassification/" - lazy val professons = Source.fromFile(new File(dataFolder + "prof.txt")).getLines().toSet - lazy val mountainKeywords = Source.fromFile(new File(dataFolder + "mount.txt")).getLines().toSet - lazy val foodKeywords = Source.fromFile(new File(dataFolder + "food.txt")).getLines().toSet - + val resourceFolder = "lists/" + lazy val professions = openFileFromClassPath("prof.txt").toSet + lazy val mountainKeywords = openFileFromClassPath("mount.txt").toSet + lazy val foodKeywords = openFileFromClassPath("food.txt").toSet lazy val pipeline = { val settings = new Properties() TextAnnotationFactory.disableSettings(settings, USE_SRL_NOM) @@ -43,7 +43,8 @@ object QuestionTypeClassificationSensors { } def getInstances(fileName: String): List[QuestionTypeInstance] = { - val allLines = Source.fromFile(new File(dataFolder + fileName), "ISO-8859-1").getLines().toList + println("reading instances . . . ") + val allLines = openFileFromClassPath(fileName) allLines.map { line => val split = line.split(" ") val splitLabel = split(0).split(":") @@ -57,18 +58,22 @@ object QuestionTypeClassificationSensors { } } - def getListOfFiles(dir: String): List[File] = { - val d = new File(dir) - if (d.exists && d.isDirectory) { - d.listFiles.filter(_.isFile).toList - } else { - List[File]() - } + import scala.collection.JavaConverters._ + + val fileList = List("At", "Why", "body", "currency", "eff", "last", "ord", "prod", "stand", "title", + "How", "abb", "cause", "date", "event", "letter", "other", "prof", "state", "unit", + "In", "act", "city", "def", "fast", "pastBe", "quot", "substance", "univ", + "InOn", "an", "code", "desc", "food", "loca", "peop", "religion", "symbol", "vessel", + "On", "anim", "color", "dimen", "group", "money", "perc", "singleBe", "tech", "weight", + "What", "art", "comp", "dise", "instrument", "mount", "plant", "speak", "temp", "word", + "Where", "be", "country", "dist", "job", "name", "popu", "speed", "term", + "Who", "big", "culture", "do", "lang", "num", "presentBe", "sport", "time") + + def openFileFromClassPath(fileName: String): List[String] = { + LineIO.readFromClasspath(fileName).asScala.toList } - lazy val wordGroupLists = { - val files = getListOfFiles(dataFolder + "publish/lists") - assert(files.nonEmpty, "list of files not found") - files.map { f: File => f.getName -> Source.fromFile(f).getLines().toSet.map { line: String => line.toLowerCase.trim } } + val wordGroupLists = { + fileList.map { f: String => f -> openFileFromClassPath(resourceFolder + f).toSet.map { line: String => line.toLowerCase.trim } } } } diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeAnnotatorTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeAnnotatorTest.scala new file mode 100644 index 00000000..28f2bcf9 --- /dev/null +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeAnnotatorTest.scala @@ -0,0 +1,39 @@ +/** This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.saulexamples.nlp.QuestionTypeClassification + +import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import org.scalatest._ + +class QuestionTypeAnnotatorTest extends FlatSpec with Matchers { + + val questionTypeAnnotator = new QuestionTypeAnnotator() + + "questionTypeClassifier " should " correctly add a view to TextAnnotation instances " in { + + val rawQuestions = Seq( + "How's the weather in Champaign-Urbana?", + "How far is Champaign to Chicago?", + "Who found dinasours?", "Which day is Christmas?", + "What can be cured by cheap pizza?", + "What can be cured by cheese pizza?", + "Who is Michael?", + "When is Easter in 2017?" + ) + import QuestionTypeClassificationSensors._ + rawQuestions.foreach { q => + val ta = pipeline.createBasicTextAnnotation("", "", q) + pipeline.addView(ta, ViewNames.LEMMA) + pipeline.addView(ta, ViewNames.POS) + pipeline.addView(ta, ViewNames.SHALLOW_PARSE) + pipeline.addView(ta, ViewNames.NER_CONLL) + questionTypeAnnotator.addView(ta) + ta.getAvailableViews.size() should be >= 7 + println(ta.getView(questionTypeAnnotator.finalViewName)) + } + } +}