diff --git a/build.sbt b/build.sbt index 3a188a46..b800c2a6 100644 --- a/build.sbt +++ b/build.sbt @@ -3,8 +3,8 @@ import sbtrelease.ReleaseStateTransformations._ scalaVersion in ThisBuild := "2.11.7" -val cogcompNLPVersion = "3.0.71" -val cogcompPipelineVersion = "0.1.25" +val cogcompNLPVersion = "3.1.7" +val lbjavaVersion = "1.2.27" val ccgGroupId = "edu.illinois.cs.cogcomp" val headerMsg = """/** This software is released under the University of Illinois/Research and Academic Use License. See | * the LICENSE file in the root folder for details. Copyright (c) 2016 @@ -61,12 +61,12 @@ lazy val commonSettings = Seq( ), javaOptions ++= List("-Xmx11g"), libraryDependencies ++= Seq( - ccgGroupId % "LBJava" % "1.2.25" withSources, + ccgGroupId % "LBJava" % lbjavaVersion withSources, ccgGroupId % "illinois-core-utilities" % cogcompNLPVersion withSources, - "com.gurobi" % "gurobi" % "6.0", + "com.gurobi" % "gurobi" % "7.0.1", "org.apache.commons" % "commons-math3" % "3.0", "org.scalatest" % "scalatest_2.11" % "2.2.4", - "ch.qos.logback" % "logback-classic" % "1.1.7" + "ch.qos.logback" % "logback-classic" % "1.2.3" ), fork := true, connectInput in run := true, @@ -95,14 +95,14 @@ lazy val saulExamples = (project in file("saul-examples")). settings( name := "saul-examples", libraryDependencies ++= Seq( - ccgGroupId % "illinois-nlp-pipeline" % cogcompPipelineVersion withSources, + ccgGroupId % "illinois-nlp-pipeline" % cogcompNLPVersion withSources, ccgGroupId % "illinois-curator" % cogcompNLPVersion, ccgGroupId % "illinois-edison" % cogcompNLPVersion, ccgGroupId % "illinois-corpusreaders" % cogcompNLPVersion, ccgGroupId % "illinois-pos" % cogcompNLPVersion, ccgGroupId % "saul-pos-tagger-models" % "1.4", ccgGroupId % "saul-er-models" % "1.8", - ccgGroupId % "saul-srl-models" % "1.3", + ccgGroupId % "saul-srl-models" % "1.4" classifier "verb-gold", "org.json" % "json" % "20140107", "com.twitter" % "hbc-core" % "2.2.0", "org.rogach" %% "scallop" % "2.0.5" diff --git a/saul-examples/src/main/java/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConfigurator.java b/saul-examples/src/main/java/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConfigurator.java deleted file mode 100644 index a1ad3b03..00000000 --- a/saul-examples/src/main/java/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConfigurator.java +++ /dev/null @@ -1,77 +0,0 @@ -/** This software is released under the University of Illinois/Research and Academic Use License. See - * the LICENSE file in the root folder for details. Copyright (c) 2016 - * - * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling; - -import edu.illinois.cs.cogcomp.core.datastructures.ViewNames; -import edu.illinois.cs.cogcomp.core.utilities.configuration.Configurator; -import edu.illinois.cs.cogcomp.core.utilities.configuration.Property; -import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager; - -/** - * The default properties used for all the examples - * - * @author Parisa Kordjamshidi - * @author Christos Christodoulopoulos - */ -public class SRLConfigurator extends Configurator { - - public static final Property TREEBANK_HOME = new Property("treebankHome", "../saul-examples/src/test/resources/SRLToy/treebank"); - public static final Property PROPBANK_HOME = new Property("propbankHome","../saul-examples/src/test/resources/SRLToy/propbank"); - public static final Property TEST_SECTION = new Property("testSection","00"); - - public static final Property MODELS_DIR = new Property("modelsDir", "../models"); - public static final Property USE_CURATOR = new Property("useCurator", Configurator.FALSE); - - // The running mode of the program. Can be "true" for only testing, or "false" for training - public static final Property RUN_MODE = new Property("runMode", Configurator.TRUE); - - // The training mode for the examples. Can be "pipeline", "joint", "jointLoss" or "other" - public static final Property TRAINING_MODE = new Property("trainingMode", "joint"); - - /*********** SRL PROPERTIES ***********/ - // The (sub)directory to store and retrieve the trained SRL models (to be used with MODELS_DIR) - public static final Property SRL_MODEL_DIR = new Property("srlModelDir", "srl"); - - public static final Property SRL_JAR_MODEL_PATH = new Property("jarModelPath","models"); - - // This is used to determine the parse view in SRL experiments (can be ViewNames.GOLD or ViewNames.STANFORD) - // For replicating the published experiments this needs to be GOLD - public static final Property SRL_PARSE_VIEW = new Property("srlParseView", ViewNames.PARSE_GOLD); - - // A file to store the predictions of the SRL classifier (for argument types only) - public static final Property SRL_OUTPUT_FILE = new Property("srlOutputFile", "srl-predictions.txt"); - - // Whether to use gold predicates (if FALSE, predicateClassifier will be used instead) - public static final Property SRL_GOLD_PREDICATES = new Property("srlGoldPredicates", Configurator.TRUE); - - // Whether to use gold argument boundaries (if FALSE, argumentXuIdentifierGivenApredicate will be used instead) - public static final Property SRL_GOLD_ARG_BOUNDARIES = new Property("srlGoldArgBoundaries", Configurator.TRUE); - - // Should we use the pipeline during testing - public static final Property SRL_TEST_PIPELINE = new Property("srlTestPipeLine", Configurator.FALSE); - - // Should we use constraints during testing - public static final Property SRL_TEST_CONSTRAINTS = new Property("srlTestConstraints", Configurator.FALSE); - - // Should we train a predicate classifier given predicate candidates - public static final Property SRL_TRAIN_PREDICATES = new Property("srlTrainPredicates", Configurator.FALSE); - - // Should we train an argument identifier given the XuPalmer argument candidates - public static final Property SRL_TRAIN_ARG_IDENTIFIERS = new Property("srlArgIdentifier", Configurator.FALSE); - - // Should we train an argument type classifier given the XuPalmer argument candidates - public static final Property SRL_TRAIN_ARG_TYPE = new Property("srlArgIdentifier", Configurator.FALSE); - - @Override - public ResourceManager getDefaultConfig() { - Property[] properties = {TREEBANK_HOME, PROPBANK_HOME, MODELS_DIR, USE_CURATOR, TRAINING_MODE, - SRL_MODEL_DIR, SRL_PARSE_VIEW, SRL_OUTPUT_FILE, SRL_GOLD_PREDICATES, SRL_GOLD_ARG_BOUNDARIES, - SRL_TEST_PIPELINE, SRL_TEST_CONSTRAINTS,SRL_JAR_MODEL_PATH, RUN_MODE, SRL_TRAIN_PREDICATES, - SRL_TRAIN_ARG_IDENTIFIERS,SRL_TRAIN_ARG_TYPE,TEST_SECTION}; - return new ResourceManager(generateProperties(properties)); - } -} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/CommonSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/CommonSensors.scala index 19d4c902..0b9c8892 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/CommonSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/CommonSensors.scala @@ -12,7 +12,7 @@ import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, import edu.illinois.cs.cogcomp.curator.CuratorFactory import edu.illinois.cs.cogcomp.edison.features.factory.WordFeatureExtractorFactory import edu.illinois.cs.cogcomp.edison.features.{ FeatureExtractor, FeatureUtilities } -import edu.illinois.cs.cogcomp.nlp.pipeline.IllinoisPipelineFactory +import edu.illinois.cs.cogcomp.pipeline.main.PipelineFactory import edu.illinois.cs.cogcomp.saul.util.Logging import edu.illinois.cs.cogcomp.saulexamples.data.Document @@ -87,9 +87,8 @@ object CommonSensors extends Logging { } def annotateWithPipeline(content: String, id: String): TextAnnotation = { - val annotatorService = IllinoisPipelineFactory.buildPipeline() + val annotatorService = PipelineFactory.buildPipeline() processDocumentWith(annotatorService, "corpus", id, content) } - } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala index 6271a5cd..a69a55c0 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/QuestionTypeClassificationSensors.scala @@ -10,7 +10,7 @@ import java.io.File import java.util.Properties import edu.illinois.cs.cogcomp.core.datastructures.ViewNames -import edu.illinois.cs.cogcomp.nlp.common.PipelineConfigurator._ +import edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator._ import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory import scala.io.Source @@ -23,7 +23,9 @@ object QuestionTypeClassificationSensors { lazy val pipeline = { val settings = new Properties() - TextAnnotationFactory.disableSettings(settings, USE_SRL_NOM) + TextAnnotationFactory.enableSettings(settings, USE_POS, USE_DEP, USE_LEMMA, USE_SHALLOW_PARSE, USE_NER_CONLL, + USE_NER_ONTONOTES, USE_STANFORD_DEP, USE_STANFORD_PARSE, USE_SRL_VERB, USE_SRL_PREP, USE_SRL_COMMA, + USE_QUANTIFIER) TextAnnotationFactory.createPipelineAnnotatorService(settings) } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/GreedyDecoder.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/GreedyDecoder.scala new file mode 100644 index 00000000..cc3c1948 --- /dev/null +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/GreedyDecoder.scala @@ -0,0 +1,50 @@ +/** This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling + +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation +import edu.illinois.cs.cogcomp.lbjava.classify.{ Score, ScoreSet } + +import scala.collection.mutable + +// Note: Targeted to in PredicateArgumentViews only. +// Used in SRL Annotation. +object GreedyDecoder { + def decodeNoOverlap(inputRelationWithScores: Seq[(Relation, ScoreSet)], labelsToExclude: Set[String] = Set.empty): Seq[(Relation, Score)] = { + val filterExcludes = inputRelationWithScores.map({ + case (relation, scoreset) => + val highScoreLabel = scoreset.highScoreValue() + (relation, scoreset.getScore(highScoreLabel)) + }).filterNot(x => labelsToExclude.contains(x._2.value)) + + if (filterExcludes.isEmpty) { + return Seq.empty + } + + val minSpan = filterExcludes.map(_._1.getTarget.getStartSpan).min + val maxSpan = filterExcludes.map(_._1.getTarget.getEndSpan).max + + val range = maxSpan - minSpan + 1 + val spanPosition = new mutable.BitSet(range) + + // Check if the sorting is stable + filterExcludes.sortBy(x => -x._2.score) // Sort from largest to lowest scores + .flatMap({ + case (relation, score) => + val startSpan = relation.getTarget.getStartSpan + val endSpan = relation.getTarget.getEndSpan - 1 + val hasOverlap = (startSpan to endSpan).exists(sp => spanPosition.contains(sp - minSpan)) + + if (!hasOverlap) { + spanPosition ++= Range(startSpan - minSpan, endSpan - minSpan + 1) + Some((relation, score)) + } else { + None + } + }) + } +} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PopulateSRLDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PopulateSRLDataModel.scala index cb706447..fe730d1b 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PopulateSRLDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PopulateSRLDataModel.scala @@ -8,22 +8,22 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling import java.util.Properties -import edu.illinois.cs.cogcomp.annotation.AnnotatorException +import edu.illinois.cs.cogcomp.annotation.{ AnnotatorException, AnnotatorServiceConfigurator } import edu.illinois.cs.cogcomp.core.datastructures.ViewNames -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, TextAnnotation, TreeView } +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, PredicateArgumentView, TextAnnotation, TreeView } import edu.illinois.cs.cogcomp.core.datastructures.trees.Tree -import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager import edu.illinois.cs.cogcomp.curator.CuratorConfigurator._ import edu.illinois.cs.cogcomp.edison.annotators.ClauseViewGenerator -import edu.illinois.cs.cogcomp.nlp.common.PipelineConfigurator._ +import edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator._ import edu.illinois.cs.cogcomp.nlp.utilities.ParseUtils import edu.illinois.cs.cogcomp.saul.util.Logging -import edu.illinois.cs.cogcomp.saulexamples.data.{ SRLDataReader, SRLFrameManager } -import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors._ +import edu.illinois.cs.cogcomp.saulexamples.data.SRLDataReader import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLSensors._ -import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLscalaConfigurator._ +import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory + import scala.collection.JavaConversions._ +import scala.collection.JavaConverters._ /** Created by Parisa on 1/17/16. */ @@ -32,12 +32,13 @@ object PopulateSRLDataModel extends Logging { testOnly: Boolean = false, useGoldPredicate: Boolean = false, useGoldArgBoundaries: Boolean = false, - rm: ResourceManager = new SRLConfigurator().getDefaultConfig - ): SRLMultiGraphDataModel = { - val frameManager: SRLFrameManager = new SRLFrameManager(PROPBANK_HOME) - val useCurator = rm.getBoolean(SRLConfigurator.USE_CURATOR) - val parseViewName = rm.getString(SRLConfigurator.SRL_PARSE_VIEW) - val graphs = new SRLMultiGraphDataModel(parseViewName, frameManager) + usePipelineCaching: Boolean = true + ): Unit = { + + val useCurator = SRLscalaConfigurator.USE_CURATOR + val parseViewName = SRLscalaConfigurator.SRL_PARSE_VIEW + val srlGoldViewName = ViewNames.SRL_VERB + val annotatorService = useCurator match { case true => val nonDefaultProps = new Properties() @@ -45,38 +46,54 @@ object PopulateSRLDataModel extends Logging { TextAnnotationFactory.createCuratorAnnotatorService(nonDefaultProps) case false => val nonDefaultProps = new Properties() - TextAnnotationFactory.disableSettings(nonDefaultProps, USE_NER_CONLL, USE_NER_ONTONOTES, USE_SRL_VERB, USE_SRL_NOM, USE_STANFORD_DEP) - if (parseViewName.equals(ViewNames.PARSE_GOLD)) - TextAnnotationFactory.disableSettings(nonDefaultProps, USE_POS, USE_STANFORD_PARSE) + TextAnnotationFactory.enableSettings(nonDefaultProps, USE_LEMMA, USE_SHALLOW_PARSE) + if (!parseViewName.equals(ViewNames.PARSE_GOLD)) { + TextAnnotationFactory.enableSettings(nonDefaultProps, USE_POS, USE_STANFORD_PARSE) + } + if (!usePipelineCaching) { + TextAnnotationFactory.enableSettings(nonDefaultProps, AnnotatorServiceConfigurator.DISABLE_CACHE) + } TextAnnotationFactory.createPipelineAnnotatorService(nonDefaultProps) } + val clauseViewGenerator = parseViewName match { case ViewNames.PARSE_GOLD => new ClauseViewGenerator(parseViewName, "CLAUSES_GOLD") case ViewNames.PARSE_STANFORD => ClauseViewGenerator.STANFORD + case ViewNames.PARSE_CHARNIAK => new ClauseViewGenerator(parseViewName, ViewNames.CLAUSES_CHARNIAK) } + + /** Add required views to the text annotations and filter out failed text annotations. + * + * @param taAll Input text annotations. + * @return Text annotations with required views populated.s + */ def addViewAndFilter(taAll: Iterable[TextAnnotation]): Iterable[TextAnnotation] = { - taAll.map { ta => + taAll.flatMap({ ta => try { annotatorService.addView(ta, ViewNames.LEMMA) annotatorService.addView(ta, ViewNames.SHALLOW_PARSE) + if (!parseViewName.equals(ViewNames.PARSE_GOLD)) { annotatorService.addView(ta, ViewNames.POS) - annotatorService.addView(ta, ViewNames.PARSE_STANFORD) + annotatorService.addView(ta, parseViewName) } + // Add a clause view (needed for the clause relative position feature) clauseViewGenerator.addView(ta) + + // Clean up the trees + val tree: Tree[String] = ta.getView(parseViewName).asInstanceOf[TreeView].getTree(0) + val parseView = new TreeView(parseViewName, ta) + parseView.setParseTree(0, ParseUtils.stripFunctionTags(ParseUtils.snipNullNodes(tree))) + ta.addView(parseViewName, parseView) + + Some(ta) } catch { - case e: AnnotatorException => + case e: Exception => logger.warn(s"Annotation failed for sentence ${ta.getId}; removing it from the list.") - taAll.remove(ta) + None } - // Clean up the trees - val tree: Tree[String] = ta.getView(parseViewName).asInstanceOf[TreeView].getTree(0) - val parseView = new TreeView(parseViewName, ta) - parseView.setParseTree(0, ParseUtils.stripFunctionTags(ParseUtils.snipNullNodes(tree))) - ta.addView(parseViewName, parseView) - ta - } + }) } def printNumbers(reader: SRLDataReader, readerType: String) = { @@ -86,36 +103,69 @@ object PopulateSRLDataModel extends Logging { logger.debug(s"Number of $readerType data arguments: $numArguments") } - var gr: SRLMultiGraphDataModel = null + /** Adds a single Text Annotation to the DataModel graph. + * + * @param a Text Annotation instance to add to the graph.s + * @param isTrainingInstance Boolean indicating if the instance is a training instance.s + */ + def populateDocument(a: TextAnnotation, isTrainingInstance: Boolean): Unit = { + // Data Model graph for a single sentence + val singleInstanceGraph = new SRLMultiGraphDataModel(parseViewName) + + // Populate the sentence node. + // Note: This does not populate the relation/predicates/arguments nodes. + singleInstanceGraph.sentences.populate(Seq(a), train = isTrainingInstance) + + val predicateTrainCandidates = { + if (useGoldPredicate) { + a.getView(srlGoldViewName).asInstanceOf[PredicateArgumentView].getPredicates.asScala + } else { + (singleInstanceGraph.sentences(a) ~> singleInstanceGraph.sentencesToTokens).collect({ + case x: Constituent if singleInstanceGraph.posTag(x).startsWith("VB") => x.cloneForNewView(ViewNames.SRL_VERB) + }) + } + } + + if (useGoldArgBoundaries) { + if (!useGoldPredicate) { + logger.error("Predicted Predicates with Gold Argument Boundaries is not supported.") + throw new UnsupportedOperationException("Predicted Predicates with Gold Argument Boundaries is not supported.") + } + + val goldRelations = a.getView(srlGoldViewName).asInstanceOf[PredicateArgumentView].getRelations.asScala + singleInstanceGraph.relations.populate(goldRelations, train = isTrainingInstance) + } else { + // Get XuPalmer Candidates for each predicate and populate the relations in the graph. + val XuPalmerCandidateArgsTraining = predicateTrainCandidates.flatMap({ + x => xuPalmerCandidate(x, (singleInstanceGraph.sentences(x.getTextAnnotation) ~> singleInstanceGraph.sentencesToStringTree).head) + }) + + singleInstanceGraph.relations.populate(XuPalmerCandidateArgsTraining, train = isTrainingInstance) + } + + logger.debug("all relations for this test:" + (singleInstanceGraph.sentences(a) ~> singleInstanceGraph.sentencesToRelations).size) + + // Populate the classifier DataModel with this single instance graph. + // This is done due to performance reasons while populating a big data model graph directly. + SRLClassifiers.SRLDataModel.addFromModel(singleInstanceGraph) + + if (SRLClassifiers.SRLDataModel.sentences().size % 1000 == 0) { + logger.info("loaded graphs in memory:" + SRLClassifiers.SRLDataModel.sentences().size) + } + } + if (!testOnly) { + logger.info(s"Reading training data from sections $TRAIN_SECTION_S to $TRAIN_SECTION_E") - val trainReader = new SRLDataReader(TREEBANK_HOME, PROPBANK_HOME, - TRAIN_SECTION_S, TRAIN_SECTION_E) + val trainReader = new SRLDataReader(TREEBANK_HOME, PROPBANK_HOME, TRAIN_SECTION_S, TRAIN_SECTION_E) trainReader.readData() + logger.info(s"Annotating ${trainReader.textAnnotations.size} training sentences") val filteredTa = addViewAndFilter(trainReader.textAnnotations) printNumbers(trainReader, "training") - logger.info("Populating SRLDataModel with training data.") - filteredTa.foreach { a => - gr = new SRLMultiGraphDataModel(parseViewName, frameManager) - if (!useGoldPredicate) { - gr.sentences.populate(Seq(a)) - val predicateTrainCandidates = gr.tokens.getTrainingInstances. - collect { case x: Constituent if gr.posTag(x).startsWith("VB") => x.cloneForNewView(ViewNames.SRL_VERB) } - gr.predicates.populate(predicateTrainCandidates) - } else { - gr.sentences.populate(Seq(a)) - } - logger.debug("gold relations for this train:" + gr.relations().size) - if (!useGoldArgBoundaries) { - val XuPalmerCandidateArgsTraining = gr.predicates.getTrainingInstances.flatMap(x => xuPalmerCandidate(x, (gr.sentences(x.getTextAnnotation) ~> gr.sentencesToStringTree).head)) - gr.relations.populate(XuPalmerCandidateArgsTraining) - } - logger.debug("all relations for this test:" + gr.relations().size) - graphs.addFromModel(gr) - if (graphs.sentences().size % 1000 == 0) logger.info("loaded graphs in memory:" + graphs.sentences().size) - } + logger.info("Populating SRLDataModel with training data.") + filteredTa.foreach(populateDocument(_, isTrainingInstance = true)) } val testReader = new SRLDataReader(TREEBANK_HOME, PROPBANK_HOME, TEST_SECTION, TEST_SECTION) @@ -128,24 +178,6 @@ object PopulateSRLDataModel extends Logging { printNumbers(testReader, "test") logger.info("Populating SRLDataModel with test data.") - filteredTest.foreach { a => - gr = new SRLMultiGraphDataModel(parseViewName, frameManager) - if (!useGoldPredicate) { - gr.sentences.populate(Seq(a), train = false) - val predicateTestCandidates = gr.tokens.getTestingInstances. - collect { case x: Constituent if gr.posTag(x).startsWith("VB") => x.cloneForNewView(ViewNames.SRL_VERB) } - gr.predicates.populate(predicateTestCandidates, train = false) - } else { - gr.sentences.populate(Seq(a), train = false) - } - logger.debug("gold relations for this test:" + gr.relations().size) - if (!useGoldArgBoundaries) { - val XuPalmerCandidateArgsTesting = gr.predicates.getTestingInstances.flatMap(x => xuPalmerCandidate(x, (gr.sentences(x.getTextAnnotation) ~> gr.sentencesToStringTree).head)) - gr.relations.populate(XuPalmerCandidateArgsTesting, train = false) - } - logger.debug("all relations for this test:" + gr.relations().size) - graphs.addFromModel(gr) - } - graphs + filteredTest.foreach(populateDocument(_, isTrainingInstance = false)) } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PredArgViewGenerator.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PredArgViewGenerator.scala index 9abf8a81..77661f95 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PredArgViewGenerator.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/PredArgViewGenerator.scala @@ -18,8 +18,9 @@ import scala.collection.JavaConversions._ */ object PredArgViewGenerator { - def toPredArgList(graph: SRLMultiGraphDataModel, labelProp: TypedProperty[Relation, String]): Iterable[PredicateArgumentView] = { - import graph._ + import SRLClassifiers.SRLDataModel._ + + def toPredArgList(labelProp: TypedProperty[Relation, String]): Iterable[PredicateArgumentView] = { sentences().map { ta => val predArgView: PredicateArgumentView = new PredicateArgumentView(ViewNames.SRL_VERB, ta) (sentences(ta) ~> sentencesToRelations ~> relationsToPredicates).foreach { pred => diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLAnnotator.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLAnnotator.scala new file mode 100644 index 00000000..8abeabb6 --- /dev/null +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLAnnotator.scala @@ -0,0 +1,272 @@ +/** This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling + +import edu.illinois.cs.cogcomp.annotation.{ Annotator, AnnotatorConfigurator, AnnotatorException } +import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.datastructures.textannotation._ +import edu.illinois.cs.cogcomp.core.utilities.configuration.{ Configurator, Property, ResourceManager } +import edu.illinois.cs.cogcomp.edison.annotators.ClauseViewGenerator +import edu.illinois.cs.cogcomp.nlp.corpusreaders.AbstractSRLAnnotationReader +import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils +import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLClassifiers.SRLDataModel + +import scala.collection.JavaConverters._ + +class SRLAnnotatorConfigurator extends AnnotatorConfigurator { + override def getDefaultConfig: ResourceManager = { + val props = Array[Property]( + SRLAnnotatorConfigurator.USE_PREDICATE_CLASSIFIER, + SRLAnnotatorConfigurator.USE_ARGUMENT_IDENTIFIER, + SRLAnnotatorConfigurator.USE_VERB_SENSE_CLASSIFIER, + SRLAnnotatorConfigurator.USE_CONSTRAINTS, + SRLAnnotatorConfigurator.USE_GREEDY_INFERENCE_BINARY, + SRLAnnotatorConfigurator.USE_GREEDY_INFERENCE_TYPE + ) + + val defaultRm = super.getDefaultConfig + Configurator.mergeProperties(defaultRm, new ResourceManager(generateProperties(props))) + } +} + +object SRLAnnotatorConfigurator { + // Use the predicate classifier if true else use all verbs as predicates + val USE_PREDICATE_CLASSIFIER = new Property("usePredicateClassifier", Configurator.TRUE) + + // Boolean denoting if we should perform Binary Argument Identification + val USE_ARGUMENT_IDENTIFIER = new Property("useArgumentIdentifier", Configurator.TRUE) + + // Verb Sense Classifier is not trained currently + val USE_VERB_SENSE_CLASSIFIER = new Property("useVerbSenseClassifier", Configurator.FALSE) + + // Constrained Inference + val USE_CONSTRAINTS = new Property("useConstraints", Configurator.TRUE) + + // Use Greedy Inference at the Binary Argument Identifier + val USE_GREEDY_INFERENCE_BINARY = new Property("useGreedyInferenceBinary", Configurator.FALSE) + + // Use Greedy Inference at the Argument Type Identifier + val USE_GREEDY_INFERENCE_TYPE = new Property("useGreedyInferenceType", Configurator.FALSE) +} + +class SRLAnnotator(finalViewName: String = ViewNames.SRL_VERB, resourceManager: ResourceManager = new SRLAnnotatorConfigurator().getDefaultConfig) + extends Annotator(finalViewName, SRLAnnotator.requiredViews, resourceManager) { + val requiredViewSet: Set[String] = getRequiredViews.toSet + + lazy val clauseViewGenerator: ClauseViewGenerator = { + SRLscalaConfigurator.SRL_PARSE_VIEW match { + case ViewNames.PARSE_STANFORD => ClauseViewGenerator.STANFORD + case ViewNames.PARSE_GOLD => new ClauseViewGenerator(ViewNames.PARSE_GOLD, "CLAUSES_GOLD") + case ViewNames.PARSE_CHARNIAK => new ClauseViewGenerator(ViewNames.PARSE_CHARNIAK, ViewNames.CLAUSES_CHARNIAK) + } + } + + override def addView(ta: TextAnnotation): Unit = { + checkPrerequisites(ta) + + SRLDataModel.clearInstances() + + val finalView = new PredicateArgumentView(getViewName, SRLAnnotator.getClass.getCanonicalName, ta, 1.0) + + // Get Predicates in the sentence. + val allPredicates = getPredicates(ta) + + allPredicates.foreach({ predicate: Constituent => + + // Get arguments for each predicate detected. + val argumentList = getArguments(ta, predicate) + finalView.addPredicateArguments( + predicate, + argumentList.map(_.getTarget).toList.asJava, + argumentList.map(_.getRelationName).toArray, + argumentList.map(_.getScore).toArray + ) + + // Add additional attributes + val lemmaOrToken = ta.getView(ViewNames.LEMMA) + .getConstituentsCovering(predicate) + .asScala + .headOption + .orElse(ta.getView(ViewNames.TOKENS).getConstituentsCovering(predicate).asScala.headOption) + + predicate.addAttribute(AbstractSRLAnnotationReader.SenseIdentifier, "XX") + predicate.addAttribute(AbstractSRLAnnotationReader.LemmaIdentifier, lemmaOrToken.map(_.getLabel).getOrElse("")) + }) + + assert(finalView.getConstituents.asScala.forall(_.getViewName == getViewName), "Verify correct constituent view names.") + ta.addView(getViewName, finalView) + + SRLDataModel.clearInstances() + } + + override def initialize(rm: ResourceManager): Unit = { + // Load models and other things + ClassifierUtils.LoadClassifier( + SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_dTr/", + SRLClassifiers.predicateClassifier + ) + ClassifierUtils.LoadClassifier( + SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_bTr/", + SRLClassifiers.argumentXuIdentifierGivenApredicate + ) + ClassifierUtils.LoadClassifier( + SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_cTr/", + SRLClassifiers.argumentTypeLearner + ) + } + + def checkPrerequisites(ta: TextAnnotation): Unit = { + val missingRequirements = requiredViewSet.diff(ta.getAvailableViews.asScala) + if (missingRequirements.nonEmpty) { + throw new AnnotatorException(s"Document ${ta.getId} is missing required views: $missingRequirements") + } + + clauseViewGenerator.addView(ta) + + // Check if the Annotator Configuration is compatible + val useConstraint = resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_CONSTRAINTS) + val useGreedyInferenceBinary = resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_GREEDY_INFERENCE_BINARY) + val useGreedyInferenceType = resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_GREEDY_INFERENCE_TYPE) + + if (useConstraint && (useGreedyInferenceBinary || useGreedyInferenceType)) { + new UnsupportedOperationException("Incompatible configuration") + } else if (useGreedyInferenceBinary && (useConstraint || useGreedyInferenceType)) { + new UnsupportedOperationException("Incompatible configuration") + } else if (useGreedyInferenceType && (useConstraint || useGreedyInferenceBinary)) { + new UnsupportedOperationException("Incompatible configuration") + } + } + + /** @param ta Input Text Annotation instance. + * @return Constituents that are not attached to any view yet. + */ + private def getPredicates(ta: TextAnnotation): Iterable[Constituent] = { + // Filter only verbs as candidates to the predicate classifier + val predicateCandidates = ta.getView(ViewNames.POS) + .getConstituents + .asScala + .filter(_.getLabel.startsWith("VB")) + .map(_.cloneForNewView(getViewName)) + + if (resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_PREDICATE_CLASSIFIER)) { + SRLDataModel.clearInstances() + SRLDataModel.predicates.populate(predicateCandidates, train = false) + + predicateCandidates.filter(SRLClassifiers.predicateClassifier(_) == "true").map({ candidate: Constituent => + candidate.cloneForNewViewWithDestinationLabel(getViewName, "Predicate") + }) + } else { + predicateCandidates + } + } + + /** @param ta Input Text Annotation instance. + * @param predicate Input Predicate instance. + * @return Relation between unattached predicate and arguments. + */ + private def getArguments(ta: TextAnnotation, predicate: Constituent): Iterable[Relation] = { + SRLDataModel.clearInstances() + + // Prevent duplicate clearing of graphs. + SRLDataModel.sentences.populate(Seq(ta), train = false) + + val stringTree = (SRLDataModel.sentences(ta) ~> SRLDataModel.sentencesToStringTree).head + + val candidateRelations = SRLSensors.xuPalmerCandidate(predicate, stringTree) + SRLDataModel.relations.populate(candidateRelations, train = false) + + val finalRelationList = { + if (resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_ARGUMENT_IDENTIFIER)) { + val filteredCandidates = { + if (resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_GREEDY_INFERENCE_BINARY)) { + val candidatesWithScores = candidateRelations.map({ + candidate => (candidate, SRLClassifiers.argumentXuIdentifierGivenApredicate.classifier.scores(candidate)) + }) + + // Greedy No Overlap decode + GreedyDecoder.decodeNoOverlap(candidatesWithScores, Set("false")) + .filter(x => x._2.value == "true") + .map(_._1) + } else { + candidateRelations.filter({ candidate: Relation => + SRLClassifiers.argumentXuIdentifierGivenApredicate(candidate) == "true" + }) + } + } + + // Re-create graph if the size of candidates are different after filtering + if (filteredCandidates.size != candidateRelations.size) { + SRLDataModel.clearInstances() + + // Prevent duplicate clearing of graphs. + SRLDataModel.sentences.populate(Seq(ta), train = false) + SRLDataModel.relations.populate(filteredCandidates, train = false) + } + + filteredCandidates + } else { + candidateRelations + } + } + + if (resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_CONSTRAINTS)) { + finalRelationList.flatMap({ relation: Relation => + val label = SRLConstrainedClassifiers.argTypeConstraintClassifier(relation) + if (label == "candidate") + None + else + Some(SRLAnnotator.cloneRelationWithNewLabelAndArgument(relation, label, 1.0, getViewName)) + }) + } else { + val relationWithScores = finalRelationList.map({ relation: Relation => + (relation, SRLClassifiers.argumentTypeLearner.classifier.scores(relation)) + }) + + if (resourceManager.getBoolean(SRLAnnotatorConfigurator.USE_GREEDY_INFERENCE_TYPE)) { + GreedyDecoder.decodeNoOverlap(relationWithScores, Set("candidate")) + .filterNot(_._2.value == "candidate") + .map({ + case (relation, score) => + SRLAnnotator.cloneRelationWithNewLabelAndArgument(relation, score.value, score.score, getViewName) + }) + } else { + relationWithScores.map({ + case (relation, scoreset) => + val label = scoreset.highScoreValue() + (relation, scoreset.getScore(label)) + }).filterNot(_._2.value == "candidate") + .map({ + case (relation, score) => + SRLAnnotator.cloneRelationWithNewLabelAndArgument(relation, score.value, score.score, getViewName) + }) + } + } + } +} + +object SRLAnnotator { + private val requiredViews = Array( + ViewNames.POS, + ViewNames.LEMMA, + ViewNames.SHALLOW_PARSE, + SRLscalaConfigurator.SRL_PARSE_VIEW + ) + + private def cloneRelationWithNewLabelAndArgument( + sourceRelation: Relation, + label: String, + score: Double, + targetViewName: String + ): Relation = { + val newTargetConstituent = sourceRelation.getTarget.cloneForNewView(targetViewName) + val newRelation = new Relation(label, sourceRelation.getSource, newTargetConstituent, score) + sourceRelation.getAttributeKeys.asScala.foreach({ key: String => + newRelation.addAttribute(key, sourceRelation.getAttribute(key)) + }) + newRelation + } +} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLApps.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLApps.scala index 9970b93b..500e5561 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLApps.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLApps.scala @@ -9,10 +9,15 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling import java.io.File import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, TreeView } import edu.illinois.cs.cogcomp.saul.classifier.{ ClassifierUtils, JointTrainSparseNetwork } import edu.illinois.cs.cogcomp.saul.util.Logging +import edu.illinois.cs.cogcomp.saulexamples.data.SRLFrameManager import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLClassifiers._ import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLConstrainedClassifiers.argTypeConstraintClassifier +import edu.illinois.cs.cogcomp.pipeline.server.ServerClientAnnotator + +import scala.collection.JavaConverters._ object SRLscalaConfigurator { @@ -37,7 +42,7 @@ object SRLscalaConfigurator { val SRL_MODEL_DIR = "srl" val SRL_JAR_MODEL_PATH = "models" - // This is used to determine the parse view in SRL experiments (can be ViewNames.GOLD or ViewNames.STANFORD) + // This is used to determine the parse view in SRL experiments (can be ViewNames.GOLD or ViewNames.STANFORD or ViewNames.CHARNIAK) // For replicating the published experiments this needs to be GOLD val SRL_PARSE_VIEW = ViewNames.PARSE_GOLD @@ -66,9 +71,10 @@ object SRLscalaConfigurator { // Should we train an argument type classifier val SRL_TRAIN_ARG_TYPE = true + lazy val SRL_FRAME_MANAGER: SRLFrameManager = new SRLFrameManager(PROPBANK_HOME) } -object SRLApps extends Logging { +object RunningApps extends App with Logging { import SRLscalaConfigurator._ @@ -89,25 +95,22 @@ object SRLApps extends Logging { else "" } + logger.info(s"Running experiment $expName") + val startTime = System.currentTimeMillis() logger.info("population starts.") // Here, the data is loaded into the graph - val srlDataModelObject = PopulateSRLDataModel(testOnly = TEST_MODE, SRL_GOLD_PREDICATES, SRL_GOLD_ARG_BOUNDARIES) + PopulateSRLDataModel(testOnly = TEST_MODE, SRL_GOLD_PREDICATES, SRL_GOLD_ARG_BOUNDARIES) - import srlDataModelObject._ + import SRLDataModel._ logger.info("all relations number after population:" + relations().size) logger.info("all sentences number after population:" + sentences().size) logger.info("all predicates number after population:" + predicates().size) logger.info("all arguments number after population:" + arguments().size) logger.info("all tokens number after population:" + tokens().size) -} -object RunningApps extends App with Logging { - import SRLApps._ - import SRLApps.srlDataModelObject._ - import SRLscalaConfigurator._ // TRAINING if (!TEST_MODE) { expName match { @@ -160,7 +163,7 @@ object RunningApps extends App with Logging { argumentTypeLearner.save() case "pTr" => - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_bTr/", argumentXuIdentifierGivenApredicate) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_bTr/", argumentXuIdentifierGivenApredicate) val training = relations.getTrainingInstances.filter(x => argumentXuIdentifierGivenApredicate(x).equals("true")) argumentTypeLearner.modelDir = modelDir argumentTypeLearner.learn(100, training) @@ -197,27 +200,27 @@ object RunningApps extends App with Logging { (SRL_TEST_PIPELINE, SRL_TEST_CONSTRAINTS) match { case (true, true) => - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_bTr/", argumentXuIdentifierGivenApredicate) - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_aTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_bTr/", argumentXuIdentifierGivenApredicate) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_aTr/", argumentTypeLearner) argumentTypeLearner.test( prediction = typeArgumentPipeGivenGoldPredicateConstrained, groundTruth = argumentLabelGold, exclude = "candidate" ) case (true, false) => - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_bTr/", argumentXuIdentifierGivenApredicate) - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_aTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_bTr/", argumentXuIdentifierGivenApredicate) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_aTr/", argumentTypeLearner) argumentTypeLearner.test( prediction = typeArgumentPipeGivenGoldPredicate, groundTruth = argumentLabelGold, exclude = "candidate" ) case (false, true) => - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_aTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_aTr/", argumentTypeLearner) argTypeConstraintClassifier.test(outputGranularity = 100, exclude = "candidate") case (false, false) => - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_aTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_aTr/", argumentTypeLearner) argumentTypeLearner.test(exclude = "candidate") } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLClassifiers.scala index a4d4155a..4b73d427 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLClassifiers.scala @@ -14,7 +14,10 @@ import edu.illinois.cs.cogcomp.saul.datamodel.property.Property /** Created by Parisa on 12/30/15. */ object SRLClassifiers { - import SRLApps.srlDataModelObject._ + // Singleton data model instance for SRL classifiers. + val SRLDataModel: SRLMultiGraphDataModel = new SRLMultiGraphDataModel() + import SRLDataModel._ + //TODO This needs to be overriden by the user; change it to be dynamic val parameters = new SparseAveragedPerceptron.Parameters() object predicateClassifier extends Learnable[Constituent](predicates, parameters) { @@ -39,13 +42,11 @@ object SRLClassifiers { } object argumentXuIdentifierGivenApredicate extends Learnable[Relation](relations, parameters) { - def label = isArgumentXuGold override def feature = using(headwordRelation, syntacticFrameRelation, pathRelation, phraseTypeRelation, predPosTag, predLemmaR, linearPosition, argWordWindow, argPOSWindow, constituentLength, chunkLength, chunkEmbedding, chunkPathPattern, clauseFeatures, containsNEG, containsMOD) override lazy val classifier = new SparseNetworkLearner() } - } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstrainedClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstrainedClassifiers.scala index cab81f68..0f604007 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstrainedClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstrainedClassifiers.scala @@ -7,7 +7,7 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Relation, TextAnnotation } -import edu.illinois.cs.cogcomp.infer.ilp.OJalgoHook +import edu.illinois.cs.cogcomp.infer.ilp.{ GurobiHook, OJalgoHook } import edu.illinois.cs.cogcomp.saul.classifier.ConstrainedClassifier import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLClassifiers.{ argumentTypeLearner, argumentXuIdentifierGivenApredicate } import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLConstraints._ @@ -15,7 +15,8 @@ import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLConstrai /** Created by Parisa on 12/27/15. */ object SRLConstrainedClassifiers { - import SRLApps.srlDataModelObject._ + import SRLClassifiers.SRLDataModel._ + val erSolver = new OJalgoHook object argTypeConstraintClassifier extends ConstrainedClassifier[Relation, TextAnnotation](argumentTypeLearner) { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstraints.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstraints.scala index 5a494d02..d6b6c4d2 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstraints.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLConstraints.scala @@ -12,10 +12,12 @@ import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstant, FirstOrderCons import edu.illinois.cs.cogcomp.saul.classifier.ConstrainedClassifier import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._ import edu.illinois.cs.cogcomp.saulexamples.data.XuPalmerCandidateGenerator -import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLApps.srlDataModelObject._ import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLClassifiers.{ argumentTypeLearner, argumentXuIdentifierGivenApredicate, predicateClassifier } import scala.collection.JavaConversions._ + +import SRLClassifiers.SRLDataModel._ + /** Created by Parisa on 12/23/15. */ object SRLConstraints { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLEvaluation.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLEvaluation.scala new file mode 100644 index 00000000..8222a7fe --- /dev/null +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLEvaluation.scala @@ -0,0 +1,110 @@ +/** This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling + +import java.util.Properties + +import edu.illinois.cs.cogcomp.annotation.AnnotatorServiceConfigurator +import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView +import edu.illinois.cs.cogcomp.core.datastructures.trees.Tree +import edu.illinois.cs.cogcomp.core.experiments.ClassificationTester +import edu.illinois.cs.cogcomp.core.experiments.evaluators.PredicateArgumentEvaluator +import edu.illinois.cs.cogcomp.curator.CuratorConfigurator.RESPECT_TOKENIZATION +import edu.illinois.cs.cogcomp.nlp.utilities.ParseUtils +import edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator.{ USE_LEMMA, USE_POS, USE_SHALLOW_PARSE, USE_STANFORD_PARSE } +import edu.illinois.cs.cogcomp.saul.util.Logging +import edu.illinois.cs.cogcomp.saulexamples.data.SRLDataReader +import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLscalaConfigurator.{ PROPBANK_HOME, TEST_SECTION, TREEBANK_HOME } +import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory + +import scala.collection.JavaConverters._ + +/** Evaluate the SRL Annotator using PredicateArgumentEvaluator. + * This evaluation honors settings in the SRLscalaConfigurator class. + */ +object SRLEvaluation extends App with Logging { + val parseViewName = SRLscalaConfigurator.SRL_PARSE_VIEW + val predictedViewName = ViewNames.SRL_VERB + "_PREDICTED" + val annotator = new SRLAnnotator(predictedViewName) + + val testReader = new SRLDataReader(TREEBANK_HOME, PROPBANK_HOME, TEST_SECTION, TEST_SECTION) + + logger.info("Reading the dataset.") + testReader.readData() + + logger.info(s"Intializing the annotator service: USE_CURATOR = ${SRLscalaConfigurator.USE_CURATOR}") + val usePipelineCaching = true + val annotatorService = SRLscalaConfigurator.USE_CURATOR match { + case true => + val nonDefaultProps = new Properties() + TextAnnotationFactory.enableSettings(nonDefaultProps, RESPECT_TOKENIZATION) + TextAnnotationFactory.createCuratorAnnotatorService(nonDefaultProps) + case false => + val nonDefaultProps = new Properties() + TextAnnotationFactory.enableSettings(nonDefaultProps, USE_LEMMA, USE_SHALLOW_PARSE) + if (!parseViewName.equals(ViewNames.PARSE_GOLD)) { + TextAnnotationFactory.enableSettings(nonDefaultProps, USE_POS, USE_STANFORD_PARSE) + } + if (!usePipelineCaching) { + TextAnnotationFactory.enableSettings(nonDefaultProps, AnnotatorServiceConfigurator.DISABLE_CACHE) + } + TextAnnotationFactory.createPipelineAnnotatorService(nonDefaultProps) + } + + logger.info("Annotating documents with pre-requisite views") + val annotatedDocumentsPartial = testReader.textAnnotations.asScala.map({ ta => + try { + annotatorService.addView(ta, ViewNames.LEMMA) + annotatorService.addView(ta, ViewNames.SHALLOW_PARSE) + + if (!parseViewName.equals(ViewNames.PARSE_GOLD)) { + annotatorService.addView(ta, ViewNames.POS) + annotatorService.addView(ta, parseViewName) + } + + // Clean up the trees + val tree: Tree[String] = ta.getView(parseViewName).asInstanceOf[TreeView].getTree(0) + val parseView = new TreeView(parseViewName, ta) + parseView.setParseTree(0, ParseUtils.stripFunctionTags(ParseUtils.snipNullNodes(tree))) + ta.addView(parseViewName, parseView) + + Some(ta) + } catch { + case ex: Exception => + logger.error(s"Annotation failed for sentence ${ta.getId}; removing it from the list.", ex) + None + } + }).partition(_.isEmpty) + + logger.info(s"Annotation failures = ${annotatedDocumentsPartial._1.size}") + logger.info(s"Annotation success = ${annotatedDocumentsPartial._2.size}") + logger.info("Starting SRL Annotation and evaluation") + + val identifierTester = new ClassificationTester + identifierTester.ignoreLabelFromSummary("V") + + val evaluator = new PredicateArgumentEvaluator + + // Annotate with SRL Annotator + var srlAnnotationFailures = 0 + annotatedDocumentsPartial._2 + .flatten + .foreach({ ta => + try { + annotator.addView(ta) + evaluator.evaluate(identifierTester, ta.getView(ViewNames.SRL_VERB), ta.getView(predictedViewName)) + } catch { + case ex: Exception => + srlAnnotationFailures += 1 + logger.error(s"SRL Annotation failed for sentence ${ta.getId}.", ex) + } + }) + + logger.info(s"Documents which failed SRL Annotation = $srlAnnotationFailures") + println(identifierTester.getPerformanceTable(true).toTextTable) +} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLMultiGraphDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLMultiGraphDataModel.scala index 3945d63d..1f1c2179 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLMultiGraphDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLMultiGraphDataModel.scala @@ -21,8 +21,8 @@ import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLConstrai import scala.collection.JavaConversions._ -class SRLMultiGraphDataModel(parseViewName: String = null, frameManager: SRLFrameManager = null) extends DataModel { - +class SRLMultiGraphDataModel(val parseViewName: String = SRLscalaConfigurator.SRL_PARSE_VIEW, val frameManager: SRLFrameManager = SRLscalaConfigurator.SRL_FRAME_MANAGER) extends DataModel { + // Nodes val predicates = node[Constituent]((x: Constituent) => x.getTextAnnotation.getCorpusId + ":" + x.getTextAnnotation.getId + ":" + x.getSpan) val arguments = node[Constituent]((x: Constituent) => x.getTextAnnotation.getCorpusId + ":" + x.getTextAnnotation.getId + ":" + x.getSpan) @@ -36,19 +36,28 @@ class SRLMultiGraphDataModel(parseViewName: String = null, frameManager: SRLFram val tokens = node[Constituent]((x: Constituent) => x.getTextAnnotation.getCorpusId + ":" + x.getTextAnnotation.getId + ":" + x.getSpan) + // Edges + val sentencesToStringTree = edge(sentences, stringTree) val sentencesToTokens = edge(sentences, tokens) val sentencesToRelations = edge(sentences, relations) val relationsToPredicates = edge(relations, predicates) val relationsToArguments = edge(relations, arguments) + // Sensors + sentencesToTokens.addSensor(CommonSensors.textAnnotationToTokens _) - sentencesToRelations.addSensor(textAnnotationToRelation _) + + // This sensor is disabled. Relations are populated manually. + // sentencesToRelations.addSensor(textAnnotationToRelation _) + sentencesToRelations.addSensor(textAnnotationToRelationMatch _) relationsToArguments.addSensor(relToArgument _) relationsToPredicates.addSensor(relToPredicate _) sentencesToStringTree.addSensor(textAnnotationToStringTree _) + // Properties + /** This can be applied to both predicates and arguments */ val address = property(predicates, "add") { x: Constituent => x.getTextAnnotation.getCorpusId + ":" + x.getTextAnnotation.getId + ":" + x.getSpan @@ -172,7 +181,11 @@ class SRLMultiGraphDataModel(parseViewName: String = null, frameManager: SRLFram /** Combines clause relative position and clause coverage */ val clauseFeatures = property(relations, "clauseFeats") { rel: Relation => - val clauseViewName = if (parseViewName.equals(ViewNames.PARSE_GOLD)) "CLAUSES_GOLD" else ViewNames.CLAUSES_STANFORD + val clauseViewName = parseViewName match { + case ViewNames.PARSE_GOLD => "CLAUSES_GOLD" + case ViewNames.PARSE_STANFORD => ViewNames.CLAUSES_STANFORD + case ViewNames.PARSE_CHARNIAK => ViewNames.CLAUSES_CHARNIAK + } fexFeatureExtractor(rel.getTarget, new ClauseFeatureExtractor(parseViewName, clauseViewName)) } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLSensors.scala index 49101ca7..4125dbbf 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/SRLSensors.scala @@ -11,11 +11,12 @@ import edu.illinois.cs.cogcomp.core.datastructures.textannotation._ import edu.illinois.cs.cogcomp.core.datastructures.trees.Tree import edu.illinois.cs.cogcomp.edison.features._ import edu.illinois.cs.cogcomp.edison.features.factory.WordFeatureExtractorFactory +import edu.illinois.cs.cogcomp.saul.util.Logging import edu.illinois.cs.cogcomp.saulexamples.data.XuPalmerCandidateGenerator import scala.collection.JavaConversions._ -object SRLSensors { +object SRLSensors extends Logging { def sentenceToGoldPredicates(ta: TextAnnotation): List[Constituent] = { ta.getView(ViewNames.SRL_VERB).asInstanceOf[PredicateArgumentView].getPredicates.toList } @@ -29,19 +30,20 @@ object SRLSensors { def textAnnotationToTree(ta: TextAnnotation): Tree[Constituent] = { // We assume that there is only 1 sentence per TextAnnotation - val parseViewName: String = ViewNames.PARSE_GOLD + val parseViewName: String = SRLscalaConfigurator.SRL_PARSE_VIEW ta.getView(parseViewName).asInstanceOf[TreeView].getConstituentTree(0) } def textAnnotationToStringTree(ta: TextAnnotation): Tree[String] = { // We assume that there is only 1 sentence per TextAnnotation - val parseViewName: String = ViewNames.PARSE_GOLD + val parseViewName: String = SRLscalaConfigurator.SRL_PARSE_VIEW ta.getView(parseViewName).asInstanceOf[TreeView].getTree(0) } def textAnnotationToRelation(ta: TextAnnotation): List[Relation] = { ta.getView(ViewNames.SRL_VERB).getRelations.toList } + def textAnnotationToRelationMatch(ta: TextAnnotation, r: Relation): Boolean = { (ta.getCorpusId + ":" + ta.getId).matches(r.getSource.getTextAnnotation.getCorpusId + ":" + r.getSource.getTextAnnotation.getId) } @@ -74,9 +76,15 @@ object SRLSensors { } def xuPalmerCandidate(x: Constituent, y: Tree[String]): List[Relation] = { - val p = XuPalmerCandidateGenerator.generateCandidates(x, y) - val z = p.map(y => new Relation("candidate", x.cloneForNewView(x.getViewName), y.cloneForNewView(y.getViewName), 0.0)) - z.toList + try { + val p = XuPalmerCandidateGenerator.generateCandidates(x, y) + val z = p.map(y => new Relation("candidate", x.cloneForNewView(x.getViewName), y.cloneForNewView(y.getViewName), 0.0)) + z.toList + } catch { + case _: Exception => + logger.warn("Exception while populating XuPalmer Candidates") + List() + } } def fexFeatureExtractor(x: Constituent, fex: FeatureExtractor): String = { @@ -88,4 +96,4 @@ object SRLSensors { contextFex.addFeatureExtractor(featureExtractor) fexFeatureExtractor(x, contextFex) } -} \ No newline at end of file +} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SpatialRoleLabeling/SpRLDataModelReader.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SpatialRoleLabeling/SpRLDataModelReader.scala index 1ca7e312..c5fe6079 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SpatialRoleLabeling/SpRLDataModelReader.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SpatialRoleLabeling/SpRLDataModelReader.scala @@ -11,7 +11,7 @@ import java.util.Properties import edu.illinois.cs.cogcomp.annotation.AnnotatorService import edu.illinois.cs.cogcomp.core.datastructures.IntPair import edu.illinois.cs.cogcomp.core.datastructures.textannotation._ -import edu.illinois.cs.cogcomp.nlp.common.PipelineConfigurator._ +import edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator._ import edu.illinois.cs.cogcomp.saul.util.Logging import edu.illinois.cs.cogcomp.saulexamples.nlp.SpatialRoleLabeling.SpRL2013.SpRL2013Document import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory @@ -26,7 +26,11 @@ object SpRLDataModelReader extends Logging { def read(path: String, version: String): List[SpRLSentence] = { val settings = new Properties() - TextAnnotationFactory.disableSettings(settings, USE_SRL_NOM, USE_NER_ONTONOTES) + // Note: We do not want to use SENTENCE_PIPELINE but the configuration property is incorrect + // in PipelineFactory. This will need to be updated when PipelineFactory is fixed. + TextAnnotationFactory.enableSettings(settings, USE_POS, USE_LEMMA, USE_NER_CONLL, USE_SHALLOW_PARSE, + USE_STANFORD_DEP, USE_STANFORD_PARSE, USE_SRL_VERB, USE_SENTENCE_PIPELINE) + val as = TextAnnotationFactory.createPipelineAnnotatorService(settings) val reader = new SpRLDataReader(path, classOf[SpRL2013Document]) reader.readData() diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TextAnnotationFactory.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TextAnnotationFactory.scala index 2620b50a..d91f4512 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TextAnnotationFactory.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/TextAnnotationFactory.scala @@ -12,7 +12,8 @@ import edu.illinois.cs.cogcomp.annotation.AnnotatorService import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ TextAnnotation, TokenLabelView } import edu.illinois.cs.cogcomp.core.utilities.configuration.{ Configurator, Property, ResourceManager } import edu.illinois.cs.cogcomp.curator.{ CuratorConfigurator, CuratorFactory } -import edu.illinois.cs.cogcomp.nlp.pipeline.IllinoisPipelineFactory +import edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator +import edu.illinois.cs.cogcomp.pipeline.main.PipelineFactory /** Created by taher on 7/30/16. */ @@ -36,8 +37,8 @@ object TextAnnotationFactory { as.createBasicTextAnnotation(corpusId, textId, text) def createPipelineAnnotatorService(settings: Properties): AnnotatorService = { - IllinoisPipelineFactory.buildPipeline( - new CuratorConfigurator().getConfig(new ResourceManager(settings)) + PipelineFactory.buildPipeline( + new PipelineConfigurator().getConfig(new ResourceManager(settings)) ) } diff --git a/saul-examples/src/test/java/edu/illinois/cs/cogcomp/saulexamples/data/SRLFrameManagerTest.java b/saul-examples/src/test/java/edu/illinois/cs/cogcomp/saulexamples/data/SRLFrameManagerTest.java index 58db6ea2..7b2ca7d6 100644 --- a/saul-examples/src/test/java/edu/illinois/cs/cogcomp/saulexamples/data/SRLFrameManagerTest.java +++ b/saul-examples/src/test/java/edu/illinois/cs/cogcomp/saulexamples/data/SRLFrameManagerTest.java @@ -6,11 +6,13 @@ */ package edu.illinois.cs.cogcomp.saulexamples.data; -import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager; -import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLConfigurator; +import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLscalaConfigurator; + import org.junit.Before; import org.junit.Test; +import org.scalatest.junit.JUnitSuite; + import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -18,14 +20,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class SRLFrameManagerTest { +public class SRLFrameManagerTest extends JUnitSuite { private SRLFrameManager frameManager; @Before public void setUp() throws Exception { - ResourceManager rm = new SRLConfigurator().getDefaultConfig(); - frameManager = new SRLFrameManager(rm.getString(SRLConfigurator.PROPBANK_HOME.key)); + frameManager = new SRLFrameManager(SRLscalaConfigurator.PROPBANK_HOME()); } @Test @@ -58,4 +59,4 @@ public void testGetLegalArguments() throws Exception { assertTrue(illegalArguments.contains("A4")); assertTrue(illegalArguments.contains("A5")); } -} \ No newline at end of file +} diff --git a/saul-examples/src/test/resources/SRLToy/propbank/frames/give.xml b/saul-examples/src/test/resources/SRLToy/propbank/frames/give.xml new file mode 100755 index 00000000..bb4d7d45 --- /dev/null +++ b/saul-examples/src/test/resources/SRLToy/propbank/frames/give.xml @@ -0,0 +1,292 @@ + +
diff --git a/saul-examples/src/test/resources/SRLToy/propbank/frames/go.xml b/saul-examples/src/test/resources/SRLToy/propbank/frames/go.xml new file mode 100755 index 00000000..be08c5ba --- /dev/null +++ b/saul-examples/src/test/resources/SRLToy/propbank/frames/go.xml @@ -0,0 +1,656 @@ + + diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/AnnotatorTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/AnnotatorTest.scala new file mode 100644 index 00000000..86cdc3a1 --- /dev/null +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/AnnotatorTest.scala @@ -0,0 +1,39 @@ +/** This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling + +import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ PredicateArgumentView, TextAnnotation } +import edu.illinois.cs.cogcomp.core.utilities.DummyTextAnnotationGenerator +import edu.illinois.cs.cogcomp.nlp.corpusreaders.AbstractSRLAnnotationReader +import edu.illinois.cs.cogcomp.saulexamples.HighMemoryTest +import org.scalatest.{ FlatSpec, Matchers } + +class AnnotatorTest extends FlatSpec with Matchers { + val textAnnotation: TextAnnotation = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation( + Array(ViewNames.POS, ViewNames.LEMMA, ViewNames.SHALLOW_PARSE, ViewNames.PARSE_GOLD), + false, + 1 + ) + + "SRLAnnotator" should "work" taggedAs (HighMemoryTest) in { + val annotator = new SRLAnnotator(ViewNames.SRL_VERB) + annotator.addView(textAnnotation) + + assert(textAnnotation.hasView(ViewNames.SRL_VERB), "SRL_VERB view should exist after annotation.") + + val srlView = textAnnotation.getView(ViewNames.SRL_VERB).asInstanceOf[PredicateArgumentView] + assert(srlView.getPredicates.size() == 1) + + val verbPredicate = srlView.getPredicates.get(0) + assert(srlView.getArguments(verbPredicate).size() >= 1) + + // Required attributes are populated. + assert(verbPredicate.hasAttribute(AbstractSRLAnnotationReader.LemmaIdentifier)) + assert(verbPredicate.hasAttribute(AbstractSRLAnnotationReader.SenseIdentifier)) + } +} diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ConstraintsTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ConstraintsTest.scala index db244ae2..85ae69ef 100644 --- a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ConstraintsTest.scala +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ConstraintsTest.scala @@ -7,11 +7,12 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling import edu.illinois.cs.cogcomp.core.datastructures.ViewNames -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, Relation, TextAnnotation } +import edu.illinois.cs.cogcomp.core.datastructures.textannotation._ import edu.illinois.cs.cogcomp.core.datastructures.trees.Tree import edu.illinois.cs.cogcomp.core.utilities.DummyTextAnnotationGenerator import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstant, FirstOrderConstraint } import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner +import edu.illinois.cs.cogcomp.pipeline.server.ServerClientAnnotator import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable } import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._ import edu.illinois.cs.cogcomp.saul.datamodel.DataModel @@ -19,6 +20,8 @@ import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors._ import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLClassifiers.argumentTypeLearner import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLSensors._ import org.scalatest.{ FlatSpec, Matchers } + +import scala.collection.JavaConverters._ import scala.collection.JavaConversions._ class ConstraintsTest extends FlatSpec with Matchers { diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/DataModelTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/DataModelTest.scala index b2e69b30..c4591164 100644 --- a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/DataModelTest.scala +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/DataModelTest.scala @@ -7,24 +7,24 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling import edu.illinois.cs.cogcomp.core.datastructures.ViewNames +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation import edu.illinois.cs.cogcomp.core.utilities.DummyTextAnnotationGenerator import org.scalatest.{ FlatSpec, Matchers } class DataModelTest extends FlatSpec with Matchers { - val rm = new SRLConfigurator().getDefaultConfig - val parseViewName = rm.getString(SRLConfigurator.SRL_PARSE_VIEW) - val SRLDataModel = new SRLMultiGraphDataModel(parseViewName) - import SRLDataModel._ + import SRLClassifiers.SRLDataModel._ + val viewsToAdd = Array( ViewNames.LEMMA, ViewNames.POS, ViewNames.SHALLOW_PARSE, ViewNames.PARSE_GOLD, ViewNames.SRL_VERB ) - val ta = { - val taTmp = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(viewsToAdd, false, 1) - // included here, in order to make sure population is done before making any queries - sentences.populate(List(taTmp)) - taTmp - } + + clearInstances() + val taTmp: TextAnnotation = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(viewsToAdd, false, 1) + sentences.populate(List(taTmp), train = false) + + val relationsList = SRLSensors.textAnnotationToRelation(taTmp) + relations.populate(relationsList, train = false) "graph population" should "be correct" in { sentences().size should be(1) @@ -105,5 +105,4 @@ class DataModelTest extends FlatSpec with Matchers { (relations() prop containsNEG).toSet should be(Set("", "")) (relations() prop containsMOD).toSet should be(Set("", "")) } - } diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ModelsTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ModelsTest.scala index 452878d5..18314a5a 100644 --- a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ModelsTest.scala +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/SemanticRoleLabeling/ModelsTest.scala @@ -6,14 +6,23 @@ */ package edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling +import edu.illinois.cs.cogcomp.core.datastructures.ViewNames import edu.illinois.cs.cogcomp.saul.classifier.ClassifierUtils import edu.illinois.cs.cogcomp.saulexamples.nlp.SemanticRoleLabeling.SRLClassifiers._ import org.scalatest.{ FlatSpec, Matchers } class ModelsTest extends FlatSpec with Matchers { + val viewsToAdd = Array( + ViewNames.LEMMA, ViewNames.POS, ViewNames.SHALLOW_PARSE, + ViewNames.PARSE_GOLD, ViewNames.SRL_VERB + ) + + SRLDataModel.clearInstances() + PopulateSRLDataModel(testOnly = true, useGoldPredicate = true, useGoldArgBoundaries = true, usePipelineCaching = false) + "argument type classifier (aTr)" should "work." in { - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_aTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_aTr/", argumentTypeLearner) val results = argumentTypeLearner.test(exclude = "candidate") results.perLabel .filter(!_.f1.isNaN) @@ -29,7 +38,7 @@ class ModelsTest extends FlatSpec with Matchers { } "predicate identifier (dTr)" should "perform higher than 0.98." in { - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_dTr/", predicateClassifier) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_dTr/", predicateClassifier) val results = predicateClassifier.test() results.perLabel.foreach { result => @@ -37,24 +46,23 @@ class ModelsTest extends FlatSpec with Matchers { } } - "L+I argument type classifier (aTr)" should "work." in { - //TODO solve the test problem with Gurobi licencing vs. OJalgoHook inefficiency - // ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_aTr/", argumentTypeLearner) - // val scores = argTypeConstraintClassifier.test(exclude = "candidate") - // scores.foreach { - // case (label, score) => { - // label match { - // case "A0" => (score._1 >= 0.9) should be(true) - // case "A1" => (score._1 >= 0.9) should be(true) - // case "A2" => (score._1 >= 0.6) should be(true) - // case _ => "" - // } - // } - // } - } + // "L+I argument type classifier (aTr)" should "work." in { + // //TODO solve the test problem with Gurobi licencing vs. OJalgoHook inefficiency + // ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_aTr/", argumentTypeLearner) + // val results = argTypeConstraintClassifier.test(exclude = "candidate") + // results.perLabel.foreach { + // result => + // result.label match { + // case "A0" => (result.f1 >= 0.9) should be(true) + // case "A1" => (result.f1 >= 0.9) should be(true) + // case "A2" => (result.f1 >= 0.6) should be(true) + // case _ => "" + // } + // } + // } "argument identifier (bTr)" should "perform higher than 0.95." in { - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_bTr/", argumentXuIdentifierGivenApredicate) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_bTr/", argumentXuIdentifierGivenApredicate) val results = argumentXuIdentifierGivenApredicate.test() results.perLabel.foreach { result => @@ -63,21 +71,21 @@ class ModelsTest extends FlatSpec with Matchers { } "argument identifier (cTr) trained with XuPalmer" should "perform higher than 0.9." in { - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_cTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_cTr/", argumentTypeLearner) val results = argumentTypeLearner.test() results.perLabel.foreach { result => result.label match { - case "A0" => result.f1 should be(0.95 +- 0.03) - case "A1" => result.f1 should be(0.95 +- 0.03) - case "A2" => result.f1 should be(0.85 +- 0.03) + case "A0" => result.f1 should be(0.95 +- 0.05) + case "A1" => result.f1 should be(0.95 +- 0.05) + case "A2" => result.f1 should be(0.85 +- 0.05) case _ => "" } } } "argument identifier (fTr) trained with XuPalmer and candidate predicates" should "work." in { - ClassifierUtils.LoadClassifier(SRLConfigurator.SRL_JAR_MODEL_PATH.value + "/models_fTr/", argumentTypeLearner) + ClassifierUtils.LoadClassifier(SRLscalaConfigurator.SRL_JAR_MODEL_PATH + "/models_fTr/", argumentTypeLearner) val results = argumentTypeLearner.test(exclude = "candidate") results.perLabel.foreach { result => diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/package.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/package.scala index 0c5207f0..611957b7 100644 --- a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/package.scala +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/package.scala @@ -4,7 +4,7 @@ * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ -package edu.illinois.cs.cogcomp.saulexamples; +package edu.illinois.cs.cogcomp.saulexamples import org.scalatest.Tag