diff --git a/build.sbt b/build.sbt index 6ee9e804..e6fb2942 100644 --- a/build.sbt +++ b/build.sbt @@ -13,7 +13,7 @@ lazy val commonSettings = Seq( Resolver.mavenLocal, "CogcompSoftware" at "http://cogcomp.cs.illinois.edu/m2repo/" ), - javaOptions ++= List("-Xmx6g"), + javaOptions ++= List("-Xmx6g", "-XX:+UseG1GC"), libraryDependencies ++= Seq( "edu.illinois.cs.cogcomp" % "LBJava" % "1.2.16" withSources, "edu.illinois.cs.cogcomp" % "illinois-core-utilities" % cogcompNLPVersion withSources, diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala index 39d46a13..d6fac9da 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala @@ -233,7 +233,7 @@ object ConstrainedClassifier { def constraint[HEAD <: AnyRef](f: HEAD => FirstOrderConstraint)(implicit headTag: ClassTag[HEAD]): LfsConstraint[HEAD] = { val hash = f.hashCode() ConstraintManager.getOrElseUpdate(hash, new LfsConstraint[HEAD] { - override def makeConstrainDef(x: HEAD): FirstOrderConstraint = f(x) + override def makeConstraintDef(x: HEAD): FirstOrderConstraint = f(x) }).asInstanceOf[LfsConstraint[HEAD]] } } \ No newline at end of file diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala index 68b7b741..4a8685d6 100755 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala @@ -96,20 +96,21 @@ class FirstOrderConstraints(val r: FirstOrderConstraint) { } -class LHSFirstOrderEqualityWithValueLBP(cls: Learner, t: AnyRef) { +class LHSFirstOrderEqualityWithValueLBP(learner: Learner, t: AnyRef) { // probably we need to write here // LHSFirstOrderEqualityWithValueLBP(cls : Learner, t : AnyRef) extends ConstraintTrait - val lbjRepr = new FirstOrderVariable(cls, t) + // This is the implicit variable in the ILP + val lbjVariable = new FirstOrderVariable(learner, t) def is(v: String): FirstOrderConstraint = { - new FirstOrderEqualityWithValue(true, lbjRepr, v) + new FirstOrderEqualityWithValue(true, lbjVariable, v) } //TODO: not sure if this works correctly. Make sure it works. def is(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = { - new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr) + new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable) } def isTrue: FirstOrderConstraint = is("true") @@ -117,17 +118,17 @@ class LHSFirstOrderEqualityWithValueLBP(cls: Learner, t: AnyRef) { def isNotTrue: FirstOrderConstraint = is("false") def isNot(v: String): FirstOrderConstraint = { - new FirstOrderNegation(new FirstOrderEqualityWithValue(true, lbjRepr, v)) + new FirstOrderNegation(new FirstOrderEqualityWithValue(true, lbjVariable, v)) } def isNot(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = { - new FirstOrderNegation(new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr)) + new FirstOrderNegation(new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable)) } def in(v: Array[String]): FirstOrderConstraint = { val falseConstant = new FirstOrderDisjunction(new FirstOrderConstant(false), new FirstOrderConstant(false)) v.foldRight(falseConstant) { (value, newConstraint) => - new FirstOrderDisjunction(new FirstOrderEqualityWithValue(true, lbjRepr, value), newConstraint) + new FirstOrderDisjunction(new FirstOrderEqualityWithValue(true, lbjVariable, value), newConstraint) } } } \ No newline at end of file diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala index 8a6c4c0d..7832b078 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala @@ -8,19 +8,19 @@ import scala.reflect.ClassTag abstract class LfsConstraint[T <: AnyRef](implicit val tag: ClassTag[T]) { - def makeConstrainDef(x: T): FirstOrderConstraint + def makeConstraintDef(x: T): FirstOrderConstraint def evalDiscreteValue(t: T): String = { - this.makeConstrainDef(t).evaluate().toString + this.makeConstraintDef(t).evaluate().toString } - def apply(t: T) = makeConstrainDef(t) + def apply(t: T) = makeConstraintDef(t) def transfer: ParameterizedConstraint = { new ParameterizedConstraint() { override def makeConstraint(__example: AnyRef): FirstOrderConstraint = { val t: T = __example.asInstanceOf[T] - makeConstrainDef(t) + makeConstraintDef(t) } override def discreteValue(__example: AnyRef): String = diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala index 9f98d97d..a923f178 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala @@ -76,7 +76,7 @@ trait DataModel { r => r.to.tag.toString.equals(tag.toString) && r.from.tag.toString.equals(headTag.toString) } if (r.isEmpty) { - throw new Exception(s"Failed to found relations between $tag to $headTag") + throw new Exception(s"Failed to find relations between $tag to $headTag") } else r flatMap (_.asInstanceOf[Edge[NEED, FROM]].backward.neighborsOf(t)) distinct } else r flatMap (_.asInstanceOf[Edge[FROM, NEED]].forward.neighborsOf(t)) distinct } diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala index 89148ea9..6b6a3cde 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala @@ -44,7 +44,7 @@ case class DiscreteProperty[T <: AnyRef]( private def _discreteValue(__example: AnyRef): String = { val t: T = __example.asInstanceOf[T] - self.sensor(t).mkString("") + self.sensor(t) } } case _ => new ClassifierContainsInLBP { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index 0a7f21fa..726830be 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -1,9 +1,17 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import java.io.PrintStream + +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.lbj.pos.POSBaselineLearner import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner } import edu.illinois.cs.cogcomp.saul.classifier.Learnable +import edu.illinois.cs.cogcomp.lbjava.classify.{ FeatureVector, ScoreSet } +import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstant, FirstOrderConstraint, OJalgoHook } +import edu.illinois.cs.cogcomp.lbjava.learn.{ Learner, SparseAveragedPerceptron, SparseNetworkLearner } +import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable } +import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._ +import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSDataModel._ object POSClassifiers { @@ -19,6 +27,31 @@ object POSClassifiers { POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue } + object POSMixedClassifier extends Learner { + override def write(out: PrintStream): Unit = ??? + + override def scores(exampleFeatures: Array[Int], exampleValues: Array[Double]): ScoreSet = ??? + + override def classify(exampleFeatures: Array[Int], exampleValues: Array[Double]): FeatureVector = ??? + + override def learn(exampleFeatures: Array[Int], exampleValues: Array[Double], exampleLabels: Array[Int], labelValues: Array[Double]): Unit = ??? + + override def discreteValue(obj: Object): String = { + val x = obj.asInstanceOf[Constituent] + if (BaselineClassifier.classifier.observed(wordForm(x))) + POSTaggerKnown.classifier.valueOf(x, BaselineClassifier.classifier.allowableTags(wordForm(x))).getStringValue + else + POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue + } + } + + def POSClassifierScoreSet(x: Constituent): ScoreSet = { + if (BaselineClassifier.classifier.observed(wordForm(x))) + POSTaggerKnown.classifier.scores(x, BaselineClassifier.classifier.allowableTags(wordForm(x))) + else + POSTaggerUnknown.classifier.scores(x, MikheevClassifier.classifier.allowableTags(x)) + } + object POSTaggerKnown extends Learnable[Constituent](POSDataModel) { def label = POSLabel override def feature = using(wordForm, baselineTarget, labelTwoBefore, labelOneBefore, @@ -29,7 +62,6 @@ object POSClassifiers { p.thickness = 2 baseLTU = new SparseAveragedPerceptron(p) } - override val loggging = true } object POSTaggerUnknown extends Learnable[Constituent](POSDataModel) { @@ -42,20 +74,78 @@ object POSClassifiers { p.thickness = 4 baseLTU = new SparseAveragedPerceptron(p) } - override val loggging = true } object BaselineClassifier extends Learnable[Constituent](POSDataModel) { def label = POSLabel override def feature = using(wordForm) override lazy val classifier = new POSBaselineLearner() - override val loggging = true } object MikheevClassifier extends Learnable[Constituent](POSDataModel) { def label = POSLabel override def feature = using(wordForm) override lazy val classifier = new MikheevLearner - override val loggging = true + } + + // Pairwise classifier + object BaselineClassifierPair extends Learnable[(Constituent, Constituent)](POSDataModel) { + def label = POSLabelPair + override def feature = using(wordFormPair) + override lazy val classifier = new POSBaselineLearner() + } + + object POSTaggerPairwise extends Learnable[(Constituent, Constituent)](POSDataModel) { + def label = POSLabelPair + override def feature = using(POSBaselineScoresPair) + override lazy val classifier = new SparseNetworkLearner + } + + // def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => + // val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) + // constituents.sliding(3).toList._forall { cons: List[Constituent] => + // consecutiveLabelPairsAreConsistent(cons.head, cons(1), cons(2)) + // } + // } + + val posLabels = List("#", "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR", + "JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", + "UH", "UNKNOWN", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``") + + def consecutiveLabelPairsAreConsistent(c1: Constituent, c2: Constituent, c3: Constituent): FirstOrderConstraint = { + posLabels._exists { label: String => + posRightLabelIs(label, c1, c2) ==> posLeftLabelIs(label, c2, c3) + } + } + + def posRightLabelIs(label: String, c1: Constituent, c2: Constituent) = { + posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(l + POSTaggerSensors.labelSeparator + label) } + } + + def posLeftLabelIs(label: String, c1: Constituent, c2: Constituent) = { + posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(label + POSTaggerSensors.labelSeparator + l) } + } + + object POSConstrainedClassifier extends ConstrainedClassifier[(Constituent, Constituent), Sentence](POSDataModel, POSTaggerPairwise) { + override def subjectTo = sentenceLabelsMatch + override val solver = new OJalgoHook + override val pathToHead = Some(POSDataModel.tokenPairToSentence) + } + + def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => + // val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation, s.getSentenceId) + // val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) + // constituents.sliding(2).toList._forall { + // case c1 :: c2 :: _ => + // posLabelPairs._exists { case (l1, l2) => posClassifierLabelCompatible(c1, c2, l1, l2) } + // } + //constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } + new FirstOrderConstant(true) + } + + def posClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = { + (POSMixedClassifier on c1).is(l1) and + (POSMixedClassifier on c2).is(l2) and + (POSTaggerPairwise on (c1, c2)).is(l1 + POSTaggerSensors.labelSeparator + l2) } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala index b3a42ccc..774370be 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala @@ -1,15 +1,28 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger import edu.illinois.cs.cogcomp.core.datastructures.ViewNames -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.lbj.pos.POSLabeledUnknownWordParser import edu.illinois.cs.cogcomp.saul.datamodel.DataModel -import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSClassifiers.{ POSTaggerUnknown, POSTaggerKnown, BaselineClassifier } +import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSClassifiers.{ MikheevClassifier, POSTaggerUnknown, POSTaggerKnown, BaselineClassifier } object POSDataModel extends DataModel { + val sentence = node[Sentence] + val tokens = node[Constituent] + val tokenPair = node[(Constituent, Constituent)] + + val tokenToTokenPair = edge(tokens, tokenPair) + tokenToTokenPair.addSensor({ x: (Constituent) => (x, POSTaggerSensors.getConstituentAfter(x)) }) + + val tokenToSentence = edge(tokens, sentence) + tokenToSentence.addSensor({ x: (Constituent) => x.getTextAnnotation.getSentence(x.getSentenceId) }) + + val tokenPairToSentence = edge(tokenPair, sentence) + tokenPairToSentence.addSensor({ x: (Constituent, Constituent) => x._1.getTextAnnotation.getSentence(x._1.getSentenceId) }) + import POSTaggerSensors._ val constituentAfter = edge(tokens, tokens) @@ -187,4 +200,35 @@ object POSDataModel extends DataModel { r + "-" + s + "-" + t } -} + + // Pairwise classifier properties + val POSLabelPair = property(tokenPair) { x: (Constituent, Constituent) => + POSLabel(x._1) + labelSeparator + POSLabel(x._2) + } + + val wordFormPair = property(tokenPair) { x: (Constituent, Constituent) => + wordForm(x._1) + wordForm(x._2) + } + + val POSBaselineScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + List(BaselineClassifier(x._1), BaselineClassifier(x._2), BaselineClassifier(x._1) + BaselineClassifier(x._2)) + } + + val POSKnownScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + val scoreSet1 = scoreSetToList(POSTaggerKnown.classifier.scores(x._1)) + val scoreSet2 = scoreSetToList(POSTaggerKnown.classifier.scores(x._2)) + scoreSet1 ++ scoreSet2 + } + + val POSUnknownScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + val scoreSet1 = scoreSetToList(POSTaggerUnknown.classifier.scores(x._1)) + val scoreSet2 = scoreSetToList(POSTaggerUnknown.classifier.scores(x._2)) + scoreSet1 ++ scoreSet2 + } + + val POSCombinedScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + val scoreSet1 = scoreSetToList(POSClassifiers.POSClassifierScoreSet(x._1)) + val scoreSet2 = scoreSetToList(POSClassifiers.POSClassifierScoreSet(x._2)) + scoreSet1 ++ scoreSet2 + } +} \ No newline at end of file diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala index 4c56a0c5..b8ac1d5a 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala @@ -1,6 +1,6 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.core.utilities.configuration.{ Property, ResourceManager, Configurator } import edu.illinois.cs.cogcomp.lbj.pos.POSLabeledUnknownWordParser import edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete @@ -29,16 +29,19 @@ object POSConfigurator extends Configurator { object POSTaggerApp { object POSExperimentType extends Enumeration { - val TrainAndTest, TestFromModel = Value + val TrainAndTest, TestFromModel, TrainAndTestPairwise, TestFromModelPairwise, TestConstrainedClassifier = Value } def main(args: Array[String]): Unit = { /** Choose the experiment you're interested in by changing the following line */ - val testType = POSExperimentType.TestFromModel + val testType = POSExperimentType.TestConstrainedClassifier testType match { case POSExperimentType.TrainAndTest => trainAndTest() case POSExperimentType.TestFromModel => testWithPretrainedModels() + case POSExperimentType.TrainAndTestPairwise => trainAndTestPairwise() + case POSExperimentType.TestFromModelPairwise => testPairwise() + case POSExperimentType.TestConstrainedClassifier => LplusI() } } @@ -68,6 +71,43 @@ object POSTaggerApp { (trainData, testData) } + def trainAndTestPairwise(): Unit = { + POSDataModel.tokens.populate(trainData) + POSDataModel.tokens.populate(testData, train = false) + + //POSClassifiers.loadModelsFromPackage() + //POSTaggerPairwise.learn(10) + //POSTaggerPairwise.save() + BaselineClassifierPair.learn(1) + testPOSTaggerPairwise() + BaselineClassifierPair.save() + } + + def testPairwise(): Unit = { + POSDataModel.tokens.populate(testData, train = false) + BaselineClassifierPair.load() + testPOSTaggerPairwise() + } + + def LplusI(): Unit = { + POSDataModel.tokens.populate(testData) + BaselineClassifierPair.load() + ClassifierUtils.LoadClassifier( + POSConfigurator.jarModelPath, + BaselineClassifier, MikheevClassifier, POSTaggerKnown, POSTaggerUnknown + ) + println(POSConstrainedClassifier.classifier.discreteValue((testData.head, POSTaggerSensors.getConstituentAfter(testData.head)))) + + // println(POSDataModel.getFromRelation[(Constituent, Constituent), Sentence]((testData.head, POSTaggerSensors.getConstituentAfter(testData.head)))) + + // for { x <- testData; y <- testData } { + // + // } + // POSConstrainedClassifier.classifier.discreteValue(testDataPair.head) + //POSConstrainedClassifier.test(testDataPair.slice(0,1)) + // println(POSConstrainedClassifier.classifier.discreteValue(testData.head)) + } + def trainAndTest(): Unit = { POSDataModel.tokens populate trainData POSDataModel.tokens.populate(testData, train = false) @@ -105,11 +145,29 @@ object POSTaggerApp { testReader.data.foreach(cons => { val gold = POSDataModel.POSLabel(cons) - val predicted = POSClassifiers.POSClassifier(cons) + val predicted = POSClassifiers.POSMixedClassifier.discreteValue(cons) tester.reportPrediction(predicted, gold) }) tester.printPerformance(System.out) } + + /* this test used the first prediction of of a classifier with two-output prediction. */ + def testPOSTaggerPairwise(): Unit = { + val tester = new TestDiscrete + val testReader = new LBJIteratorParserScala[Constituent](testData) + testReader.reset() + + testReader.data.foreach(cons => { + val posPair = (cons, POSTaggerSensors.getConstituentAfter(cons)) + val gold = POSDataModel.POSLabelPair(posPair) + val predicted = POSClassifiers.BaselineClassifierPair(cons, POSTaggerSensors.getConstituentAfter(cons)) + val goldSplit = POSTaggerSensors.splitPaired(gold) + val predictionSplit = POSTaggerSensors.splitPaired(predicted) + tester.reportPrediction(predictionSplit._1, goldSplit._1) + }) + + tester.printPerformance(System.out) + } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala index b008a250..46ea6c50 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala @@ -2,6 +2,7 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger import edu.illinois.cs.cogcomp.core.datastructures.ViewNames import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet import scala.collection.JavaConversions._ @@ -29,4 +30,20 @@ object POSTaggerSensors { if (consBefore.size >= 2) consBefore.sortBy(-_.getEndSpan).get(1) else x } + + // pairwise + val labelSeparator = "||" + def splitPaired(pairLabel: String): (String, String) = { + if (pairLabel.contains(labelSeparator)) { + val splitted = pairLabel.split("[||]") + (splitted(0), splitted(2)) + } else { + ("UNKNOWN", "UNKNOWN") + } + } + + def scoreSetToList(scoreSet: ScoreSet): List[Double] = { + println() + scoreSet.toArray.map(_.score).toList + } } \ No newline at end of file diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala index 45d9cdb6..1b49301b 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala @@ -40,6 +40,18 @@ object CommonSensors { x.getView(ViewNames.POS).getConstituents.toList } + def getPOSConstituents(x: TextAnnotation): List[Constituent] = { + getConstituents(x, ViewNames.POS) + } + + def getPOSConstituents(x: TextAnnotation, sentenceId: Int): List[Constituent] = { + getConstituents(x, ViewNames.POS).filter(_.getSentenceId == sentenceId) + } + + def getConstituents(x: TextAnnotation, view: String): List[Constituent] = { + x.getView(view).getConstituents.toList + } + def textAnnotationToTokens(ta: TextAnnotation): List[Constituent] = { ta.getView(ViewNames.TOKENS).getConstituents.toList } diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala index ed564940..5954fe1b 100644 --- a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala @@ -129,7 +129,7 @@ class POSTaggerTest extends FlatSpec with Matchers { val combinedClassifierLabelMap = Map("To" -> "TO", "or" -> "CC", "not" -> "RB", ";" -> ":", "is" -> "VBZ", "the" -> "DT", "question" -> "NN", "." -> ".") toyConstituents.forall { cons => - val predicted = POSClassifiers.POSClassifier(cons) + val predicted = POSClassifiers.POSMixedClassifier.discreteValue(cons) predicted == combinedClassifierLabelMap.getOrElse(cons.getSurfaceForm, predicted) } should be(true) }