From 42831b350d0aabec2e068d67503e7b8d7989e4aa Mon Sep 17 00:00:00 2001 From: khashab2 Date: Wed, 23 Mar 2016 13:47:29 -0500 Subject: [PATCH 1/8] pairwise pos. --- .../cogcomp/saul/constraint/Constraint.scala | 9 +-- .../nlp/POSTagger/POSClassifiers.scala | 62 +++++++++++++++++-- .../nlp/POSTagger/POSDataModel.scala | 47 +++++++++++++- .../nlp/POSTagger/POSTaggerApps.scala | 33 +++++++++- .../nlp/POSTagger/POSTaggerSensors.scala | 17 +++++ .../saulexamples/nlp/commonSensors.scala | 8 +++ 6 files changed, 161 insertions(+), 15 deletions(-) diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala index d8d84755..1b6ef53c 100755 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala @@ -102,20 +102,21 @@ class FirstOrderConstraints(val r: FirstOrderConstraint) { } -class LHSFirstOrderEqualityWithValueLBP(cls: Learner, t: AnyRef) { +class LHSFirstOrderEqualityWithValueLBP(learner: Learner, t: AnyRef) { // probably we need to write here // LHSFirstOrderEqualityWithValueLBP(cls : Learner, t : AnyRef) extends ConstraintTrait - val lbjRepr = new FirstOrderVariable(cls, t) + // This is the implicit variable in the ILP + val lbjVariable = new FirstOrderVariable(learner, t) def is(v: String): FirstOrderConstraint = { - new FirstOrderEqualityWithValue(true, lbjRepr, v) + new FirstOrderEqualityWithValue(true, lbjVariable, v) } //TODO: not sure if this works correctly. Make sure it works. def is(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = { - new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr) + new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable) } def isTrue: FirstOrderConstraint = is("true") diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index 90801cd7..a391de12 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -1,11 +1,15 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.lbj.pos.POSBaselineLearner +import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet +import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstraint, OJalgoHook } import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner } +import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable } import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._ -import edu.illinois.cs.cogcomp.saul.classifier.Learnable +import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSDataModel._ +import edu.illinois.cs.cogcomp.saulexamples.setcover.{ SetCoverSolverDataModel, Neighborhood, City } object POSClassifiers { /** After POSTaggerKnown and POSTaggerUnknown are trained, @@ -20,6 +24,13 @@ object POSClassifiers { POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue } + def POSClassifierScoreSet(x: Constituent): ScoreSet = { + if (BaselineClassifier.classifier.observed(wordForm(x))) + POSTaggerKnown.classifier.scores(x, BaselineClassifier.classifier.allowableTags(wordForm(x))) + else + POSTaggerUnknown.classifier.scores(x, MikheevClassifier.classifier.allowableTags(x)) + } + // Loads learned models from the "saul-pos-tagger-models" jar package def loadModelsFromPackage(): Unit = { val jarModelPath = "edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/models/" @@ -59,7 +70,6 @@ object POSClassifiers { p.thickness = 2 baseLTU = new SparseAveragedPerceptron(p) } - override val loggging = true } object POSTaggerUnknown extends Learnable[Constituent](POSDataModel) { @@ -72,20 +82,60 @@ object POSClassifiers { p.thickness = 4 baseLTU = new SparseAveragedPerceptron(p) } - override val loggging = true } object BaselineClassifier extends Learnable[Constituent](POSDataModel) { def label = POSLabel override def feature = using(wordForm) override lazy val classifier = new POSBaselineLearner() - override val loggging = true } object MikheevClassifier extends Learnable[Constituent](POSDataModel) { def label = POSLabel override def feature = using(wordForm) override lazy val classifier = new MikheevLearner - override val loggging = true + } + + // Pairwise classifier + object POSTaggerPairwise extends Learnable[(Constituent, Constituent)](POSDataModel) { + def label = POSLabelPair + override def feature = using(POSBaselineScoresPair) + override lazy val classifier = new SparseNetworkLearner + } + + object BaselineClassifierPair extends Learnable[(Constituent, Constituent)](POSDataModel) { + def label = POSLabelPair + override def feature = using(wordFormPair) + override lazy val classifier = new POSBaselineLearner() + } + + def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => + val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) + constituents.sliding(3).toList._forall { cons: List[Constituent] => + consecutiveLabelPairsAreConsistent(cons.head, cons(1), cons(2)) + } + } + + val posLabels = List("#", "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR", + "JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", + "UH", "UNKNOWN", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``") + + def consecutiveLabelPairsAreConsistent(c1: Constituent, c2: Constituent, c3: Constituent): FirstOrderConstraint = { + posLabels._exists { label: String => + posRightLabelIs(label, c1, c2) ==> posLeftLabelIs(label, c2, c3) + } + } + + def posRightLabelIs(label: String, c1: Constituent, c2: Constituent) = { + posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(l + POSTaggerSensors.labelSpliter + label) } + } + + def posLeftLabelIs(label: String, c1: Constituent, c2: Constituent) = { + posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(label + POSTaggerSensors.labelSpliter + l) } + } + + object POSConstrainedClassifier extends ConstrainedClassifier[(Constituent, Constituent), Sentence](POSDataModel, POSTaggerPairwise) { + override def subjectTo = sentenceLabelsMatch + override val solver = new OJalgoHook } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala index b3a42ccc..0d3fa7cd 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala @@ -1,15 +1,25 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger import edu.illinois.cs.cogcomp.core.datastructures.ViewNames -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.lbj.pos.POSLabeledUnknownWordParser import edu.illinois.cs.cogcomp.saul.datamodel.DataModel -import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSClassifiers.{ POSTaggerUnknown, POSTaggerKnown, BaselineClassifier } +import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSClassifiers.{ MikheevClassifier, POSTaggerUnknown, POSTaggerKnown, BaselineClassifier } object POSDataModel extends DataModel { + val sentence = node[Sentence] + val tokens = node[Constituent] + val tokenPair = node[(Constituent, Constituent)] + + val tokenToTokenPair = edge(tokens, tokenPair) + tokenToTokenPair.addSensor({ x: (Constituent) => (x, POSTaggerSensors.getConstituentAfter(x)) }) + + val tokenToSentence = edge(tokens, sentence) + tokenToSentence.addSensor({ x: (Constituent) => x.getTextAnnotation.getSentence(x.getSentenceId) }) + import POSTaggerSensors._ val constituentAfter = edge(tokens, tokens) @@ -187,4 +197,35 @@ object POSDataModel extends DataModel { r + "-" + s + "-" + t } -} + + // Pairwise classifier properties + val POSLabelPair = property(tokenPair) { x: (Constituent, Constituent) => + POSLabel(x._1) + labelSpliter + POSLabel(x._2) + } + + val wordFormPair = property(tokenPair) { x: (Constituent, Constituent) => + wordForm(x._1) + wordForm(x._2) + } + + val POSBaselineScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + List(BaselineClassifier(x._1), BaselineClassifier(x._2), BaselineClassifier(x._1) + BaselineClassifier(x._2)) + } + + val POSKnownScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + val scoreSet1 = scoreSetToList(POSTaggerKnown.classifier.scores(x._1)) + val scoreSet2 = scoreSetToList(POSTaggerKnown.classifier.scores(x._2)) + scoreSet1 ++ scoreSet2 + } + + val POSUnknownScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + val scoreSet1 = scoreSetToList(POSTaggerUnknown.classifier.scores(x._1)) + val scoreSet2 = scoreSetToList(POSTaggerUnknown.classifier.scores(x._2)) + scoreSet1 ++ scoreSet2 + } + + val POSCombinedScoresPair = property(tokenPair) { x: (Constituent, Constituent) => + val scoreSet1 = scoreSetToList(POSClassifiers.POSClassifierScoreSet(x._1)) + val scoreSet2 = scoreSetToList(POSClassifiers.POSClassifierScoreSet(x._2)) + scoreSet1 ++ scoreSet2 + } +} \ No newline at end of file diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala index 47cfe771..6915be72 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala @@ -27,7 +27,7 @@ object POSConfigurator extends Configurator { object POSTaggerApp { object POSExperimentType extends Enumeration { - val TrainAndTest, TestFromModel = Value + val TrainAndTest, TestFromModel, TrainAndTestPairwise = Value } def main(args: Array[String]): Unit = { @@ -37,6 +37,7 @@ object POSTaggerApp { testType match { case POSExperimentType.TrainAndTest => trainAndTest() case POSExperimentType.TestFromModel => testWithPretrainedModels() + case POSExperimentType.TrainAndTestPairwise => trainAndTestPairwise() } } @@ -66,6 +67,17 @@ object POSTaggerApp { (trainData, testData) } + def trainAndTestPairwise(): Unit = { + POSDataModel.tokens.populate(trainData) + POSDataModel.tokens.populate(testData, train = false) + + POSClassifiers.loadModelsFromPackage() + POSTaggerPairwise.learn(10) + POSTaggerPairwise.save() + + testPOSTaggerPairwise() + } + def trainAndTest(): Unit = { POSDataModel.tokens populate trainData POSDataModel.tokens.populate(testData, train = false) @@ -108,5 +120,22 @@ object POSTaggerApp { tester.printPerformance(System.out) } -} + /* this test used the first prediction of of a classifier with two-output prediction. */ + def testPOSTaggerPairwise(): Unit = { + val tester = new TestDiscrete + val testReader = new LBJIteratorParserScala[Constituent](testData) + testReader.reset() + + testReader.data.foreach(cons => { + val posPair = (cons, POSTaggerSensors.getConstituentAfter(cons)) + val gold = POSDataModel.POSLabelPair(posPair) + val predicted = POSClassifiers.POSTaggerPairwise(cons, POSTaggerSensors.getConstituentAfter(cons)) + val goldSplit = POSTaggerSensors.splitPaired(gold) + val predictionSplit = POSTaggerSensors.splitPaired(predicted) + tester.reportPrediction(predictionSplit._1, goldSplit._1) + }) + + tester.printPerformance(System.out) + } +} diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala index b008a250..2f884b15 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala @@ -2,6 +2,7 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger import edu.illinois.cs.cogcomp.core.datastructures.ViewNames import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet import scala.collection.JavaConversions._ @@ -29,4 +30,20 @@ object POSTaggerSensors { if (consBefore.size >= 2) consBefore.sortBy(-_.getEndSpan).get(1) else x } + + // pairwise + val labelSpliter = "||" + def splitPaired(pairLabel: String): (String, String) = { + if (pairLabel.contains(labelSpliter)) { + val splitted = pairLabel.split("[||]") + (splitted(0), splitted(2)) + } else { + ("UNKNOWN", "UNKNOWN") + } + } + + def scoreSetToList(scoreSet: ScoreSet): List[Double] = { + println() + scoreSet.toArray.map(_.score).toList + } } \ No newline at end of file diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala index 45d9cdb6..e4405121 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala @@ -40,6 +40,14 @@ object CommonSensors { x.getView(ViewNames.POS).getConstituents.toList } + def getPOSConstituents(x: TextAnnotation): List[Constituent] = { + getConstituents(x, ViewNames.POS) + } + + def getConstituents(x: TextAnnotation, view: String): List[Constituent] = { + x.getView(view).getConstituents.toList + } + def textAnnotationToTokens(ta: TextAnnotation): List[Constituent] = { ta.getView(ViewNames.TOKENS).getConstituents.toList } From 5283821545e6364878620c829507cec13dc92713 Mon Sep 17 00:00:00 2001 From: khashab2 Date: Sat, 16 Apr 2016 18:12:06 -0500 Subject: [PATCH 2/8] changes to constrained pos classifier. --- build.sbt | 2 +- .../nlp/POSTagger/POSClassifiers.scala | 127 +++++++++++------- .../nlp/POSTagger/POSDataModel.scala | 2 +- .../nlp/POSTagger/POSTaggerApps.scala | 34 +++-- .../nlp/POSTagger/POSTaggerSensors.scala | 4 +- 5 files changed, 107 insertions(+), 62 deletions(-) diff --git a/build.sbt b/build.sbt index 7bcd99a1..73200589 100644 --- a/build.sbt +++ b/build.sbt @@ -13,7 +13,7 @@ lazy val commonSettings = Seq( Resolver.mavenLocal, "CogcompSoftware" at "http://cogcomp.cs.illinois.edu/m2repo/" ), - javaOptions ++= List("-Xmx6g"), + javaOptions ++= List("-Xmx6g", "-XX:+UseG1GC"), libraryDependencies ++= Seq( "edu.illinois.cs.cogcomp" % "LBJava" % "1.2.8", "edu.illinois.cs.cogcomp" % "illinois-core-utilities" % cogcompNLPVersion withSources, diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index a391de12..6c7986b6 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -1,10 +1,12 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger +import java.io.PrintStream + import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.lbj.pos.POSBaselineLearner -import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet +import edu.illinois.cs.cogcomp.lbjava.classify.{ FeatureVector, ScoreSet } import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstraint, OJalgoHook } -import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner } +import edu.illinois.cs.cogcomp.lbjava.learn.{ Learner, SparseAveragedPerceptron, SparseNetworkLearner } import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable } import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._ import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors @@ -17,47 +19,30 @@ object POSClassifiers { * the input word was observed during training or of POSTaggerUnknown * if it wasn't. */ - def POSClassifier(x: Constituent): String = { - if (BaselineClassifier.classifier.observed(wordForm(x))) - POSTaggerKnown.classifier.valueOf(x, BaselineClassifier.classifier.allowableTags(wordForm(x))).getStringValue - else - POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue - } - def POSClassifierScoreSet(x: Constituent): ScoreSet = { - if (BaselineClassifier.classifier.observed(wordForm(x))) - POSTaggerKnown.classifier.scores(x, BaselineClassifier.classifier.allowableTags(wordForm(x))) - else - POSTaggerUnknown.classifier.scores(x, MikheevClassifier.classifier.allowableTags(x)) - } + object POSMixedClassifier extends Learner { + override def write(out: PrintStream): Unit = ??? - // Loads learned models from the "saul-pos-tagger-models" jar package - def loadModelsFromPackage(): Unit = { - val jarModelPath = "edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/models/" + override def scores(exampleFeatures: Array[Int], exampleValues: Array[Double]): ScoreSet = ??? - def loadModel(x: Learnable[Constituent]): Unit = { - val prefix = jarModelPath + x.getClassNameForClassifier - x.load(prefix + ".lc", prefix + ".lex") - } + override def classify(exampleFeatures: Array[Int], exampleValues: Array[Double]): FeatureVector = ??? - loadModel(BaselineClassifier) - loadModel(MikheevClassifier) - loadModel(POSTaggerKnown) - loadModel(POSTaggerUnknown) - } + override def learn(exampleFeatures: Array[Int], exampleValues: Array[Double], exampleLabels: Array[Int], labelValues: Array[Double]): Unit = ??? - def loadSavedModels(): Unit = { - BaselineClassifier.load() - MikheevClassifier.load() - POSTaggerKnown.load() - POSTaggerUnknown.load() + override def discreteValue(obj: Object): String = { + val x = obj.asInstanceOf[Constituent] + if (BaselineClassifier.classifier.observed(wordForm(x))) + POSTaggerKnown.classifier.valueOf(x, BaselineClassifier.classifier.allowableTags(wordForm(x))).getStringValue + else + POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue + } } - def saveModels(): Unit = { - BaselineClassifier.save() - MikheevClassifier.save() - POSTaggerKnown.save() - POSTaggerUnknown.save() + def POSClassifierScoreSet(x: Constituent): ScoreSet = { + if (BaselineClassifier.classifier.observed(wordForm(x))) + POSTaggerKnown.classifier.scores(x, BaselineClassifier.classifier.allowableTags(wordForm(x))) + else + POSTaggerUnknown.classifier.scores(x, MikheevClassifier.classifier.allowableTags(x)) } object POSTaggerKnown extends Learnable[Constituent](POSDataModel) { @@ -97,25 +82,25 @@ object POSClassifiers { } // Pairwise classifier - object POSTaggerPairwise extends Learnable[(Constituent, Constituent)](POSDataModel) { - def label = POSLabelPair - override def feature = using(POSBaselineScoresPair) - override lazy val classifier = new SparseNetworkLearner - } - object BaselineClassifierPair extends Learnable[(Constituent, Constituent)](POSDataModel) { def label = POSLabelPair override def feature = using(wordFormPair) override lazy val classifier = new POSBaselineLearner() } - def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => - val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) - constituents.sliding(3).toList._forall { cons: List[Constituent] => - consecutiveLabelPairsAreConsistent(cons.head, cons(1), cons(2)) - } + object POSTaggerPairwise extends Learnable[(Constituent, Constituent)](POSDataModel) { + def label = POSLabelPair + override def feature = using(POSBaselineScoresPair) + override lazy val classifier = new SparseNetworkLearner } + // def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => + // val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) + // constituents.sliding(3).toList._forall { cons: List[Constituent] => + // consecutiveLabelPairsAreConsistent(cons.head, cons(1), cons(2)) + // } + // } + val posLabels = List("#", "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "UNKNOWN", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``") @@ -127,15 +112,59 @@ object POSClassifiers { } def posRightLabelIs(label: String, c1: Constituent, c2: Constituent) = { - posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(l + POSTaggerSensors.labelSpliter + label) } + posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(l + POSTaggerSensors.labelSeparator + label) } } def posLeftLabelIs(label: String, c1: Constituent, c2: Constituent) = { - posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(label + POSTaggerSensors.labelSpliter + l) } + posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(label + POSTaggerSensors.labelSeparator + l) } } object POSConstrainedClassifier extends ConstrainedClassifier[(Constituent, Constituent), Sentence](POSDataModel, POSTaggerPairwise) { override def subjectTo = sentenceLabelsMatch override val solver = new OJalgoHook } + + def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => + val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) + val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) + constituents.sliding(2).toList._forall { + case c1 :: c2 :: _ => + posLabelPairs._exists { case (l1, l2) => posUnaryClassifierLabelCompatible(c1, c2, l1, l2) } + } + } + + def posUnaryClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = { + (POSMixedClassifier on c1).is(l1) and + (POSMixedClassifier on c2).is(l2) and + (POSTaggerPairwise on (c1, c2)).is(l1 + POSTaggerSensors.labelSeparator + l2) + } + + // Loads learned models from the "saul-pos-tagger-models" jar package + def loadModelsFromPackage(): Unit = { + val jarModelPath = "edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/models/" + + def loadModel(x: Learnable[Constituent]): Unit = { + val prefix = jarModelPath + x.getClassNameForClassifier + x.load(prefix + ".lc", prefix + ".lex") + } + + loadModel(BaselineClassifier) + loadModel(MikheevClassifier) + loadModel(POSTaggerKnown) + loadModel(POSTaggerUnknown) + } + + def loadSavedModels(): Unit = { + BaselineClassifier.load() + MikheevClassifier.load() + POSTaggerKnown.load() + POSTaggerUnknown.load() + } + + def saveModels(): Unit = { + BaselineClassifier.save() + MikheevClassifier.save() + POSTaggerKnown.save() + POSTaggerUnknown.save() + } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala index 0d3fa7cd..cf60a67d 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala @@ -200,7 +200,7 @@ object POSDataModel extends DataModel { // Pairwise classifier properties val POSLabelPair = property(tokenPair) { x: (Constituent, Constituent) => - POSLabel(x._1) + labelSpliter + POSLabel(x._2) + POSLabel(x._1) + labelSeparator + POSLabel(x._2) } val wordFormPair = property(tokenPair) { x: (Constituent, Constituent) => diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala index 6915be72..238e6236 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala @@ -27,17 +27,19 @@ object POSConfigurator extends Configurator { object POSTaggerApp { object POSExperimentType extends Enumeration { - val TrainAndTest, TestFromModel, TrainAndTestPairwise = Value + val TrainAndTest, TestFromModel, TrainAndTestPairwise, TestFromModelPairwise, TestConstrainedClassifier = Value } def main(args: Array[String]): Unit = { /** Choose the experiment you're interested in by changing the following line */ - val testType = POSExperimentType.TestFromModel + val testType = POSExperimentType.TestConstrainedClassifier testType match { case POSExperimentType.TrainAndTest => trainAndTest() case POSExperimentType.TestFromModel => testWithPretrainedModels() case POSExperimentType.TrainAndTestPairwise => trainAndTestPairwise() + case POSExperimentType.TestFromModelPairwise => testPairwise() + case POSExperimentType.TestConstrainedClassifier => LplusI() } } @@ -71,13 +73,29 @@ object POSTaggerApp { POSDataModel.tokens.populate(trainData) POSDataModel.tokens.populate(testData, train = false) - POSClassifiers.loadModelsFromPackage() - POSTaggerPairwise.learn(10) - POSTaggerPairwise.save() + //POSClassifiers.loadModelsFromPackage() + //POSTaggerPairwise.learn(10) + //POSTaggerPairwise.save() + BaselineClassifierPair.learn(1) + testPOSTaggerPairwise() + BaselineClassifierPair.save() + } + def testPairwise(): Unit = { + POSDataModel.tokens.populate(testData, train = false) + BaselineClassifierPair.load() testPOSTaggerPairwise() } + def LplusI(): Unit = { + POSDataModel.tokens.populate(testData, train = false) + BaselineClassifierPair.load() + POSClassifiers.loadModelsFromPackage() + val testDataPair = for{x <- testData; y <- testData} yield (x,y) + POSConstrainedClassifier.test(testDataPair.slice(0,3)) +// println(POSConstrainedClassifier.classifier.discreteValue(testData.head)) + } + def trainAndTest(): Unit = { POSDataModel.tokens populate trainData POSDataModel.tokens.populate(testData, train = false) @@ -101,9 +119,7 @@ object POSTaggerApp { /** Loading the serialized models as a dependency */ def testWithPretrainedModels(): Unit = { POSDataModel.tokens.populate(testData, train = false) - POSClassifiers.loadModelsFromPackage() - testPOSTagger() } @@ -114,7 +130,7 @@ object POSTaggerApp { testReader.data.foreach(cons => { val gold = POSDataModel.POSLabel(cons) - val predicted = POSClassifiers.POSClassifier(cons) + val predicted = POSClassifiers.POSMixedClassifier.discreteValue(cons) tester.reportPrediction(predicted, gold) }) @@ -130,7 +146,7 @@ object POSTaggerApp { testReader.data.foreach(cons => { val posPair = (cons, POSTaggerSensors.getConstituentAfter(cons)) val gold = POSDataModel.POSLabelPair(posPair) - val predicted = POSClassifiers.POSTaggerPairwise(cons, POSTaggerSensors.getConstituentAfter(cons)) + val predicted = POSClassifiers.BaselineClassifierPair(cons, POSTaggerSensors.getConstituentAfter(cons)) val goldSplit = POSTaggerSensors.splitPaired(gold) val predictionSplit = POSTaggerSensors.splitPaired(predicted) tester.reportPrediction(predictionSplit._1, goldSplit._1) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala index 2f884b15..46ea6c50 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerSensors.scala @@ -32,9 +32,9 @@ object POSTaggerSensors { } // pairwise - val labelSpliter = "||" + val labelSeparator = "||" def splitPaired(pairLabel: String): (String, String) = { - if (pairLabel.contains(labelSpliter)) { + if (pairLabel.contains(labelSeparator)) { val splitted = pairLabel.split("[||]") (splitted(0), splitted(2)) } else { From a638000ecc62c0d390f12fa3fbfe34207a86a8fa Mon Sep 17 00:00:00 2001 From: khashab2 Date: Sun, 17 Apr 2016 18:41:04 -0500 Subject: [PATCH 3/8] adding pathToHead to constrained pos classifier. --- .../saul/classifier/ConstrainedClassifier.scala | 12 ++++-------- .../cogcomp/saul/constraint/LfsConstraint.scala | 8 ++++---- .../cs/cogcomp/saul/datamodel/DataModel.scala | 2 +- .../features/discrete/DiscreteProperty.scala | 2 +- .../nlp/POSTagger/POSClassifiers.scala | 13 ++++++++----- .../nlp/POSTagger/POSDataModel.scala | 3 +++ .../nlp/POSTagger/POSTaggerApps.scala | 17 ++++++++++++----- .../nlp/POSTagger/POSTaggerTest.scala | 2 +- 8 files changed, 34 insertions(+), 25 deletions(-) diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala index 9b489a4e..0eb125c5 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala @@ -97,13 +97,12 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo val name = String.valueOf(infer.subjectTo.hashCode()) var inference = InferenceManager.get(name, head) if (inference == null) { - inference = infer(head) if (log) - println("Inference is NULL " + name) + println("Inference is not cached previously; running the inference from scratch ") + inference = infer(head) InferenceManager.put(name, inference) } inference.valueOf(cls, t) - case None => val name = String.valueOf(infer.subjectTo.hashCode()) @@ -155,7 +154,6 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo // println(remainingIteration) val v = crTokenTest.next if (v == null) { - if (remainingIteration > 0) { crTokenTest.reset() learnAll(crTokenTest, remainingIteration - 1) @@ -166,15 +164,12 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo learnAll(crTokenTest, remainingIteration) } } - learnAll(crTokenTest, iteration) } def test(): List[(String, (Double, Double, Double))] = { - val allHeads = this.dm.getNodeWithType[HEAD].getTestingInstances // allHeads foreach( t => println(s" [HEAD] Using thie head ${t} ")) - val data: List[T] = if (tType.equals(headType)) { allHeads.map(_.asInstanceOf[T]).toList } else { @@ -211,12 +206,13 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo } } +/** The constraint object defined for each classifier */ object ConstrainedClassifier { val ConstraintManager = scala.collection.mutable.HashMap[Int, LfsConstraint[_]]() def constraint[HEAD <: AnyRef](f: HEAD => FirstOrderConstraint)(implicit headTag: ClassTag[HEAD]): LfsConstraint[HEAD] = { val hash = f.hashCode() ConstraintManager.getOrElseUpdate(hash, new LfsConstraint[HEAD] { - override def makeConstrainDef(x: HEAD): FirstOrderConstraint = f(x) + override def makeConstraintDef(x: HEAD): FirstOrderConstraint = f(x) }).asInstanceOf[LfsConstraint[HEAD]] } } \ No newline at end of file diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala index 8a6c4c0d..7832b078 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/LfsConstraint.scala @@ -8,19 +8,19 @@ import scala.reflect.ClassTag abstract class LfsConstraint[T <: AnyRef](implicit val tag: ClassTag[T]) { - def makeConstrainDef(x: T): FirstOrderConstraint + def makeConstraintDef(x: T): FirstOrderConstraint def evalDiscreteValue(t: T): String = { - this.makeConstrainDef(t).evaluate().toString + this.makeConstraintDef(t).evaluate().toString } - def apply(t: T) = makeConstrainDef(t) + def apply(t: T) = makeConstraintDef(t) def transfer: ParameterizedConstraint = { new ParameterizedConstraint() { override def makeConstraint(__example: AnyRef): FirstOrderConstraint = { val t: T = __example.asInstanceOf[T] - makeConstrainDef(t) + makeConstraintDef(t) } override def discreteValue(__example: AnyRef): String = diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala index 54d77d93..20375afd 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/DataModel.scala @@ -79,7 +79,7 @@ trait DataModel { r => r.to.tag.toString.equals(tag.toString) && r.from.tag.toString.equals(headTag.toString) } if (r.isEmpty) { - throw new Exception(s"Failed to found relations between $tag to $headTag") + throw new Exception(s"Failed to find relations between $tag to $headTag") } else r flatMap (_.asInstanceOf[Edge[NEED, FROM]].backward.neighborsOf(t)) distinct } else r flatMap (_.asInstanceOf[Edge[FROM, NEED]].forward.neighborsOf(t)) distinct } diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala index 89148ea9..6b6a3cde 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/datamodel/property/features/discrete/DiscreteProperty.scala @@ -44,7 +44,7 @@ case class DiscreteProperty[T <: AnyRef]( private def _discreteValue(__example: AnyRef): String = { val t: T = __example.asInstanceOf[T] - self.sensor(t).mkString("") + self.sensor(t) } } case _ => new ClassifierContainsInLBP { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index 6c7986b6..a1a0a3fa 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -5,7 +5,7 @@ import java.io.PrintStream import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.lbj.pos.POSBaselineLearner import edu.illinois.cs.cogcomp.lbjava.classify.{ FeatureVector, ScoreSet } -import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstraint, OJalgoHook } +import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstant, FirstOrderConstraint, OJalgoHook } import edu.illinois.cs.cogcomp.lbjava.learn.{ Learner, SparseAveragedPerceptron, SparseNetworkLearner } import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable } import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._ @@ -122,15 +122,18 @@ object POSClassifiers { object POSConstrainedClassifier extends ConstrainedClassifier[(Constituent, Constituent), Sentence](POSDataModel, POSTaggerPairwise) { override def subjectTo = sentenceLabelsMatch override val solver = new OJalgoHook + override val pathToHead = Some(POSDataModel.tokenPairToSentence) } def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) - constituents.sliding(2).toList._forall { - case c1 :: c2 :: _ => - posLabelPairs._exists { case (l1, l2) => posUnaryClassifierLabelCompatible(c1, c2, l1, l2) } - } + // constituents.sliding(2).toList._forall { + // case c1 :: c2 :: _ => + // posLabelPairs._exists { case (l1, l2) => posUnaryClassifierLabelCompatible(c1, c2, l1, l2) } + // } + //constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } + new FirstOrderConstant(true) } def posUnaryClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala index cf60a67d..774370be 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSDataModel.scala @@ -20,6 +20,9 @@ object POSDataModel extends DataModel { val tokenToSentence = edge(tokens, sentence) tokenToSentence.addSensor({ x: (Constituent) => x.getTextAnnotation.getSentence(x.getSentenceId) }) + val tokenPairToSentence = edge(tokenPair, sentence) + tokenPairToSentence.addSensor({ x: (Constituent, Constituent) => x._1.getTextAnnotation.getSentence(x._1.getSentenceId) }) + import POSTaggerSensors._ val constituentAfter = edge(tokens, tokens) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala index 238e6236..7dd3110b 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala @@ -1,6 +1,6 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger -import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent +import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent } import edu.illinois.cs.cogcomp.core.utilities.configuration.{ Property, ResourceManager, Configurator } import edu.illinois.cs.cogcomp.lbj.pos.POSLabeledUnknownWordParser import edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete @@ -88,12 +88,19 @@ object POSTaggerApp { } def LplusI(): Unit = { - POSDataModel.tokens.populate(testData, train = false) + POSDataModel.tokens.populate(testData) BaselineClassifierPair.load() POSClassifiers.loadModelsFromPackage() - val testDataPair = for{x <- testData; y <- testData} yield (x,y) - POSConstrainedClassifier.test(testDataPair.slice(0,3)) -// println(POSConstrainedClassifier.classifier.discreteValue(testData.head)) + println(POSConstrainedClassifier.classifier.discreteValue((testData.head, POSTaggerSensors.getConstituentAfter(testData.head)))) + + // println(POSDataModel.getFromRelation[(Constituent, Constituent), Sentence]((testData.head, POSTaggerSensors.getConstituentAfter(testData.head)))) + + // for { x <- testData; y <- testData } { + // + // } + // POSConstrainedClassifier.classifier.discreteValue(testDataPair.head) + //POSConstrainedClassifier.test(testDataPair.slice(0,1)) + // println(POSConstrainedClassifier.classifier.discreteValue(testData.head)) } def trainAndTest(): Unit = { diff --git a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala index 4f35758b..c1746820 100644 --- a/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala +++ b/saul-examples/src/test/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerTest.scala @@ -119,7 +119,7 @@ class POSTaggerTest extends FlatSpec with Matchers { val combinedClassifierLabelMap = Map("To" -> "TO", "or" -> "CC", "not" -> "RB", ";" -> ":", "is" -> "VBZ", "the" -> "DT", "question" -> "NN", "." -> ".") toyConstituents.forall { cons => - val predicted = POSClassifiers.POSClassifier(cons) + val predicted = POSClassifiers.POSMixedClassifier.discreteValue(cons) predicted == combinedClassifierLabelMap.getOrElse(cons.getSurfaceForm, predicted) } should be(true) } From 64e151fcda6e09b50d36a50ba7368412cfd5d2e0 Mon Sep 17 00:00:00 2001 From: khashab2 Date: Sun, 17 Apr 2016 19:18:30 -0500 Subject: [PATCH 4/8] minor --- .../saulexamples/nlp/POSTagger/POSClassifiers.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index a1a0a3fa..9e95aff1 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -128,11 +128,11 @@ object POSClassifiers { def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) - // constituents.sliding(2).toList._forall { - // case c1 :: c2 :: _ => - // posLabelPairs._exists { case (l1, l2) => posUnaryClassifierLabelCompatible(c1, c2, l1, l2) } - // } - //constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } +// constituents.sliding(2).toList._forall { +// case c1 :: c2 :: _ => +// posLabelPairs._exists { case (l1, l2) => posUnaryClassifierLabelCompatible(c1, c2, l1, l2) } +// } + constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } new FirstOrderConstant(true) } From 24d27497c2c6de5226f8743bb25b3a0c42092237 Mon Sep 17 00:00:00 2001 From: khashab2 Date: Mon, 18 Apr 2016 11:04:55 -0500 Subject: [PATCH 5/8] minor --- .../nlp/POSTagger/POSClassifiers.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index 9e95aff1..f59ab21d 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -128,15 +128,15 @@ object POSClassifiers { def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) -// constituents.sliding(2).toList._forall { -// case c1 :: c2 :: _ => -// posLabelPairs._exists { case (l1, l2) => posUnaryClassifierLabelCompatible(c1, c2, l1, l2) } -// } - constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } - new FirstOrderConstant(true) + constituents.sliding(2).toList._forall { + case c1 :: c2 :: _ => + posLabelPairs._exists { case (l1, l2) => posClassifierLabelCompatible(c1, c2, l1, l2) } + } + // constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } + // new FirstOrderConstant(true) } - def posUnaryClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = { + def posClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = { (POSMixedClassifier on c1).is(l1) and (POSMixedClassifier on c2).is(l2) and (POSTaggerPairwise on (c1, c2)).is(l1 + POSTaggerSensors.labelSeparator + l2) From 5dd37912d3d4c8badd5b9fa1e4e5f11568d08183 Mon Sep 17 00:00:00 2001 From: khashab2 Date: Mon, 25 Apr 2016 16:44:54 -0500 Subject: [PATCH 6/8] merge with latest. --- .../cs/cogcomp/saul/classifier/ConstrainedClassifier.scala | 4 ++-- .../illinois/cs/cogcomp/saul/constraint/Constraint.scala | 6 +++--- .../cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala index 18d570aa..d6fac9da 100644 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/classifier/ConstrainedClassifier.scala @@ -115,7 +115,7 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo var inference = InferenceManager.get(name, head) if (inference == null) { inference = infer(head) - if (log) + if (logger) println("Inference is NULL " + name) InferenceManager.put(name, inference) } @@ -233,7 +233,7 @@ object ConstrainedClassifier { def constraint[HEAD <: AnyRef](f: HEAD => FirstOrderConstraint)(implicit headTag: ClassTag[HEAD]): LfsConstraint[HEAD] = { val hash = f.hashCode() ConstraintManager.getOrElseUpdate(hash, new LfsConstraint[HEAD] { - override def makeConstrainDef(x: HEAD): FirstOrderConstraint = f(x) + override def makeConstraintDef(x: HEAD): FirstOrderConstraint = f(x) }).asInstanceOf[LfsConstraint[HEAD]] } } \ No newline at end of file diff --git a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala index 625be5c0..4a8685d6 100755 --- a/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala +++ b/saul-core/src/main/scala/edu/illinois/cs/cogcomp/saul/constraint/Constraint.scala @@ -118,17 +118,17 @@ class LHSFirstOrderEqualityWithValueLBP(learner: Learner, t: AnyRef) { def isNotTrue: FirstOrderConstraint = is("false") def isNot(v: String): FirstOrderConstraint = { - new FirstOrderNegation(new FirstOrderEqualityWithValue(true, lbjRepr, v)) + new FirstOrderNegation(new FirstOrderEqualityWithValue(true, lbjVariable, v)) } def isNot(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = { - new FirstOrderNegation(new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr)) + new FirstOrderNegation(new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable)) } def in(v: Array[String]): FirstOrderConstraint = { val falseConstant = new FirstOrderDisjunction(new FirstOrderConstant(false), new FirstOrderConstant(false)) v.foldRight(falseConstant) { (value, newConstraint) => - new FirstOrderDisjunction(new FirstOrderEqualityWithValue(true, lbjRepr, value), newConstraint) + new FirstOrderDisjunction(new FirstOrderEqualityWithValue(true, lbjVariable, value), newConstraint) } } } \ No newline at end of file diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index 6715af86..27bb38b8 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -27,7 +27,6 @@ object POSClassifiers { POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue } - object POSMixedClassifier extends Learner { override def write(out: PrintStream): Unit = ??? From bf7fa4fe72742433270dd3385f8e15300154d2d7 Mon Sep 17 00:00:00 2001 From: khashab2 Date: Mon, 25 Apr 2016 16:52:30 -0500 Subject: [PATCH 7/8] get constituents of single sentence, not the whole textAnnotation. --- .../nlp/POSTagger/POSClassifiers.scala | 16 ++++++++-------- .../cogcomp/saulexamples/nlp/commonSensors.scala | 4 ++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index 27bb38b8..dc885cbe 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -133,14 +133,14 @@ object POSClassifiers { } def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence => - val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation) - val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) - constituents.sliding(2).toList._forall { - case c1 :: c2 :: _ => - posLabelPairs._exists { case (l1, l2) => posClassifierLabelCompatible(c1, c2, l1, l2) } - } - // constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } - // new FirstOrderConstant(true) + // val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation, s.getSentenceId) + // val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y) + // constituents.sliding(2).toList._forall { + // case c1 :: c2 :: _ => + // posLabelPairs._exists { case (l1, l2) => posClassifierLabelCompatible(c1, c2, l1, l2) } + // } + //constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } } + new FirstOrderConstant(true) } def posClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala index e4405121..1b49301b 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/commonSensors.scala @@ -44,6 +44,10 @@ object CommonSensors { getConstituents(x, ViewNames.POS) } + def getPOSConstituents(x: TextAnnotation, sentenceId: Int): List[Constituent] = { + getConstituents(x, ViewNames.POS).filter(_.getSentenceId == sentenceId) + } + def getConstituents(x: TextAnnotation, view: String): List[Constituent] = { x.getView(view).getConstituents.toList } From 9356ecf4b7765a6b919457be000e55cf1828b7fe Mon Sep 17 00:00:00 2001 From: khashab2 Date: Mon, 25 Apr 2016 16:55:55 -0500 Subject: [PATCH 8/8] use ClassifierUtils --- .../nlp/POSTagger/POSClassifiers.scala | 29 ------------------- .../nlp/POSTagger/POSTaggerApps.scala | 5 +++- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala index dc885cbe..726830be 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSClassifiers.scala @@ -148,33 +148,4 @@ object POSClassifiers { (POSMixedClassifier on c2).is(l2) and (POSTaggerPairwise on (c1, c2)).is(l1 + POSTaggerSensors.labelSeparator + l2) } - - // Loads learned models from the "saul-pos-tagger-models" jar package - def loadModelsFromPackage(): Unit = { - val jarModelPath = "edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/models/" - - def loadModel(x: Learnable[Constituent]): Unit = { - val prefix = jarModelPath + x.getClassNameForClassifier - x.load(prefix + ".lc", prefix + ".lex") - } - - loadModel(BaselineClassifier) - loadModel(MikheevClassifier) - loadModel(POSTaggerKnown) - loadModel(POSTaggerUnknown) - } - - def loadSavedModels(): Unit = { - BaselineClassifier.load() - MikheevClassifier.load() - POSTaggerKnown.load() - POSTaggerUnknown.load() - } - - def saveModels(): Unit = { - BaselineClassifier.save() - MikheevClassifier.save() - POSTaggerKnown.save() - POSTaggerUnknown.save() - } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala index 59d82fde..b8ac1d5a 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/POSTaggerApps.scala @@ -92,7 +92,10 @@ object POSTaggerApp { def LplusI(): Unit = { POSDataModel.tokens.populate(testData) BaselineClassifierPair.load() - POSClassifiers.loadModelsFromPackage() + ClassifierUtils.LoadClassifier( + POSConfigurator.jarModelPath, + BaselineClassifier, MikheevClassifier, POSTaggerKnown, POSTaggerUnknown + ) println(POSConstrainedClassifier.classifier.discreteValue((testData.head, POSTaggerSensors.getConstituentAfter(testData.head)))) // println(POSDataModel.getFromRelation[(Constituent, Constituent), Sentence]((testData.head, POSTaggerSensors.getConstituentAfter(testData.head))))