From 82b796b6fc0dd3b08c8c9b5720854c00a10d9ad6 Mon Sep 17 00:00:00 2001 From: khashab2 Date: Mon, 8 Aug 2016 01:31:12 -0700 Subject: [PATCH] getting rid of window based properties in entity relations example --- .../EntityRelationClassifiers.scala | 14 +++++----- .../EntityRelationDataModel.scala | 28 +++++++++---------- .../EntityRelationSensors.scala | 12 ++++++++ 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationClassifiers.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationClassifiers.scala index 72c535b7..feba7cf9 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationClassifiers.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationClassifiers.scala @@ -17,14 +17,14 @@ object EntityRelationClassifiers { object OrganizationClassifier extends Learnable(tokens) { def label: Property[ConllRawToken] = entityType is "Org" override lazy val classifier = new SparsePerceptron() - override def feature = using(word, windowWithin[ConllRawSentence](EntityRelationDataModel, -2, 2, List(pos)), phrase, + override def feature = using(word, tokensWithinWindowPos, phrase, containsSubPhraseMent, containsSubPhraseIng, wordLen) // The gazetteer properties are temporarily removed: containsInPersonList, containsInCityList } object PersonClassifier extends Learnable(tokens) { def label: Property[ConllRawToken] = entityType is "Peop" - override def feature = using(word, windowWithin[ConllRawSentence](EntityRelationDataModel, -2, 2, List(pos)), phrase, + override def feature = using(word, tokensWithinWindowPos, phrase, containsSubPhraseMent, containsSubPhraseIng, wordLen) override lazy val classifier = new SparsePerceptron() // The gazetteer properties are temporarily removed: containsInPersonList, containsInCityList @@ -32,7 +32,7 @@ object EntityRelationClassifiers { object LocationClassifier extends Learnable(tokens) { def label: Property[ConllRawToken] = entityType is "Loc" - override def feature = using(word, windowWithin[ConllRawSentence](EntityRelationDataModel, -2, 2, List(pos)), phrase, containsSubPhraseMent, + override def feature = using(word, tokensWithinWindowPos, phrase, containsSubPhraseMent, containsSubPhraseIng, wordLen) override lazy val classifier = new SparsePerceptron() // The gazetteer properties are temporarily removed: containsInPersonList, containsInCityList @@ -41,13 +41,13 @@ object EntityRelationClassifiers { /** independent relation classifiers */ object WorksForClassifier extends Learnable(pairs) { def label: Property[ConllRelation] = relationType is "Work_For" - override def feature = using(relFeature, relPos) + override def feature = using(relFeature, tokensWithinWindowRelPos) override lazy val classifier = new SparsePerceptron() } object LivesInClassifier extends Learnable(pairs) { def label: Property[ConllRelation] = relationType is "Live_In" - override def feature = using(relFeature, relPos) + override def feature = using(relFeature, tokensWithinWindowRelPos) override lazy val classifier = new SparsePerceptron() } @@ -64,13 +64,13 @@ object EntityRelationClassifiers { /** relation pipeline classifiers */ object WorksForClassifierPipeline extends Learnable(pairs) { override def label: Property[ConllRelation] = relationType is "Work_For" - override def feature = using(relFeature, relPos, entityPrediction) + override def feature = using(relFeature, tokensWithinWindowRelPos, entityPrediction) override lazy val classifier = new SparsePerceptron() } object LivesInClassifierPipeline extends Learnable(pairs) { override def label: Property[ConllRelation] = relationType is "Live_In" - override def feature = using(relFeature, relPos, entityPrediction) + override def feature = using(relFeature, tokensWithinWindowRelPos, entityPrediction) override lazy val classifier = new SparsePerceptron() } } diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationDataModel.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationDataModel.scala index e58005db..cf6fe8af 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationDataModel.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationDataModel.scala @@ -8,11 +8,10 @@ package edu.illinois.cs.cogcomp.saulexamples.nlp.EntityRelation import edu.illinois.cs.cogcomp.saul.datamodel.DataModel import edu.illinois.cs.cogcomp.saulexamples.EntityMentionRelation.datastruct.{ ConllRawSentence, ConllRawToken, ConllRelation } -import edu.illinois.cs.cogcomp.saulexamples.EntityMentionRelation.reader.Conll04_Reader import edu.illinois.cs.cogcomp.saulexamples.nlp.EntityRelation.EntityRelationClassifiers._ import edu.illinois.cs.cogcomp.saulexamples.nlp.EntityRelation.EntityRelationSensors._ -import scala.collection.JavaConversions._ +import scala.collection.JavaConverters._ object EntityRelationDataModel extends DataModel { @@ -82,19 +81,20 @@ object EntityRelationDataModel extends DataModel { Nil } - val relPos = property(pairs) { - rela: ConllRelation => - val e1 = rela.e1 - val e2 = rela.e2 + val tokensWithinWindowPos = property(tokens) { t: ConllRawToken => + val allSentenceTokens = t.s.sentTokens.asScala + val indexOpt = allSentenceTokens.zipWithIndex.collectFirst { case (item, idx) if item == t => idx } + require(indexOpt.isDefined, "ERROR: the token not found in the sentence!") + val minInd = Math.max(indexOpt.get - 2, 0) + val maxInd = Math.min(indexOpt.get + 2, allSentenceTokens.length - 1) + val tokensInWindow = allSentenceTokens.slice(minInd, maxInd) + tokensInWindow.map(pos(_)).toList + } - this.tokens.getWithWindow(e1, -2, 2, _.sentId).zipWithIndex.map { - case (Some(t), idx) => s"left-$idx-pos-${t.POS} " - case (None, idx) => s"left-$idx-pos-EMPTY " - } ++ - this.tokens.getWithWindow(e2, -2, 2, _.sentId).zipWithIndex.map { - case (Some(t), idx) => s"right-$idx-pos-${t.POS} " - case (None, idx) => s"right-$idx-pos-EMPTY} " - } + val tokensWithinWindowRelPos = property(pairs) { rela: ConllRelation => + val e1 = rela.e1 + val e2 = rela.e2 + tokensWithinWindowPos(e1) ++ tokensWithinWindowPos(e2) } val entityPrediction = property[ConllRelation](pairs) { diff --git a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationSensors.scala b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationSensors.scala index c79ca4d9..834cbf69 100644 --- a/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationSensors.scala +++ b/saul-examples/src/main/scala/edu/illinois/cs/cogcomp/saulexamples/nlp/EntityRelation/EntityRelationSensors.scala @@ -51,4 +51,16 @@ object EntityRelationSensors { def relationToSecondArg_MatchingSensor(r: ConllRelation, t: ConllRawToken): Boolean = { r.sentId.equals(t.sentId) && r.e2.wordId == t.wordId } + + def getTokenBefore(t: ConllRawToken): ConllRawToken = { + val indexOpt = t.s.sentTokens.asScala.zipWithIndex.collectFirst { case (item, idx) if item == t => idx } + require(indexOpt.isDefined, "ERROR: the token not found in the sentence!") + t.s.sentTokens.get(Math.min(indexOpt.get - 1, 0)) + } + + def getTokenAfter(t: ConllRawToken): ConllRawToken = { + val indexOpt = t.s.sentTokens.asScala.zipWithIndex.collectFirst { case (item, idx) if item == t => idx } + require(indexOpt.isDefined, "ERROR: the token not found in the sentence!") + t.s.sentTokens.get(Math.min(indexOpt.get + 1, t.s.sentTokens.size() - 1)) + } }