From 593147f5bb394261f3b3ecb00ce205804b242bab Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 8 Mar 2023 23:10:42 -0700 Subject: [PATCH 1/3] Test hash code --- .../reach/assembly/TestAssemblyManager.scala | 8 ++++ .../org/clulab/reach/assembly/TestHash.scala | 30 +++++++++++++ .../clulab/reach/grounding/InMemoryKB.scala | 2 +- .../clulab/reach/utils/MentionManager.scala | 2 +- .../scala/org/clulab/reach/TestHash.scala | 45 +++++++++++++++++++ 5 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 assembly/src/test/scala/org/clulab/reach/assembly/TestHash.scala create mode 100644 main/src/test/scala/org/clulab/reach/TestHash.scala diff --git a/assembly/src/test/scala/org/clulab/reach/assembly/TestAssemblyManager.scala b/assembly/src/test/scala/org/clulab/reach/assembly/TestAssemblyManager.scala index 47b9e9b36..68de4dfd7 100644 --- a/assembly/src/test/scala/org/clulab/reach/assembly/TestAssemblyManager.scala +++ b/assembly/src/test/scala/org/clulab/reach/assembly/TestAssemblyManager.scala @@ -39,6 +39,8 @@ class TestAssemblyManager extends FlatSpec with Matchers { val se = am.getSimpleEntity(ras) se.getPTMs should have size(0) + se.equivalenceHash(ignoreMods = false) should be (-1879068248) // SimpleEntity + se.equivalenceHash(ignoreMods = true) should be (-11559465) // SimpleEntity } it should "have 3 mentions as evidence" in { @@ -87,6 +89,8 @@ class TestAssemblyManager extends FlatSpec with Matchers { val phos = am.getSimpleEvent(p) phos.evidence should have size(1) + phos.equivalenceHash(ignoreMods = true) should be (-90854158) // SimpleEvent + phos.equivalenceHash(ignoreMods = false) should be (-24500179) // SimpleEvent phos.output should have size(1) @@ -157,6 +161,8 @@ class TestAssemblyManager extends FlatSpec with Matchers { val (c1, c2) = (complexes.head, complexes.last) c1.isEquivalentTo(c2, ignoreMods = false) should be(true) c1.isEquivalentTo(c2, ignoreMods = true) should be(true) + c1.equivalenceHash(ignoreMods = false) should be (-1357391490) // Complex + c1.equivalenceHash(ignoreMods = true) should be (686730731) // Complex } it should "have 2 mentions as evidence" in { @@ -226,6 +232,8 @@ class TestAssemblyManager extends FlatSpec with Matchers { am.getRegulations should have size (1) am.distinctRegulations should have size (1) + am.getRegulations.head.equivalenceHash(ignoreMods = false) should be (832895248) // ComplexEvent + am.getRegulations.head.equivalenceHash(ignoreMods = true) should be (738272956) // ComplexEvent } val regText2 = "Akt inhibits the phosphorylation of AFT by BEF." diff --git a/assembly/src/test/scala/org/clulab/reach/assembly/TestHash.scala b/assembly/src/test/scala/org/clulab/reach/assembly/TestHash.scala new file mode 100644 index 000000000..09b0277a4 --- /dev/null +++ b/assembly/src/test/scala/org/clulab/reach/assembly/TestHash.scala @@ -0,0 +1,30 @@ +package org.clulab.reach.assembly + +import org.clulab.reach.PaperReader +import org.clulab.reach.assembly.relations.corpus.EventPair +import org.clulab.reach.mentions.CorefMention +import org.clulab.reach.utils.MentionManager +import org.scalatest.{FlatSpec, Matchers} + +class TestHash extends FlatSpec with Matchers { + val mentionManager = new MentionManager() + val testReach = PaperReader.reachSystem + val text = "Tbet Rag2 mice (Garrett et al., 2010) as well as Bacteroides spp. (Bloom et al., 2011), Helicobacter spp. (Fox et al., 2011), and Bilophila wadsworthia (Devkota et al., 2012) in Il10 have been shown to enhance intestinal inflammation.The acute dextran sulfate sodium" + val allMentions = testReach.extractFrom(text, "serialization-test", "1", None) + val sortedMentions = allMentions.sortBy { mention => (mention.startOffset, mention.endOffset) } + + val assemblyManager = AssemblyManager() + + behavior of "Hash" + + it should "compute the expected value for an EventPair" in { + val expectedHash = 316669350 + val corefMentions = sortedMentions.collect { case corefMention: CorefMention => corefMention } + val e1 = corefMentions.head + val e2 = corefMentions.last + val eventPair = new EventPair(e1, e2, "relation", 42d, "annotatorID", None) + val actualHash = eventPair.equivalenceHash + + actualHash should be (expectedHash) + } +} diff --git a/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala b/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala index 8be824e79..845f8af2a 100644 --- a/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala +++ b/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala @@ -211,7 +211,7 @@ object InMemoryKB { * Written by: Tom Hicks. 10/25/2015. * Last Modified: Limit the scope of the KBEntry class by embedding it in IMKB. */ - private class KBEntry ( + class KBEntry ( /** Text for this entry, loaded from the external KB. */ val text: String, diff --git a/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala b/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala index 8db590b9d..907520ad6 100644 --- a/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala +++ b/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala @@ -82,7 +82,7 @@ class MentionManager { // Private Methods // - private def computeHash (mention:Mention): Int = { + def computeHash(mention: Mention): Int = { // val hash = computeHash(mention, symmetricSeed) // return finalize(hash) computeHash(mention, symmetricSeed) diff --git a/main/src/test/scala/org/clulab/reach/TestHash.scala b/main/src/test/scala/org/clulab/reach/TestHash.scala new file mode 100644 index 000000000..72e548e15 --- /dev/null +++ b/main/src/test/scala/org/clulab/reach/TestHash.scala @@ -0,0 +1,45 @@ +package org.clulab.reach + +import org.clulab.reach.grounding.InMemoryKB.KBEntry +import org.clulab.reach.grounding.{InMemoryKB, KBResolution} +import org.clulab.reach.utils.MentionManager +import org.scalatest.{FlatSpec, Matchers} + +class TestHash extends FlatSpec with Matchers { + val mentionManager = new MentionManager() + val testReach = PaperReader.reachSystem + val text1 = "Mek was not phosphorylized by AKT1" + val text2 = "Mouse AKT2 phosphorylates PTHR2 in chicken adenoid." + val text3 = "Tbet Rag2 mice (Garrett et al., 2010) as well as Bacteroides spp. (Bloom et al., 2011), Helicobacter spp. (Fox et al., 2011), and Bilophila wadsworthia (Devkota et al., 2012) in Il10 have been shown to enhance intestinal inflammation.The acute dextran sulfate sodium" + val allTexts = Seq(text1, text2, text3) + val sortedMentions = allTexts.flatMap { text => + val mentions = testReach.extractFrom(text, "serialization-test", "1", None) + + mentions.sortBy { mention => (mention.startOffset, mention.endOffset) } + } + + behavior of "Hash" + + it should "compute the expected value for a Mention" in { + val expectedHashes = Array(27986141, -396507223, 1590560579, -1891512069, -914654348, 408527033, -1487373899, -2017652764, 1558808406, 2017209834, -1279750485, -37832763, 200095485, 2020390684, 1876313014, 795103862, 220919393) + val actualHashes = sortedMentions.map(mentionManager.computeHash) + + actualHashes should be (expectedHashes) + } + + it should "compute the expected value for a KBResolution" in { + val expectedHash = 1782466108 + val kbResolution = new KBResolution("text", "namespace", "id", "species") + val actualHash = kbResolution.hashCode + + actualHash should be (expectedHash) + } + + it should "compute the expected value for a KBEntry" in { + val expectedHash = 578280303 + val kbEntry = new KBEntry("text", "namespace", "id", "species") + val actualHash = kbEntry.hashCode + + actualHash should be (expectedHash) + } +} From c82871bf44cc2179dcef4506337fee542045f4f9 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 8 Mar 2023 23:32:23 -0700 Subject: [PATCH 2/3] Modify hash code --- .../assembly/relations/corpus/Corpus.scala | 21 +++++----- .../assembly/representations/Complex.scala | 25 +++++------- .../representations/ComplexEvent.scala | 39 +++++++++--------- .../representations/SimpleEntity.scala | 34 ++++++---------- .../representations/SimpleEvent.scala | 40 +++++++++---------- .../clulab/reach/grounding/InMemoryKB.scala | 19 +++++---- .../clulab/reach/grounding/KBResolution.scala | 23 ++++++----- .../clulab/reach/utils/MentionManager.scala | 29 +++++++++----- processors/build.sbt | 2 +- .../bionlp/ner/ReachStandardKbSource.scala | 3 +- 10 files changed, 110 insertions(+), 125 deletions(-) diff --git a/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/Corpus.scala b/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/Corpus.scala index 79c90473c..2ee22211f 100644 --- a/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/Corpus.scala +++ b/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/Corpus.scala @@ -9,7 +9,7 @@ import org.clulab.serialization.json.JSONSerialization import org.json4s.jackson.JsonMethods._ import org.json4s.JsonDSL._ import org.json4s._ -import scala.util.hashing.MurmurHash3._ +import org.clulab.utils.Hash import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils.forceMkdir import ai.lum.common.FileUtils._ @@ -40,20 +40,17 @@ case class EventPair( def isCrossSentence = sentenceIndices.length > 1 /** Create a unique hash to identify this training instance */ - def equivalenceHash: Int = { - // the seed (not counted in the length of finalizeHash) - val h0 = stringHash("org.clulab.assembly.TrainingInstance") + def equivalenceHash: Int = Hash.withLast( + Hash("org.clulab.assembly.TrainingInstance"), // get hashes for each event - val h1 = mix(h0, e1.equivalenceHash) - val h2 = mix(h1, e2.equivalenceHash) - // is it cross-sentence? - val h3 = mix(h2, isCrossSentence.hashCode) + e1.equivalenceHash, + e2.equivalenceHash, + isCrossSentence.hashCode, // the text of the sentences containing the two event mentions - val h4 = mix(h3, text.hashCode) + text.hashCode, // what paper did this come from? - val h5 = mixLast(h4, pmid.hashCode) - finalizeHash(h5, 5) - } + pmid.hashCode + ) def copy( before: CorefMention = this.e1, diff --git a/assembly/src/main/scala/org/clulab/reach/assembly/representations/Complex.scala b/assembly/src/main/scala/org/clulab/reach/assembly/representations/Complex.scala index dc713b32d..98608c055 100644 --- a/assembly/src/main/scala/org/clulab/reach/assembly/representations/Complex.scala +++ b/assembly/src/main/scala/org/clulab/reach/assembly/representations/Complex.scala @@ -3,7 +3,7 @@ package org.clulab.reach.assembly.representations import org.clulab.odin.Mention import org.clulab.reach.assembly.AssemblyManager import org.clulab.reach.assembly._ -import scala.util.hashing.MurmurHash3._ +import org.clulab.utils.Hash /** @@ -58,10 +58,12 @@ class Complex( * @return an Int hash based on the [[Entity.equivalenceHash]] of each member */ def membersHash(ignoreMods: Boolean): Int = { - val h0 = stringHash(s"$eerString.members") val hs = members.map(_.equivalenceHash(ignoreMods)) - val h = mixLast(h0, unorderedHash(hs)) - finalizeHash(h, members.size) + + Hash.withLast(members.size)( + Hash(s"$eerString.members"), + Hash.unordered(hs) + ) } /** @@ -69,16 +71,11 @@ class Complex( * @param ignoreMods whether or not to ignore modifications when calculating the equivalenceHash * @return a hash (Int) based primarily on the [[membersHash]] */ - def equivalenceHash(ignoreMods: Boolean): Int = { - // the seed (not counted in the length of finalizeHash) - // decided to use the class name - val h0 = stringHash(eerString) - // comprised of the equiv. hash of members - val h1 = mix(h0, membersHash(ignoreMods)) - // whether or not the representation is negated - val h2 = mixLast(h1, negated.hashCode) - finalizeHash(h2, 2) - } + def equivalenceHash(ignoreMods: Boolean): Int = Hash.withLast( + Hash(eerString), + membersHash(ignoreMods), + negated.hashCode + ) /** * Used to compare against another [[Complex]].
diff --git a/assembly/src/main/scala/org/clulab/reach/assembly/representations/ComplexEvent.scala b/assembly/src/main/scala/org/clulab/reach/assembly/representations/ComplexEvent.scala index d83d7ac4b..b485cb60c 100644 --- a/assembly/src/main/scala/org/clulab/reach/assembly/representations/ComplexEvent.scala +++ b/assembly/src/main/scala/org/clulab/reach/assembly/representations/ComplexEvent.scala @@ -2,8 +2,8 @@ package org.clulab.reach.assembly.representations import org.clulab.reach.assembly._ import org.clulab.reach.assembly.AssemblyManager +import org.clulab.utils.Hash import org.clulab.odin.Mention -import scala.util.hashing.MurmurHash3._ /** @@ -54,10 +54,12 @@ trait ComplexEvent extends Event { * @return an Int hash based on the [[EntityEventRepresentation.equivalenceHash]] of each element in the [[controller]] */ def controllerHash(ignoreMods: Boolean): Int = { - val h0 = stringHash(s"$eerString.controller") val hs = controller.map(_.equivalenceHash(ignoreMods)) - val h = mixLast(h0, unorderedHash(hs)) - finalizeHash(h, controller.size) + + Hash.withLast(controller.size)( + Hash(s"$eerString.controller"), + Hash.unordered(hs) + ) } /** @@ -67,10 +69,12 @@ trait ComplexEvent extends Event { * @return an Int hash based on the [[EntityEventRepresentation.equivalenceHash]] of each element in the [[controlled]] */ def controlledHash(ignoreMods: Boolean): Int = { - val h0 = stringHash(s"$eerString.controlled") val hs = controlled.map(_.equivalenceHash(ignoreMods)) - val h = mixLast(h0, unorderedHash(hs)) - finalizeHash(h, controlled.size) + + Hash.withLast(controlled.size)( + Hash(s"$eerString.controlled"), + Hash.unordered(hs) + ) } /** @@ -78,20 +82,13 @@ trait ComplexEvent extends Event { * @param ignoreMods whether or not to ignore modifications when calculating the controlledHash * @return an Int hash based on the [[polarity]], [[controllerHash]], [[controlledHash]], and [[negated.hashCode]] */ - def equivalenceHash(ignoreMods: Boolean): Int = { - // the seed (not counted in the length of finalizeHash) - // decided to use the class name - val h0 = stringHash(eerString) - // the polarity of the Regulation - val h1 = mix(h0, stringHash(polarity)) - // controller - val h2 = mix(h1, controllerHash(ignoreMods)) - // controlled - val h3 = mix(h2, controlledHash(ignoreMods)) - // whether or not the representation is negated - val h4 = mixLast(h3, negated.hashCode) - finalizeHash(h4, 4) - } + def equivalenceHash(ignoreMods: Boolean): Int = Hash.withLast( + Hash(eerString), + Hash(polarity), + controllerHash(ignoreMods), + controlledHash(ignoreMods), + negated.hashCode + ) /** * Used to compare against another [[ComplexEvent]].
diff --git a/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEntity.scala b/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEntity.scala index d3a9418de..af7c224a4 100644 --- a/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEntity.scala +++ b/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEntity.scala @@ -3,7 +3,7 @@ package org.clulab.reach.assembly.representations import org.clulab.reach.assembly.AssemblyManager import org.clulab.reach.assembly._ import org.clulab.odin.Mention -import scala.util.hashing.MurmurHash3._ +import org.clulab.utils.Hash /** @@ -71,24 +71,12 @@ class SimpleEntity( * @return a hash (Int) based primarily on the [[grounding]] and [[modsHash]] */ def equivalenceHash(ignoreMods: Boolean): Int = { - // the seed (not counted in the length of finalizeHash) - // decided to use the class name - val h0 = stringHash(eerString) - // a representation of the ID - val h1 = mix(h0, grounding.hashCode) - ignoreMods match { - // include the modifications - case false => - // a representation of the set of modifications - val h2 = mix(h1, modsHash) - // whether or not the representation is negated - val h3 = mixLast(h2, negated.hashCode) - finalizeHash(h3, 3) - // ignore the mods - case true => - val h2 = mixLast(h1, negated.hashCode) - finalizeHash(h2, 2) - } + val h0 = Hash(eerString) + val h1 = grounding.hashCode // a representation of the ID + val h3 = negated.hashCode + + if (ignoreMods) Hash.withLast(h0, h1, h3) + else Hash.withLast(h0, h1, modsHash, h3) } /** @@ -98,10 +86,12 @@ class SimpleEntity( * @return an Int hash based on the hashcodes of the modifications */ def modsHash: Int = { - val h0 = stringHash(s"$eerString.modifications") val hs = modifications.map(_.hashCode) - val h = mixLast(h0, unorderedHash(hs)) - finalizeHash(h, modifications.size) + + Hash.withLast(modifications.size)( + Hash(s"$eerString.modifications"), + Hash.unordered(hs) + ) } /** diff --git a/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEvent.scala b/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEvent.scala index 3958a941e..28d86e1b0 100644 --- a/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEvent.scala +++ b/assembly/src/main/scala/org/clulab/reach/assembly/representations/SimpleEvent.scala @@ -3,9 +3,8 @@ package org.clulab.reach.assembly.representations import org.clulab.reach.assembly.AssemblyManager import org.clulab.reach.assembly._ import org.clulab.odin.Mention +import org.clulab.utils.Hash import scala.collection.Map -import scala.util.hashing.MurmurHash3._ - /** * Representation for any Mention with the label SimpleEvent. Note that a Binding is represented using a [[Complex]]. @@ -65,10 +64,12 @@ class SimpleEvent( * @return an Int hash based on hashes of the keys in the [[input]] and the [[Entity.equivalenceHash]] of each element contained in the corresponding value in the [[input]] */ def inputHash(ignoreMods: Boolean): Int = { - val h0 = stringHash(s"$eerString.input") val hs = output.map(_.equivalenceHash(ignoreMods)) - val h = mixLast(h0, unorderedHash(hs)) - finalizeHash(h, input.size) + + Hash.withLast(input.size)( + Hash(s"$eerString.input"), + Hash.unordered(hs) + ) } /** @@ -78,10 +79,12 @@ class SimpleEvent( * @return an Int hash based on the [[Entity.equivalenceHash]] of each element in the [[output]] */ def outputHash(ignoreMods: Boolean): Int = { - val h0 = stringHash(s"$eerString.output") val hs = output.map(_.equivalenceHash(ignoreMods)) - val h = mixLast(h0, unorderedHash(hs)) - finalizeHash(h, output.size) + + Hash.withLast(output.size)( + Hash(s"$eerString.output"), + Hash.unordered(hs) + ) } /** @@ -89,20 +92,13 @@ class SimpleEvent( * @param ignoreMods whether or not to ignore modifications when calculating the equivalenceHash * @return an Int hash based primarily on the [[label]], [[inputHash]], and [[outputHash]] */ - def equivalenceHash(ignoreMods: Boolean): Int = { - // the seed (not counted in the length of finalizeHash) - // decided to use the class name - val h0 = stringHash(eerString) - // the label of the SimpleEvent - val h1 = mix(h0, label.hashCode) - // the input of the SimpleEvent - val h2 = mix(h1, inputHash(ignoreMods)) - // the output of the SimpleEvent - val h3 = mix(h2, outputHash(ignoreMods)) - // whether or not the representation is negated - val h4 = mixLast(h3, negated.hashCode) - finalizeHash(h4, 4) - } + def equivalenceHash(ignoreMods: Boolean): Int = Hash.withLast( + Hash(eerString), + label.hashCode, + inputHash(ignoreMods), + outputHash(ignoreMods), + negated.hashCode + ) /** * Used to compare against another [[SimpleEvent]].
diff --git a/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala b/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala index 845f8af2a..7fbe7a6d1 100644 --- a/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala +++ b/main/src/main/scala/org/clulab/reach/grounding/InMemoryKB.scala @@ -1,15 +1,15 @@ package org.clulab.reach.grounding import scala.Serializable -import scala.util.hashing.MurmurHash3._ import collection.mutable.{ HashMap, HashSet, Map, MultiMap, Set } +import org.clulab.reach.grounding.InMemoryKB._ import org.clulab.reach.grounding.ReachKBConstants._ import org.clulab.reach.grounding.ReachKBKeyTransforms._ import org.clulab.reach.grounding.ReachKBUtils._ import org.clulab.reach.grounding.Speciated._ +import org.clulab.utils.Hash -import org.clulab.reach.grounding.InMemoryKB._ /** * Class implementing an in-memory knowledge base indexed by key and species. @@ -243,14 +243,13 @@ object InMemoryKB { } /** Redefine hashCode. */ - override def hashCode: Int = { - val h0 = stringHash("org.clulab.reach.grounding.KBEntry") - val h1 = mix(h0, text.toLowerCase.hashCode) - val h2 = mix(h1, namespace.hashCode) - val h3 = mix(h2, id.hashCode) - val h4 = mixLast(h3, species.hashCode) - finalizeHash(h4, 4) - } + override def hashCode: Int = Hash.withLast( + Hash("org.clulab.reach.grounding.KBEntry"), + text.toLowerCase.hashCode, + namespace.hashCode, + id.hashCode, + species.hashCode + ) /** Tell whether this entry has an associated species or not. */ def hasSpecies: Boolean = (species != NoSpeciesValue) diff --git a/main/src/main/scala/org/clulab/reach/grounding/KBResolution.scala b/main/src/main/scala/org/clulab/reach/grounding/KBResolution.scala index 8641a2c0f..c6fd3a57f 100644 --- a/main/src/main/scala/org/clulab/reach/grounding/KBResolution.scala +++ b/main/src/main/scala/org/clulab/reach/grounding/KBResolution.scala @@ -1,10 +1,12 @@ package org.clulab.reach.grounding -import scala.Serializable -import scala.util.hashing.MurmurHash3._ - +import org.clulab.utils.Hash +import org.clulab.reach.context.BoundedPaddingContext.species import org.clulab.reach.grounding.ReachKBConstants._ import org.clulab.reach.grounding.Speciated._ +import org.clulab.utils.Hash + +import scala.Serializable /** * Class holding information about a specific resolution from the in-memory Knowledge Base. @@ -46,14 +48,13 @@ class KBResolution ( } /** Redefine hashCode. */ - override def hashCode: Int = { - val h0 = stringHash("org.clulab.reach.grounding.KBResolution") - val h1 = mix(h0, text.toLowerCase.hashCode) - val h2 = mix(h1, namespace.hashCode) - val h3 = mix(h2, id.hashCode) - val h4 = mixLast(h3, species.hashCode) - finalizeHash(h4, 4) - } + override def hashCode: Int = Hash.withLast( + Hash("org.clulab.reach.grounding.KBResolution"), + text.toLowerCase.hashCode, + namespace.hashCode, + id.hashCode, + species.hashCode + ) /** Tell whether this entry has an associated species or not. */ def hasSpecies: Boolean = (species != NoSpeciesValue) diff --git a/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala b/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala index 907520ad6..da49a8744 100644 --- a/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala +++ b/main/src/main/scala/org/clulab/reach/utils/MentionManager.scala @@ -2,11 +2,11 @@ package org.clulab.reach.utils import java.io._ import scala.collection.mutable.MutableList -import scala.util.hashing.MurmurHash3._ import org.clulab.odin._ import org.clulab.processors.Document import org.clulab.reach.context._ import org.clulab.reach.mentions._ +import org.clulab.utils.Hash /** @@ -85,26 +85,33 @@ class MentionManager { def computeHash(mention: Mention): Int = { // val hash = computeHash(mention, symmetricSeed) // return finalize(hash) - computeHash(mention, symmetricSeed) + computeHash(mention, Hash.symmetricSeed) } - private def computeHash (mention:Mention, hash:Int): Int = { + private def computeHash(mention: Mention, hash: Int): Int = { mention match { case mention: TextBoundMention => - mix(hash, stringHash("TEXT" + mention.label + mention.text)) + val h1 = Hash.mix(hash, Hash("TEXT" + mention.label + mention.text)) + h1 case mention: EventMention => - val h1 = mix(hash, stringHash("EVENT" + mention.label)) - mix(h1, unorderedHash(mention.arguments.filterNot(ignoreArg).map(computeHash(_,0)))) + val h1 = Hash.mix(hash, Hash("EVENT" + mention.label)) + // TODO: This appears to leave out the argument keys. See also Mention.argsHash(). + val h2 = mention.arguments.filterNot(ignoreArg).map(computeHash(_, 0)) + Hash.mix(h1, Hash.unordered(h2)) case mention: RelationMention => - val h1 = mix(hash, stringHash("EVENT" + mention.label)) - mix(h1, unorderedHash(mention.arguments.filterNot(ignoreArg).map(computeHash(_,0)))) + val h1 = Hash.mix(hash, Hash("EVENT" + mention.label)) + // TODO: This appears to leave out the argument keys. See also Mention.argsHash(). + val h2 = mention.arguments.filterNot(ignoreArg).map(computeHash(_, 0)) + Hash.mix(h1, Hash.unordered(h2)) case _ => 0 } } - private def computeHash (entry:Tuple2[String,Seq[Mention]], hash:Int): Int = { - mix(mix(hash, stringHash(entry._1)), // add argument name (key) to hash - orderedHash(entry._2.map(computeHash(_,0)))) // recursively add mentions of this argument + private def computeHash(entry: (String, Seq[Mention]), hash: Int): Int = { + Hash.mix( + Hash.mix(hash, Hash(entry._1)), // add argument name (key) to hash + Hash.ordered(entry._2.map(computeHash(_, 0))) // recursively add mentions of this argument + ) } /** Filter to decide which mention arguments to ignore. */ diff --git a/processors/build.sbt b/processors/build.sbt index d7f8f8e0d..80d04c258 100644 --- a/processors/build.sbt +++ b/processors/build.sbt @@ -5,7 +5,7 @@ resolvers += "clulab" at "https://artifactory.clulab.org/artifactory/sbt-release libraryDependencies ++= { - val procVer = "8.5.3" + val procVer = "8.5.4-SNAPSHOT" Seq( "com.typesafe" % "config" % "1.3.1", diff --git a/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala b/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala index faea5d422..03bcce1de 100644 --- a/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala +++ b/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala @@ -6,6 +6,7 @@ import org.clulab.processors.bionlp.ner.KBGenerator.logger import org.clulab.processors.bionlp.ner.KBGenerator.tokenizeResourceLine import org.clulab.processors.clu.tokenizer.Tokenizer import org.clulab.sequences.StandardKbSource +import org.clulab.utils.Closer.AutoCloser import org.clulab.utils.Files import org.clulab.utils.Serializer @@ -54,7 +55,7 @@ class ReachSingleStandardKbSource(kbEntry: KBEntry, caseInsensitiveMatching: Boo ) ) - Serializer.using(bufferedReader) { bufferedReader => + bufferedReader.autoClose { bufferedReader => bufferedReader.lines.forEach(consumer) } logger.info(s"Done. Read ${consumer.lineCount} lines from ${new File(kbEntry.path).getName}") From 46f81e21591d70e1ee419b92af07110896d4957c Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Thu, 20 Jul 2023 10:25:13 -0700 Subject: [PATCH 3/3] Prepare for processors update autoclose, java converters, sbt version --- .../relations/corpus/CorpusBuilder.scala | 19 ++++++-------- .../ml/DeepLearningPolarityClassifier.scala | 26 ++++++++++--------- .../bionlp/ner/ReachStandardKbSource.scala | 7 +++-- project/build.properties | 2 +- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/CorpusBuilder.scala b/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/CorpusBuilder.scala index 1e687632a..2515f0fac 100644 --- a/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/CorpusBuilder.scala +++ b/assembly/src/main/scala/org/clulab/reach/assembly/relations/corpus/CorpusBuilder.scala @@ -1,21 +1,18 @@ package org.clulab.reach.assembly.relations.corpus -import org.clulab.reach.mentions._ -import org.clulab.reach.assembly.AssemblyManager -import org.clulab.reach.assembly.sieves.Constraints -import org.clulab.odin._ import ai.lum.common.ConfigUtils._ -import ai.lum.common.RandomUtils._ -import ai.lum.common.FileUtils._ - -import collection.JavaConversions._ import com.typesafe.config.ConfigFactory import com.typesafe.scalalogging.LazyLogging - -import java.io.File +import org.clulab.odin._ +import org.clulab.reach.assembly.AssemblyManager +import org.clulab.reach.assembly.sieves.Constraints +import org.clulab.reach.mentions._ import org.clulab.reach.mentions.serialization.json.{JSONSerializer => ReachJSONSerializer} import org.clulab.utils.ThreadUtils +import java.io.File +import scala.jdk.CollectionConverters._ + /** * RELATION CORPUS REQUIREMENTS: * find events occurring in the same or neighboring sentences @@ -31,7 +28,7 @@ object CorpusBuilder extends LazyLogging { val config = ConfigFactory.load() val kWindow = config.getInt("assembly.windowSize") - val validLabels: Set[String] = config.getStringList("assembly.corpus.validLabels").toSet + val validLabels: Set[String] = config.getStringList("assembly.corpus.validLabels").asScala.toSet /** * Find mentions in sentences of interest.
diff --git a/main/src/main/scala/org/clulab/polarity/ml/DeepLearningPolarityClassifier.scala b/main/src/main/scala/org/clulab/polarity/ml/DeepLearningPolarityClassifier.scala index b303b0c15..332b30680 100644 --- a/main/src/main/scala/org/clulab/polarity/ml/DeepLearningPolarityClassifier.scala +++ b/main/src/main/scala/org/clulab/polarity/ml/DeepLearningPolarityClassifier.scala @@ -1,32 +1,34 @@ package org.clulab.polarity.ml -import java.io -import java.io.FileNotFoundException -import java.nio.charset.StandardCharsets -import java.nio.file.{Paths, Files} - - import com.typesafe.config.ConfigFactory import edu.cmu.dynet._ import edu.cmu.dynet.Expression._ import org.clulab.fatdynet.utils.BaseTextModelLoader import org.clulab.fatdynet.utils.CloseableModelSaver -import org.clulab.fatdynet.utils.Closer.AutoCloser import org.clulab.fatdynet.utils.Initializer import org.clulab.fatdynet.utils.Synchronizer import org.clulab.odin.{EventMention, Mention, RelationMention, TextBoundMention} import org.clulab.polarity.{NegativePolarity, NeutralPolarity, Polarity, PositivePolarity} import org.clulab.reach.mentions.BioEventMention +import java.io +import java.io.FileNotFoundException +import java.nio.charset.StandardCharsets import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, ListBuffer} import scala.io.BufferedSource import scala.io.Source -import scala.util.Random - - +import scala.util.{Random, Using} +import scala.util.Using.Releasable class DeepLearningPolarityClassifier() extends PolarityClassifier{ + // These can be removed after processors updates to the newest version of fatdynet. + implicit object CloseableModelSaverReleaser extends Releasable[CloseableModelSaver] { + override def release(resource: CloseableModelSaver): Unit = resource.close() + } + implicit object BaseTextModelLoaderReleaser extends Releasable[BaseTextModelLoader] { + override def release(resource: BaseTextModelLoader): Unit = resource.close() + } var IS_DYNET_INITIALIZED = false @@ -131,7 +133,7 @@ class DeepLearningPolarityClassifier() extends PolarityClassifier{ // implement this strategy internally. try { logger.info(s"Loading saved model $savedModelPath ...") - BaseTextModelLoader.newTextModelLoader(savedModelPath).autoClose { modelLoader => + Using.resource(BaseTextModelLoader.newTextModelLoader(savedModelPath)) { modelLoader => modelLoader.populateModel(pc, "/allParams") _isFitted = true } @@ -358,7 +360,7 @@ class DeepLearningPolarityClassifier() extends PolarityClassifier{ */ override def save(modelPath: String=savedModelPath, w2iPath:String = w2iPath, c2iPath:String = c2iPath): Unit = { logger.info("Saving model ...") - new CloseableModelSaver(modelPath).autoClose { modelSaver => + Using.resource(new CloseableModelSaver(modelPath)) { modelSaver => modelSaver.addModel(pc, "/allParams") } writeMap2Csv(w2i, w2iPath) diff --git a/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala b/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala index 03bcce1de..3c5d5b196 100644 --- a/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala +++ b/processors/src/main/scala/org/clulab/processors/bionlp/ner/ReachStandardKbSource.scala @@ -6,14 +6,13 @@ import org.clulab.processors.bionlp.ner.KBGenerator.logger import org.clulab.processors.bionlp.ner.KBGenerator.tokenizeResourceLine import org.clulab.processors.clu.tokenizer.Tokenizer import org.clulab.sequences.StandardKbSource -import org.clulab.utils.Closer.AutoCloser import org.clulab.utils.Files import org.clulab.utils.Serializer import java.io.File import java.util.function.Consumer -import scala.language.reflectiveCalls // required to access consumer.lineCount -import scala.util.Try +import scala.language.reflectiveCalls +import scala.util.{Try, Using} abstract class ReachStandardKbSource(caseInsensitiveMatching: Boolean) extends StandardKbSource(caseInsensitiveMatching) @@ -55,7 +54,7 @@ class ReachSingleStandardKbSource(kbEntry: KBEntry, caseInsensitiveMatching: Boo ) ) - bufferedReader.autoClose { bufferedReader => + Using.resource(bufferedReader) { bufferedReader => bufferedReader.lines.forEach(consumer) } logger.info(s"Done. Read ${consumer.lineCount} lines from ${new File(kbEntry.path).getName}") diff --git a/project/build.properties b/project/build.properties index 6db984250..563a014da 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.4.0 +sbt.version=1.7.2