Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import org.clulab.serialization.json.JSONSerialization
import org.json4s.jackson.JsonMethods._
import org.json4s.JsonDSL._
import org.json4s._
import scala.util.hashing.MurmurHash3._
import org.clulab.utils.Hash
import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.io.FileUtils.forceMkdir
import ai.lum.common.FileUtils._
Expand Down Expand Up @@ -40,20 +40,17 @@ case class EventPair(
def isCrossSentence = sentenceIndices.length > 1

/** Create a unique hash to identify this training instance */
def equivalenceHash: Int = {
// the seed (not counted in the length of finalizeHash)
val h0 = stringHash("org.clulab.assembly.TrainingInstance")
def equivalenceHash: Int = Hash.withLast(
Hash("org.clulab.assembly.TrainingInstance"),
// get hashes for each event
val h1 = mix(h0, e1.equivalenceHash)
val h2 = mix(h1, e2.equivalenceHash)
// is it cross-sentence?
val h3 = mix(h2, isCrossSentence.hashCode)
e1.equivalenceHash,
e2.equivalenceHash,
isCrossSentence.hashCode,
// the text of the sentences containing the two event mentions
val h4 = mix(h3, text.hashCode)
text.hashCode,
// what paper did this come from?
val h5 = mixLast(h4, pmid.hashCode)
finalizeHash(h5, 5)
}
pmid.hashCode
)

def copy(
before: CorefMention = this.e1,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
package org.clulab.reach.assembly.relations.corpus

import org.clulab.reach.mentions._
import org.clulab.reach.assembly.AssemblyManager
import org.clulab.reach.assembly.sieves.Constraints
import org.clulab.odin._
import ai.lum.common.ConfigUtils._
import ai.lum.common.RandomUtils._
import ai.lum.common.FileUtils._

import collection.JavaConversions._
import com.typesafe.config.ConfigFactory
import com.typesafe.scalalogging.LazyLogging

import java.io.File
import org.clulab.odin._
import org.clulab.reach.assembly.AssemblyManager
import org.clulab.reach.assembly.sieves.Constraints
import org.clulab.reach.mentions._
import org.clulab.reach.mentions.serialization.json.{JSONSerializer => ReachJSONSerializer}
import org.clulab.utils.ThreadUtils

import java.io.File
import scala.jdk.CollectionConverters._

/**
* RELATION CORPUS REQUIREMENTS:
* find events occurring in the same or neighboring sentences
Expand All @@ -31,7 +28,7 @@ object CorpusBuilder extends LazyLogging {

val config = ConfigFactory.load()
val kWindow = config.getInt("assembly.windowSize")
val validLabels: Set[String] = config.getStringList("assembly.corpus.validLabels").toSet
val validLabels: Set[String] = config.getStringList("assembly.corpus.validLabels").asScala.toSet

/**
* Find mentions in sentences of interest. <br>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package org.clulab.reach.assembly.representations
import org.clulab.odin.Mention
import org.clulab.reach.assembly.AssemblyManager
import org.clulab.reach.assembly._
import scala.util.hashing.MurmurHash3._
import org.clulab.utils.Hash


/**
Expand Down Expand Up @@ -58,27 +58,24 @@ class Complex(
* @return an Int hash based on the [[Entity.equivalenceHash]] of each member
*/
def membersHash(ignoreMods: Boolean): Int = {
val h0 = stringHash(s"$eerString.members")
val hs = members.map(_.equivalenceHash(ignoreMods))
val h = mixLast(h0, unorderedHash(hs))
finalizeHash(h, members.size)

Hash.withLast(members.size)(
Hash(s"$eerString.members"),
Hash.unordered(hs)
)
}

/**
* Used by [[isEquivalentTo]] to compare against another [[Complex]].
* @param ignoreMods whether or not to ignore modifications when calculating the equivalenceHash
* @return a hash (Int) based primarily on the [[membersHash]]
*/
def equivalenceHash(ignoreMods: Boolean): Int = {
// the seed (not counted in the length of finalizeHash)
// decided to use the class name
val h0 = stringHash(eerString)
// comprised of the equiv. hash of members
val h1 = mix(h0, membersHash(ignoreMods))
// whether or not the representation is negated
val h2 = mixLast(h1, negated.hashCode)
finalizeHash(h2, 2)
}
def equivalenceHash(ignoreMods: Boolean): Int = Hash.withLast(
Hash(eerString),
membersHash(ignoreMods),
negated.hashCode
)

/**
* Used to compare against another [[Complex]]. <br>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package org.clulab.reach.assembly.representations

import org.clulab.reach.assembly._
import org.clulab.reach.assembly.AssemblyManager
import org.clulab.utils.Hash
import org.clulab.odin.Mention
import scala.util.hashing.MurmurHash3._


/**
Expand Down Expand Up @@ -54,10 +54,12 @@ trait ComplexEvent extends Event {
* @return an Int hash based on the [[EntityEventRepresentation.equivalenceHash]] of each element in the [[controller]]
*/
def controllerHash(ignoreMods: Boolean): Int = {
val h0 = stringHash(s"$eerString.controller")
val hs = controller.map(_.equivalenceHash(ignoreMods))
val h = mixLast(h0, unorderedHash(hs))
finalizeHash(h, controller.size)

Hash.withLast(controller.size)(
Hash(s"$eerString.controller"),
Hash.unordered(hs)
)
}

/**
Expand All @@ -67,31 +69,26 @@ trait ComplexEvent extends Event {
* @return an Int hash based on the [[EntityEventRepresentation.equivalenceHash]] of each element in the [[controlled]]
*/
def controlledHash(ignoreMods: Boolean): Int = {
val h0 = stringHash(s"$eerString.controlled")
val hs = controlled.map(_.equivalenceHash(ignoreMods))
val h = mixLast(h0, unorderedHash(hs))
finalizeHash(h, controlled.size)

Hash.withLast(controlled.size)(
Hash(s"$eerString.controlled"),
Hash.unordered(hs)
)
}

/**
* Used by [[isEquivalentTo]] to compare against another [[ComplexEvent]].
* @param ignoreMods whether or not to ignore modifications when calculating the controlledHash
* @return an Int hash based on the [[polarity]], [[controllerHash]], [[controlledHash]], and [[negated.hashCode]]
*/
def equivalenceHash(ignoreMods: Boolean): Int = {
// the seed (not counted in the length of finalizeHash)
// decided to use the class name
val h0 = stringHash(eerString)
// the polarity of the Regulation
val h1 = mix(h0, stringHash(polarity))
// controller
val h2 = mix(h1, controllerHash(ignoreMods))
// controlled
val h3 = mix(h2, controlledHash(ignoreMods))
// whether or not the representation is negated
val h4 = mixLast(h3, negated.hashCode)
finalizeHash(h4, 4)
}
def equivalenceHash(ignoreMods: Boolean): Int = Hash.withLast(
Hash(eerString),
Hash(polarity),
controllerHash(ignoreMods),
controlledHash(ignoreMods),
negated.hashCode
)

/**
* Used to compare against another [[ComplexEvent]]. <br>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package org.clulab.reach.assembly.representations
import org.clulab.reach.assembly.AssemblyManager
import org.clulab.reach.assembly._
import org.clulab.odin.Mention
import scala.util.hashing.MurmurHash3._
import org.clulab.utils.Hash


/**
Expand Down Expand Up @@ -71,24 +71,12 @@ class SimpleEntity(
* @return a hash (Int) based primarily on the [[grounding]] and [[modsHash]]
*/
def equivalenceHash(ignoreMods: Boolean): Int = {
// the seed (not counted in the length of finalizeHash)
// decided to use the class name
val h0 = stringHash(eerString)
// a representation of the ID
val h1 = mix(h0, grounding.hashCode)
ignoreMods match {
// include the modifications
case false =>
// a representation of the set of modifications
val h2 = mix(h1, modsHash)
// whether or not the representation is negated
val h3 = mixLast(h2, negated.hashCode)
finalizeHash(h3, 3)
// ignore the mods
case true =>
val h2 = mixLast(h1, negated.hashCode)
finalizeHash(h2, 2)
}
val h0 = Hash(eerString)
val h1 = grounding.hashCode // a representation of the ID
val h3 = negated.hashCode

if (ignoreMods) Hash.withLast(h0, h1, h3)
else Hash.withLast(h0, h1, modsHash, h3)
}

/**
Expand All @@ -98,10 +86,12 @@ class SimpleEntity(
* @return an Int hash based on the hashcodes of the modifications
*/
def modsHash: Int = {
val h0 = stringHash(s"$eerString.modifications")
val hs = modifications.map(_.hashCode)
val h = mixLast(h0, unorderedHash(hs))
finalizeHash(h, modifications.size)

Hash.withLast(modifications.size)(
Hash(s"$eerString.modifications"),
Hash.unordered(hs)
)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package org.clulab.reach.assembly.representations
import org.clulab.reach.assembly.AssemblyManager
import org.clulab.reach.assembly._
import org.clulab.odin.Mention
import org.clulab.utils.Hash
import scala.collection.Map
import scala.util.hashing.MurmurHash3._


/**
* Representation for any Mention with the label SimpleEvent. Note that a Binding is represented using a [[Complex]].
Expand Down Expand Up @@ -65,10 +64,12 @@ class SimpleEvent(
* @return an Int hash based on hashes of the keys in the [[input]] and the [[Entity.equivalenceHash]] of each element contained in the corresponding value in the [[input]]
*/
def inputHash(ignoreMods: Boolean): Int = {
val h0 = stringHash(s"$eerString.input")
val hs = output.map(_.equivalenceHash(ignoreMods))
val h = mixLast(h0, unorderedHash(hs))
finalizeHash(h, input.size)

Hash.withLast(input.size)(
Hash(s"$eerString.input"),
Hash.unordered(hs)
)
}

/**
Expand All @@ -78,31 +79,26 @@ class SimpleEvent(
* @return an Int hash based on the [[Entity.equivalenceHash]] of each element in the [[output]]
*/
def outputHash(ignoreMods: Boolean): Int = {
val h0 = stringHash(s"$eerString.output")
val hs = output.map(_.equivalenceHash(ignoreMods))
val h = mixLast(h0, unorderedHash(hs))
finalizeHash(h, output.size)

Hash.withLast(output.size)(
Hash(s"$eerString.output"),
Hash.unordered(hs)
)
}

/**
* Used by [[isEquivalentTo]] to compare against another [[SimpleEvent]].
* @param ignoreMods whether or not to ignore modifications when calculating the equivalenceHash
* @return an Int hash based primarily on the [[label]], [[inputHash]], and [[outputHash]]
*/
def equivalenceHash(ignoreMods: Boolean): Int = {
// the seed (not counted in the length of finalizeHash)
// decided to use the class name
val h0 = stringHash(eerString)
// the label of the SimpleEvent
val h1 = mix(h0, label.hashCode)
// the input of the SimpleEvent
val h2 = mix(h1, inputHash(ignoreMods))
// the output of the SimpleEvent
val h3 = mix(h2, outputHash(ignoreMods))
// whether or not the representation is negated
val h4 = mixLast(h3, negated.hashCode)
finalizeHash(h4, 4)
}
def equivalenceHash(ignoreMods: Boolean): Int = Hash.withLast(
Hash(eerString),
label.hashCode,
inputHash(ignoreMods),
outputHash(ignoreMods),
negated.hashCode
)

/**
* Used to compare against another [[SimpleEvent]]. <br>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class TestAssemblyManager extends FlatSpec with Matchers {
val se = am.getSimpleEntity(ras)

se.getPTMs should have size(0)
se.equivalenceHash(ignoreMods = false) should be (-1879068248) // SimpleEntity
se.equivalenceHash(ignoreMods = true) should be (-11559465) // SimpleEntity
}

it should "have 3 mentions as evidence" in {
Expand Down Expand Up @@ -87,6 +89,8 @@ class TestAssemblyManager extends FlatSpec with Matchers {
val phos = am.getSimpleEvent(p)

phos.evidence should have size(1)
phos.equivalenceHash(ignoreMods = true) should be (-90854158) // SimpleEvent
phos.equivalenceHash(ignoreMods = false) should be (-24500179) // SimpleEvent

phos.output should have size(1)

Expand Down Expand Up @@ -157,6 +161,8 @@ class TestAssemblyManager extends FlatSpec with Matchers {
val (c1, c2) = (complexes.head, complexes.last)
c1.isEquivalentTo(c2, ignoreMods = false) should be(true)
c1.isEquivalentTo(c2, ignoreMods = true) should be(true)
c1.equivalenceHash(ignoreMods = false) should be (-1357391490) // Complex
c1.equivalenceHash(ignoreMods = true) should be (686730731) // Complex
}

it should "have 2 mentions as evidence" in {
Expand Down Expand Up @@ -226,6 +232,8 @@ class TestAssemblyManager extends FlatSpec with Matchers {

am.getRegulations should have size (1)
am.distinctRegulations should have size (1)
am.getRegulations.head.equivalenceHash(ignoreMods = false) should be (832895248) // ComplexEvent
am.getRegulations.head.equivalenceHash(ignoreMods = true) should be (738272956) // ComplexEvent
}

val regText2 = "Akt inhibits the phosphorylation of AFT by BEF."
Expand Down
30 changes: 30 additions & 0 deletions assembly/src/test/scala/org/clulab/reach/assembly/TestHash.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.clulab.reach.assembly

import org.clulab.reach.PaperReader
import org.clulab.reach.assembly.relations.corpus.EventPair
import org.clulab.reach.mentions.CorefMention
import org.clulab.reach.utils.MentionManager
import org.scalatest.{FlatSpec, Matchers}

class TestHash extends FlatSpec with Matchers {
val mentionManager = new MentionManager()
val testReach = PaperReader.reachSystem
val text = "Tbet Rag2 mice (Garrett et al., 2010) as well as Bacteroides spp. (Bloom et al., 2011), Helicobacter spp. (Fox et al., 2011), and Bilophila wadsworthia (Devkota et al., 2012) in Il10 have been shown to enhance intestinal inflammation.The acute dextran sulfate sodium"
val allMentions = testReach.extractFrom(text, "serialization-test", "1", None)
val sortedMentions = allMentions.sortBy { mention => (mention.startOffset, mention.endOffset) }

val assemblyManager = AssemblyManager()

behavior of "Hash"

it should "compute the expected value for an EventPair" in {
val expectedHash = 316669350
val corefMentions = sortedMentions.collect { case corefMention: CorefMention => corefMention }
val e1 = corefMentions.head
val e2 = corefMentions.last
val eventPair = new EventPair(e1, e2, "relation", 42d, "annotatorID", None)
val actualHash = eventPair.equivalenceHash

actualHash should be (expectedHash)
}
}
Loading