Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package org.apache.ctakes.ytex.kernel.metric;

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
* Compute the Dice metric
*
*
* Described in:
* https://www.sciencedirect.com/science/article/pii/S1532046411000645 Tbl 3, eqn 11
*
* @author painter
*
*/
public class BraunBlanquetMetric extends BaseSimilarityMetric {

private static final Log log = LogFactory.getLog(LinMetric.class);
private boolean intrinsicIC = true;
private boolean validCG = false;
private String rootConcept = simSvc.getConceptGraph().getRoot();

public boolean isIntrinsicIC() {
return intrinsicIC;
}

public void setIntrinsicIC(boolean intrinsicIC) {
this.intrinsicIC = intrinsicIC;
}

@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
// don't bother if the concept graph is null
if (!validCG)
return 0d;

// Compute the IC values for each concept
double ic1 = simSvc.getIC(concept1, this.intrinsicIC);
double ic2 = simSvc.getIC(concept2, this.intrinsicIC);

// Get the LCS with the lowest IC score
double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo,
this.intrinsicIC);

// if the corpus IC is 0 and the concept is not the root, then we don't
// have any IC on the concept and can't measure similarity - return 0
if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1))
return 0d;

if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2))
return 0d;

// Compute the Braun-Blanquet score
// we just need one of these to be greater than zero
if ( ic1 > 0 || ic2 > 0 ) {

// max of the individual IC's
double denom = ic1;
if ( ic2 > ic1 ) denom = ic2;
double sim = (lcsIC) / ( denom );
return sim;
} else {
return 0d;
}
}

public BraunBlanquetMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) {
super(simSvc);
this.intrinsicIC = intrinsicIC;
this.validCG = simSvc.getConceptGraph() != null;
if (!this.intrinsicIC && validCG) {
rootConcept = simSvc.getConceptGraph().getRoot();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@
public interface ConceptSimilarityService {

public enum SimilarityMetricEnum {
LCH(false, false), INTRINSIC_LCH(true, false), LIN(false, true), INTRINSIC_LIN(
true, false), PATH(false, false), INTRINSIC_PATH(true, false), JACCARD(
true, false), SOKAL(true, false), RADA(false, false), INTRINSIC_RADA(
true, false), WUPALMER(false, false), PAGERANK(false, false);
LCH(false, false), INTRINSIC_LCH(true, false),
LIN(false, true), INTRINSIC_LIN(true, false), PATH(false, false),
INTRINSIC_PATH(true, false), JACCARD(true, false), SOKAL(true, false),
RADA(false, false), INTRINSIC_RADA(true, false), WUPALMER(false, false),
PAGERANK(false, false), RESNIK(false, false), INTRINSIC_RESNIK(true, false),
FAITH(false, false), INTRINSIC_FAITH(true, false), DICE(false, true),
SIMPSON(false, true), BRAUN_BLANQUET(false, true), OCHIAI(false, true);

boolean intrinsicIC = false;
boolean corpusIC = false;

Expand Down Expand Up @@ -201,4 +205,4 @@ public List<ConceptPairSimilarity> similarity(
Map<String, Double> conceptFilter, boolean lcs);

public abstract int getDepth(String concept);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,24 @@ private void initSimilarityMetricMap() {
new JaccardMetric(this));
this.similarityMetricMap.put(SimilarityMetricEnum.WUPALMER,
new WuPalmerMetric(this));
this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_RESNIK,
new ResnikMetric(this, true));
this.similarityMetricMap.put(SimilarityMetricEnum.RESNIK,
new ResnikMetric(this, false));
this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_FAITH,
new FaithMetric(this, true));
this.similarityMetricMap.put(SimilarityMetricEnum.FAITH,
new FaithMetric(this, false));
this.similarityMetricMap.put(SimilarityMetricEnum.DICE,
new DiceMetric(this, true));
this.similarityMetricMap.put(SimilarityMetricEnum.SIMPSON,
new SimpsonMetric(this, true));
this.similarityMetricMap.put(SimilarityMetricEnum.BRAUN_BLANQUET,
new BraunBlanquetMetric(this, true));
this.similarityMetricMap.put(SimilarityMetricEnum.OCHIAI,
new OchiaiMetric(this, true));


} else {
this.similarityMetricMap.put(SimilarityMetricEnum.PAGERANK,
new PageRankMetric(this, this.getPageRankService()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package org.apache.ctakes.ytex.kernel.metric;

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
* Compute the Dice metric
*
*
* Described in:
* https://www.sciencedirect.com/science/article/pii/S1532046411000645 Tbl 3, eqn 8
*
* @author painter
*
*/
public class DiceMetric extends BaseSimilarityMetric {

private static final Log log = LogFactory.getLog(LinMetric.class);
private boolean intrinsicIC = true;
private boolean validCG = false;
private String rootConcept = simSvc.getConceptGraph().getRoot();

public boolean isIntrinsicIC() {
return intrinsicIC;
}

public void setIntrinsicIC(boolean intrinsicIC) {
this.intrinsicIC = intrinsicIC;
}

@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
// don't bother if the concept graph is null
if (!validCG)
return 0d;

// Compute the IC values for each concept
double ic1 = simSvc.getIC(concept1, this.intrinsicIC);
double ic2 = simSvc.getIC(concept2, this.intrinsicIC);

// Get the LCS with the lowest IC score
double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo,
this.intrinsicIC);

// if the corpus IC is 0 and the concept is not the root, then we don't
// have any IC on the concept and can't measure similarity - return 0
if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1))
return 0d;

if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2))
return 0d;

// Compute the Dice score
if ( ic1 > 0 || ic2 > 0 ) {
double sim = (2.0 * lcsIC) / ( ic1 + ic2 );
return sim;
} else {
return 0d;
}
}

public DiceMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) {
super(simSvc);
this.intrinsicIC = intrinsicIC;
this.validCG = simSvc.getConceptGraph() != null;
if (!this.intrinsicIC && validCG) {
rootConcept = simSvc.getConceptGraph().getRoot();
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package org.apache.ctakes.ytex.kernel.metric;

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
* compute Faith score to provide functionality as found in UMLS::Similarity
*
* UMLS::Similarity::faith.pm
* Module implementing the semantic relatedness measure described
* by Pirro and Euzenat (2010)
*
* Described in:
* https://inria.hal.science/hal-00793283/file/pirro2010b.pdf
*
* @author painter
*
*/
public class FaithMetric extends BaseSimilarityMetric {

private static final Log log = LogFactory.getLog(LinMetric.class);
private boolean intrinsicIC = true;
private boolean validCG = false;
private String rootConcept = simSvc.getConceptGraph().getRoot();

public boolean isIntrinsicIC() {
return intrinsicIC;
}

public void setIntrinsicIC(boolean intrinsicIC) {
this.intrinsicIC = intrinsicIC;
}

@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
// don't bother if the concept graph is null
if (!validCG)
return 0d;

// Compute the IC values for each concept
double ic1 = simSvc.getIC(concept1, this.intrinsicIC);
double ic2 = simSvc.getIC(concept2, this.intrinsicIC);

// Get the LCS with the lowest IC score
double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo,
this.intrinsicIC);

// if the corpus IC is 0 and the concept is not the root, then we don't
// have any IC on the concept and can't measure similarity - return 0
if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1))
return 0d;

if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2))
return 0d;

// Compute the faith score
if ( ic1 > 0 && ic2 > 0 ) {
double sim = (lcsIC) / ( ic1 + ic2 - lcsIC );
return sim;
} else {
return 0d;
}
}

public FaithMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) {
super(simSvc);
this.intrinsicIC = intrinsicIC;
this.validCG = simSvc.getConceptGraph() != null;
if (!this.intrinsicIC && validCG) {
rootConcept = simSvc.getConceptGraph().getRoot();
}
}

}
Loading