diff --git a/.gitignore b/.gitignore index 1e031f925..87a5d80ad 100644 --- a/.gitignore +++ b/.gitignore @@ -44,4 +44,5 @@ bin/ moocore.Rcheck/ +moocore_*.tar.gz testsuite/ diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root new file mode 120000 index 000000000..945c9b46d --- /dev/null +++ b/_codeql_detected_source_root @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/r/NAMESPACE b/r/NAMESPACE index 8aadfb1d7..8a331f2fa 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -23,6 +23,7 @@ export(is_nondominated) export(largest_eafdiff) export(normalise) export(pareto_rank) +export(r2_exact) export(rbind_datasets) export(read_datasets) export(total_whv_rect) diff --git a/r/R/r2.R b/r/R/r2.R new file mode 100644 index 000000000..ab169ae2a --- /dev/null +++ b/r/R/r2.R @@ -0,0 +1,101 @@ +#' Exact R2 indicator +#' +#' Computes the exact R2 indicator with respect to a given ideal/utopian +#' reference point assuming minimization of all objectives. +#' +#' @inherit epsilon params +#' +#' @param reference `numeric()`\cr Reference point as a vector of numerical +#' values. +#' +#' @return `numeric(1)` A single numerical value. +#' +#' @author Lennart \enc{Schäpermeier}{Schapermeier} +#' +#' @details +#' +#' The unary R2 indicator is a quality indicator for a set \eqn{A \subset +#' \mathbb{R}^m}{A in R^m} w.r.t. an ideal or utopian reference point +#' \eqn{\vec{r} \in \mathbb{R}^m}{r in R^m}. It was originally proposed by +#' \citet{HanJas1998} and is defined as the expected Tchebycheff utility under a +#' uniform distribution of weight vectors (w.l.o.g. assuming minimization): +#' +#' \deqn{R2(A) := \int_{w \in W} \min_{a \in A} \left\{ \max_{i=1,\dots,m} w_i +#' (a_i - r_i) \right\} \, dw,}{R2(A) := integral over w in W of min over a in +#' A of max over i of w_i (a_i - r_i) dw,} +#' +#' where \eqn{W} denotes the uniform distribution across weights: +#' +#' \deqn{W = \{w \in \mathbb{R}^m \mid w_i \geq 0, \sum_{i=1}^m w_i = 1\}.}{W +#' = \{w in R^m | w_i >= 0, sum_i w_i = 1\}.} +#' +#' The R2 indicator is to be minimized and has an optimal value of 0 when +#' \eqn{\vec{r} \in A}{r in A}. +#' +#' The exact R2 indicator is strongly Pareto-compliant, i.e., compatible with +#' Pareto-optimality: +#' +#' \deqn{\forall A, B \subset \mathbb{R}^m: A \prec B \Rightarrow R2(A) < +#' R2(B).}{for all A, B in R^m: A dominates B implies R2(A) < R2(B).} +#' +#' Given an ideal or utopian reference point, which is available in most +#' scenarios, all non-dominated solutions always contribute to the value of +#' the exact R2 indicator. However, it is scale-dependent and care should be +#' taken such that all objectives contribute approximately equally to the +#' indicator, e.g., by normalizing the Pareto front to the unit hypercube. +#' +#' The current implementation exclusively supports bi-objective solution sets +#' and runs in \eqn{O(n \log n)} following \citet{SchKer2025r2v2}. +#' +#' @references +#' +#' \insertAllCited{} +#' +#' @doctest +#' dat <- matrix(c(5, 5, 4, 6, 2, 7, 7, 4), ncol = 2, byrow = TRUE) +#' @expect equal(tolerance = 1e-9, 2.5941919191919194) +#' r2_exact(dat, reference = c(0, 0)) +#' +#' # This function assumes minimisation by default. We can easily specify maximisation: +#' @expect equal(tolerance = 1e-9, 2.5196969696969695) +#' r2_exact(dat, reference = c(10, 10), maximise = TRUE) +#' +#' # Merge all the sets of a dataset by removing the set number column: +#' extdata_path <- system.file(package="moocore","extdata") +#' dat <- read_datasets(file.path(extdata_path, "example1_dat"))[, 1:2] +#' @expect equal(65L) +#' nrow(dat) +#' +#' # Dominated points are ignored, so this: +#' @expect equal(tolerance = 1e-9, 3865393.493470812) +#' r2_exact(dat, reference = 0) +#' +#' # gives the same exact R2 value as this: +#' dat <- filter_dominated(dat) +#' @expect equal(9L) +#' nrow(dat) +#' @expect equal(tolerance = 1e-9, 3865393.493470812) +#' r2_exact(dat, reference = 0) +#' +#' @export +#' @concept metrics +r2_exact <- function(x, reference, maximise = FALSE) +{ + x <- as_double_matrix(x) + nobjs <- ncol(x) + if (!is.numeric(reference)) + stop("a numerical reference vector must be provided") + if (length(reference) == 1L) reference <- rep_len(reference, nobjs) + + if (any(maximise)) { + x <- transform_maximise(x, maximise) + if (all(maximise)) { + reference <- -reference + } else { + reference[maximise] <- -reference[maximise] + } + } + .Call(r2_exact_C, + t(x), + as.double(reference)) +} diff --git a/r/man/r2_exact.Rd b/r/man/r2_exact.Rd new file mode 100644 index 000000000..d1376c760 --- /dev/null +++ b/r/man/r2_exact.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/r2.R +\name{r2_exact} +\alias{r2_exact} +\title{Exact R2 indicator} +\usage{ +r2_exact(x, reference, maximise = FALSE) +} +\arguments{ +\item{x}{\code{matrix()}|\code{data.frame()}\cr Matrix or data frame of numerical +values, where each row gives the coordinates of a point.} + +\item{reference}{\code{numeric()}\cr Reference point as a vector of numerical +values.} + +\item{maximise}{\code{logical()}\cr Whether the objectives must be maximised +instead of minimised. Either a single logical value that applies to all +objectives or a vector of logical values, with one value per objective.} +} +\value{ +\code{numeric(1)} A single numerical value. +} +\description{ +Computes the exact R2 indicator with respect to a given ideal/utopian +reference point assuming minimization of all objectives. +} +\details{ +The unary R2 indicator is a quality indicator for a set \eqn{A \subset +\mathbb{R}^m}{A in R^m} w.r.t. an ideal or utopian reference point +\eqn{\vec{r} \in \mathbb{R}^m}{r in R^m}. It was originally proposed by +\citet{HanJas1998} and is defined as the expected Tchebycheff utility under a +uniform distribution of weight vectors (w.l.o.g. assuming minimization): + +\deqn{R2(A) := \int_{w \in W} \min_{a \in A} \left\{ \max_{i=1,\dots,m} w_i +(a_i - r_i) \right\} \, dw,}{R2(A) := integral over w in W of min over a in +A of max over i of w_i (a_i - r_i) dw,} + +where \eqn{W} denotes the uniform distribution across weights: + +\deqn{W = \{w \in \mathbb{R}^m \mid w_i \geq 0, \sum_{i=1}^m w_i = 1\}.}{W += \{w in R^m | w_i >= 0, sum_i w_i = 1\}.} + +The R2 indicator is to be minimized and has an optimal value of 0 when +\eqn{\vec{r} \in A}{r in A}. + +The exact R2 indicator is strongly Pareto-compliant, i.e., compatible with +Pareto-optimality: + +\deqn{\forall A, B \subset \mathbb{R}^m: A \prec B \Rightarrow R2(A) < +R2(B).}{for all A, B in R^m: A dominates B implies R2(A) < R2(B).} + +Given an ideal or utopian reference point, which is available in most +scenarios, all non-dominated solutions always contribute to the value of +the exact R2 indicator. However, it is scale-dependent and care should be +taken such that all objectives contribute approximately equally to the +indicator, e.g., by normalizing the Pareto front to the unit hypercube. + +The current implementation exclusively supports bi-objective solution sets +and runs in \eqn{O(n \log n)} following \citet{SchKer2025r2v2}. +} +\examples{ +dat <- matrix(c(5, 5, 4, 6, 2, 7, 7, 4), ncol = 2, byrow = TRUE) +r2_exact(dat, reference = c(0, 0)) + +# This function assumes minimisation by default. We can easily specify maximisation: +r2_exact(dat, reference = c(10, 10), maximise = TRUE) + +# Merge all the sets of a dataset by removing the set number column: +extdata_path <- system.file(package="moocore","extdata") +dat <- read_datasets(file.path(extdata_path, "example1_dat"))[, 1:2] +nrow(dat) + +# Dominated points are ignored, so this: +r2_exact(dat, reference = 0) + +# gives the same exact R2 value as this: +dat <- filter_dominated(dat) +nrow(dat) +r2_exact(dat, reference = 0) + +} +\references{ +\insertAllCited{} +} +\author{ +Lennart \enc{Schäpermeier}{Schapermeier} +} +\concept{metrics} diff --git a/r/src/Makevars b/r/src/Makevars index ffeb66c44..efdc89539 100644 --- a/r/src/Makevars +++ b/r/src/Makevars @@ -8,7 +8,7 @@ MOOCORE_DEBUG ?=0 PKG_CPPFLAGS=-DR_PACKAGE -DDEBUG=$(MOOCORE_DEBUG) -I./libmoocore/ PKG_CFLAGS+=$(C_VISIBILITY) -MOOCORE_SRC_FILES = hv3dplus.c hv4d.c hv_contrib.c hv.c hvapprox.c hvc3d.c pareto.c whv.c whv_hype.c avl.c eaf3d.c eaf.c io.c rng.c mt19937/mt19937.c +MOOCORE_SRC_FILES = hv3dplus.c hv4d.c hv_contrib.c hv.c hvapprox.c hvc3d.c pareto.c r2_exact.c whv.c whv_hype.c avl.c eaf3d.c eaf.c io.c rng.c mt19937/mt19937.c SOURCES = $(MOOCORE_SRC_FILES:%=libmoocore/%) init.c Rmoocore.c OBJECTS = $(SOURCES:.c=.o) diff --git a/r/src/Rmoocore.c b/r/src/Rmoocore.c index 2343f1f3c..47183af67 100644 --- a/r/src/Rmoocore.c +++ b/r/src/Rmoocore.c @@ -488,6 +488,19 @@ hv_approx_dz2019_hw_C(SEXP DATA, SEXP REFERENCE, SEXP MAXIMISE, SEXP NSAMPLES) return Rf_ScalarReal(hv); } +#include "r2_exact.h" + +SEXP +r2_exact_C(SEXP DATA, SEXP REFERENCE) +{ + /* We transpose the matrix before calling this function. */ + SEXP_2_DOUBLE_MATRIX(DATA, data, nobj, npoint); + SEXP_2_DOUBLE_VECTOR(REFERENCE, reference, reference_len); + assert(nobj == reference_len); + double r2 = r2_exact(data, npoint, nobj, reference); + return Rf_ScalarReal(r2); +} + #include "epsilon.h" #include "igd.h" #include "nondominated.h" diff --git a/r/src/init.h b/r/src/init.h index 9131634ef..61cc54d22 100644 --- a/r/src/init.h +++ b/r/src/init.h @@ -5,6 +5,7 @@ DECLARE_CALL(compute_eafdiff_polygon_C, SEXP DATA, SEXP CUMSIZES, SEXP INTERVALS DECLARE_CALL(compute_eafdiff_rectangles_C, SEXP DATA, SEXP CUMSIZES, SEXP INTERVALS) DECLARE_CALL(R_read_datasets, SEXP FILENAME) DECLARE_CALL(hypervolume_C, SEXP DATA, SEXP REFERENCE) +DECLARE_CALL(r2_exact_C, SEXP DATA, SEXP REFERENCE) DECLARE_CALL(hv_contributions_C, SEXP DATA, SEXP REFERENCE, SEXP IGNORE_DOMINATED) DECLARE_CALL(normalise_C, SEXP DATA, SEXP RANGE, SEXP LBOUND, SEXP UBOUND, SEXP MAXIMISE) DECLARE_CALL(is_nondominated_C, SEXP DATA, SEXP MAXIMISE, SEXP KEEP_WEAKLY) diff --git a/r/tests/testthat/test-doctest-r2_exact.R b/r/tests/testthat/test-doctest-r2_exact.R new file mode 100644 index 000000000..7e4797a4e --- /dev/null +++ b/r/tests/testthat/test-doctest-r2_exact.R @@ -0,0 +1,18 @@ +# Generated by doctest: do not edit by hand +# Please edit file in R/r2.R + +test_that("Doctest: r2_exact", { + # Created from @doctest for `r2_exact` + # Source file: R/r2.R + # Source line: 28 + dat <- matrix(c(5, 5, 4, 6, 2, 7, 7, 4), ncol = 2, byrow = TRUE) + expect_equal(r2_exact(dat, reference = c(0, 0)), 2.5941919191919194, tolerance = 1e-9) + expect_equal(r2_exact(dat, reference = c(10, 10), maximise = TRUE), 2.5196969696969695, tolerance = 1e-9) + extdata_path <- system.file(package = "moocore", "extdata") + dat <- read_datasets(file.path(extdata_path, "example1_dat"))[, 1:2] + expect_equal(nrow(dat), 65L) + expect_equal(r2_exact(dat, reference = 0), 3865393.493470812, tolerance = 1e-9) + dat <- filter_dominated(dat) + expect_equal(nrow(dat), 9L) + expect_equal(r2_exact(dat, reference = 0), 3865393.493470812, tolerance = 1e-9) +}) diff --git a/r/tests/testthat/test_r2.R b/r/tests/testthat/test_r2.R new file mode 100644 index 000000000..ee2cb8bd6 --- /dev/null +++ b/r/tests/testthat/test_r2.R @@ -0,0 +1,33 @@ +test_that("r2_exact", { + # perfect approximation of ideal ref yields 0.0 + expect_equal(r2_exact(matrix(c(0, 0), ncol = 2), reference = c(0, 0)), 0.0) + + # [1,1] in normalized objective space should yield r2 = 0.75 + expect_equal(r2_exact(matrix(c(2, 2), ncol = 2), reference = c(1, 1)), 0.75) + + # [[1,0],[0,1]] should yield r2 = 0.25 + expect_equal(r2_exact(matrix(c(0, 1, 1, 0), ncol = 2, byrow = TRUE), reference = c(0, 0)), 0.25) + + x <- matrix(c(0, 1, 0.2, 0.8, 0.4, 0.6, 0.6, 0.4, 0.8, 0.2, 1, 0), ncol = 2, byrow = TRUE) + expect_equal(r2_exact(x, reference = c(0, 0)), 0.1833333333333333, tolerance = 1e-10) + + # a closely sampled linear front should yield a value close to 1/6 + xvals <- seq(0, 1, length.out = 1000001) + pf <- cbind(xvals, 1 - xvals) + expect_equal(r2_exact(pf, reference = c(0, 0)), 1 / 6, tolerance = 1e-6) +}) + +test_that("r2_exact from doc", { + dat <- matrix(c(5, 5, 4, 6, 2, 7, 7, 4), ncol = 2, byrow = TRUE) + expect_equal(r2_exact(dat, reference = c(0, 0)), 2.5941919191919194, tolerance = 1e-9) + + expect_equal(r2_exact(dat, reference = c(10, 10), maximise = TRUE), 2.5196969696969695, tolerance = 1e-9) + + extdata_path <- system.file(package = "moocore", "extdata") + dat <- read_datasets(file.path(extdata_path, "example1_dat"))[, 1:2] + r2_val <- r2_exact(dat, reference = c(0, 0)) + dat_nondom <- filter_dominated(dat) + r2_val_nondom <- r2_exact(dat_nondom, reference = c(0, 0)) + # Dominated points are ignored, so both should give the same value + expect_equal(r2_val, r2_val_nondom) +})