Skip to content

Commit 8c137ef

Browse files
authored
fix(groupcomparison): Fix .countMissingPercentage for custom contrast matrices (#171)
1 parent 5e7372e commit 8c137ef

File tree

2 files changed

+142
-0
lines changed

2 files changed

+142
-0
lines changed

R/utils_groupcomparison.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ getSamplesInfo = function(summarization_output) {
406406
NumImputedFeature = sum(NumImputedFeature,
407407
na.rm = TRUE)),
408408
by = "GROUP"]
409+
counts <- counts[match(intersect(colnames(contrast_matrix), GROUP), GROUP), ]
409410

410411
empty_conditions = setdiff(samples_info$GROUP, unique(counts$GROUP))
411412
if (length(empty_conditions) !=0) {
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Test suite for .countMissingPercentage function
2+
## Test 1: Basic functionality with no missing values
3+
contrast_matrix <- matrix(c(1, -1, 0,
4+
0, 1, -1), nrow = 2, ncol = 3, byrow = TRUE)
5+
colnames(contrast_matrix) <- c("Group1", "Group2", "Group3")
6+
summarized <- data.table::data.table(
7+
GROUP = c("Group1", "Group1", "Group2", "Group2", "Group3", "Group3"),
8+
TotalGroupMeasurements = c(100, 100, 100, 100, 100, 100),
9+
NumMeasuredFeature = c(50, 50, 50, 50, 50, 50),
10+
NumImputedFeature = c(0, 0, 0, 0, 0, 0)
11+
)
12+
result <- data.table::data.table(
13+
logFC = c(6.154384, 6.154384),
14+
SE = c(0.2917031, 0.2917031),
15+
Tvalue = c(21.09811, 21.09811),
16+
DF = c(4, 4),
17+
pvalue = c(0.0000381, 0.0000381),
18+
Protein = c("IDHC", "IDHC"),
19+
Label = c("Group1 - Group2", "Group2 - Group3"),
20+
issue = c(NA, NA)
21+
)
22+
samples_info <- data.table::data.table(GROUP = c("Group1", "Group2", "Group3"), NumRuns = c(2, 2, 2))
23+
output <- MSstats:::.countMissingPercentage(
24+
contrast_matrix, summarized, result, samples_info, FALSE
25+
)
26+
expect_equal(length(output$MissingPercentage), 2, info = "Basic functionality: MissingPercentage length")
27+
expect_equal(output$MissingPercentage, c(0, 0), info = "Basic functionality: No missing values")
28+
expect_true(is.null(output$ImputationPercentage), info = "Basic functionality: No imputation when has_imputed = FALSE")
29+
expect_true(all(names(result) %in% names(output)),
30+
info = "Basic functionality: Preserve existing result columns")
31+
expect_true(setequal(names(output), c(names(result), "MissingPercentage")),
32+
info = "Basic functionality: No extraneous columns added")
33+
34+
## Test 2: With imputed values
35+
contrast_matrix <- matrix(c(1, -1), nrow = 1, ncol = 2)
36+
colnames(contrast_matrix) <- c("Group1", "Group2")
37+
summarized <- data.table::data.table(
38+
GROUP = c("Group1", "Group2"),
39+
TotalGroupMeasurements = c(100, 100),
40+
NumMeasuredFeature = c(80, 70),
41+
NumImputedFeature = c(10, 20)
42+
)
43+
result <- list()
44+
samples_info <- data.table::data.table(GROUP = c("Group1", "Group2"), NumRuns = c(10, 10))
45+
output <- MSstats:::.countMissingPercentage(contrast_matrix, summarized, result, samples_info, TRUE)
46+
expected_missing <- 1 - (80 + 70) / (100 + 100) # 0.25
47+
expected_imputed <- (10 + 20) / (100 + 100) # 0.15
48+
expect_equal(output$MissingPercentage[1], expected_missing, info = "Imputed values: Missing percentage calculation")
49+
expect_equal(output$ImputationPercentage[1], expected_imputed, info = "Imputed values: Imputation percentage calculation")
50+
51+
## Test 3: With empty conditions (groups not in summarized data)
52+
contrast_matrix <- matrix(c(1, -1, 0), nrow = 1, ncol = 3)
53+
colnames(contrast_matrix) <- c("Group1", "Group2", "Group3")
54+
summarized <- data.table::data.table(
55+
GROUP = c("Group1"),
56+
TotalGroupMeasurements = c(100),
57+
NumMeasuredFeature = c(80),
58+
NumImputedFeature = c(0)
59+
)
60+
61+
result <- list()
62+
samples_info <- data.table::data.table(GROUP = c("Group1", "Group2", "Group3"), NumRuns = c(10, 10, 10))
63+
output <- MSstats:::.countMissingPercentage(contrast_matrix, summarized, result, samples_info, FALSE)
64+
expect_equal(length(output$MissingPercentage), 1, info = "Empty conditions: MissingPercentage length")
65+
expect_true(is.numeric(output$MissingPercentage), info = "Empty conditions: Numeric output")
66+
67+
## Test 4: Multiple contrasts with different missing patterns
68+
contrast_matrix <- matrix(c(1, -1, 0,
69+
0, 1, -1,
70+
1, 0, -1), nrow = 3, ncol = 3, byrow = TRUE)
71+
colnames(contrast_matrix) <- c("Group3", "Group2", "Group1")
72+
summarized <- data.table::data.table(
73+
GROUP = c("Group1", "Group2", "Group3"),
74+
TotalGroupMeasurements = c(100, 100, 100),
75+
NumMeasuredFeature = c(90, 80, 70),
76+
NumImputedFeature = c(5, 10, 15)
77+
)
78+
result <- list()
79+
samples_info <- data.table::data.table(
80+
GROUP = c("Group3", "Group2", "Group1"),
81+
NumRuns = c(1, 1, 1)
82+
)
83+
output <- MSstats:::.countMissingPercentage(contrast_matrix, summarized, result, samples_info, TRUE)
84+
85+
expected_missing_1 <- 1 - (70 + 80) / (100 + 100) # 1 - 150/200 = 0.25
86+
expected_imputed_1 <- (15 + 10) / (100 + 100) # 25/200 = 0.125
87+
expected_missing_2 <- 1 - (80 + 90) / (100 + 100) # 1 - 170/200 = 0.15
88+
expected_imputed_2 <- (10 + 5) / (100 + 100) # 15/200 = 0.075
89+
expected_missing_3 <- 1 - (70 + 90) / (100 + 100) # 1 - 160/200 = 0.20
90+
expected_imputed_3 <- (15 + 5) / (100 + 100) # 20/200 = 0.10
91+
92+
expect_equal(length(output$MissingPercentage), 3, info = "Column ordering: MissingPercentage length")
93+
expect_equal(length(output$ImputationPercentage), 3, info = "Column ordering: ImputationPercentage length")
94+
expect_equal(output$MissingPercentage[1], expected_missing_1, info = "Column ordering: Contrast 1 missing percentage (Group3 vs Group2)")
95+
expect_equal(output$ImputationPercentage[1], expected_imputed_1, info = "Column ordering: Contrast 1 imputation percentage")
96+
expect_equal(output$MissingPercentage[2], expected_missing_2, info = "Column ordering: Contrast 2 missing percentage (Group2 vs Group1)")
97+
expect_equal(output$ImputationPercentage[2], expected_imputed_2, info = "Column ordering: Contrast 2 imputation percentage")
98+
expect_equal(output$MissingPercentage[3], expected_missing_3, info = "Column ordering: Contrast 3 missing percentage (Group3 vs Group1)")
99+
expect_equal(output$ImputationPercentage[3], expected_imputed_3, info = "Column ordering: Contrast 3 imputation percentage")
100+
101+
## Test 5: Edge case with all values missing in one group
102+
contrast_matrix <- matrix(c(1, -1), nrow = 1, ncol = 2)
103+
colnames(contrast_matrix) <- c("Group1", "Group2")
104+
summarized <- data.table::data.table(
105+
GROUP = c("Group1", "Group2"),
106+
TotalGroupMeasurements = c(0, 100),
107+
NumMeasuredFeature = c(0, 80),
108+
NumImputedFeature = c(0, 20)
109+
)
110+
result <- list()
111+
samples_info <- data.table::data.table(GROUP = c("Group1", "Group2"), NumRuns = c(10, 10))
112+
output <- MSstats:::.countMissingPercentage(contrast_matrix, summarized, result, samples_info, FALSE)
113+
expected_missing <- 1 - (0 + 80) / (0 + 100) # 0.2
114+
expect_equal(output$MissingPercentage[1], expected_missing, info = "Complete missing group: Missing percentage calculation")
115+
116+
## Test 6: Test with complex contrast matrix (multiple comparisons)
117+
contrast_matrix <- matrix(c(0.5, 0.5, -1, 1, -1, 0), nrow = 2, ncol = 3, byrow = TRUE)
118+
colnames(contrast_matrix) <- c("Group1", "Group2", "Group3")
119+
summarized <- data.table::data.table(
120+
GROUP = c("Group1", "Group2", "Group3"),
121+
TotalGroupMeasurements = c(200, 150, 100),
122+
NumMeasuredFeature = c(180, 120, 80),
123+
NumImputedFeature = c(10, 15, 5)
124+
)
125+
result <- list()
126+
samples_info <- data.table::data.table(GROUP = c("Group1", "Group2", "Group3"), NumRuns = c(20, 15, 10))
127+
output <- MSstats:::.countMissingPercentage(contrast_matrix, summarized, result, samples_info, TRUE)
128+
expected_missing_1 <- 1 - 380 / 450
129+
expected_imputed_1 <- 30 / 450
130+
expected_missing_2 <- 1 - 300 / 350 # 1 - 0.8571 = 0.1429 (approximately)
131+
expected_imputed_2 <- 25 / 350 # 0.0714 (approximately)
132+
expect_equal(length(output$MissingPercentage), 2, info = "Complex contrast: MissingPercentage length")
133+
expect_equal(length(output$ImputationPercentage), 2, info = "Complex contrast: ImputationPercentage length")
134+
expect_equal(output$MissingPercentage[1], expected_missing_1, tolerance = 1e-10,
135+
info = "Complex contrast: Contrast 1 missing percentage (0.5*Group1 + 0.5*Group2 - Group3)")
136+
expect_equal(output$ImputationPercentage[1], expected_imputed_1, tolerance = 1e-10,
137+
info = "Complex contrast: Contrast 1 imputation percentage")
138+
expect_equal(output$MissingPercentage[2], expected_missing_2, tolerance = 1e-10,
139+
info = "Complex contrast: Contrast 2 missing percentage (Group1 - Group2)")
140+
expect_equal(output$ImputationPercentage[2], expected_imputed_2, tolerance = 1e-10,
141+
info = "Complex contrast: Contrast 2 imputation percentage")

0 commit comments

Comments
 (0)