From 19689aa45e81e26830c7a94fe13fb773410695dc Mon Sep 17 00:00:00 2001 From: bczernecki Date: Mon, 2 Feb 2026 23:40:47 +0100 Subject: [PATCH 1/9] fix: meteo_imgw_daily --- DESCRIPTION | 2 +- NEWS.md | 5 ++++ R/clean_metadata_meteo.R | 63 ++++++++++++++++++++++----------------- R/meteo_imgw_daily.R | 64 +++++++++++++++++++++------------------- R/meteo_metadata_imgw.R | 1 + 5 files changed, 76 insertions(+), 59 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9491e4f..e0574ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: climate Title: Interface to Download Meteorological (and Hydrological) Datasets -Version: 1.2.8 +Version: 1.2.9.9001 Authors@R: c(person(given = "Bartosz", family = "Czernecki", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index aef53f6..8f061e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# climate 1.2.9 + +* fixes for corrupted header files in `meteo_imgw_` family of functions due to changes in the IMGW-PIB repository + + # climate 1.2.8 * speeding up selective download for given station names in `meteo_imgw_*` and `hydro_imgw_daily()` functions that simultaneously reduce use of memory diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R index 7a5aac9..9e4e464 100644 --- a/R/clean_metadata_meteo.R +++ b/R/clean_metadata_meteo.R @@ -16,34 +16,43 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") { test_url(link = address, output = temp) a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")$V1 - a = gsub(a, pattern = "\\?", replacement = "") - a = stringi::stri_trans_general(a, 'LATIN-ASCII') + + inds = grepl("^[A-Z]{2}.{5}", a) + + code = trimws(substr(a, 1, 7))[inds] + name = trimws(substr(a, 8, nchar(a)))[inds] + a = data.frame(parameters = code, V1 = name) + a$V1 = stringi::stri_trans_general(a$V1, 'LATIN-ASCII') + return(a) + + #a = gsub(a, pattern = "\\?", replacement = "") + # additional workarounds for mac os but not only... - a = gsub(x = a, pattern = "'", replacement = "") - a = gsub(x = a, pattern = "\\^0", replacement = "") - a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE) - length_char = max(nchar(a$V1), na.rm = TRUE) - - if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly - if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily - if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly - if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly - - field = substr(a$V1, length_char - 3, length_char) - - if (rank == "synop" && interval == "monthly") { - length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2 - field = substr(a$V1, length_char - 3, length_char + 2) - } - - a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1])))) - a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2])))) - - a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3)) - a$V1 = gsub(x = a$V1, pattern = "* ", "") - - a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status - colnames(a)[1] = "parameters" + # a = gsub(x = a, pattern = "'", replacement = "") + # a = gsub(x = a, pattern = "\\^0", replacement = "") + # a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE) + # length_char = max(nchar(a$V1), na.rm = TRUE) + # + # if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly + # if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily + # if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly + # if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly + # + # field = substr(a$V1, length_char - 3, length_char) + # + # if (rank == "synop" && interval == "monthly") { + # length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2 + # field = substr(a$V1, length_char - 3, length_char + 2) + # } + # + # a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1])))) + # a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2])))) + # + # a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3)) + # a$V1 = gsub(x = a$V1, pattern = "* ", "") + # + # a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status + # colnames(a)[1] = "parameters" return(a) } diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index 3a15b46..5f3ff8e 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -148,42 +148,41 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - data1$`Nazwa stacji` = trimws(data1$`Nazwa stacji`) + data1$POST = trimws(data1$POST) file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters - data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`) + data2$POST = trimws(data2$POST) } else { data2 = head(data1, 0)[, 1:min(5, ncol(data1))] - data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`) + data2$POST = trimws(data2$POST) } unlink(c(temp, temp2)) # remove statuses if not needed: if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL - data2[grep("^Status", colnames(data2))] = NULL + data1[grep("^W", colnames(data1))] = NULL + data2[grep("^W", colnames(data2))] = NULL } ttt = merge(data1, data2, - by = c("Kod stacji", "Rok", "Miesiac", "Dzien"), + by = c("NSP", "ROK", "MC", "DZ"), all.x = TRUE ) - - ttt = ttt[order(ttt$`Nazwa stacji.x`, ttt$Rok, ttt$Miesiac, ttt$Dzien), ] + + ttt = ttt[order(ttt$POST.x, ttt$ROK, ttt$MC, ttt$DZ), ] if (!is.null(station)) { - all_data[[length(all_data) + 1]] = ttt[ttt$`Nazwa stacji.x` %in% station, ] + all_data[[length(all_data) + 1]] = ttt[ttt$POST.x %in% station, ] } else { all_data[[length(all_data) + 1]] = ttt } } # end of looping for zip archives } # end of if statement for SYNOP stations - ###################### ###### KLIMAT: ####### if (rank == "climate") { @@ -220,7 +219,7 @@ meteo_imgw_daily_bp = function(rank, if (!is.null(csv_data)) { csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters - csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`) + csv_data$POST = trimws(csv_data$POST) } return(csv_data) } @@ -230,7 +229,7 @@ meteo_imgw_daily_bp = function(rank, data1 = d colnames(data1) = meta[[1]]$parameters if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + data1[grep("^W", colnames(data1))] = NULL } } @@ -249,9 +248,9 @@ meteo_imgw_daily_bp = function(rank, # remove statuses if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + data1[grep("^W", colnames(data1))] = NULL if (file.exists(file2)) { - data2[grep("^Status", colnames(data2))] = NULL + data2[grep("^W", colnames(data2))] = NULL } } @@ -259,7 +258,7 @@ meteo_imgw_daily_bp = function(rank, if (file.exists(file2)) { all_data[[length(all_data) + 1]] = merge(data1, data2, - by = c("Kod stacji", "Rok", "Miesiac", "Dzien"), + by = c("NSP", "ROK", "MC", "DZ"), all.x = TRUE ) } else { @@ -304,7 +303,7 @@ meteo_imgw_daily_bp = function(rank, csv_data = read.table(data, header = FALSE, stringsAsFactors = FALSE, sep = ",", encoding = "CP1250") csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters - csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`) + csv_data$POST = trimws(csv_data$POST) return(csv_data) } ) @@ -313,7 +312,7 @@ meteo_imgw_daily_bp = function(rank, data1 = d colnames(data1) = meta[[1]]$parameters if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + data1[grep("^W", colnames(data1))] = NULL } } @@ -324,7 +323,7 @@ meteo_imgw_daily_bp = function(rank, colnames(data1) = meta[[1]]$parameters # remove status if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + data1[grep("^W", colnames(data1))] = NULL } } # end of corrupted zips unlink(c(temp, temp2)) @@ -336,12 +335,12 @@ meteo_imgw_daily_bp = function(rank, all_data = as.data.frame(data.table::rbindlist(all_data, fill = TRUE)) # fix order of columns if needed and entries in stations' names if more than 1 available: - col_inds = grep(pattern = "Nazwa stacji", colnames(all_data), value = TRUE) + col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE) if (length(col_inds) > 1) { - all_data$`Nazwa stacji` = apply(all_data[, col_inds], 1, function(x) na.omit(unique(x))[1]) - all_data$`Nazwa stacji.x` = NULL - all_data$`Nazwa stacji.y` = NULL - if (colnames(all_data)[ncol(all_data)] == "Nazwa stacji") { # re-order columns if needed + all_data$POST = apply(all_data[, col_inds], 1, function(x) na.omit(unique(x))[1]) + all_data$POST.x = NULL + all_data$POST.y = NULL + if (colnames(all_data)[ncol(all_data)] == "POST") { # re-order columns if needed all_data = all_data[, c(1, ncol(all_data), 2:(ncol(all_data) - 1))] } } @@ -350,7 +349,7 @@ meteo_imgw_daily_bp = function(rank, all_data = merge(climate::imgw_meteo_stations[, 1:3], all_data, by.x = "id", - by.y = "Kod stacji", + by.y = "NSP", all.y = TRUE ) } @@ -361,14 +360,15 @@ meteo_imgw_daily_bp = function(rank, climate = "KLIMATYCZNA", precip = "OPADOWA" ) + all_data = cbind(data.frame(rank_code = rank_code), all_data) - all_data = all_data[all_data$Rok %in% year, ] # clip only to selected years + all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`)))))) + inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { env$logs = c( env$logs, @@ -379,17 +379,19 @@ meteo_imgw_daily_bp = function(rank, } } } - all_data$`Nazwa stacji` = trimws(all_data$`Nazwa stacji`) + all_data$POST = trimws(all_data$POST) # sort output - if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) { - all_data = all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac, all_data$Dzien), ] + if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { + all_data = all_data[order(all_data$NSP, all_data$ROK, all_data$MC, all_data$DZ), ] } else { - all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien), ] + all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ), ] } # remove duplicates and shorten colnames - all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) + # TODO: + # turned off temporarily: + # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) rownames(all_data) = NULL # check if there any messages gathered in env$logs and if it is not empty then print them: diff --git a/R/meteo_metadata_imgw.R b/R/meteo_metadata_imgw.R index 95e0291..53c5879 100644 --- a/R/meteo_metadata_imgw.R +++ b/R/meteo_metadata_imgw.R @@ -18,6 +18,7 @@ meteo_metadata_imgw = function(interval, rank) { # interval can be: monthly, hou if (interval == "daily") { # warning! daily for climates and synop have 2 files with metadata!!! if (rank == "synop") { + b[[1]] = clean_metadata_meteo(address = paste0(base_url,"dane_meteorologiczne/dobowe/synop/s_d_format.txt"), rank = "synop", interval = "daily") b[[2]] = clean_metadata_meteo(address = paste0(base_url, "dane_meteorologiczne/dobowe/synop/s_d_t_format.txt"), From c980aec03c412bbe8519c797278bc9e2ed56890b Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 01:46:01 +0100 Subject: [PATCH 2/9] fix: imgw changes --- R/clean_metadata_meteo.R | 6 ++--- R/meteo_imgw_daily.R | 51 +++++++++++++++++++++++----------------- R/meteo_imgw_hourly.R | 26 ++++++++++---------- R/meteo_imgw_monthly.R | 20 ++++++++-------- 4 files changed, 56 insertions(+), 47 deletions(-) diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R index 9e4e464..630b48b 100644 --- a/R/clean_metadata_meteo.R +++ b/R/clean_metadata_meteo.R @@ -16,13 +16,13 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") { test_url(link = address, output = temp) a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")$V1 - + inds = grepl("^[A-Z]{2}.{5}", a) code = trimws(substr(a, 1, 7))[inds] name = trimws(substr(a, 8, nchar(a)))[inds] - a = data.frame(parameters = code, V1 = name) - a$V1 = stringi::stri_trans_general(a$V1, 'LATIN-ASCII') + a = data.frame(parameters = code, label = name) + a$label = stringi::stri_trans_general(a$label, 'LATIN-ASCII') return(a) #a = gsub(a, pattern = "\\?", replacement = "") diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index 5f3ff8e..eeab4f2 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -148,12 +148,19 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } data1$POST = trimws(data1$POST) file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters + for (labs in seq_along(meta[[2]]$parameters)) { + attr(data2[[labs]], "label") = meta[[2]]$label[[labs]] + } + data2$POST = trimws(data2$POST) } else { data2 = head(data1, 0)[, 1:min(5, ncol(data1))] @@ -162,19 +169,17 @@ meteo_imgw_daily_bp = function(rank, unlink(c(temp, temp2)) - # remove statuses if not needed: - if (status == FALSE) { - data1[grep("^W", colnames(data1))] = NULL - data2[grep("^W", colnames(data2))] = NULL - } - - ttt = merge(data1, + data.table::setDT(data1) + data.table::setDT(data2) + + ttt = merge( + data1, data2, by = c("NSP", "ROK", "MC", "DZ"), all.x = TRUE ) - ttt = ttt[order(ttt$POST.x, ttt$ROK, ttt$MC, ttt$DZ), ] + data.table::setorder(ttt, POST.x, ROK, MC, DZ) if (!is.null(station)) { all_data[[length(all_data) + 1]] = ttt[ttt$POST.x %in% station, ] @@ -332,21 +337,26 @@ meteo_imgw_daily_bp = function(rank, } # end of if statement for climate stations } # end of looping over catalogs - all_data = as.data.frame(data.table::rbindlist(all_data, fill = TRUE)) + all_data = data.table::rbindlist(all_data, fill = TRUE) # fix order of columns if needed and entries in stations' names if more than 1 available: col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE) + if (length(col_inds) > 1) { - all_data$POST = apply(all_data[, col_inds], 1, function(x) na.omit(unique(x))[1]) + all_data$POST <- apply( + all_data[, col_inds, with = FALSE], + 1, + function(x) na.omit(unique(x))[1] + ) all_data$POST.x = NULL all_data$POST.y = NULL if (colnames(all_data)[ncol(all_data)] == "POST") { # re-order columns if needed - all_data = all_data[, c(1, ncol(all_data), 2:(ncol(all_data) - 1))] + data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1))) } } if (coords) { - all_data = merge(climate::imgw_meteo_stations[, 1:3], + all_data = merge(setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", by.y = "NSP", @@ -355,13 +365,13 @@ meteo_imgw_daily_bp = function(rank, } # add station rank: - rank_code = switch(rank, - synop = "SYNOPTYCZNA", - climate = "KLIMATYCZNA", - precip = "OPADOWA" - ) - - all_data = cbind(data.frame(rank_code = rank_code), all_data) + # rank_code = switch(rank, + # synop = "SYNOPTYCZNA", + # climate = "KLIMATYCZNA", + # precip = "OPADOWA" + # ) + # + # all_data = cbind(data.frame(rank_code = rank_code), all_data) all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years @@ -375,12 +385,11 @@ meteo_imgw_daily_bp = function(rank, paste("At least one of selected station(s) is not available in the database. Returning all available stations") ) } else { - all_data = all_data[inds, ] + all_data = all_data[inds] } } } all_data$POST = trimws(all_data$POST) - # sort output if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { all_data = all_data[order(all_data$NSP, all_data$ROK, all_data$MC, all_data$DZ), ] diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index c2a556e..25f9c1b 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -152,7 +152,7 @@ meteo_imgw_hourly_bp = function(rank, # remove statuses if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + data1[grep("^W", colnames(data1))] = NULL } unlink(c(temp, temp2)) @@ -196,7 +196,7 @@ meteo_imgw_hourly_bp = function(rank, csv_data = read.csv(data, header = FALSE, sep = ",") csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters - csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`) + csv_data$POST = trimws(csv_data$POST) return(csv_data) } ) @@ -229,7 +229,7 @@ meteo_imgw_hourly_bp = function(rank, all_data = merge(climate::imgw_meteo_stations[, 1:3], all_data, by.x = "id", - by.y = "Kod stacji", + by.y = "NSP", all.y = TRUE ) } @@ -240,12 +240,12 @@ meteo_imgw_hourly_bp = function(rank, climate = "KLIMATYCZNA" ) all_data = cbind(data.frame(rank_code = rank_code), all_data) - all_data = all_data[all_data$Rok %in% year, ] # przyciecie tylko do wybranych lat gdyby sie pobralo za duzo + all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`)))))) + inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { env$logs = c( env$logs, @@ -256,19 +256,19 @@ meteo_imgw_hourly_bp = function(rank, } } } - all_data$`Nazwa stacji` = trimws(all_data$`Nazwa stacji`) + all_data$POST = trimws(all_data$POST) # sortowanie w zaleznosci od nazw kolumn - raz jest "kod stacji", raz "id" - if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) { + if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { all_data = all_data[order( - all_data$`Kod stacji`, - all_data$Rok, - all_data$Miesiac, - all_data$Dzien, - all_data$Godzina + all_data$NSP, + all_data$ROK, + all_data$MC, + all_data$DZ, + all_data$GG ), ] } else { - all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien, all_data$Godzina), ] + all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ, all_data$GG), ] } # extra option for shortening colnames and removing duplicates diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index 6e10885..962e071 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -157,10 +157,10 @@ meteo_imgw_monthly_bp <- function(rank, # removing status if set if (status == FALSE) { - data1[grep("^Status", colnames(data1))] <- NULL + data1[grep("^W", colnames(data1))] <- NULL if (rank != "precip") { # in precipitation station only 1 file - data2[grep("^Status", colnames(data2))] <- NULL + data2[grep("^W", colnames(data2))] <- NULL } } @@ -168,7 +168,7 @@ meteo_imgw_monthly_bp <- function(rank, if (rank != "precip") { all_data[[i]] <- merge(data1, data2, - by = c("Kod stacji", "Nazwa stacji", "Rok", "Miesiac"), + by = c("NSP", "POST", "ROK", "MC"), all.x = TRUE ) } else { @@ -177,13 +177,13 @@ meteo_imgw_monthly_bp <- function(rank, } all_data <- do.call(rbind, all_data) - all_data <- all_data[all_data$Rok %in% year, ] + all_data <- all_data[all_data$ROK %in% year, ] if (coords) { all_data <- merge(climate::imgw_meteo_stations[, 1:3], all_data, by.x = "id", - by.y = "Kod stacji", + by.y = "NSP", all.y = TRUE ) } @@ -199,7 +199,7 @@ meteo_imgw_monthly_bp <- function(rank, # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds <- unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`)))))) + inds <- unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { env$logs <- c( env$logs, @@ -210,13 +210,13 @@ meteo_imgw_monthly_bp <- function(rank, } } } - all_data$`Nazwa stacji` <- trimws(all_data$`Nazwa stacji`) + all_data$POST <- trimws(all_data$POST) # sorting data accordingly to column names - (could be "kod stacji" or "id") - if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) { - all_data <- all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac), ] + if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { + all_data <- all_data[order(all_data$NSP, all_data$ROK, all_data$MC), ] } else { - all_data <- all_data[order(all_data$id, all_data$Rok, all_data$Miesiac), ] + all_data <- all_data[order(all_data$id, all_data$ROK, all_data$MC), ] } # adding option to shorten columns and removing duplicates: From 0ebfd6c1f86e3cca9c0bef6c4d7928dc62022394 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 02:16:12 +0100 Subject: [PATCH 3/9] fix: meteo hourly and monthly --- R/meteo_imgw_hourly.R | 41 +++++++++++----------------- R/meteo_imgw_monthly.R | 62 ++++++++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 48 deletions(-) diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index 25f9c1b..0df918f 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -149,11 +149,8 @@ meteo_imgw_hourly_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/") data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - - # remove statuses - if (status == FALSE) { - data1[grep("^W", colnames(data1))] = NULL - } + data1$POST = trimws(data1$POST) + data.table::setDT(data1) unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 @@ -209,10 +206,8 @@ meteo_imgw_hourly_bp = function(rank, } colnames(data1) = meta[[1]]$parameters - # remove status - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL - } + data1$POST = trimws(data1$POST) + data.table::setDT(data1) unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 } # end of looping for zip files @@ -220,13 +215,13 @@ meteo_imgw_hourly_bp = function(rank, } # end of loop over directories if (!is.null(all_data)) { - all_data = do.call(rbind, all_data) + all_data = data.table::rbindlist(all_data, fill = TRUE) } else { stop("No data found. Quitting", call. = FALSE) } if (coords) { - all_data = merge(climate::imgw_meteo_stations[, 1:3], + all_data = merge(data.table::setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", by.y = "NSP", @@ -235,11 +230,12 @@ meteo_imgw_hourly_bp = function(rank, } # add rank - rank_code = switch(rank, - synop = "SYNOPTYCZNA", - climate = "KLIMATYCZNA" - ) - all_data = cbind(data.frame(rank_code = rank_code), all_data) + # add station rank (temporarily disabled to align with daily) + # rank_code = switch(rank, + # synop = "SYNOPTYCZNA", + # climate = "KLIMATYCZNA" + # ) + # all_data = cbind(data.frame(rank_code = rank_code), all_data) all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years # station selection and names cleaning: @@ -260,19 +256,14 @@ meteo_imgw_hourly_bp = function(rank, # sortowanie w zaleznosci od nazw kolumn - raz jest "kod stacji", raz "id" if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { - all_data = all_data[order( - all_data$NSP, - all_data$ROK, - all_data$MC, - all_data$DZ, - all_data$GG - ), ] + data.table::setorder(all_data, NSP, ROK, MC, DZ, GG) } else { - all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ, all_data$GG), ] + data.table::setorder(all_data, id, ROK, MC, DZ, GG) } # extra option for shortening colnames and removing duplicates - all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) + # TODO: turned off temporarily, consistent with daily implementation + # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) rownames(all_data) = NULL # check if there any messages gathered in env$logs and if it is not empty then print them: diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index 962e071..36df408 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -140,35 +140,33 @@ meteo_imgw_monthly_bp <- function(rank, temp <- tempfile() temp2 <- tempfile() test_url(address, temp) - # download.file(address, temp) invisible(unzip(zipfile = temp, exdir = temp2)) file1 <- paste(temp2, dir(temp2), sep = "/")[1] data1 <- imgw_read(translit, file1) - colnames(data1) <- meta[[1]]$parameters + data1$POST <- trimws(data1$POST) + data.table::setDT(data1) if (rank != "precip") { # only 1 file in precipitation stations file2 <- paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { data2 <- imgw_read(translit, file2) colnames(data2) <- meta[[2]]$parameters - } - } - - # removing status if set - if (status == FALSE) { - data1[grep("^W", colnames(data1))] <- NULL - - if (rank != "precip") { # in precipitation station only 1 file - data2[grep("^W", colnames(data2))] <- NULL + data2$POST <- trimws(data2$POST) + data.table::setDT(data2) } } unlink(c(temp, temp2)) + + if (any(is.na(colnames(data1)))) { + colnames(data1) = make.names(colnames(data1), unique = TRUE) + } if (rank != "precip") { + # merge without POST to align with daily approach; unify POST later all_data[[i]] <- merge(data1, data2, - by = c("NSP", "POST", "ROK", "MC"), + by = c("NSP", "ROK", "MC"), all.x = TRUE ) } else { @@ -176,11 +174,27 @@ meteo_imgw_monthly_bp <- function(rank, } } - all_data <- do.call(rbind, all_data) + all_data <- data.table::rbindlist(all_data, fill = TRUE) + + # fix order of columns if needed and entries in stations' names if more than 1 available: + col_inds <- grep(pattern = "POST", colnames(all_data), value = TRUE) + if (length(col_inds) > 1) { + all_data$POST <- apply( + all_data[, col_inds, with = FALSE], + 1, + function(x) na.omit(unique(x))[1] + ) + all_data$POST.x <- NULL + all_data$POST.y <- NULL + if (colnames(all_data)[ncol(all_data)] == "POST") { + data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1))) + } + } + all_data <- all_data[all_data$ROK %in% year, ] if (coords) { - all_data <- merge(climate::imgw_meteo_stations[, 1:3], + all_data <- merge(data.table::setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", by.y = "NSP", @@ -189,12 +203,13 @@ meteo_imgw_monthly_bp <- function(rank, } # add rank - rank_code <- switch(rank, - synop = "SYNOPTYCZNA", - climate = "KLIMATYCZNA", - precip = "OPADOWA" - ) - all_data <- cbind(data.frame(rank_code = rank_code), all_data) + # add station rank (temporarily disabled to align with daily) + # rank_code <- switch(rank, + # synop = "SYNOPTYCZNA", + # climate = "KLIMATYCZNA", + # precip = "OPADOWA" + # ) + # all_data <- cbind(data.frame(rank_code = rank_code), all_data) # station selection and names cleaning: if (!is.null(station)) { @@ -214,13 +229,14 @@ meteo_imgw_monthly_bp <- function(rank, # sorting data accordingly to column names - (could be "kod stacji" or "id") if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { - all_data <- all_data[order(all_data$NSP, all_data$ROK, all_data$MC), ] + data.table::setorder(all_data, NSP, ROK, MC) } else { - all_data <- all_data[order(all_data$id, all_data$ROK, all_data$MC), ] + data.table::setorder(all_data, id, ROK, MC) } # adding option to shorten columns and removing duplicates: - all_data <- meteo_shortening_imgw(all_data, col_names = col_names, ...) + # TODO: turned off temporarily, consistent with daily implementation + # all_data <- meteo_shortening_imgw(all_data, col_names = col_names, ...) rownames(all_data) <- NULL return(all_data) # clipping to selected years only From 909f9f6b7de3b82e4c2553756637c0b2535f33ec Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 09:37:49 +0100 Subject: [PATCH 4/9] fix: handle status in meteo_imgw --- R/meteo_imgw_daily.R | 18 +++---- R/meteo_imgw_hourly.R | 5 ++ R/meteo_imgw_monthly.R | 99 ++++++++++++++++++++------------------- R/utils.R | 31 ++++++++++++ R/zzz.R | 4 +- man/meteo_imgw_monthly.Rd | 2 +- 6 files changed, 99 insertions(+), 60 deletions(-) diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index eeab4f2..e0fb022 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -160,7 +160,6 @@ meteo_imgw_daily_bp = function(rank, for (labs in seq_along(meta[[2]]$parameters)) { attr(data2[[labs]], "label") = meta[[2]]$label[[labs]] } - data2$POST = trimws(data2$POST) } else { data2 = head(data1, 0)[, 1:min(5, ncol(data1))] @@ -343,7 +342,7 @@ meteo_imgw_daily_bp = function(rank, col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE) if (length(col_inds) > 1) { - all_data$POST <- apply( + all_data$POST = apply( all_data[, col_inds, with = FALSE], 1, function(x) na.omit(unique(x))[1] @@ -364,15 +363,6 @@ meteo_imgw_daily_bp = function(rank, ) } - # add station rank: - # rank_code = switch(rank, - # synop = "SYNOPTYCZNA", - # climate = "KLIMATYCZNA", - # precip = "OPADOWA" - # ) - # - # all_data = cbind(data.frame(rank_code = rank_code), all_data) - all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years # station selection and names cleaning: @@ -390,12 +380,18 @@ meteo_imgw_daily_bp = function(rank, } } all_data$POST = trimws(all_data$POST) + # sort output if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { all_data = all_data[order(all_data$NSP, all_data$ROK, all_data$MC, all_data$DZ), ] } else { all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ), ] } + + # remove status: + if (status == FALSE) { + all_data = remove_status(all_data) + } # remove duplicates and shorten colnames # TODO: diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index 0df918f..4667309 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -260,6 +260,11 @@ meteo_imgw_hourly_bp = function(rank, } else { data.table::setorder(all_data, id, ROK, MC, DZ, GG) } + + # remove status: + if (status == FALSE) { + all_data = remove_status(all_data) + } # extra option for shortening colnames and removing duplicates # TODO: turned off temporarily, consistent with daily implementation diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index 36df408..a1b6606 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -27,7 +27,7 @@ #' #' @examples #' \donttest{ -#' monthly <- meteo_imgw_monthly(rank = "climate", year = 1969) +#' monthly = meteo_imgw_monthly(rank = "climate", year = 1969) #' head(monthly) #' #' # a descriptive (long) column names: @@ -80,79 +80,79 @@ meteo_imgw_monthly = function(rank = "synop", #' @noRd #' @keywords internal -meteo_imgw_monthly_bp <- function(rank, +meteo_imgw_monthly_bp = function(rank, year, status, coords, station, col_names, ...) { - translit <- check_locale() - base_url <- "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/" - interval_pl <- "miesieczne" - meta <- meteo_metadata_imgw(interval = "monthly", rank = rank) - rank_pl <- switch(rank, + translit = check_locale() + base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/" + interval_pl = "miesieczne" + meta = meteo_metadata_imgw(interval = "monthly", rank = rank) + rank_pl = switch(rank, synop = "synop", climate = "klimat", precip = "opad" ) # checking internet connection: - temp <- tempfile() + temp = tempfile() test_url( link = paste0(base_url, "dane_meteorologiczne/", interval_pl, "/", rank_pl, "/"), output = temp ) - a <- readLines(temp, warn = FALSE) + a = readLines(temp, warn = FALSE) unlink(temp) - ind <- grep(readHTMLTable(a)[[1]]$Name, pattern = "/") - catalogs <- as.character(readHTMLTable(a)[[1]]$Name[ind]) + ind = grep(readHTMLTable(a)[[1]]$Name, pattern = "/") + catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind]) - years_in_catalogs <- strsplit(gsub(x = catalogs, pattern = "/", replacement = ""), split = "_") - years_in_catalogs <- lapply(years_in_catalogs, function(x) x[1]:x[length(x)]) - ind <- lapply(years_in_catalogs, function(x) sum(x %in% year) > 0) - catalogs <- catalogs[unlist(ind)] + years_in_catalogs = strsplit(gsub(x = catalogs, pattern = "/", replacement = ""), split = "_") + years_in_catalogs = lapply(years_in_catalogs, function(x) x[1]:x[length(x)]) + ind = lapply(years_in_catalogs, function(x) sum(x %in% year) > 0) + catalogs = catalogs[unlist(ind)] - all_data <- vector("list", length = length(catalogs)) + all_data = vector("list", length = length(catalogs)) for (i in seq_along(catalogs)) { - catalog <- gsub(catalogs[i], pattern = "/", replacement = "") + catalog = gsub(catalogs[i], pattern = "/", replacement = "") if (rank == "synop") { - address <- paste0( + address = paste0( base_url, "dane_meteorologiczne/miesieczne/synop", "/", catalog, "/", catalog, "_m_s.zip" ) } if (rank == "climate") { - address <- paste0( + address = paste0( base_url, "dane_meteorologiczne/miesieczne/klimat", "/", catalog, "/", catalog, "_m_k.zip" ) } if (rank == "precip") { - address <- paste0( + address = paste0( base_url, "dane_meteorologiczne/miesieczne/opad", "/", catalog, "/", catalog, "_m_o.zip" ) } - temp <- tempfile() - temp2 <- tempfile() + temp = tempfile() + temp2 = tempfile() test_url(address, temp) invisible(unzip(zipfile = temp, exdir = temp2)) - file1 <- paste(temp2, dir(temp2), sep = "/")[1] - data1 <- imgw_read(translit, file1) - colnames(data1) <- meta[[1]]$parameters - data1$POST <- trimws(data1$POST) + file1 = paste(temp2, dir(temp2), sep = "/")[1] + data1 = imgw_read(translit, file1) + colnames(data1) = meta[[1]]$parameters + data1$POST = trimws(data1$POST) data.table::setDT(data1) if (rank != "precip") { # only 1 file in precipitation stations - file2 <- paste(temp2, dir(temp2), sep = "/")[2] + file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { - data2 <- imgw_read(translit, file2) - colnames(data2) <- meta[[2]]$parameters - data2$POST <- trimws(data2$POST) + data2 = imgw_read(translit, file2) + colnames(data2) = meta[[2]]$parameters + data2$POST = trimws(data2$POST) data.table::setDT(data2) } } @@ -165,36 +165,36 @@ meteo_imgw_monthly_bp <- function(rank, if (rank != "precip") { # merge without POST to align with daily approach; unify POST later - all_data[[i]] <- merge(data1, data2, + all_data[[i]] = merge(data1, data2, by = c("NSP", "ROK", "MC"), all.x = TRUE ) } else { - all_data[[i]] <- data1 + all_data[[i]] = data1 } } - all_data <- data.table::rbindlist(all_data, fill = TRUE) + all_data = data.table::rbindlist(all_data, fill = TRUE) # fix order of columns if needed and entries in stations' names if more than 1 available: - col_inds <- grep(pattern = "POST", colnames(all_data), value = TRUE) + col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE) if (length(col_inds) > 1) { - all_data$POST <- apply( + all_data$POST = apply( all_data[, col_inds, with = FALSE], 1, function(x) na.omit(unique(x))[1] ) - all_data$POST.x <- NULL - all_data$POST.y <- NULL + all_data$POST.x = NULL + all_data$POST.y = NULL if (colnames(all_data)[ncol(all_data)] == "POST") { data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1))) } } - all_data <- all_data[all_data$ROK %in% year, ] + all_data = all_data[all_data$ROK %in% year, ] if (coords) { - all_data <- merge(data.table::setDT(climate::imgw_meteo_stations[, 1:3]), + all_data = merge(data.table::setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", by.y = "NSP", @@ -204,28 +204,28 @@ meteo_imgw_monthly_bp <- function(rank, # add rank # add station rank (temporarily disabled to align with daily) - # rank_code <- switch(rank, + # rank_code = switch(rank, # synop = "SYNOPTYCZNA", # climate = "KLIMATYCZNA", # precip = "OPADOWA" # ) - # all_data <- cbind(data.frame(rank_code = rank_code), all_data) + # all_data = cbind(data.frame(rank_code = rank_code), all_data) # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds <- unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) + inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { - env$logs <- c( + env$logs = c( env$logs, paste("At least one of selected station(s) is not available in the database. Returning all available stations") ) } else { - all_data <- all_data[inds, ] + all_data = all_data[inds, ] } } } - all_data$POST <- trimws(all_data$POST) + all_data$POST = trimws(all_data$POST) # sorting data accordingly to column names - (could be "kod stacji" or "id") if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { @@ -233,11 +233,16 @@ meteo_imgw_monthly_bp <- function(rank, } else { data.table::setorder(all_data, id, ROK, MC) } + + # remove status: + if (status == FALSE) { + all_data = remove_status(all_data) + } # adding option to shorten columns and removing duplicates: # TODO: turned off temporarily, consistent with daily implementation - # all_data <- meteo_shortening_imgw(all_data, col_names = col_names, ...) - rownames(all_data) <- NULL + # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) + rownames(all_data) = NULL return(all_data) # clipping to selected years only } diff --git a/R/utils.R b/R/utils.R index 9e9df92..d227012 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,3 +18,34 @@ convert_encoding = function(df) { }) return(df) } + + +#' Remove columns containing status information +#' +#' Internal function for removing columns from data frame data that contain only +#' status information and expand the created object +#' +#' @param df data frame or data.table +#' +#' @keywords internal +#' @noRd +#' +remove_status = function(df) { + + labels = sapply(df, function(x) { + lbl = attr(x, "label") + ifelse(is.null(lbl), NA, lbl) + }) + + status_cols = grepl("^Status pomiaru", labels) + + if (any(class(df) == "data.table")) { + df = df[, !..status_cols] + } else if (any(class(df) == "data.frame")) { + df = df[, !status_cols] + } else { + stop("Removing status is possible only for data.frame or data.table objects") + } + + return(df) +} diff --git a/R/zzz.R b/R/zzz.R index 3bf0207..c48fe63 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -2,4 +2,6 @@ #' @return Empty env #' @keywords internal #' @noRd -env <- new.env(parent = emptyenv()) \ No newline at end of file +env <- new.env(parent = emptyenv()) + +globalVariables(c("DZ", "GG", "MC", "NSP", "POST.x", "ROK", "id")) \ No newline at end of file diff --git a/man/meteo_imgw_monthly.Rd b/man/meteo_imgw_monthly.Rd index 82f7cd6..3ea4887 100644 --- a/man/meteo_imgw_monthly.Rd +++ b/man/meteo_imgw_monthly.Rd @@ -49,7 +49,7 @@ SYNOP / CLIMATE / PRECIP stations available in the danepubliczne.imgw.pl collect } \examples{ \donttest{ -monthly <- meteo_imgw_monthly(rank = "climate", year = 1969) +monthly = meteo_imgw_monthly(rank = "climate", year = 1969) head(monthly) # a descriptive (long) column names: From 9a3b6e0f3dfd3621f70460c9ccc7855dfd89323c Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 09:42:20 +0100 Subject: [PATCH 5/9] fix: meteo_imgw labels --- R/meteo_imgw_hourly.R | 9 +++++++++ R/meteo_imgw_monthly.R | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index 4667309..1321cbb 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -149,6 +149,9 @@ meteo_imgw_hourly_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/") data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } data1$POST = trimws(data1$POST) data.table::setDT(data1) @@ -193,6 +196,9 @@ meteo_imgw_hourly_bp = function(rank, csv_data = read.csv(data, header = FALSE, sep = ",") csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } csv_data$POST = trimws(csv_data$POST) return(csv_data) } @@ -206,6 +212,9 @@ meteo_imgw_hourly_bp = function(rank, } colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } data1$POST = trimws(data1$POST) data.table::setDT(data1) unlink(c(temp, temp2)) diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index a1b6606..cd1ccff 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -144,6 +144,9 @@ meteo_imgw_monthly_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } data1$POST = trimws(data1$POST) data.table::setDT(data1) @@ -152,6 +155,9 @@ meteo_imgw_monthly_bp = function(rank, if (file.exists(file2)) { data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } data2$POST = trimws(data2$POST) data.table::setDT(data2) } From 8ac0472851349d377fb3a91d1d37053db307b22f Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 13:08:35 +0100 Subject: [PATCH 6/9] fix: unit tests and adjust imgw changes --- R/nearest_stations_imgw.R | 10 +++++----- R/zzz.R | 3 ++- tests/testthat/test-meteo_imgw_daily.R | 21 ++++++++++---------- tests/testthat/test-meteo_metadata_imgw.R | 24 +++++++++++------------ 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/R/nearest_stations_imgw.R b/R/nearest_stations_imgw.R index f466634..4e3f902 100644 --- a/R/nearest_stations_imgw.R +++ b/R/nearest_stations_imgw.R @@ -90,16 +90,16 @@ nearest_stations_imgw_bp = function(type, stop("y should be latitude") } - if (max(year) >= as.integer(substr(Sys.Date(), 1, 4)) - 1) { - message("Data cannot be provided for this repository. Please check the available records at: \n - https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/") + if (max(year) >= as.integer(substr(Sys.Date(), 1, 4)) - 1 | length(year) == 1) { + stop("Data cannot be provided for this repository. Please check the available records at: \n + https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/ or the syntax used for the year argument. \n") Sys.sleep(2) } if (type == "meteo") { - result = unique(meteo_imgw_monthly(rank = rank, year = year, coords = TRUE)[, c(2:5)]) + result = as.data.frame(unique(meteo_imgw_monthly(rank = rank, year = year, coords = TRUE)[, c(2:5)])) } else if (type == "hydro") { - result = unique(hydro_imgw_monthly(year = year, coords = TRUE)[, c(1:4)]) + result = as.data.frame(unique(hydro_imgw_monthly(year = year, coords = TRUE)[, c(1:4)])) } else { stop("You've provided wrong type argument; please use: \"meteo\", or \"hydro\"") } diff --git a/R/zzz.R b/R/zzz.R index c48fe63..edeeeed 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -4,4 +4,5 @@ #' @noRd env <- new.env(parent = emptyenv()) -globalVariables(c("DZ", "GG", "MC", "NSP", "POST.x", "ROK", "id")) \ No newline at end of file +globalVariables(c("DZ", "GG", "MC", "NSP", "POST.x", "ROK", "id", + "..status_cols", "status_cols")) \ No newline at end of file diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index db870fe..315fcb7 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -7,7 +7,7 @@ test_that("meteo_imgw_daily", { return(invisible(NULL)) } else { y = 1900 # year not supported - expect_message(meteo_imgw_daily(rank = "synop", year = y, status = TRUE, + expect_warning(meteo_imgw_daily(rank = "synop", year = y, status = TRUE, coords = TRUE, allow_failure = TRUE)) } }) @@ -35,10 +35,13 @@ test_that("check_message_for_non_existing_station", { message("No internet connection! \n") return(invisible(NULL)) } else { - expect_message(meteo_imgw_daily(rank = "precip", + expect_true( + nrow( + meteo_imgw_daily(rank = "precip", year = 2002, coords = TRUE, - station = 9999)) + station = 9999) + ) > 160000) } }) @@ -52,17 +55,13 @@ test_that("check_encoding_in_non_synop", { message("No internet connection! \n") return(invisible(NULL)) } else { - non_synop = suppressWarnings( - suppressMessages(meteo_imgw_daily(year = 2024, + non_synop = meteo_imgw_daily(year = 2024, rank = "precip", - allow_failure = FALSE)) - ) + allow_failure = FALSE) expect_identical(nchar(non_synop$station), nchar(trimws(non_synop$station))) - non_synop = suppressWarnings( - suppressMessages(meteo_imgw_daily(year = 2024, + non_synop = meteo_imgw_daily(year = 2024, rank = "climate", - allow_failure = FALSE)) - ) + allow_failure = FALSE) expect_identical(nchar(non_synop$station), nchar(trimws(non_synop$station))) } }) diff --git a/tests/testthat/test-meteo_metadata_imgw.R b/tests/testthat/test-meteo_metadata_imgw.R index f450260..de6e125 100644 --- a/tests/testthat/test-meteo_metadata_imgw.R +++ b/tests/testthat/test-meteo_metadata_imgw.R @@ -19,18 +19,18 @@ test_that("meteo_metadata_imgw tests", { if (is.list(m_hs) && is.list(m_ds) && is.list(m_ds) && is.list(m_dc) && is.list(m_dp) && is.list(m_ms) && is.list(m_mc) && is.list(m_mp)) { - expect_equal(dim(m_hs[[1]]), c(107, 3)) - expect_equal(dim(m_hc[[1]]), c(22, 3)) - expect_equal(dim(m_ds[[1]]), c(65, 3)) - expect_equal(dim(m_ds[[2]]), c(23, 3)) - expect_equal(dim(m_dc[[1]]), c(18, 3)) - expect_equal(dim(m_dc[[2]]), c(13, 3)) - expect_equal(dim(m_dp[[1]]), c(16, 3)) - expect_equal(dim(m_ms[[1]]), c(60, 3)) - expect_equal(dim(m_ms[[2]]), c(22, 3)) - expect_equal(dim(m_mc[[1]]), c(27, 3)) - expect_equal(dim(m_mc[[2]]), c(12, 3)) - expect_equal(dim(m_mp[[1]]), c(14, 3)) + expect_equal(dim(m_hs[[1]]), c(107, 2)) + expect_equal(dim(m_hc[[1]]), c(22, 2)) + expect_equal(dim(m_ds[[1]]), c(65, 2)) + expect_equal(dim(m_ds[[2]]), c(23, 2)) + expect_equal(dim(m_dc[[1]]), c(18, 2)) + expect_equal(dim(m_dc[[2]]), c(13, 2)) + expect_equal(dim(m_dp[[1]]), c(16, 2)) + expect_equal(dim(m_ms[[1]]), c(58, 2)) + expect_equal(dim(m_ms[[2]]), c(22, 2)) + expect_equal(dim(m_mc[[1]]), c(27, 2)) + expect_equal(dim(m_mc[[2]]), c(12, 2)) + expect_equal(dim(m_mp[[1]]), c(14, 2)) } } }) From 3355f228307439afa566fa2ca6f58b753805bc03 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 17:32:00 +0100 Subject: [PATCH 7/9] climate rc 1.2.9 --- .github/workflows/test-coverage.yaml | 1 + DESCRIPTION | 2 +- R/clean_metadata_meteo.R | 31 ------------------------ R/meteo_imgw_daily.R | 33 +++++++++++++------------- R/meteo_imgw_hourly.R | 2 +- R/meteo_imgw_monthly.R | 2 +- R/nearest_stations_imgw.R | 5 ++-- tests/testthat/test-meteo_imgw_daily.R | 20 ++++++++++++++-- 8 files changed, 41 insertions(+), 55 deletions(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 9b432e5..61f9ae8 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -15,6 +15,7 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + NOT_CRAN: true # to enforce also tests that are skipped on cran and reduce code coverage steps: - uses: actions/checkout@v4 diff --git a/DESCRIPTION b/DESCRIPTION index e0574ba..c23b156 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: climate Title: Interface to Download Meteorological (and Hydrological) Datasets -Version: 1.2.9.9001 +Version: 1.2.9 Authors@R: c(person(given = "Bartosz", family = "Czernecki", role = c("aut", "cre"), diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R index 630b48b..583ff40 100644 --- a/R/clean_metadata_meteo.R +++ b/R/clean_metadata_meteo.R @@ -24,35 +24,4 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") { a = data.frame(parameters = code, label = name) a$label = stringi::stri_trans_general(a$label, 'LATIN-ASCII') return(a) - - #a = gsub(a, pattern = "\\?", replacement = "") - - - # additional workarounds for mac os but not only... - # a = gsub(x = a, pattern = "'", replacement = "") - # a = gsub(x = a, pattern = "\\^0", replacement = "") - # a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE) - # length_char = max(nchar(a$V1), na.rm = TRUE) - # - # if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly - # if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily - # if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly - # if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly - # - # field = substr(a$V1, length_char - 3, length_char) - # - # if (rank == "synop" && interval == "monthly") { - # length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2 - # field = substr(a$V1, length_char - 3, length_char + 2) - # } - # - # a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1])))) - # a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2])))) - # - # a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3)) - # a$V1 = gsub(x = a$V1, pattern = "* ", "") - # - # a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status - # colnames(a)[1] = "parameters" - return(a) } diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index e0fb022..d883fa2 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -223,6 +223,9 @@ meteo_imgw_daily_bp = function(rank, if (!is.null(csv_data)) { csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]] + } csv_data$POST = trimws(csv_data$POST) } return(csv_data) @@ -232,8 +235,8 @@ meteo_imgw_daily_bp = function(rank, if (is.data.frame(d)) { data1 = d colnames(data1) = meta[[1]]$parameters - if (status == FALSE) { - data1[grep("^W", colnames(data1))] = NULL + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] } } @@ -242,19 +245,17 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters - } - } - - # remove statuses - if (status == FALSE) { - data1[grep("^W", colnames(data1))] = NULL - if (file.exists(file2)) { - data2[grep("^W", colnames(data2))] = NULL + for (labs in seq_along(meta[[2]]$parameters)) { + attr(data2[[labs]], "label") = meta[[2]]$label[[labs]] + } } } @@ -307,6 +308,9 @@ meteo_imgw_daily_bp = function(rank, csv_data = read.table(data, header = FALSE, stringsAsFactors = FALSE, sep = ",", encoding = "CP1250") csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]] + } csv_data$POST = trimws(csv_data$POST) return(csv_data) } @@ -315,8 +319,8 @@ meteo_imgw_daily_bp = function(rank, if (is.data.frame(d)) { data1 = d colnames(data1) = meta[[1]]$parameters - if (status == FALSE) { - data1[grep("^W", colnames(data1))] = NULL + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] } } @@ -325,10 +329,6 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - # remove status - if (status == FALSE) { - data1[grep("^W", colnames(data1))] = NULL - } } # end of corrupted zips unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 @@ -394,7 +394,6 @@ meteo_imgw_daily_bp = function(rank, } # remove duplicates and shorten colnames - # TODO: # turned off temporarily: # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) rownames(all_data) = NULL diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index 1321cbb..f02c8ef 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -197,7 +197,7 @@ meteo_imgw_hourly_bp = function(rank, csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters for (labs in seq_along(meta[[1]]$parameters)) { - attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]] } csv_data$POST = trimws(csv_data$POST) return(csv_data) diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index cd1ccff..7ccbe7f 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -156,7 +156,7 @@ meteo_imgw_monthly_bp = function(rank, data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters for (labs in seq_along(meta[[1]]$parameters)) { - attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + attr(data2[[labs]], "label") = meta[[1]]$label[[labs]] } data2$POST = trimws(data2$POST) data.table::setDT(data2) diff --git a/R/nearest_stations_imgw.R b/R/nearest_stations_imgw.R index 4e3f902..3432b78 100644 --- a/R/nearest_stations_imgw.R +++ b/R/nearest_stations_imgw.R @@ -90,10 +90,9 @@ nearest_stations_imgw_bp = function(type, stop("y should be latitude") } - if (max(year) >= as.integer(substr(Sys.Date(), 1, 4)) - 1 | length(year) == 1) { + if (max(year) >= as.integer(substr(Sys.Date(), 1, 4)) - 1 | length(year) > 1) { stop("Data cannot be provided for this repository. Please check the available records at: \n https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/ or the syntax used for the year argument. \n") - Sys.sleep(2) } if (type == "meteo") { @@ -109,6 +108,7 @@ nearest_stations_imgw_bp = function(type, https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/") } + # nocov start if (is.null(point)) { # workaround for different column names: if ("LON" %in% colnames(result)) @@ -183,5 +183,6 @@ nearest_stations_imgw_bp = function(type, message(paste0("Please provide only one year. For more years station's metadata", "may change (name, location or station may stop collecting data)")) } + # nocov end return(result) } diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index 315fcb7..b5c4ea4 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -1,6 +1,22 @@ context("meteo_imgw_daily") + +test_that("meteo_imgw_daily_single_station", { + if (!curl::has_internet()) { + message("No internet connection! \n") + return(invisible(NULL)) + } else { + station = c("POZNAŃ", "POZNAŃ-ŁAWICA") # year not supported + poznan = meteo_imgw_daily(rank = "synop", + year = 2024, + station = station, + status = FALSE, + coords = TRUE) + expect_true(nrow(poznan) > 360) + } +}) + test_that("meteo_imgw_daily", { if (!curl::has_internet()) { message("No internet connection! \n") @@ -58,10 +74,10 @@ test_that("check_encoding_in_non_synop", { non_synop = meteo_imgw_daily(year = 2024, rank = "precip", allow_failure = FALSE) - expect_identical(nchar(non_synop$station), nchar(trimws(non_synop$station))) + expect_identical(nchar(non_synop$NSP), nchar(trimws(non_synop$NSP))) non_synop = meteo_imgw_daily(year = 2024, rank = "climate", allow_failure = FALSE) - expect_identical(nchar(non_synop$station), nchar(trimws(non_synop$station))) + expect_identical(nchar(non_synop$NSP), nchar(trimws(non_synop$NSP))) } }) From b8483678c31da06c3abb4e5484e81275055bbf65 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 17:53:46 +0100 Subject: [PATCH 8/9] simplify single-station test --- tests/testthat/test-meteo_imgw_daily.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index b5c4ea4..253448f 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -7,13 +7,12 @@ test_that("meteo_imgw_daily_single_station", { message("No internet connection! \n") return(invisible(NULL)) } else { - station = c("POZNAŃ", "POZNAŃ-ŁAWICA") # year not supported - poznan = meteo_imgw_daily(rank = "synop", - year = 2024, - station = station, - status = FALSE, - coords = TRUE) - expect_true(nrow(poznan) > 360) + single_station = meteo_imgw_daily(rank = "synop", + year = 2024, + station = "LESZNO", + status = FALSE, + coords = TRUE) + expect_true(nrow(single_station) > 360) } }) From 5456cd84bb6cf55809fa65c0517b47e53195ae3d Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 3 Feb 2026 18:16:46 +0100 Subject: [PATCH 9/9] fix: unit test --- tests/testthat/test-meteo_imgw_daily.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index 253448f..cf277f6 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -70,13 +70,13 @@ test_that("check_encoding_in_non_synop", { message("No internet connection! \n") return(invisible(NULL)) } else { - non_synop = meteo_imgw_daily(year = 2024, + non_synop = expect_message(suppressWarnings(meteo_imgw_daily(year = 2024, rank = "precip", - allow_failure = FALSE) + allow_failure = FALSE))) expect_identical(nchar(non_synop$NSP), nchar(trimws(non_synop$NSP))) - non_synop = meteo_imgw_daily(year = 2024, + non_synop = suppressWarnings(meteo_imgw_daily(year = 2024, rank = "climate", - allow_failure = FALSE) + allow_failure = FALSE)) expect_identical(nchar(non_synop$NSP), nchar(trimws(non_synop$NSP))) } })