diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 9b432e5..61f9ae8 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -15,6 +15,7 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + NOT_CRAN: true # to enforce also tests that are skipped on cran and reduce code coverage steps: - uses: actions/checkout@v4 diff --git a/DESCRIPTION b/DESCRIPTION index 9491e4f..c23b156 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: climate Title: Interface to Download Meteorological (and Hydrological) Datasets -Version: 1.2.8 +Version: 1.2.9 Authors@R: c(person(given = "Bartosz", family = "Czernecki", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index aef53f6..8f061e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# climate 1.2.9 + +* fixes for corrupted header files in `meteo_imgw_` family of functions due to changes in the IMGW-PIB repository + + # climate 1.2.8 * speeding up selective download for given station names in `meteo_imgw_*` and `hydro_imgw_daily()` functions that simultaneously reduce use of memory diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R index 7a5aac9..583ff40 100644 --- a/R/clean_metadata_meteo.R +++ b/R/clean_metadata_meteo.R @@ -16,34 +16,12 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") { test_url(link = address, output = temp) a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")$V1 - a = gsub(a, pattern = "\\?", replacement = "") - a = stringi::stri_trans_general(a, 'LATIN-ASCII') - # additional workarounds for mac os but not only... - a = gsub(x = a, pattern = "'", replacement = "") - a = gsub(x = a, pattern = "\\^0", replacement = "") - a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE) - length_char = max(nchar(a$V1), na.rm = TRUE) - - if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly - if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily - if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly - if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly - - field = substr(a$V1, length_char - 3, length_char) - - if (rank == "synop" && interval == "monthly") { - length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2 - field = substr(a$V1, length_char - 3, length_char + 2) - } - - a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1])))) - a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2])))) - - a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3)) - a$V1 = gsub(x = a$V1, pattern = "* ", "") - - a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status - colnames(a)[1] = "parameters" + inds = grepl("^[A-Z]{2}.{5}", a) + + code = trimws(substr(a, 1, 7))[inds] + name = trimws(substr(a, 8, nchar(a)))[inds] + a = data.frame(parameters = code, label = name) + a$label = stringi::stri_trans_general(a$label, 'LATIN-ASCII') return(a) } diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index 3a15b46..d883fa2 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -148,42 +148,45 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - data1$`Nazwa stacji` = trimws(data1$`Nazwa stacji`) + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } + data1$POST = trimws(data1$POST) file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters - data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`) + for (labs in seq_along(meta[[2]]$parameters)) { + attr(data2[[labs]], "label") = meta[[2]]$label[[labs]] + } + data2$POST = trimws(data2$POST) } else { data2 = head(data1, 0)[, 1:min(5, ncol(data1))] - data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`) + data2$POST = trimws(data2$POST) } unlink(c(temp, temp2)) - # remove statuses if not needed: - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL - data2[grep("^Status", colnames(data2))] = NULL - } - - ttt = merge(data1, + data.table::setDT(data1) + data.table::setDT(data2) + + ttt = merge( + data1, data2, - by = c("Kod stacji", "Rok", "Miesiac", "Dzien"), + by = c("NSP", "ROK", "MC", "DZ"), all.x = TRUE ) - - ttt = ttt[order(ttt$`Nazwa stacji.x`, ttt$Rok, ttt$Miesiac, ttt$Dzien), ] + + data.table::setorder(ttt, POST.x, ROK, MC, DZ) if (!is.null(station)) { - all_data[[length(all_data) + 1]] = ttt[ttt$`Nazwa stacji.x` %in% station, ] + all_data[[length(all_data) + 1]] = ttt[ttt$POST.x %in% station, ] } else { all_data[[length(all_data) + 1]] = ttt } } # end of looping for zip archives } # end of if statement for SYNOP stations - ###################### ###### KLIMAT: ####### if (rank == "climate") { @@ -220,7 +223,10 @@ meteo_imgw_daily_bp = function(rank, if (!is.null(csv_data)) { csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters - csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`) + for (labs in seq_along(meta[[1]]$parameters)) { + attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]] + } + csv_data$POST = trimws(csv_data$POST) } return(csv_data) } @@ -229,8 +235,8 @@ meteo_imgw_daily_bp = function(rank, if (is.data.frame(d)) { data1 = d colnames(data1) = meta[[1]]$parameters - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] } } @@ -239,19 +245,17 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters - } - } - - # remove statuses - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL - if (file.exists(file2)) { - data2[grep("^Status", colnames(data2))] = NULL + for (labs in seq_along(meta[[2]]$parameters)) { + attr(data2[[labs]], "label") = meta[[2]]$label[[labs]] + } } } @@ -259,7 +263,7 @@ meteo_imgw_daily_bp = function(rank, if (file.exists(file2)) { all_data[[length(all_data) + 1]] = merge(data1, data2, - by = c("Kod stacji", "Rok", "Miesiac", "Dzien"), + by = c("NSP", "ROK", "MC", "DZ"), all.x = TRUE ) } else { @@ -304,7 +308,10 @@ meteo_imgw_daily_bp = function(rank, csv_data = read.table(data, header = FALSE, stringsAsFactors = FALSE, sep = ",", encoding = "CP1250") csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters - csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`) + for (labs in seq_along(meta[[1]]$parameters)) { + attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]] + } + csv_data$POST = trimws(csv_data$POST) return(csv_data) } ) @@ -312,8 +319,8 @@ meteo_imgw_daily_bp = function(rank, if (is.data.frame(d)) { data1 = d colnames(data1) = meta[[1]]$parameters - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] } } @@ -322,10 +329,6 @@ meteo_imgw_daily_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - # remove status - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL - } } # end of corrupted zips unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 @@ -333,63 +336,66 @@ meteo_imgw_daily_bp = function(rank, } # end of if statement for climate stations } # end of looping over catalogs - all_data = as.data.frame(data.table::rbindlist(all_data, fill = TRUE)) + all_data = data.table::rbindlist(all_data, fill = TRUE) # fix order of columns if needed and entries in stations' names if more than 1 available: - col_inds = grep(pattern = "Nazwa stacji", colnames(all_data), value = TRUE) + col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE) + if (length(col_inds) > 1) { - all_data$`Nazwa stacji` = apply(all_data[, col_inds], 1, function(x) na.omit(unique(x))[1]) - all_data$`Nazwa stacji.x` = NULL - all_data$`Nazwa stacji.y` = NULL - if (colnames(all_data)[ncol(all_data)] == "Nazwa stacji") { # re-order columns if needed - all_data = all_data[, c(1, ncol(all_data), 2:(ncol(all_data) - 1))] + all_data$POST = apply( + all_data[, col_inds, with = FALSE], + 1, + function(x) na.omit(unique(x))[1] + ) + all_data$POST.x = NULL + all_data$POST.y = NULL + if (colnames(all_data)[ncol(all_data)] == "POST") { # re-order columns if needed + data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1))) } } if (coords) { - all_data = merge(climate::imgw_meteo_stations[, 1:3], + all_data = merge(setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", - by.y = "Kod stacji", + by.y = "NSP", all.y = TRUE ) } - # add station rank: - rank_code = switch(rank, - synop = "SYNOPTYCZNA", - climate = "KLIMATYCZNA", - precip = "OPADOWA" - ) - all_data = cbind(data.frame(rank_code = rank_code), all_data) - - all_data = all_data[all_data$Rok %in% year, ] # clip only to selected years + all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`)))))) + inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { env$logs = c( env$logs, paste("At least one of selected station(s) is not available in the database. Returning all available stations") ) } else { - all_data = all_data[inds, ] + all_data = all_data[inds] } } } - all_data$`Nazwa stacji` = trimws(all_data$`Nazwa stacji`) - + all_data$POST = trimws(all_data$POST) + # sort output - if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) { - all_data = all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac, all_data$Dzien), ] + if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { + all_data = all_data[order(all_data$NSP, all_data$ROK, all_data$MC, all_data$DZ), ] } else { - all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien), ] + all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ), ] + } + + # remove status: + if (status == FALSE) { + all_data = remove_status(all_data) } # remove duplicates and shorten colnames - all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) + # turned off temporarily: + # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) rownames(all_data) = NULL # check if there any messages gathered in env$logs and if it is not empty then print them: diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index c2a556e..f02c8ef 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -149,11 +149,11 @@ meteo_imgw_hourly_bp = function(rank, file1 = paste(temp2, dir(temp2), sep = "/") data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - - # remove statuses - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] } + data1$POST = trimws(data1$POST) + data.table::setDT(data1) unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 @@ -196,7 +196,10 @@ meteo_imgw_hourly_bp = function(rank, csv_data = read.csv(data, header = FALSE, sep = ",") csv_data = convert_encoding(csv_data) colnames(csv_data) = meta[[1]]$parameters - csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`) + for (labs in seq_along(meta[[1]]$parameters)) { + attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]] + } + csv_data$POST = trimws(csv_data$POST) return(csv_data) } ) @@ -209,10 +212,11 @@ meteo_imgw_hourly_bp = function(rank, } colnames(data1) = meta[[1]]$parameters - # remove status - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] = NULL + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] } + data1$POST = trimws(data1$POST) + data.table::setDT(data1) unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 } # end of looping for zip files @@ -220,32 +224,33 @@ meteo_imgw_hourly_bp = function(rank, } # end of loop over directories if (!is.null(all_data)) { - all_data = do.call(rbind, all_data) + all_data = data.table::rbindlist(all_data, fill = TRUE) } else { stop("No data found. Quitting", call. = FALSE) } if (coords) { - all_data = merge(climate::imgw_meteo_stations[, 1:3], + all_data = merge(data.table::setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", - by.y = "Kod stacji", + by.y = "NSP", all.y = TRUE ) } # add rank - rank_code = switch(rank, - synop = "SYNOPTYCZNA", - climate = "KLIMATYCZNA" - ) - all_data = cbind(data.frame(rank_code = rank_code), all_data) - all_data = all_data[all_data$Rok %in% year, ] # przyciecie tylko do wybranych lat gdyby sie pobralo za duzo + # add station rank (temporarily disabled to align with daily) + # rank_code = switch(rank, + # synop = "SYNOPTYCZNA", + # climate = "KLIMATYCZNA" + # ) + # all_data = cbind(data.frame(rank_code = rank_code), all_data) + all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`)))))) + inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { env$logs = c( env$logs, @@ -256,23 +261,23 @@ meteo_imgw_hourly_bp = function(rank, } } } - all_data$`Nazwa stacji` = trimws(all_data$`Nazwa stacji`) + all_data$POST = trimws(all_data$POST) # sortowanie w zaleznosci od nazw kolumn - raz jest "kod stacji", raz "id" - if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) { - all_data = all_data[order( - all_data$`Kod stacji`, - all_data$Rok, - all_data$Miesiac, - all_data$Dzien, - all_data$Godzina - ), ] + if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { + data.table::setorder(all_data, NSP, ROK, MC, DZ, GG) } else { - all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien, all_data$Godzina), ] + data.table::setorder(all_data, id, ROK, MC, DZ, GG) + } + + # remove status: + if (status == FALSE) { + all_data = remove_status(all_data) } # extra option for shortening colnames and removing duplicates - all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) + # TODO: turned off temporarily, consistent with daily implementation + # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) rownames(all_data) = NULL # check if there any messages gathered in env$logs and if it is not empty then print them: diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index 6e10885..7ccbe7f 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -27,7 +27,7 @@ #' #' @examples #' \donttest{ -#' monthly <- meteo_imgw_monthly(rank = "climate", year = 1969) +#' monthly = meteo_imgw_monthly(rank = "climate", year = 1969) #' head(monthly) #' #' # a descriptive (long) column names: @@ -80,148 +80,175 @@ meteo_imgw_monthly = function(rank = "synop", #' @noRd #' @keywords internal -meteo_imgw_monthly_bp <- function(rank, +meteo_imgw_monthly_bp = function(rank, year, status, coords, station, col_names, ...) { - translit <- check_locale() - base_url <- "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/" - interval_pl <- "miesieczne" - meta <- meteo_metadata_imgw(interval = "monthly", rank = rank) - rank_pl <- switch(rank, + translit = check_locale() + base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/" + interval_pl = "miesieczne" + meta = meteo_metadata_imgw(interval = "monthly", rank = rank) + rank_pl = switch(rank, synop = "synop", climate = "klimat", precip = "opad" ) # checking internet connection: - temp <- tempfile() + temp = tempfile() test_url( link = paste0(base_url, "dane_meteorologiczne/", interval_pl, "/", rank_pl, "/"), output = temp ) - a <- readLines(temp, warn = FALSE) + a = readLines(temp, warn = FALSE) unlink(temp) - ind <- grep(readHTMLTable(a)[[1]]$Name, pattern = "/") - catalogs <- as.character(readHTMLTable(a)[[1]]$Name[ind]) + ind = grep(readHTMLTable(a)[[1]]$Name, pattern = "/") + catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind]) - years_in_catalogs <- strsplit(gsub(x = catalogs, pattern = "/", replacement = ""), split = "_") - years_in_catalogs <- lapply(years_in_catalogs, function(x) x[1]:x[length(x)]) - ind <- lapply(years_in_catalogs, function(x) sum(x %in% year) > 0) - catalogs <- catalogs[unlist(ind)] + years_in_catalogs = strsplit(gsub(x = catalogs, pattern = "/", replacement = ""), split = "_") + years_in_catalogs = lapply(years_in_catalogs, function(x) x[1]:x[length(x)]) + ind = lapply(years_in_catalogs, function(x) sum(x %in% year) > 0) + catalogs = catalogs[unlist(ind)] - all_data <- vector("list", length = length(catalogs)) + all_data = vector("list", length = length(catalogs)) for (i in seq_along(catalogs)) { - catalog <- gsub(catalogs[i], pattern = "/", replacement = "") + catalog = gsub(catalogs[i], pattern = "/", replacement = "") if (rank == "synop") { - address <- paste0( + address = paste0( base_url, "dane_meteorologiczne/miesieczne/synop", "/", catalog, "/", catalog, "_m_s.zip" ) } if (rank == "climate") { - address <- paste0( + address = paste0( base_url, "dane_meteorologiczne/miesieczne/klimat", "/", catalog, "/", catalog, "_m_k.zip" ) } if (rank == "precip") { - address <- paste0( + address = paste0( base_url, "dane_meteorologiczne/miesieczne/opad", "/", catalog, "/", catalog, "_m_o.zip" ) } - temp <- tempfile() - temp2 <- tempfile() + temp = tempfile() + temp2 = tempfile() test_url(address, temp) - # download.file(address, temp) invisible(unzip(zipfile = temp, exdir = temp2)) - file1 <- paste(temp2, dir(temp2), sep = "/")[1] - data1 <- imgw_read(translit, file1) - - colnames(data1) <- meta[[1]]$parameters + file1 = paste(temp2, dir(temp2), sep = "/")[1] + data1 = imgw_read(translit, file1) + colnames(data1) = meta[[1]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data1[[labs]], "label") = meta[[1]]$label[[labs]] + } + data1$POST = trimws(data1$POST) + data.table::setDT(data1) if (rank != "precip") { # only 1 file in precipitation stations - file2 <- paste(temp2, dir(temp2), sep = "/")[2] + file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { - data2 <- imgw_read(translit, file2) - colnames(data2) <- meta[[2]]$parameters - } - } - - # removing status if set - if (status == FALSE) { - data1[grep("^Status", colnames(data1))] <- NULL - - if (rank != "precip") { # in precipitation station only 1 file - data2[grep("^Status", colnames(data2))] <- NULL + data2 = imgw_read(translit, file2) + colnames(data2) = meta[[2]]$parameters + for (labs in seq_along(meta[[1]]$parameters)) { + attr(data2[[labs]], "label") = meta[[1]]$label[[labs]] + } + data2$POST = trimws(data2$POST) + data.table::setDT(data2) } } unlink(c(temp, temp2)) + + if (any(is.na(colnames(data1)))) { + colnames(data1) = make.names(colnames(data1), unique = TRUE) + } if (rank != "precip") { - all_data[[i]] <- merge(data1, data2, - by = c("Kod stacji", "Nazwa stacji", "Rok", "Miesiac"), + # merge without POST to align with daily approach; unify POST later + all_data[[i]] = merge(data1, data2, + by = c("NSP", "ROK", "MC"), all.x = TRUE ) } else { - all_data[[i]] <- data1 + all_data[[i]] = data1 } } - all_data <- do.call(rbind, all_data) - all_data <- all_data[all_data$Rok %in% year, ] + all_data = data.table::rbindlist(all_data, fill = TRUE) + + # fix order of columns if needed and entries in stations' names if more than 1 available: + col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE) + if (length(col_inds) > 1) { + all_data$POST = apply( + all_data[, col_inds, with = FALSE], + 1, + function(x) na.omit(unique(x))[1] + ) + all_data$POST.x = NULL + all_data$POST.y = NULL + if (colnames(all_data)[ncol(all_data)] == "POST") { + data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1))) + } + } + + all_data = all_data[all_data$ROK %in% year, ] if (coords) { - all_data <- merge(climate::imgw_meteo_stations[, 1:3], + all_data = merge(data.table::setDT(climate::imgw_meteo_stations[, 1:3]), all_data, by.x = "id", - by.y = "Kod stacji", + by.y = "NSP", all.y = TRUE ) } # add rank - rank_code <- switch(rank, - synop = "SYNOPTYCZNA", - climate = "KLIMATYCZNA", - precip = "OPADOWA" - ) - all_data <- cbind(data.frame(rank_code = rank_code), all_data) + # add station rank (temporarily disabled to align with daily) + # rank_code = switch(rank, + # synop = "SYNOPTYCZNA", + # climate = "KLIMATYCZNA", + # precip = "OPADOWA" + # ) + # all_data = cbind(data.frame(rank_code = rank_code), all_data) # station selection and names cleaning: if (!is.null(station)) { if (is.character(station)) { - inds <- unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`)))))) + inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST)))))) if (any(is.na(inds)) || length(inds) == 0) { - env$logs <- c( + env$logs = c( env$logs, paste("At least one of selected station(s) is not available in the database. Returning all available stations") ) } else { - all_data <- all_data[inds, ] + all_data = all_data[inds, ] } } } - all_data$`Nazwa stacji` <- trimws(all_data$`Nazwa stacji`) + all_data$POST = trimws(all_data$POST) # sorting data accordingly to column names - (could be "kod stacji" or "id") - if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) { - all_data <- all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac), ] + if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) { + data.table::setorder(all_data, NSP, ROK, MC) } else { - all_data <- all_data[order(all_data$id, all_data$Rok, all_data$Miesiac), ] + data.table::setorder(all_data, id, ROK, MC) + } + + # remove status: + if (status == FALSE) { + all_data = remove_status(all_data) } # adding option to shorten columns and removing duplicates: - all_data <- meteo_shortening_imgw(all_data, col_names = col_names, ...) - rownames(all_data) <- NULL + # TODO: turned off temporarily, consistent with daily implementation + # all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...) + rownames(all_data) = NULL return(all_data) # clipping to selected years only } diff --git a/R/meteo_metadata_imgw.R b/R/meteo_metadata_imgw.R index 95e0291..53c5879 100644 --- a/R/meteo_metadata_imgw.R +++ b/R/meteo_metadata_imgw.R @@ -18,6 +18,7 @@ meteo_metadata_imgw = function(interval, rank) { # interval can be: monthly, hou if (interval == "daily") { # warning! daily for climates and synop have 2 files with metadata!!! if (rank == "synop") { + b[[1]] = clean_metadata_meteo(address = paste0(base_url,"dane_meteorologiczne/dobowe/synop/s_d_format.txt"), rank = "synop", interval = "daily") b[[2]] = clean_metadata_meteo(address = paste0(base_url, "dane_meteorologiczne/dobowe/synop/s_d_t_format.txt"), diff --git a/R/nearest_stations_imgw.R b/R/nearest_stations_imgw.R index f466634..3432b78 100644 --- a/R/nearest_stations_imgw.R +++ b/R/nearest_stations_imgw.R @@ -90,16 +90,15 @@ nearest_stations_imgw_bp = function(type, stop("y should be latitude") } - if (max(year) >= as.integer(substr(Sys.Date(), 1, 4)) - 1) { - message("Data cannot be provided for this repository. Please check the available records at: \n - https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/") - Sys.sleep(2) + if (max(year) >= as.integer(substr(Sys.Date(), 1, 4)) - 1 | length(year) > 1) { + stop("Data cannot be provided for this repository. Please check the available records at: \n + https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/ or the syntax used for the year argument. \n") } if (type == "meteo") { - result = unique(meteo_imgw_monthly(rank = rank, year = year, coords = TRUE)[, c(2:5)]) + result = as.data.frame(unique(meteo_imgw_monthly(rank = rank, year = year, coords = TRUE)[, c(2:5)])) } else if (type == "hydro") { - result = unique(hydro_imgw_monthly(year = year, coords = TRUE)[, c(1:4)]) + result = as.data.frame(unique(hydro_imgw_monthly(year = year, coords = TRUE)[, c(1:4)])) } else { stop("You've provided wrong type argument; please use: \"meteo\", or \"hydro\"") } @@ -109,6 +108,7 @@ nearest_stations_imgw_bp = function(type, https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/") } + # nocov start if (is.null(point)) { # workaround for different column names: if ("LON" %in% colnames(result)) @@ -183,5 +183,6 @@ nearest_stations_imgw_bp = function(type, message(paste0("Please provide only one year. For more years station's metadata", "may change (name, location or station may stop collecting data)")) } + # nocov end return(result) } diff --git a/R/utils.R b/R/utils.R index 9e9df92..d227012 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,3 +18,34 @@ convert_encoding = function(df) { }) return(df) } + + +#' Remove columns containing status information +#' +#' Internal function for removing columns from data frame data that contain only +#' status information and expand the created object +#' +#' @param df data frame or data.table +#' +#' @keywords internal +#' @noRd +#' +remove_status = function(df) { + + labels = sapply(df, function(x) { + lbl = attr(x, "label") + ifelse(is.null(lbl), NA, lbl) + }) + + status_cols = grepl("^Status pomiaru", labels) + + if (any(class(df) == "data.table")) { + df = df[, !..status_cols] + } else if (any(class(df) == "data.frame")) { + df = df[, !status_cols] + } else { + stop("Removing status is possible only for data.frame or data.table objects") + } + + return(df) +} diff --git a/R/zzz.R b/R/zzz.R index 3bf0207..edeeeed 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -2,4 +2,7 @@ #' @return Empty env #' @keywords internal #' @noRd -env <- new.env(parent = emptyenv()) \ No newline at end of file +env <- new.env(parent = emptyenv()) + +globalVariables(c("DZ", "GG", "MC", "NSP", "POST.x", "ROK", "id", + "..status_cols", "status_cols")) \ No newline at end of file diff --git a/man/meteo_imgw_monthly.Rd b/man/meteo_imgw_monthly.Rd index 82f7cd6..3ea4887 100644 --- a/man/meteo_imgw_monthly.Rd +++ b/man/meteo_imgw_monthly.Rd @@ -49,7 +49,7 @@ SYNOP / CLIMATE / PRECIP stations available in the danepubliczne.imgw.pl collect } \examples{ \donttest{ -monthly <- meteo_imgw_monthly(rank = "climate", year = 1969) +monthly = meteo_imgw_monthly(rank = "climate", year = 1969) head(monthly) # a descriptive (long) column names: diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index db870fe..cf277f6 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -1,13 +1,28 @@ context("meteo_imgw_daily") + +test_that("meteo_imgw_daily_single_station", { + if (!curl::has_internet()) { + message("No internet connection! \n") + return(invisible(NULL)) + } else { + single_station = meteo_imgw_daily(rank = "synop", + year = 2024, + station = "LESZNO", + status = FALSE, + coords = TRUE) + expect_true(nrow(single_station) > 360) + } +}) + test_that("meteo_imgw_daily", { if (!curl::has_internet()) { message("No internet connection! \n") return(invisible(NULL)) } else { y = 1900 # year not supported - expect_message(meteo_imgw_daily(rank = "synop", year = y, status = TRUE, + expect_warning(meteo_imgw_daily(rank = "synop", year = y, status = TRUE, coords = TRUE, allow_failure = TRUE)) } }) @@ -35,10 +50,13 @@ test_that("check_message_for_non_existing_station", { message("No internet connection! \n") return(invisible(NULL)) } else { - expect_message(meteo_imgw_daily(rank = "precip", + expect_true( + nrow( + meteo_imgw_daily(rank = "precip", year = 2002, coords = TRUE, - station = 9999)) + station = 9999) + ) > 160000) } }) @@ -52,17 +70,13 @@ test_that("check_encoding_in_non_synop", { message("No internet connection! \n") return(invisible(NULL)) } else { - non_synop = suppressWarnings( - suppressMessages(meteo_imgw_daily(year = 2024, + non_synop = expect_message(suppressWarnings(meteo_imgw_daily(year = 2024, rank = "precip", - allow_failure = FALSE)) - ) - expect_identical(nchar(non_synop$station), nchar(trimws(non_synop$station))) - non_synop = suppressWarnings( - suppressMessages(meteo_imgw_daily(year = 2024, + allow_failure = FALSE))) + expect_identical(nchar(non_synop$NSP), nchar(trimws(non_synop$NSP))) + non_synop = suppressWarnings(meteo_imgw_daily(year = 2024, rank = "climate", allow_failure = FALSE)) - ) - expect_identical(nchar(non_synop$station), nchar(trimws(non_synop$station))) + expect_identical(nchar(non_synop$NSP), nchar(trimws(non_synop$NSP))) } }) diff --git a/tests/testthat/test-meteo_metadata_imgw.R b/tests/testthat/test-meteo_metadata_imgw.R index f450260..de6e125 100644 --- a/tests/testthat/test-meteo_metadata_imgw.R +++ b/tests/testthat/test-meteo_metadata_imgw.R @@ -19,18 +19,18 @@ test_that("meteo_metadata_imgw tests", { if (is.list(m_hs) && is.list(m_ds) && is.list(m_ds) && is.list(m_dc) && is.list(m_dp) && is.list(m_ms) && is.list(m_mc) && is.list(m_mp)) { - expect_equal(dim(m_hs[[1]]), c(107, 3)) - expect_equal(dim(m_hc[[1]]), c(22, 3)) - expect_equal(dim(m_ds[[1]]), c(65, 3)) - expect_equal(dim(m_ds[[2]]), c(23, 3)) - expect_equal(dim(m_dc[[1]]), c(18, 3)) - expect_equal(dim(m_dc[[2]]), c(13, 3)) - expect_equal(dim(m_dp[[1]]), c(16, 3)) - expect_equal(dim(m_ms[[1]]), c(60, 3)) - expect_equal(dim(m_ms[[2]]), c(22, 3)) - expect_equal(dim(m_mc[[1]]), c(27, 3)) - expect_equal(dim(m_mc[[2]]), c(12, 3)) - expect_equal(dim(m_mp[[1]]), c(14, 3)) + expect_equal(dim(m_hs[[1]]), c(107, 2)) + expect_equal(dim(m_hc[[1]]), c(22, 2)) + expect_equal(dim(m_ds[[1]]), c(65, 2)) + expect_equal(dim(m_ds[[2]]), c(23, 2)) + expect_equal(dim(m_dc[[1]]), c(18, 2)) + expect_equal(dim(m_dc[[2]]), c(13, 2)) + expect_equal(dim(m_dp[[1]]), c(16, 2)) + expect_equal(dim(m_ms[[1]]), c(58, 2)) + expect_equal(dim(m_ms[[2]]), c(22, 2)) + expect_equal(dim(m_mc[[1]]), c(27, 2)) + expect_equal(dim(m_mc[[2]]), c(12, 2)) + expect_equal(dim(m_mp[[1]]), c(14, 2)) } } })