Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
NOT_CRAN: true # to enforce also tests that are skipped on cran and reduce code coverage

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: climate
Title: Interface to Download Meteorological (and Hydrological) Datasets
Version: 1.2.8
Version: 1.2.9
Authors@R: c(person(given = "Bartosz",
family = "Czernecki",
role = c("aut", "cre"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# climate 1.2.9

* fixes for corrupted header files in `meteo_imgw_` family of functions due to changes in the IMGW-PIB repository


# climate 1.2.8

* speeding up selective download for given station names in `meteo_imgw_*` and `hydro_imgw_daily()` functions that simultaneously reduce use of memory
Expand Down
34 changes: 6 additions & 28 deletions R/clean_metadata_meteo.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,12 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
test_url(link = address, output = temp)
a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE,
fileEncoding = "CP1250")$V1
a = gsub(a, pattern = "\\?", replacement = "")
a = stringi::stri_trans_general(a, 'LATIN-ASCII')

# additional workarounds for mac os but not only...
a = gsub(x = a, pattern = "'", replacement = "")
a = gsub(x = a, pattern = "\\^0", replacement = "")
a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE)
length_char = max(nchar(a$V1), na.rm = TRUE)

if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly
if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily
if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly
if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly

field = substr(a$V1, length_char - 3, length_char)

if (rank == "synop" && interval == "monthly") {
length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2
field = substr(a$V1, length_char - 3, length_char + 2)
}

a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1]))))
a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))

a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3))
a$V1 = gsub(x = a$V1, pattern = "* ", "")

a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status
colnames(a)[1] = "parameters"
inds = grepl("^[A-Z]{2}.{5}", a)

code = trimws(substr(a, 1, 7))[inds]
name = trimws(substr(a, 8, nchar(a)))[inds]
a = data.frame(parameters = code, label = name)
a$label = stringi::stri_trans_general(a$label, 'LATIN-ASCII')
return(a)
}
126 changes: 66 additions & 60 deletions R/meteo_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,42 +148,45 @@ meteo_imgw_daily_bp = function(rank,
file1 = paste(temp2, dir(temp2), sep = "/")[1]
data1 = imgw_read(translit, file1)
colnames(data1) = meta[[1]]$parameters
data1$`Nazwa stacji` = trimws(data1$`Nazwa stacji`)
for (labs in seq_along(meta[[1]]$parameters)) {
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
}
data1$POST = trimws(data1$POST)

file2 = paste(temp2, dir(temp2), sep = "/")[2]
if (file.exists(file2)) {
data2 = imgw_read(translit, file2)
colnames(data2) = meta[[2]]$parameters
data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`)
for (labs in seq_along(meta[[2]]$parameters)) {
attr(data2[[labs]], "label") = meta[[2]]$label[[labs]]
}
data2$POST = trimws(data2$POST)
} else {
data2 = head(data1, 0)[, 1:min(5, ncol(data1))]
data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`)
data2$POST = trimws(data2$POST)
}

unlink(c(temp, temp2))

# remove statuses if not needed:
if (status == FALSE) {
data1[grep("^Status", colnames(data1))] = NULL
data2[grep("^Status", colnames(data2))] = NULL
}

ttt = merge(data1,
data.table::setDT(data1)
data.table::setDT(data2)

ttt = merge(
data1,
data2,
by = c("Kod stacji", "Rok", "Miesiac", "Dzien"),
by = c("NSP", "ROK", "MC", "DZ"),
all.x = TRUE
)

ttt = ttt[order(ttt$`Nazwa stacji.x`, ttt$Rok, ttt$Miesiac, ttt$Dzien), ]
data.table::setorder(ttt, POST.x, ROK, MC, DZ)

if (!is.null(station)) {
all_data[[length(all_data) + 1]] = ttt[ttt$`Nazwa stacji.x` %in% station, ]
all_data[[length(all_data) + 1]] = ttt[ttt$POST.x %in% station, ]
} else {
all_data[[length(all_data) + 1]] = ttt
}
} # end of looping for zip archives
} # end of if statement for SYNOP stations

######################
###### KLIMAT: #######
if (rank == "climate") {
Expand Down Expand Up @@ -220,7 +223,10 @@ meteo_imgw_daily_bp = function(rank,
if (!is.null(csv_data)) {
csv_data = convert_encoding(csv_data)
colnames(csv_data) = meta[[1]]$parameters
csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`)
for (labs in seq_along(meta[[1]]$parameters)) {
attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]]
}
csv_data$POST = trimws(csv_data$POST)
}
return(csv_data)
}
Expand All @@ -229,8 +235,8 @@ meteo_imgw_daily_bp = function(rank,
if (is.data.frame(d)) {
data1 = d
colnames(data1) = meta[[1]]$parameters
if (status == FALSE) {
data1[grep("^Status", colnames(data1))] = NULL
for (labs in seq_along(meta[[1]]$parameters)) {
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
}
}

Expand All @@ -239,27 +245,25 @@ meteo_imgw_daily_bp = function(rank,
file1 = paste(temp2, dir(temp2), sep = "/")[1]
data1 = imgw_read(translit, file1)
colnames(data1) = meta[[1]]$parameters
for (labs in seq_along(meta[[1]]$parameters)) {
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
}

file2 = paste(temp2, dir(temp2), sep = "/")[2]
if (file.exists(file2)) {
data2 = imgw_read(translit, file2)
colnames(data2) = meta[[2]]$parameters
}
}

# remove statuses
if (status == FALSE) {
data1[grep("^Status", colnames(data1))] = NULL
if (file.exists(file2)) {
data2[grep("^Status", colnames(data2))] = NULL
for (labs in seq_along(meta[[2]]$parameters)) {
attr(data2[[labs]], "label") = meta[[2]]$label[[labs]]
}
}
}

unlink(c(temp, temp2))
if (file.exists(file2)) {
all_data[[length(all_data) + 1]] = merge(data1,
data2,
by = c("Kod stacji", "Rok", "Miesiac", "Dzien"),
by = c("NSP", "ROK", "MC", "DZ"),
all.x = TRUE
)
} else {
Expand Down Expand Up @@ -304,16 +308,19 @@ meteo_imgw_daily_bp = function(rank,
csv_data = read.table(data, header = FALSE, stringsAsFactors = FALSE, sep = ",", encoding = "CP1250")
csv_data = convert_encoding(csv_data)
colnames(csv_data) = meta[[1]]$parameters
csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`)
for (labs in seq_along(meta[[1]]$parameters)) {
attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]]
}
csv_data$POST = trimws(csv_data$POST)
return(csv_data)
}
)

if (is.data.frame(d)) {
data1 = d
colnames(data1) = meta[[1]]$parameters
if (status == FALSE) {
data1[grep("^Status", colnames(data1))] = NULL
for (labs in seq_along(meta[[1]]$parameters)) {
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
}
}

Expand All @@ -322,74 +329,73 @@ meteo_imgw_daily_bp = function(rank,
file1 = paste(temp2, dir(temp2), sep = "/")[1]
data1 = imgw_read(translit, file1)
colnames(data1) = meta[[1]]$parameters
# remove status
if (status == FALSE) {
data1[grep("^Status", colnames(data1))] = NULL
}
} # end of corrupted zips
unlink(c(temp, temp2))
all_data[[length(all_data) + 1]] = data1
} # end of loop for zip files
} # end of if statement for climate stations
} # end of looping over catalogs

all_data = as.data.frame(data.table::rbindlist(all_data, fill = TRUE))
all_data = data.table::rbindlist(all_data, fill = TRUE)

# fix order of columns if needed and entries in stations' names if more than 1 available:
col_inds = grep(pattern = "Nazwa stacji", colnames(all_data), value = TRUE)
col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE)

if (length(col_inds) > 1) {
all_data$`Nazwa stacji` = apply(all_data[, col_inds], 1, function(x) na.omit(unique(x))[1])
all_data$`Nazwa stacji.x` = NULL
all_data$`Nazwa stacji.y` = NULL
if (colnames(all_data)[ncol(all_data)] == "Nazwa stacji") { # re-order columns if needed
all_data = all_data[, c(1, ncol(all_data), 2:(ncol(all_data) - 1))]
all_data$POST = apply(
all_data[, col_inds, with = FALSE],
1,
function(x) na.omit(unique(x))[1]
)
all_data$POST.x = NULL
all_data$POST.y = NULL
if (colnames(all_data)[ncol(all_data)] == "POST") { # re-order columns if needed
data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1)))
}
}

if (coords) {
all_data = merge(climate::imgw_meteo_stations[, 1:3],
all_data = merge(setDT(climate::imgw_meteo_stations[, 1:3]),
all_data,
by.x = "id",
by.y = "Kod stacji",
by.y = "NSP",
all.y = TRUE
)
}

# add station rank:
rank_code = switch(rank,
synop = "SYNOPTYCZNA",
climate = "KLIMATYCZNA",
precip = "OPADOWA"
)
all_data = cbind(data.frame(rank_code = rank_code), all_data)

all_data = all_data[all_data$Rok %in% year, ] # clip only to selected years
all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years

# station selection and names cleaning:
if (!is.null(station)) {
if (is.character(station)) {
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`))))))
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST))))))
if (any(is.na(inds)) || length(inds) == 0) {
env$logs = c(
env$logs,
paste("At least one of selected station(s) is not available in the database. Returning all available stations")
)
} else {
all_data = all_data[inds, ]
all_data = all_data[inds]
}
}
}
all_data$`Nazwa stacji` = trimws(all_data$`Nazwa stacji`)

all_data$POST = trimws(all_data$POST)
# sort output
if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) {
all_data = all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac, all_data$Dzien), ]
if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) {
all_data = all_data[order(all_data$NSP, all_data$ROK, all_data$MC, all_data$DZ), ]
} else {
all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien), ]
all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ), ]
}

# remove status:
if (status == FALSE) {
all_data = remove_status(all_data)
}

# remove duplicates and shorten colnames
all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...)
# turned off temporarily:
# all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...)
rownames(all_data) = NULL

# check if there any messages gathered in env$logs and if it is not empty then print them:
Expand Down
Loading