diff --git a/DESCRIPTION b/DESCRIPTION index 6e7d0a2..ff112f6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hfhub Title: Hugging Face Hub Interface -Version: 0.1.1.9000 +Version: 0.1.1.9001 Authors@R: c( person("Daniel", "Falbel", , "daniel@posit.co", role = c("aut", "cre")), person("Regouby", "Christophe", , "christophe.regouby@free.fr", c("ctb")), diff --git a/NEWS.md b/NEWS.md index 872af05..a887592 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # hfhub (development version) * Added FR translation of the R messages. (#8 @cregouby) +* Fixed symlink issues on Windows that caused model snapshots to be empty. (#9) # hfhub 0.1.1 diff --git a/R/hub_download.R b/R/hub_download.R index 714c2a5..0d99c71 100644 --- a/R/hub_download.R +++ b/R/hub_download.R @@ -129,10 +129,12 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = } if (fs::file_exists(blob_path) && !force_download) { - fs::link_create(blob_path, pointer_path) + # Blob already exists, we'll link/copy it + link_or_copy(blob_path, pointer_path, FALSE, storage_folder) return(pointer_path) } + # Download the blob withr::with_tempfile("tmp", { lock <- filelock::lock(paste0(blob_path, ".lock")) on.exit({filelock::unlock(lock)}) @@ -156,12 +158,11 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) }) fs::file_move(tmp, blob_path) - - # fs::link_create doesn't work for linking files on windows. - try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings - file.symlink(blob_path, pointer_path) }) + # Create pointer file (symlink, move, or copy depending on symlink support) + link_or_copy(blob_path, pointer_path, TRUE, storage_folder) + pointer_path } @@ -288,5 +289,84 @@ reqst <- function(method, url, ..., follow_relative_redirects = FALSE) { method(url, ...) } +# Cache for symlink support detection (per storage folder) +symlink_support_cache <- new.env(parent = emptyenv()) + +#' Check if symlinks are supported in the given directory +#' +#' Tests whether file.symlink() works in the storage folder. +#' Caches the result per folder to avoid repeated tests. +#' Matches Python's huggingface_hub behavior. +#' +#' @param storage_folder Path to storage folder +#' @return TRUE if symlinks work, FALSE otherwise +#' @noRd +supports_symlinks <- function(storage_folder) { + # Check cache first + cache_key <- as.character(storage_folder) + if (exists(cache_key, envir = symlink_support_cache)) { + return(get(cache_key, envir = symlink_support_cache)) + } + + # Test symlink support + test_dir <- fs::path(storage_folder, ".symlink_test") + fs::dir_create(test_dir) + on.exit(fs::dir_delete(test_dir), add = TRUE) + + test_file <- fs::path(test_dir, "test.txt") + test_link <- fs::path(test_dir, "test_link.txt") + + writeLines("test", test_file) + result <- suppressWarnings(file.symlink(test_file, test_link)) + + # Cache the result + assign(cache_key, result, envir = symlink_support_cache) + + # Show warning if symlinks aren't supported (matches Python's behavior) + if (!result && !isTRUE(Sys.getenv("HF_HUB_DISABLE_SYMLINKS_WARNING") != "")) { + cli::cli_warn(c( + "{.pkg hfhub} cache-system uses symlinks by default to efficiently store ", + "duplicated files but your machine does not support them in {.path {storage_folder}}. ", + "Caching files will still work but in a degraded version that might require ", + "more space on your disk. This warning can be disabled by setting the ", + "{.envvar HF_HUB_DISABLE_SYMLINKS_WARNING} environment variable.", + "i" = "For more details, see {.url https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations}", + "i" = "To support symlinks on Windows, you either need to activate Developer Mode or run R as administrator." + )) + } + + result +} + +#' Link, move, or copy blob to pointer path based on symlink support +#' +#' Helper function that handles creating the final pointer file. +#' - If symlinks supported: creates symlink +#' - If symlinks not supported and blob just downloaded: moves file +#' - If symlinks not supported and blob already existed: copies file +#' +#' @param blob_path Path to the blob file (source) +#' @param pointer_path Path to the pointer file (destination) +#' @param owned Whether the blob is safe to delete if symlinks are not supported +#' @param storage_folder Path to storage folder (for symlink check) +#' @noRd +link_or_copy <- function(blob_path, pointer_path, owned, storage_folder) { + use_symlinks <- supports_symlinks(storage_folder) + + if (use_symlinks) { + # Original behavior: create symlink + # fs::link_create doesn't work for linking files on windows. + try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings + file.symlink(blob_path, pointer_path) + } else { + # Degraded mode: move if just downloaded, copy if already existed + if (owned) { + fs::file_move(blob_path, pointer_path) + } else { + fs::file_copy(blob_path, pointer_path, overwrite = TRUE) + } + } +} + utils::globalVariables("tmp") diff --git a/tests/testthat/_snaps/hub_snapshot.md b/tests/testthat/_snaps/hub_snapshot.md index fd11065..46825a2 100644 --- a/tests/testthat/_snaps/hub_snapshot.md +++ b/tests/testthat/_snaps/hub_snapshot.md @@ -3,7 +3,7 @@ Code p <- hub_snapshot("dfalbel/cran-packages", repo_type = "dataset", allow_patterns = "\\.R") - Message + Message i Snapshotting files 0/4 v Snapshotting files 4/4 [0ms] diff --git a/tests/testthat/test-hub_download.R b/tests/testthat/test-hub_download.R index 0486bb6..c9a4b6d 100644 --- a/tests/testthat/test-hub_download.R +++ b/tests/testthat/test-hub_download.R @@ -1,6 +1,8 @@ skip_on_cran() test_that("hub_download", { + withr::local_envvar(list(HF_HUB_DISABLE_SYMLINKS_WARNING = "1")) + file <- hub_download("gpt2", filename = "config.json") expect_equal( @@ -26,6 +28,11 @@ test_that("hub_download", { file <- hub_download("gpt2", filename = "config.json") }) expect_equal(list.files(tmp), "models--gpt2") + # Make sure the config.json exists (detect broken symlink support in Windows) + expect_length( + Sys.glob(file.path(tmp, "models--gpt2", "snapshots", "*", "config.json")), + 1 + ) }) test_that("can download from private repo", {