From 3b52e1ba1740547e19a004d58d0978d237f543db Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Fri, 21 Nov 2025 22:50:13 -0800 Subject: [PATCH 1/6] Fall back to file copy when symlinks fail on Windows --- R/hub_download.R | 8 +++++++- tests/testthat/_snaps/hub_snapshot.md | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/hub_download.R b/R/hub_download.R index 714c2a5..b7bc9d8 100644 --- a/R/hub_download.R +++ b/R/hub_download.R @@ -159,7 +159,13 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = # fs::link_create doesn't work for linking files on windows. try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings - file.symlink(blob_path, pointer_path) + symlink_success <- suppressWarnings(file.symlink(blob_path, pointer_path)) + + # On Windows without admin/developer mode, symlinks fail silently + # Fall back to copying the file instead + if (!symlink_success && !file.exists(pointer_path)) { + file.copy(blob_path, pointer_path) + } }) pointer_path diff --git a/tests/testthat/_snaps/hub_snapshot.md b/tests/testthat/_snaps/hub_snapshot.md index fd11065..46825a2 100644 --- a/tests/testthat/_snaps/hub_snapshot.md +++ b/tests/testthat/_snaps/hub_snapshot.md @@ -3,7 +3,7 @@ Code p <- hub_snapshot("dfalbel/cran-packages", repo_type = "dataset", allow_patterns = "\\.R") - Message + Message i Snapshotting files 0/4 v Snapshotting files 4/4 [0ms] From 899fc78097e502f4197ee5fd5422c35d7225dc12 Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Fri, 21 Nov 2025 23:46:00 -0800 Subject: [PATCH 2/6] Don't create blob folders in degraded mode Creating both blob and snapshot folders when symlinks aren't supported is just taking double the space for no reason. This change follows the Python package's logic for handling this case--just don't have a blobs folder. --- R/hub_download.R | 154 +++++++++++++++++++++-------- tests/testthat/test-hub_download.R | 7 ++ 2 files changed, 122 insertions(+), 39 deletions(-) diff --git a/R/hub_download.R b/R/hub_download.R index b7bc9d8..5e77ce4 100644 --- a/R/hub_download.R +++ b/R/hub_download.R @@ -107,10 +107,7 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = if (is.null(etag)) cli::cli_abort(gettext("etag must have been retrieved from server")) if (is.null(commit_hash)) cli::cli_abort(gettext("commit_hash must have been retrieved from server")) - blob_path <- fs::path(storage_folder, "blobs", etag) pointer_path <- get_pointer_path(storage_folder, commit_hash, filename) - - fs::dir_create(fs::path_dir(blob_path)) fs::dir_create(fs::path_dir(pointer_path)) # if passed revision is not identical to commit_hash @@ -128,45 +125,75 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = return(pointer_path) } - if (fs::file_exists(blob_path) && !force_download) { - fs::link_create(blob_path, pointer_path) - return(pointer_path) - } + # Check if symlinks are supported (matches Python's behavior) + use_symlinks <- supports_symlinks(storage_folder) - withr::with_tempfile("tmp", { - lock <- filelock::lock(paste0(blob_path, ".lock")) - on.exit({filelock::unlock(lock)}) - tryCatch({ - bar_id <- cli::cli_progress_bar( - name = filename, - total = if (is.numeric(expected_size)) expected_size else NA, - type = "download", - ) - progress <- function(down, up) { - if (down[1] != 0) { - cli::cli_progress_update(total = down[1], set = down[2], id = bar_id) - } - TRUE - } - handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress) - curl::handle_setheaders(handle, .list = hub_headers()) - curl::curl_download(url, tmp, handle = handle, quiet = FALSE) - cli::cli_progress_done(id = bar_id) - }, error = function(err) { - cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) - }) - fs::file_move(tmp, blob_path) + if (use_symlinks) { + # Use blob storage with symlinks (efficient deduplication) + blob_path <- fs::path(storage_folder, "blobs", etag) + fs::dir_create(fs::path_dir(blob_path)) - # fs::link_create doesn't work for linking files on windows. - try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings - symlink_success <- suppressWarnings(file.symlink(blob_path, pointer_path)) - - # On Windows without admin/developer mode, symlinks fail silently - # Fall back to copying the file instead - if (!symlink_success && !file.exists(pointer_path)) { - file.copy(blob_path, pointer_path) + if (fs::file_exists(blob_path) && !force_download) { + fs::link_create(blob_path, pointer_path) + return(pointer_path) } - }) + + withr::with_tempfile("tmp", { + lock <- filelock::lock(paste0(blob_path, ".lock")) + on.exit({filelock::unlock(lock)}) + tryCatch({ + bar_id <- cli::cli_progress_bar( + name = filename, + total = if (is.numeric(expected_size)) expected_size else NA, + type = "download", + ) + progress <- function(down, up) { + if (down[1] != 0) { + cli::cli_progress_update(total = down[1], set = down[2], id = bar_id) + } + TRUE + } + handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress) + curl::handle_setheaders(handle, .list = hub_headers()) + curl::curl_download(url, tmp, handle = handle, quiet = FALSE) + cli::cli_progress_done(id = bar_id) + }, error = function(err) { + cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) + }) + fs::file_move(tmp, blob_path) + + # fs::link_create doesn't work for linking files on windows. + try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings + file.symlink(blob_path, pointer_path) + }) + } else { + # Degraded mode: download directly to pointer_path (no symlinks) + # This matches Python's huggingface_hub behavior on Windows + withr::with_tempfile("tmp", { + lock <- filelock::lock(paste0(pointer_path, ".lock")) + on.exit({filelock::unlock(lock)}) + tryCatch({ + bar_id <- cli::cli_progress_bar( + name = filename, + total = if (is.numeric(expected_size)) expected_size else NA, + type = "download", + ) + progress <- function(down, up) { + if (down[1] != 0) { + cli::cli_progress_update(total = down[1], set = down[2], id = bar_id) + } + TRUE + } + handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress) + curl::handle_setheaders(handle, .list = hub_headers()) + curl::curl_download(url, tmp, handle = handle, quiet = FALSE) + cli::cli_progress_done(id = bar_id) + }, error = function(err) { + cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) + }) + fs::file_move(tmp, pointer_path) + }) + } pointer_path } @@ -294,5 +321,54 @@ reqst <- function(method, url, ..., follow_relative_redirects = FALSE) { method(url, ...) } +# Cache for symlink support detection (per storage folder) +symlink_support_cache <- new.env(parent = emptyenv()) + +#' Check if symlinks are supported in the given directory +#' +#' Tests whether file.symlink() works in the storage folder. +#' Caches the result per folder to avoid repeated tests. +#' Matches Python's huggingface_hub behavior. +#' +#' @param storage_folder Path to storage folder +#' @return TRUE if symlinks work, FALSE otherwise +#' @keywords internal +supports_symlinks <- function(storage_folder) { + # Check cache first + cache_key <- as.character(storage_folder) + if (exists(cache_key, envir = symlink_support_cache)) { + return(get(cache_key, envir = symlink_support_cache)) + } + + # Test symlink support + test_dir <- fs::path(storage_folder, ".symlink_test") + fs::dir_create(test_dir) + on.exit(fs::dir_delete(test_dir), add = TRUE) + + test_file <- fs::path(test_dir, "test.txt") + test_link <- fs::path(test_dir, "test_link.txt") + + writeLines("test", test_file) + result <- suppressWarnings(file.symlink(test_file, test_link)) + + # Cache the result + assign(cache_key, result, envir = symlink_support_cache) + + # Show warning if symlinks aren't supported (matches Python's behavior) + if (!result && !isTRUE(Sys.getenv("HF_HUB_DISABLE_SYMLINKS_WARNING") != "")) { + cli::cli_warn(c( + "{.pkg hfhub} cache-system uses symlinks by default to efficiently store ", + "duplicated files but your machine does not support them in {.path {storage_folder}}. ", + "Caching files will still work but in a degraded version that might require ", + "more space on your disk. This warning can be disabled by setting the ", + "{.envvar HF_HUB_DISABLE_SYMLINKS_WARNING} environment variable.", + "i" = "For more details, see {.url https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations}", + "i" = "To support symlinks on Windows, you either need to activate Developer Mode or run R as administrator." + )) + } + + result +} + utils::globalVariables("tmp") diff --git a/tests/testthat/test-hub_download.R b/tests/testthat/test-hub_download.R index 0486bb6..c9a4b6d 100644 --- a/tests/testthat/test-hub_download.R +++ b/tests/testthat/test-hub_download.R @@ -1,6 +1,8 @@ skip_on_cran() test_that("hub_download", { + withr::local_envvar(list(HF_HUB_DISABLE_SYMLINKS_WARNING = "1")) + file <- hub_download("gpt2", filename = "config.json") expect_equal( @@ -26,6 +28,11 @@ test_that("hub_download", { file <- hub_download("gpt2", filename = "config.json") }) expect_equal(list.files(tmp), "models--gpt2") + # Make sure the config.json exists (detect broken symlink support in Windows) + expect_length( + Sys.glob(file.path(tmp, "models--gpt2", "snapshots", "*", "config.json")), + 1 + ) }) test_that("can download from private repo", { From 180805a03858526268d9c67c5248f4cd68470918 Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Sat, 22 Nov 2025 00:05:25 -0800 Subject: [PATCH 3/6] Cleaner implementation --- R/hub_download.R | 84 ++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/R/hub_download.R b/R/hub_download.R index 5e77ce4..5811b12 100644 --- a/R/hub_download.R +++ b/R/hub_download.R @@ -125,19 +125,16 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = return(pointer_path) } - # Check if symlinks are supported (matches Python's behavior) - use_symlinks <- supports_symlinks(storage_folder) - - if (use_symlinks) { - # Use blob storage with symlinks (efficient deduplication) - blob_path <- fs::path(storage_folder, "blobs", etag) - fs::dir_create(fs::path_dir(blob_path)) + blob_path <- fs::path(storage_folder, "blobs", etag) + fs::dir_create(fs::path_dir(blob_path)) - if (fs::file_exists(blob_path) && !force_download) { - fs::link_create(blob_path, pointer_path) - return(pointer_path) - } + blob_just_downloaded <- FALSE + if (fs::file_exists(blob_path) && !force_download) { + # Blob already exists, we'll link/copy it + blob_just_downloaded <- FALSE + } else { + # Download the blob withr::with_tempfile("tmp", { lock <- filelock::lock(paste0(blob_path, ".lock")) on.exit({filelock::unlock(lock)}) @@ -161,40 +158,13 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) }) fs::file_move(tmp, blob_path) - - # fs::link_create doesn't work for linking files on windows. - try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings - file.symlink(blob_path, pointer_path) - }) - } else { - # Degraded mode: download directly to pointer_path (no symlinks) - # This matches Python's huggingface_hub behavior on Windows - withr::with_tempfile("tmp", { - lock <- filelock::lock(paste0(pointer_path, ".lock")) - on.exit({filelock::unlock(lock)}) - tryCatch({ - bar_id <- cli::cli_progress_bar( - name = filename, - total = if (is.numeric(expected_size)) expected_size else NA, - type = "download", - ) - progress <- function(down, up) { - if (down[1] != 0) { - cli::cli_progress_update(total = down[1], set = down[2], id = bar_id) - } - TRUE - } - handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress) - curl::handle_setheaders(handle, .list = hub_headers()) - curl::curl_download(url, tmp, handle = handle, quiet = FALSE) - cli::cli_progress_done(id = bar_id) - }, error = function(err) { - cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) - }) - fs::file_move(tmp, pointer_path) }) + blob_just_downloaded <- TRUE } + # Create pointer file (symlink, move, or copy depending on symlink support) + link_or_copy(blob_path, pointer_path, blob_just_downloaded, storage_folder) + pointer_path } @@ -370,5 +340,35 @@ supports_symlinks <- function(storage_folder) { result } +#' Link, move, or copy blob to pointer path based on symlink support +#' +#' Helper function that handles creating the final pointer file. +#' - If symlinks supported: creates symlink +#' - If symlinks not supported and blob just downloaded: moves file +#' - If symlinks not supported and blob already existed: copies file +#' +#' @param blob_path Path to the blob file (source) +#' @param pointer_path Path to the pointer file (destination) +#' @param blob_just_downloaded Whether the blob was just downloaded +#' @param storage_folder Path to storage folder (for symlink check) +#' @keywords internal +link_or_copy <- function(blob_path, pointer_path, blob_just_downloaded, storage_folder) { + use_symlinks <- supports_symlinks(storage_folder) + + if (use_symlinks) { + # Original behavior: create symlink + # fs::link_create doesn't work for linking files on windows. + try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings + file.symlink(blob_path, pointer_path) + } else { + # Degraded mode: move if just downloaded, copy if already existed + if (blob_just_downloaded) { + fs::file_move(blob_path, pointer_path) + } else { + fs::file_copy(blob_path, pointer_path, overwrite = TRUE) + } + } +} + utils::globalVariables("tmp") From 9ffc748b11c1ac9216e1b9a7bb9f85909742b473 Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Sat, 22 Nov 2025 00:11:18 -0800 Subject: [PATCH 4/6] Cleaner diff versus main branch --- R/hub_download.R | 68 +++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/R/hub_download.R b/R/hub_download.R index 5811b12..3a15a81 100644 --- a/R/hub_download.R +++ b/R/hub_download.R @@ -107,7 +107,10 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = if (is.null(etag)) cli::cli_abort(gettext("etag must have been retrieved from server")) if (is.null(commit_hash)) cli::cli_abort(gettext("commit_hash must have been retrieved from server")) + blob_path <- fs::path(storage_folder, "blobs", etag) pointer_path <- get_pointer_path(storage_folder, commit_hash, filename) + + fs::dir_create(fs::path_dir(blob_path)) fs::dir_create(fs::path_dir(pointer_path)) # if passed revision is not identical to commit_hash @@ -125,45 +128,40 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type = return(pointer_path) } - blob_path <- fs::path(storage_folder, "blobs", etag) - fs::dir_create(fs::path_dir(blob_path)) - - blob_just_downloaded <- FALSE - if (fs::file_exists(blob_path) && !force_download) { # Blob already exists, we'll link/copy it - blob_just_downloaded <- FALSE - } else { - # Download the blob - withr::with_tempfile("tmp", { - lock <- filelock::lock(paste0(blob_path, ".lock")) - on.exit({filelock::unlock(lock)}) - tryCatch({ - bar_id <- cli::cli_progress_bar( - name = filename, - total = if (is.numeric(expected_size)) expected_size else NA, - type = "download", - ) - progress <- function(down, up) { - if (down[1] != 0) { - cli::cli_progress_update(total = down[1], set = down[2], id = bar_id) - } - TRUE + link_or_copy(blob_path, pointer_path, FALSE, storage_folder) + return(pointer_path) + } + + # Download the blob + withr::with_tempfile("tmp", { + lock <- filelock::lock(paste0(blob_path, ".lock")) + on.exit({filelock::unlock(lock)}) + tryCatch({ + bar_id <- cli::cli_progress_bar( + name = filename, + total = if (is.numeric(expected_size)) expected_size else NA, + type = "download", + ) + progress <- function(down, up) { + if (down[1] != 0) { + cli::cli_progress_update(total = down[1], set = down[2], id = bar_id) } - handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress) - curl::handle_setheaders(handle, .list = hub_headers()) - curl::curl_download(url, tmp, handle = handle, quiet = FALSE) - cli::cli_progress_done(id = bar_id) - }, error = function(err) { - cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) - }) - fs::file_move(tmp, blob_path) + TRUE + } + handle <- curl::new_handle(noprogress = FALSE, progressfunction = progress) + curl::handle_setheaders(handle, .list = hub_headers()) + curl::curl_download(url, tmp, handle = handle, quiet = FALSE) + cli::cli_progress_done(id = bar_id) + }, error = function(err) { + cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err) }) - blob_just_downloaded <- TRUE - } + fs::file_move(tmp, blob_path) + }) # Create pointer file (symlink, move, or copy depending on symlink support) - link_or_copy(blob_path, pointer_path, blob_just_downloaded, storage_folder) + link_or_copy(blob_path, pointer_path, TRUE, storage_folder) pointer_path } @@ -302,7 +300,7 @@ symlink_support_cache <- new.env(parent = emptyenv()) #' #' @param storage_folder Path to storage folder #' @return TRUE if symlinks work, FALSE otherwise -#' @keywords internal +#' @noRd supports_symlinks <- function(storage_folder) { # Check cache first cache_key <- as.character(storage_folder) @@ -351,7 +349,7 @@ supports_symlinks <- function(storage_folder) { #' @param pointer_path Path to the pointer file (destination) #' @param blob_just_downloaded Whether the blob was just downloaded #' @param storage_folder Path to storage folder (for symlink check) -#' @keywords internal +#' @noRd link_or_copy <- function(blob_path, pointer_path, blob_just_downloaded, storage_folder) { use_symlinks <- supports_symlinks(storage_folder) From 1aa4122f8ec27b5476ee97f81d9719fa4cd393cb Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Sat, 22 Nov 2025 00:14:37 -0800 Subject: [PATCH 5/6] Clearer parameter name --- R/hub_download.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/hub_download.R b/R/hub_download.R index 3a15a81..0d99c71 100644 --- a/R/hub_download.R +++ b/R/hub_download.R @@ -347,10 +347,10 @@ supports_symlinks <- function(storage_folder) { #' #' @param blob_path Path to the blob file (source) #' @param pointer_path Path to the pointer file (destination) -#' @param blob_just_downloaded Whether the blob was just downloaded +#' @param owned Whether the blob is safe to delete if symlinks are not supported #' @param storage_folder Path to storage folder (for symlink check) #' @noRd -link_or_copy <- function(blob_path, pointer_path, blob_just_downloaded, storage_folder) { +link_or_copy <- function(blob_path, pointer_path, owned, storage_folder) { use_symlinks <- supports_symlinks(storage_folder) if (use_symlinks) { @@ -360,7 +360,7 @@ link_or_copy <- function(blob_path, pointer_path, blob_just_downloaded, storage_ file.symlink(blob_path, pointer_path) } else { # Degraded mode: move if just downloaded, copy if already existed - if (blob_just_downloaded) { + if (owned) { fs::file_move(blob_path, pointer_path) } else { fs::file_copy(blob_path, pointer_path, overwrite = TRUE) From 8203ce3ca267284fd6624ef4b8ad1396bfed9b64 Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Sat, 22 Nov 2025 00:35:57 -0800 Subject: [PATCH 6/6] Update news, bump dev version --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e7d0a2..ff112f6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hfhub Title: Hugging Face Hub Interface -Version: 0.1.1.9000 +Version: 0.1.1.9001 Authors@R: c( person("Daniel", "Falbel", , "daniel@posit.co", role = c("aut", "cre")), person("Regouby", "Christophe", , "christophe.regouby@free.fr", c("ctb")), diff --git a/NEWS.md b/NEWS.md index 872af05..a887592 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # hfhub (development version) * Added FR translation of the R messages. (#8 @cregouby) +* Fixed symlink issues on Windows that caused model snapshots to be empty. (#9) # hfhub 0.1.1