Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hfhub
Title: Hugging Face Hub Interface
Version: 0.1.1.9000
Version: 0.1.1.9001
Authors@R: c(
person("Daniel", "Falbel", , "daniel@posit.co", role = c("aut", "cre")),
person("Regouby", "Christophe", , "christophe.regouby@free.fr", c("ctb")),
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# hfhub (development version)

* Added FR translation of the R messages. (#8 @cregouby)
* Fixed symlink issues on Windows that caused model snapshots to be empty. (#9)

# hfhub 0.1.1

Expand Down
90 changes: 85 additions & 5 deletions R/hub_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,12 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type =
}

if (fs::file_exists(blob_path) && !force_download) {
fs::link_create(blob_path, pointer_path)
# Blob already exists, we'll link/copy it
link_or_copy(blob_path, pointer_path, FALSE, storage_folder)
return(pointer_path)
}

# Download the blob
withr::with_tempfile("tmp", {
lock <- filelock::lock(paste0(blob_path, ".lock"))
on.exit({filelock::unlock(lock)})
Expand All @@ -156,12 +158,11 @@ hub_download <- function(repo_id, filename, ..., revision = "main", repo_type =
cli::cli_abort(gettext("Error downloading from {.url {url}}"), parent = err)
})
fs::file_move(tmp, blob_path)

# fs::link_create doesn't work for linking files on windows.
try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings
file.symlink(blob_path, pointer_path)
})

# Create pointer file (symlink, move, or copy depending on symlink support)
link_or_copy(blob_path, pointer_path, TRUE, storage_folder)

pointer_path
}

Expand Down Expand Up @@ -288,5 +289,84 @@ reqst <- function(method, url, ..., follow_relative_redirects = FALSE) {
method(url, ...)
}

# Cache for symlink support detection (per storage folder)
symlink_support_cache <- new.env(parent = emptyenv())

#' Check if symlinks are supported in the given directory
#'
#' Tests whether file.symlink() works in the storage folder.
#' Caches the result per folder to avoid repeated tests.
#' Matches Python's huggingface_hub behavior.
#'
#' @param storage_folder Path to storage folder
#' @return TRUE if symlinks work, FALSE otherwise
#' @noRd
supports_symlinks <- function(storage_folder) {
# Check cache first
cache_key <- as.character(storage_folder)
if (exists(cache_key, envir = symlink_support_cache)) {
return(get(cache_key, envir = symlink_support_cache))
}

# Test symlink support
test_dir <- fs::path(storage_folder, ".symlink_test")
fs::dir_create(test_dir)
on.exit(fs::dir_delete(test_dir), add = TRUE)

test_file <- fs::path(test_dir, "test.txt")
test_link <- fs::path(test_dir, "test_link.txt")

writeLines("test", test_file)
result <- suppressWarnings(file.symlink(test_file, test_link))

# Cache the result
assign(cache_key, result, envir = symlink_support_cache)

# Show warning if symlinks aren't supported (matches Python's behavior)
if (!result && !isTRUE(Sys.getenv("HF_HUB_DISABLE_SYMLINKS_WARNING") != "")) {
cli::cli_warn(c(
"{.pkg hfhub} cache-system uses symlinks by default to efficiently store ",
"duplicated files but your machine does not support them in {.path {storage_folder}}. ",
"Caching files will still work but in a degraded version that might require ",
"more space on your disk. This warning can be disabled by setting the ",
"{.envvar HF_HUB_DISABLE_SYMLINKS_WARNING} environment variable.",
"i" = "For more details, see {.url https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations}",
"i" = "To support symlinks on Windows, you either need to activate Developer Mode or run R as administrator."
))
}

result
}

#' Link, move, or copy blob to pointer path based on symlink support
#'
#' Helper function that handles creating the final pointer file.
#' - If symlinks supported: creates symlink
#' - If symlinks not supported and blob just downloaded: moves file
#' - If symlinks not supported and blob already existed: copies file
#'
#' @param blob_path Path to the blob file (source)
#' @param pointer_path Path to the pointer file (destination)
#' @param owned Whether the blob is safe to delete if symlinks are not supported
#' @param storage_folder Path to storage folder (for symlink check)
#' @noRd
link_or_copy <- function(blob_path, pointer_path, owned, storage_folder) {
use_symlinks <- supports_symlinks(storage_folder)

if (use_symlinks) {
# Original behavior: create symlink
# fs::link_create doesn't work for linking files on windows.
try(fs::file_delete(pointer_path), silent = TRUE) # delete the link to avoid warnings
file.symlink(blob_path, pointer_path)
} else {
# Degraded mode: move if just downloaded, copy if already existed
if (owned) {
fs::file_move(blob_path, pointer_path)
} else {
fs::file_copy(blob_path, pointer_path, overwrite = TRUE)
}
}
}

utils::globalVariables("tmp")

2 changes: 1 addition & 1 deletion tests/testthat/_snaps/hub_snapshot.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Code
p <- hub_snapshot("dfalbel/cran-packages", repo_type = "dataset",
allow_patterns = "\\.R")
Message <cliMessage>
Message
i Snapshotting files 0/4
v Snapshotting files 4/4 [0ms]

Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-hub_download.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
skip_on_cran()

test_that("hub_download", {
withr::local_envvar(list(HF_HUB_DISABLE_SYMLINKS_WARNING = "1"))

file <- hub_download("gpt2", filename = "config.json")

expect_equal(
Expand All @@ -26,6 +28,11 @@ test_that("hub_download", {
file <- hub_download("gpt2", filename = "config.json")
})
expect_equal(list.files(tmp), "models--gpt2")
# Make sure the config.json exists (detect broken symlink support in Windows)
expect_length(
Sys.glob(file.path(tmp, "models--gpt2", "snapshots", "*", "config.json")),
1
)
})

test_that("can download from private repo", {
Expand Down