diff --git a/package.json b/package.json index 0596df4..8ea6011 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "dev": "vite dev", "build": "vite build", "preview": "vite preview", - "check": "npm run check:backend && npm run check:backend", + "check": "npm run check:backend && npm run check:frontend", "check:frontend": "npm run check:biome -- ./src/ ./tests/ && npm run check:svelte -- ./src/ ./tests/", "check:svelte": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", "check:biome": "npx biome check --no-errors-on-unmatched --files-ignore-unknown=true", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 06c7e54..10fa071 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -43,3 +43,4 @@ tile-grid = "0.6" geozero = { version = "0.13", features = ["with-mvt"] } pointy = "0.4" url = "2.5.7" +filetime = "0.2" diff --git a/src-tauri/src/dwca/archive.rs b/src-tauri/src/dwca/archive.rs index b58ab65..10dbd3b 100644 --- a/src-tauri/src/dwca/archive.rs +++ b/src-tauri/src/dwca/archive.rs @@ -88,10 +88,6 @@ impl Archive { let db_path = storage_dir.join(format!("{db_name}.db")); let db = Database::create_from_core_files(&core_files, &extensions, &db_path, &core_id_column)?; - // Initialize photo cache table - let photo_cache = crate::photo_cache::PhotoCache::new(db.connection()); - photo_cache.create_table()?; - Ok(Self { // archive_path: archive_path.to_path_buf(), storage_dir, @@ -337,44 +333,35 @@ impl Archive { /// Gets a photo from the cache or extracts it from the archive /// Returns the absolute path to the cached photo file pub fn get_photo(&self, photo_path: &str) -> Result { - // Create photo cache - let photo_cache = crate::photo_cache::PhotoCache::new(self.db.connection()); + // Create photo cache directory + let cache_dir = self.storage_dir.join("photo_cache"); + std::fs::create_dir_all(&cache_dir).map_err(|e| ChuckError::DirectoryCreate { + path: cache_dir.clone(), + source: e, + })?; + + let photo_cache = crate::photo_cache::PhotoCache::new(&cache_dir); // Check if photo is already cached if let Some(cached_path) = photo_cache.get_cached_photo(photo_path)? { // Update access time and return cached path - photo_cache.update_access_time(photo_path)?; - return Ok(cached_path); + photo_cache.touch_file(&cached_path)?; + return Ok(cached_path.to_string_lossy().to_string()); } // Photo not cached - extract it from the archive let archive_zip_path = self.storage_dir.join("archive.zip"); - let cache_dir = self.storage_dir.join("photo_cache"); - std::fs::create_dir_all(&cache_dir).map_err(|e| ChuckError::DirectoryCreate { - path: cache_dir.clone(), - source: e, - })?; - - // Create a unique filename based on the photo path to avoid conflicts - let safe_filename = photo_path.replace(['/', '\\'], "_"); - let cached_file_path = cache_dir.join(&safe_filename); + let cached_file_path = photo_cache.get_cache_path(photo_path); // Extract the photo from the ZIP using the path from the multimedia table - let bytes_written = extract_single_file( + extract_single_file( &archive_zip_path, photo_path, &cached_file_path, )?; - // Add to cache - photo_cache.add_photo( - photo_path, - cached_file_path.to_str().ok_or_else(|| ChuckError::InvalidFileName(cached_file_path.clone()))?, - bytes_written as i64, - )?; - // Evict LRU photos if cache is too large (2GB default) - const MAX_CACHE_SIZE: i64 = 2 * 1024 * 1024 * 1024; // 2GB + const MAX_CACHE_SIZE: u64 = 2 * 1024 * 1024 * 1024; // 2GB photo_cache.evict_lru(MAX_CACHE_SIZE)?; Ok(cached_file_path.to_string_lossy().to_string()) @@ -1399,8 +1386,9 @@ mod tests { let archive_zip = archive.storage_dir.join("archive.zip"); assert!(archive_zip.exists(), "archive.zip hard link should exist"); - // Verify photo cache table exists - let photo_cache = crate::photo_cache::PhotoCache::new(archive.db.connection()); + // Verify cache is empty initially + let cache_dir = archive.storage_dir.join("photo_cache"); + let photo_cache = crate::photo_cache::PhotoCache::new(&cache_dir); let cache_size = photo_cache.get_cache_size().unwrap(); assert_eq!(cache_size, 0, "Cache should be empty initially"); @@ -1513,5 +1501,83 @@ obs789,34.0522,-118.2437,Pinus coulteri assert_eq!(first_point.2, -122.4194); // longitude assert_eq!(first_point.3, Some("Quercus agrifolia".to_string())); // scientific name } + + #[test] + fn test_get_photo_works_after_reopening_archive() { + use std::io::Write; + + // Create a test archive with photos + let test_name = "get_photo_after_reopen"; + let temp_dir = std::env::temp_dir().join(format!("chuck_test_{test_name}")); + std::fs::remove_dir_all(&temp_dir).ok(); + std::fs::create_dir_all(&temp_dir).unwrap(); + + // Create a simple ZIP archive with photos + let archive_path = temp_dir.join("test.zip"); + let mut zip = zip::ZipWriter::new(std::fs::File::create(&archive_path).unwrap()); + let options: zip::write::FileOptions<()> = zip::write::FileOptions::default() + .compression_method(zip::CompressionMethod::Stored) + .unix_permissions(0o644); + + // Add meta.xml + let meta_xml = br#" + + + + occurrence.csv + + + + + + + multimedia.csv + + + + + +"#; + zip.start_file("meta.xml", options).unwrap(); + zip.write_all(meta_xml).unwrap(); + + // Add occurrence.csv + let occurrence_csv = b"occurrenceID\n1\n"; + zip.start_file("occurrence.csv", options).unwrap(); + zip.write_all(occurrence_csv).unwrap(); + + // Add multimedia.csv with photo reference + let multimedia_csv = b"occurrenceID,identifier\n1,media/photo.jpg\n"; + zip.start_file("multimedia.csv", options).unwrap(); + zip.write_all(multimedia_csv).unwrap(); + + // Add a photo file + let photo_data = b"fake jpeg data"; + zip.start_file("media/photo.jpg", options).unwrap(); + zip.write_all(photo_data).unwrap(); + + zip.finish().unwrap(); + + // Open the archive initially (this works because db is read-write) + let base_dir = temp_dir.join("storage"); + let archive = Archive::open(&archive_path, &base_dir, |_| {}).unwrap(); + + // Drop the archive to release the database connection + drop(archive); + + // Re-open using Archive::current() (simulates app restart) + let archive = Archive::current(&base_dir).unwrap(); + + // This should work - but currently fails with read-only database error + let cached_path = archive.get_photo("media/photo.jpg").unwrap(); + assert!(std::path::Path::new(&cached_path).exists(), "Photo should be extracted to cache"); + + // Verify the photo content + let content = std::fs::read(&cached_path).unwrap(); + assert_eq!(content, photo_data, "Cached photo content should match original"); + + // Clean up + std::fs::remove_dir_all(&temp_dir).ok(); + } } diff --git a/src-tauri/src/photo_cache.rs b/src-tauri/src/photo_cache.rs index 1e3a821..d400455 100644 --- a/src-tauri/src/photo_cache.rs +++ b/src-tauri/src/photo_cache.rs @@ -1,147 +1,124 @@ use crate::error::Result; +use std::path::{Path, PathBuf}; -/// Manages lazy-loaded photo caching using DuckDB -pub struct PhotoCache<'a> { - conn: &'a duckdb::Connection, +/// Manages lazy-loaded photo caching using the filesystem +/// Uses file modification times for LRU eviction +pub struct PhotoCache { + cache_dir: PathBuf, } -impl<'a> PhotoCache<'a> { - /// Creates a new PhotoCache with a connection reference - pub fn new(conn: &'a duckdb::Connection) -> Self { - Self { conn } +impl PhotoCache { + /// Creates a new PhotoCache with a cache directory path + pub fn new(cache_dir: &Path) -> Self { + Self { + cache_dir: cache_dir.to_path_buf(), + } } - /// Creates the photo cache table if it doesn't exist - pub fn create_table(&self) -> Result<()> { - let sql = " - CREATE TABLE IF NOT EXISTS photo_cache ( - photo_path VARCHAR PRIMARY KEY, - cached_file_path VARCHAR NOT NULL, - file_size BIGINT NOT NULL, - last_accessed TIMESTAMP NOT NULL, - extracted_at TIMESTAMP NOT NULL - ) - "; - - self.conn.execute(sql, []).map_err(|e| { - log::error!("Failed to create photo_cache table: {e}"); - e - })?; - - // Create index on last_accessed for LRU eviction queries - self.conn.execute( - "CREATE INDEX IF NOT EXISTS idx_last_accessed ON photo_cache(last_accessed)", - [], - )?; - - log::info!("Photo cache table created successfully"); - Ok(()) + /// Gets a cached photo path if it exists + pub fn get_cached_photo(&self, photo_path: &str) -> Result> { + let safe_filename = photo_path.replace(['/', '\\'], "_"); + let cached_file_path = self.cache_dir.join(&safe_filename); + + if cached_file_path.exists() { + Ok(Some(cached_file_path)) + } else { + Ok(None) + } } - /// Adds or updates a photo in the cache - pub fn add_photo( - &self, - photo_path: &str, - cached_file_path: &str, - file_size: i64, - ) -> Result<()> { - let now = chrono::Utc::now().to_rfc3339(); - - self.conn.execute( - "INSERT OR REPLACE INTO photo_cache - (photo_path, cached_file_path, file_size, last_accessed, extracted_at) - VALUES (?, ?, ?, ?, ?)", - duckdb::params![photo_path, cached_file_path, file_size, &now, &now], - )?; - + /// Updates the file modification time to mark it as recently accessed (for LRU) + pub fn touch_file(&self, cached_path: &Path) -> Result<()> { + let now = filetime::FileTime::now(); + filetime::set_file_mtime(cached_path, now).map_err(|e| { + log::warn!("Failed to update mtime for {}: {}", cached_path.display(), e); + crate::error::ChuckError::FileRead { + path: cached_path.to_path_buf(), + source: e, + } + })?; Ok(()) } - /// Updates the last accessed time for a photo - pub fn update_access_time(&self, photo_path: &str) -> Result<()> { - let now = chrono::Utc::now().to_rfc3339(); - - self.conn.execute( - "UPDATE photo_cache SET last_accessed = ? WHERE photo_path = ?", - duckdb::params![&now, photo_path], - )?; - - Ok(()) + /// Returns the path where a photo should be cached + /// The caller is responsible for actually writing the file + pub fn get_cache_path(&self, photo_path: &str) -> PathBuf { + let safe_filename = photo_path.replace(['/', '\\'], "_"); + self.cache_dir.join(&safe_filename) } - /// Gets a cached photo path if it exists - pub fn get_cached_photo(&self, photo_path: &str) -> Result> { - let result = self.conn.query_row( - "SELECT cached_file_path FROM photo_cache WHERE photo_path = ?", - [photo_path], - |row| row.get(0), - ); - - match result { - Ok(path) => Ok(Some(path)), - Err(duckdb::Error::QueryReturnedNoRows) => Ok(None), - Err(e) => Err(e.into()), + /// Gets the total size of cached photos in bytes by scanning the cache directory + pub fn get_cache_size(&self) -> Result { + if !self.cache_dir.exists() { + return Ok(0); } - } - /// Gets the total size of cached photos in bytes - pub fn get_cache_size(&self) -> Result { - let size: Option = self.conn.query_row( - "SELECT COALESCE(SUM(file_size), 0) FROM photo_cache", - [], - |row| row.get(0), - )?; + let mut total_size = 0u64; + for entry in std::fs::read_dir(&self.cache_dir) + .map_err(|e| crate::error::ChuckError::FileRead { + path: self.cache_dir.clone(), + source: e, + })? + .flatten() + { + if let Ok(metadata) = entry.metadata() { + if metadata.is_file() { + total_size += metadata.len(); + } + } + } - Ok(size.unwrap_or(0)) + Ok(total_size) } /// Evicts least recently used photos until cache is under the size limit /// Returns the number of photos evicted - pub fn evict_lru(&self, max_cache_size: i64) -> Result { - let current_size = self.get_cache_size()?; + pub fn evict_lru(&self, max_cache_size: u64) -> Result { + if !self.cache_dir.exists() { + return Ok(0); + } + let current_size = self.get_cache_size()?; if current_size <= max_cache_size { return Ok(0); } - // Get photos ordered by least recently used - let mut stmt = self.conn.prepare( - "SELECT photo_path, cached_file_path, file_size - FROM photo_cache - ORDER BY last_accessed ASC" - )?; - - let photos: Vec<(String, String, i64)> = stmt - .query_map([], |row| { - Ok(( - row.get(0)?, - row.get(1)?, - row.get(2)?, - )) + // Collect files with their modification times and sizes + let mut files: Vec<(PathBuf, std::time::SystemTime, u64)> = Vec::new(); + + for entry in std::fs::read_dir(&self.cache_dir) + .map_err(|e| crate::error::ChuckError::FileRead { + path: self.cache_dir.clone(), + source: e, })? - .collect::, _>>()?; + .flatten() + { + let path = entry.path(); + if let Ok(metadata) = entry.metadata() { + if metadata.is_file() { + let mtime = metadata.modified().unwrap_or(std::time::UNIX_EPOCH); + files.push((path, mtime, metadata.len())); + } + } + } - let mut size_to_free = current_size - max_cache_size; + // Sort by modification time (oldest first) + files.sort_by_key(|(_, mtime, _)| *mtime); + + let mut size_to_free = current_size.saturating_sub(max_cache_size); let mut evicted = 0; - for (photo_path, cached_file_path, file_size) in photos { - if size_to_free <= 0 { + for (path, _, file_size) in files { + if size_to_free == 0 { break; } - // Delete the cached file - if let Err(e) = std::fs::remove_file(&cached_file_path) { - log::warn!("Failed to delete cached photo {cached_file_path}: {e}"); + if let Err(e) = std::fs::remove_file(&path) { + log::warn!("Failed to delete cached photo {}: {}", path.display(), e); + } else { + size_to_free = size_to_free.saturating_sub(file_size); + evicted += 1; } - - // Remove from database - self.conn.execute( - "DELETE FROM photo_cache WHERE photo_path = ?", - [&photo_path], - )?; - - size_to_free -= file_size; - evicted += 1; } log::info!("Evicted {evicted} photos from cache"); diff --git a/tests/app.spec.ts b/tests/app.spec.ts index a732e92..8a2bfd9 100644 --- a/tests/app.spec.ts +++ b/tests/app.spec.ts @@ -517,8 +517,13 @@ test.describe('Frontend', () => { await expect(cardContent).toBeVisible(); }); - test('should render cards with the correct height', async ({ page }, testInfo) => { - test.skip(testInfo.project.name === 'integration-windows', 'Card height calculation differs in Edge'); + test('should render cards with the correct height', async ({ + page, + }, testInfo) => { + test.skip( + testInfo.project.name === 'integration-windows', + 'Card height calculation differs in Edge', + ); // Open the archive await openArchive(page); diff --git a/tests/bounding-box.spec.ts b/tests/bounding-box.spec.ts index 6a4580a..e73c41a 100644 --- a/tests/bounding-box.spec.ts +++ b/tests/bounding-box.spec.ts @@ -14,7 +14,7 @@ test.describe('Bounding Box Filtering', () => { // TODO figure out why playwright doesn't render the map correctly on windows test.skip( testInfo.project.name === 'integration-windows', - 'Map rendering in playwright on Windows not quite working' + 'Map rendering in playwright on Windows not quite working', ); // Open archive await openArchive(page);