diff --git a/Cargo.lock b/Cargo.lock index 552e0aa..54fd0b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -428,6 +428,7 @@ dependencies = [ "spellbook", "streaming-iterator", "tempfile", + "thiserror 2.0.17", "tree-sitter", "tree-sitter-bash", "tree-sitter-c", diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index d7bb976..bf8cd60 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -2,6 +2,7 @@ mod helpers; mod settings; mod watched_file; use crate::settings::ConfigSettings; +pub use crate::settings::CustomDictionariesEntry; use crate::watched_file::WatchedFile; use log::debug; use log::info; @@ -14,9 +15,9 @@ use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; -static CACHE_DIR: &str = "codebook"; -static GLOBAL_CONFIG_FILE: &str = "codebook.toml"; -static USER_CONFIG_FILES: [&str; 2] = ["codebook.toml", ".codebook.toml"]; +const CACHE_DIR: &str = "codebook"; +const GLOBAL_CONFIG_FILE: &str = "codebook.toml"; +const USER_CONFIG_FILES: [&str; 2] = ["codebook.toml", ".codebook.toml"]; /// The main trait for Codebook configuration. pub trait CodebookConfig: Sync + Send + Debug { @@ -24,6 +25,7 @@ pub trait CodebookConfig: Sync + Send + Debug { fn add_word_global(&self, word: &str) -> Result; fn add_ignore(&self, file: &str) -> Result; fn get_dictionary_ids(&self) -> Vec; + fn get_custom_dictionaries_definitions(&self) -> Vec; fn should_ignore_path(&self, path: &Path) -> bool; fn is_allowed_word(&self, word: &str) -> bool; fn should_flag_word(&self, word: &str) -> bool; @@ -198,8 +200,11 @@ impl CodebookConfigFile { let path = path.as_ref(); let content = fs::read_to_string(path)?; - match toml::from_str(&content) { - Ok(settings) => Ok(settings), + match toml::from_str::(&content) { + Ok(mut settings) => { + settings.set_config_file_paths(path); + Ok(settings) + } Err(e) => { let err = io::Error::new( ErrorKind::InvalidData, @@ -223,6 +228,7 @@ impl CodebookConfigFile { if project.use_global { if let Some(global) = global_config.content() { let mut effective = global.clone(); + effective.merge(project); effective } else { @@ -496,6 +502,11 @@ impl CodebookConfig for CodebookConfigFile { fn cache_dir(&self) -> &Path { &self.cache_dir } + + fn get_custom_dictionaries_definitions(&self) -> Vec { + let snapshot = self.snapshot(); + snapshot.custom_dictionaries_definitions.clone() + } } #[derive(Debug)] @@ -520,6 +531,18 @@ impl CodebookConfigMemory { cache_dir: env::temp_dir().join(CACHE_DIR), } } + + pub fn add_dict_id(&self, id: &str) { + let mut settings = self.settings.write().unwrap(); + settings.dictionaries.push(id.into()); + settings.sort_and_dedup(); + } + + pub fn add_custom_dict(&self, custom_dict: CustomDictionariesEntry) { + let mut settings = self.settings.write().unwrap(); + settings.custom_dictionaries_definitions.push(custom_dict); + settings.sort_and_dedup(); + } } impl CodebookConfigMemory { @@ -576,6 +599,11 @@ impl CodebookConfig for CodebookConfigMemory { fn cache_dir(&self) -> &Path { &self.cache_dir } + + fn get_custom_dictionaries_definitions(&self) -> Vec { + let snapshot = self.snapshot(); + snapshot.custom_dictionaries_definitions.clone() + } } #[cfg(test)] @@ -1066,4 +1094,53 @@ mod tests { Ok(()) } + + #[test] + fn test_normalization_of_custom_dict_paths() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = Arc::from(temp_dir.path().join("codebook.toml").as_path()); + let relative_custom_dict_path = temp_dir.path().join("custom_rel.txt"); + let absolute_custom_dict_path = temp_dir.path().join("custom_abs.txt"); + let mut file = File::create(&config_path)?; + File::create(&relative_custom_dict_path)?; + File::create(&absolute_custom_dict_path)?; + + let expected = vec![ + CustomDictionariesEntry { + name: "absolute".to_owned(), + path: absolute_custom_dict_path.to_str().unwrap().to_string(), + allow_add_words: true, + config_file_path: Some(config_path.clone()), + }, + CustomDictionariesEntry { + name: "relative".to_owned(), + path: relative_custom_dict_path.to_str().unwrap().to_string(), + allow_add_words: false, + config_file_path: Some(config_path.clone()), + }, + ]; + + let a = format!( + r#" + [[custom_dictionaries_definitions]] + name = "absolute" + path = "{}" + allow_add_words = true + + [[custom_dictionaries_definitions]] + name = "relative" + path = "{}" + allow_add_words = false + "#, + absolute_custom_dict_path.display(), + relative_custom_dict_path.display(), + ); + file.write_all(a.as_bytes())?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + let custom_dicts = config.snapshot().custom_dictionaries_definitions.clone(); + assert_eq!(expected, custom_dicts); + + Ok(()) + } } diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 9173b98..ea81d99 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,10 +1,55 @@ +use std::{ + io, + path::{self, Path, PathBuf}, + sync::Arc, +}; + use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)] +pub struct CustomDictionariesEntry { + /// The name of the custom dictionary + #[serde(default)] + pub name: String, + + /// An absolute or relative path to the custom dictionary + #[serde(default)] + pub path: String, + + /// Allow adding words to this dictionary + #[serde(default)] + pub allow_add_words: bool, + + /// For internal use to track the coodbook.toml that originated this entry + #[serde(skip)] + pub config_file_path: Option>, +} + +impl CustomDictionariesEntry { + pub fn resolve_full_path(&self) -> Result { + let full_path = if let Some(config_file_path) = &self.config_file_path { + config_file_path + .parent() + .ok_or(io::Error::from(io::ErrorKind::NotFound))? + .join(Path::new(&self.path)) + } else { + PathBuf::from(&self.path) + }; + + path::absolute(&full_path) + } +} + #[derive(Debug, Serialize, Clone, PartialEq)] pub struct ConfigSettings { /// List of dictionaries to use for spell checking #[serde(default, skip_serializing_if = "Vec::is_empty")] pub dictionaries: Vec, + /// List of custom dictionaries to use for spell checking + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub custom_dictionaries_definitions: Vec, + /// Custom allowlist of words #[serde(default, skip_serializing_if = "Vec::is_empty")] pub words: Vec, @@ -56,6 +101,7 @@ impl Default for ConfigSettings { fn default() -> Self { Self { dictionaries: vec![], + custom_dictionaries_definitions: vec![], words: Vec::new(), flag_words: Vec::new(), ignore_paths: Vec::new(), @@ -79,6 +125,8 @@ impl<'de> Deserialize<'de> for ConfigSettings { #[serde(default)] dictionaries: Vec, #[serde(default)] + custom_dictionaries_definitions: Vec, + #[serde(default)] words: Vec, #[serde(default)] flag_words: Vec, @@ -95,6 +143,14 @@ impl<'de> Deserialize<'de> for ConfigSettings { let helper = Helper::deserialize(deserializer)?; Ok(ConfigSettings { dictionaries: to_lowercase_vec(helper.dictionaries), + custom_dictionaries_definitions: helper + .custom_dictionaries_definitions + .into_iter() + .map(|mut c| { + c.name.make_ascii_lowercase(); + c + }) + .collect(), words: to_lowercase_vec(helper.words), flag_words: to_lowercase_vec(helper.flag_words), ignore_paths: helper.ignore_paths, @@ -106,10 +162,12 @@ impl<'de> Deserialize<'de> for ConfigSettings { } impl ConfigSettings { - /// Merge another config settings into this one, sorting and deduplicating all collections + /// Merge another config settings into this one, sorting and deduplicating all collections, prioritizing self when possible pub fn merge(&mut self, other: ConfigSettings) { // Add items from the other config self.dictionaries.extend(other.dictionaries); + self.custom_dictionaries_definitions + .extend(other.custom_dictionaries_definitions); self.words.extend(other.words); self.flag_words.extend(other.flag_words); self.ignore_paths.extend(other.ignore_paths); @@ -131,11 +189,21 @@ impl ConfigSettings { pub fn sort_and_dedup(&mut self) { // Sort and deduplicate each Vec sort_and_dedup(&mut self.dictionaries); + sort_and_dedup_by(&mut self.custom_dictionaries_definitions, |d1, d2| { + d1.name.cmp(&d2.name) + }); sort_and_dedup(&mut self.words); sort_and_dedup(&mut self.flag_words); sort_and_dedup(&mut self.ignore_paths); sort_and_dedup(&mut self.ignore_patterns); } + + pub fn set_config_file_paths(&mut self, config_path: &Path) { + let config_path: Arc = Arc::from(config_path); + for custom_directory in &mut self.custom_dictionaries_definitions { + custom_directory.config_file_path = Some(config_path.clone()); + } + } } /// Helper function to sort and deduplicate a Vec of strings @@ -144,10 +212,26 @@ fn sort_and_dedup(vec: &mut Vec) { vec.dedup(); } +pub fn sort_and_dedup_by(vec: &mut Vec, f: F) +where + F: Fn(&T, &T) -> std::cmp::Ordering, +{ + vec.sort_by(&f); + vec.dedup_by(|d1, d2| f(d1, d2) == std::cmp::Ordering::Equal); +} + #[cfg(test)] mod tests { use super::*; + fn build_fake_custom_dict(name: &str) -> CustomDictionariesEntry { + CustomDictionariesEntry { + name: name.into(), + path: name.into(), + ..Default::default() + } + } + #[test] fn test_default() { let config = ConfigSettings::default(); @@ -221,8 +305,14 @@ mod tests { #[test] fn test_merge() { + let mut duplicate_custom_dict = build_fake_custom_dict("duplicate"); + let mut base = ConfigSettings { dictionaries: vec!["en_us".to_string()], + custom_dictionaries_definitions: vec![ + build_fake_custom_dict("base_unique"), + duplicate_custom_dict.clone(), + ], words: vec!["codebook".to_string()], flag_words: vec!["todo".to_string()], ignore_paths: vec!["**/*.md".to_string()], @@ -231,8 +321,15 @@ mod tests { min_word_length: 3, }; + // flip allow_add_words to true, to create a disparity between the dictionaries + duplicate_custom_dict.allow_add_words = !duplicate_custom_dict.allow_add_words; + let other = ConfigSettings { dictionaries: vec!["en_gb".to_string(), "en_us".to_string()], + custom_dictionaries_definitions: vec![ + duplicate_custom_dict.clone(), + build_fake_custom_dict("other_unique"), + ], words: vec!["rust".to_string()], flag_words: vec!["fixme".to_string()], ignore_paths: vec!["target/".to_string()], @@ -245,6 +342,13 @@ mod tests { // After merging and deduplicating, we should have combined items assert_eq!(base.dictionaries, vec!["en_gb", "en_us"]); + assert_eq!( + base.custom_dictionaries_definitions + .iter() + .map(|d| d.name.clone()) + .collect::>(), + vec!["base_unique", "duplicate", "other_unique"] + ); assert_eq!(base.words, vec!["codebook", "rust"]); assert_eq!(base.flag_words, vec!["fixme", "todo"]); assert_eq!(base.ignore_paths, vec!["**/*.md", "target/"]); @@ -258,6 +362,12 @@ mod tests { assert!(base.use_global); // min_word_length from other should override base (since it's non-default) assert_eq!(base.min_word_length, 2); + + // Assert that base custom_dictionaries_definitions took priority + assert_ne!( + base.custom_dictionaries_definitions.iter().find(|d| d.name == "duplicate").expect("custom_dictionaries_definitions duplicate must be present if set in ether of the merged dictionaries").allow_add_words + ,duplicate_custom_dict.allow_add_words + ); } #[test] @@ -288,6 +398,11 @@ mod tests { "en_us".to_string(), "en_gb".to_string(), ], + custom_dictionaries_definitions: vec![ + build_fake_custom_dict("custom_1"), + build_fake_custom_dict("custom_2"), + build_fake_custom_dict("custom_1"), + ], words: vec![ "rust".to_string(), "codebook".to_string(), @@ -311,6 +426,14 @@ mod tests { config.sort_and_dedup(); assert_eq!(config.dictionaries, vec!["en_gb", "en_us"]); + assert_eq!( + config + .custom_dictionaries_definitions + .iter() + .map(|d| d.name.clone()) + .collect::>(), + vec!["custom_1", "custom_2"] + ); assert_eq!(config.words, vec!["codebook", "rust"]); assert_eq!(config.flag_words, vec!["fixme", "todo"]); assert_eq!(config.ignore_paths, vec!["**/*.md", "target/"]); diff --git a/crates/codebook-lsp/src/lsp.rs b/crates/codebook-lsp/src/lsp.rs index c07f343..a2cdfe6 100644 --- a/crates/codebook-lsp/src/lsp.rs +++ b/crates/codebook-lsp/src/lsp.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; +use std::collections::HashSet; use std::path::Path; use std::str::FromStr as _; use std::sync::Arc; +use codebook::errors::DictModificationError; use codebook::parser::get_word_from_string; use codebook::queries::LanguageType; use string_offsets::AllConfig; @@ -13,6 +15,8 @@ use log::LevelFilter; use log::error; use serde_json::Value; use tokio::task; +use tower_lsp::jsonrpc::Error as RpcError; +use tower_lsp::jsonrpc::ErrorCode; use tower_lsp::jsonrpc::Result as RpcResult; use tower_lsp::lsp_types::*; use tower_lsp::{Client, LanguageServer}; @@ -37,6 +41,7 @@ pub struct Backend { enum CodebookCommand { AddWord, AddWordGlobal, + AddWordDict, Unknown, } @@ -45,6 +50,7 @@ impl From<&str> for CodebookCommand { match command { "codebook.addWord" => CodebookCommand::AddWord, "codebook.addWordGlobal" => CodebookCommand::AddWordGlobal, + "codebook.addWordDict" => CodebookCommand::AddWordDict, _ => CodebookCommand::Unknown, } } @@ -55,6 +61,7 @@ impl From for String { match command { CodebookCommand::AddWord => "codebook.addWord".to_string(), CodebookCommand::AddWordGlobal => "codebook.addWordGlobal".to_string(), + CodebookCommand::AddWordDict => "codebook.addWordDict".to_string(), CodebookCommand::Unknown => "codebook.unknown".to_string(), } } @@ -95,6 +102,7 @@ impl LanguageServer for Backend { commands: vec![ CodebookCommand::AddWord.into(), CodebookCommand::AddWordGlobal.into(), + CodebookCommand::AddWordDict.into(), ], work_done_progress_options: Default::default(), }), @@ -255,6 +263,35 @@ impl LanguageServer for Backend { disabled: None, data: None, })); + + let active_dict_ids = self + .config + .get_dictionary_ids() + .into_iter() + .collect::>(); + + for custom_dict in self.config.get_custom_dictionaries_definitions() { + if !custom_dict.allow_add_words || !active_dict_ids.contains(&custom_dict.name) { + continue; + } + + let custom_dict_name = custom_dict.name; + let title = format!("Add '{word}' to '{custom_dict_name}' dictionary"); + actions.push(CodeActionOrCommand::CodeAction(CodeAction { + title: title.clone(), + kind: Some(CodeActionKind::QUICKFIX), + diagnostics: None, + edit: None, + command: Some(Command { + title: title, + command: CodebookCommand::AddWordDict.into(), + arguments: Some(vec![custom_dict_name.into(), word.to_string().into()]), + }), + is_preferred: None, + disabled: None, + data: None, + })); + } } match actions.is_empty() { true => Ok(None), @@ -292,6 +329,28 @@ impl LanguageServer for Backend { } Ok(None) } + CodebookCommand::AddWordDict => { + let dict_id = params + .arguments + .first() + .and_then(|arg| arg.as_str()) + .ok_or(RpcError::new(ErrorCode::InvalidParams))? + .to_string(); + + let words = params + .arguments + .iter() + .skip(1) + .filter_map(|arg| arg.as_str().map(|s| s.to_string())); + + let updated = self.add_words_to_dict(&dict_id, words); + if updated { + self.codebook.refresh_custom_dictionary(&dict_id); + + self.recheck_all().await; + } + Ok(None) + } CodebookCommand::Unknown => Ok(None), } } @@ -369,6 +428,17 @@ impl Backend { } should_save } + fn add_words_to_dict(&self, dict_id: &str, words: impl Iterator) -> bool { + let mut should_save = false; + for word in words { + match self.codebook.add_word_to_custom_dictionary(&word, dict_id) { + Ok(_) => should_save = true, + Err(e @ DictModificationError::WordAlreadyExists(_)) => info!("{e}"), + Err(e) => error!("{e}"), + }; + } + should_save + } fn make_suggestion(&self, suggestion: &str, range: &Range, uri: &Url) -> CodeAction { let title = format!("Replace with '{suggestion}'"); diff --git a/crates/codebook/Cargo.toml b/crates/codebook/Cargo.toml index 29d94c7..7422bd2 100644 --- a/crates/codebook/Cargo.toml +++ b/crates/codebook/Cargo.toml @@ -52,6 +52,7 @@ tree-sitter-zig.workspace = true tree-sitter-c-sharp.workspace = true tree-sitter.workspace = true unicode-segmentation.workspace = true +thiserror.workspace = true codebook_config.workspace = true codebook_downloader.workspace = true diff --git a/crates/codebook/src/dictionaries/dictionary.rs b/crates/codebook/src/dictionaries/dictionary.rs index 0feccba..3daff00 100644 --- a/crates/codebook/src/dictionaries/dictionary.rs +++ b/crates/codebook/src/dictionaries/dictionary.rs @@ -2,6 +2,7 @@ use lru::LruCache; use std::{ collections::HashSet, + io, num::NonZeroUsize, path::PathBuf, sync::{Arc, RwLock}, @@ -158,11 +159,6 @@ impl TextDictionary { .collect(); Self { words } } - pub fn new_from_path(path: &PathBuf) -> Self { - let word_list = std::fs::read_to_string(path) - .unwrap_or_else(|_| panic!("Failed to read dictionary file: {}", path.display())); - Self::new(&word_list) - } /// Get a reference to the internal HashSet for batch operations pub fn word_set(&self) -> &HashSet { @@ -170,6 +166,15 @@ impl TextDictionary { } } +impl TryFrom<&PathBuf> for TextDictionary { + type Error = io::Error; + + fn try_from(value: &PathBuf) -> Result { + let word_list = std::fs::read_to_string(value)?; + Ok(Self::new(&word_list)) + } +} + /// Integration helper to use any Dictionary trait with optimized batch processing pub fn find_locations_with_dictionary_batch( text: &str, diff --git a/crates/codebook/src/dictionaries/manager.rs b/crates/codebook/src/dictionaries/manager.rs index f6a4728..77a8b2d 100644 --- a/crates/codebook/src/dictionaries/manager.rs +++ b/crates/codebook/src/dictionaries/manager.rs @@ -4,10 +4,13 @@ use std::{ sync::{Arc, RwLock}, }; +use crate::dictionaries::repo::TextRepoLocation; + use super::{ dictionary::{self, TextDictionary}, repo::{DictionaryRepo, HunspellRepo, TextRepo, get_repo}, }; +use codebook_config::CustomDictionariesEntry; use codebook_downloader::Downloader; use dictionary::{Dictionary, HunspellDictionary}; use log::{debug, error}; @@ -25,19 +28,39 @@ impl DictionaryManager { } } - pub fn get_dictionary(&self, id: &str) -> Option> { + pub fn invalidate_cache_entry(&self, id: &str) { + let mut cache = self.dictionary_cache.write().unwrap(); + cache.remove(id); + } + + pub fn get_dictionary( + &self, + id: &str, + custom_dicts_defs: &[CustomDictionariesEntry], + ) -> Option> { { let cache = self.dictionary_cache.read().unwrap(); if let Some(dictionary) = cache.get(id) { return Some(dictionary.clone()); } } - let repo = match get_repo(id) { - Some(r) => r, - None => { + + let repo = if let Some(custom_dict) = custom_dicts_defs.iter().find(|d| d.name == id) { + DictionaryRepo::Text(TextRepo { + name: custom_dict.name.clone(), + text_location: TextRepoLocation::LocalFile( + custom_dict + .resolve_full_path() + .inspect_err(|e| error!("Failed to build local text repo due to: {e}")) + .ok()?, + ), + }) + } else { + let repo = get_repo(id); + if repo.is_none() { debug!("Failed to get repo for dictionary, skipping: {id}"); - return None; } + repo? }; let dictionary: Option> = match repo { @@ -46,13 +69,12 @@ impl DictionaryManager { }; let mut cache = self.dictionary_cache.write().unwrap(); - match dictionary { - Some(d) => { - cache.insert(id.to_string(), d.clone()); - Some(d) - } - None => None, + + if let Some(dictionary) = &dictionary { + cache.insert(id.to_string(), dictionary.clone()); } + + dictionary } fn get_hunspell_dictionary(&self, repo: HunspellRepo) -> Option> { @@ -82,17 +104,26 @@ impl DictionaryManager { } fn get_text_dictionary(&self, repo: TextRepo) -> Option> { - if repo.text.is_some() { - return Some(Arc::new(TextDictionary::new(repo.text.unwrap()))); - } - let text_path = match self.downloader.get(&repo.url.unwrap()) { - Ok(path) => path, - Err(e) => { - error!("Error: {e:?}"); - return None; + const FAILED_TO_READ_DICT_ERR: &'static str = "Failed to read dictionary file"; + + let dict = match repo.text_location { + super::repo::TextRepoLocation::Url(url) => { + let text_path = self + .downloader + .get(&url) + .inspect_err(|e| error!("Error: {e:?}")) + .ok()?; + + TextDictionary::try_from(&text_path) + .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, text_path.display())) + .ok()? } + super::repo::TextRepoLocation::LocalFile(path) => TextDictionary::try_from(&path) + .inspect_err(|_| error!("{}: {}", FAILED_TO_READ_DICT_ERR, path.display())) + .ok()?, + super::repo::TextRepoLocation::Text(text) => TextDictionary::new(text), }; - let dict = TextDictionary::new_from_path(&text_path); + Some(Arc::new(dict)) } } diff --git a/crates/codebook/src/dictionaries/repo.rs b/crates/codebook/src/dictionaries/repo.rs index dd4c2b9..2f25aec 100644 --- a/crates/codebook/src/dictionaries/repo.rs +++ b/crates/codebook/src/dictionaries/repo.rs @@ -1,4 +1,4 @@ -use std::sync::LazyLock; +use std::{path::PathBuf, sync::LazyLock}; static CODEBOOK_DICTIONARY: &str = include_str!("./combined.gen.txt"); @@ -19,18 +19,23 @@ impl HunspellRepo { } } +#[derive(Clone, Debug)] +pub enum TextRepoLocation { + Url(String), + Text(&'static str), + LocalFile(PathBuf), +} + #[derive(Clone, Debug)] pub struct TextRepo { - pub url: Option, - pub text: Option<&'static str>, + pub text_location: TextRepoLocation, pub name: String, } impl TextRepo { - pub fn new(name: &str, url: &str) -> Self { + pub fn new_url_repo(name: &str, url: &str) -> Self { Self { - url: Some(url.to_string()), - text: None, + text_location: TextRepoLocation::Url(url.to_string()), name: name.to_string(), } } @@ -119,24 +124,23 @@ static HUNSPELL_DICTIONARIES: LazyLock> = LazyLock::new(|| { static TEXT_DICTIONARIES: LazyLock> = LazyLock::new(|| { vec![ - TextRepo::new( + TextRepo::new_url_repo( "rust", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/rust/dict/rust.txt", ), - TextRepo::new( + TextRepo::new_url_repo( "software_terms", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/software-terms/dict/softwareTerms.txt", ), - TextRepo::new( + TextRepo::new_url_repo( "computing_acronyms", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/software-terms/dict/computing-acronyms.txt", ), TextRepo { name: "codebook".to_string(), - text: Some(CODEBOOK_DICTIONARY), - url: None, + text_location: TextRepoLocation::Text(CODEBOOK_DICTIONARY), }, - TextRepo::new( + TextRepo::new_url_repo( "csharp", "https://raw.githubusercontent.com/streetsidesoftware/cspell-dicts/refs/heads/main/dictionaries/csharp/csharp.txt", ), diff --git a/crates/codebook/src/errors.rs b/crates/codebook/src/errors.rs new file mode 100644 index 0000000..e2d9c91 --- /dev/null +++ b/crates/codebook/src/errors.rs @@ -0,0 +1,12 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum DictModificationError { + #[error("Failed to write data due to: {0}")] + WriteFailed(#[from] std::io::Error), + #[error("The word '{0}' already exists in the given dictionary")] + WordAlreadyExists(String), + + #[error("The '{0}' dict ID is not present in the configuration")] + UnknownDictID(String), +} diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 2fd2120..e950a14 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -1,13 +1,16 @@ pub mod dictionaries; +pub mod errors; mod logging; pub mod parser; pub mod queries; pub mod regexes; mod splitter; +use crate::errors::DictModificationError; use crate::regexes::get_default_skip_patterns; -use std::path::Path; +use std::io::Write; use std::sync::Arc; +use std::{fs::File, path::Path}; use codebook_config::CodebookConfig; use dictionaries::{dictionary, manager::DictionaryManager}; @@ -103,7 +106,10 @@ impl Codebook { let mut dictionaries = Vec::with_capacity(dictionary_ids.len()); debug!("Checking text with dictionaries: {dictionary_ids:?}"); for dictionary_id in dictionary_ids { - let dictionary = self.manager.get_dictionary(&dictionary_id); + let dictionary = self.manager.get_dictionary( + &dictionary_id, + &self.config.get_custom_dictionaries_definitions(), + ); if let Some(d) = dictionary { dictionaries.push(d); } @@ -138,6 +144,57 @@ impl Codebook { } Some(collect_round_robin(&suggestions, max_results)) } + + pub fn add_word_to_custom_dictionary( + &self, + word: &str, + dict_id: &str, + ) -> Result<(), DictModificationError> { + let custom_dicts_defs = &self.config.get_custom_dictionaries_definitions(); + + let dict = self.manager.get_dictionary(dict_id, custom_dicts_defs); + + if dict.is_none() { + return Err(DictModificationError::UnknownDictID(dict_id.to_string())); + } + let dict = dict.unwrap(); + + if dict.check(word) { + return Err(DictModificationError::WordAlreadyExists(word.to_string())); + } + + if let Some(custom_dict) = custom_dicts_defs + .iter() + .find(|d| d.allow_add_words && d.name == dict_id) + { + // FIXME: I am still unsure where to maintain an dict_id to WatchedFile map, for now I + // am just going to use simple file operations to write to the end of the file. + // Also we should make sure we are writing to a text file, a simple solution would be to + // filter out dict paths based on extensions, so if the path isn't ending with .dict + // or .txt we just toss the update away. + let mut file = File::options() + .read(true) + .append(true) + .create(false) + .open(&custom_dict.resolve_full_path()?)?; + + // FIXME: we should check if the last byte of the dict is a new line and only prepend + // newlines if its missing, I have bigger fish to fry right now + if file.metadata()?.len() == 0 { + write!(file, "{}", word)?; + } else { + write!(file, "\n{}", word)?; + } + } + + Ok(()) + } + + pub fn refresh_custom_dictionary(&self, dict_id: &str) { + self.manager.invalidate_cache_entry(dict_id); + self.manager + .get_dictionary(dict_id, &self.config.get_custom_dictionaries_definitions()); + } } fn collect_round_robin(sources: &[Vec], max_count: usize) -> Vec { diff --git a/crates/codebook/tests/examples/custom_dict.txt b/crates/codebook/tests/examples/custom_dict.txt new file mode 100644 index 0000000..ce7739a --- /dev/null +++ b/crates/codebook/tests/examples/custom_dict.txt @@ -0,0 +1 @@ +mycustomcorrectword diff --git a/crates/codebook/tests/test_custom_dicts.rs b/crates/codebook/tests/test_custom_dicts.rs new file mode 100644 index 0000000..17748c6 --- /dev/null +++ b/crates/codebook/tests/test_custom_dicts.rs @@ -0,0 +1,49 @@ +use codebook::{Codebook, queries::LanguageType}; +use codebook_config::{CodebookConfigMemory, CustomDictionariesEntry}; +use std::sync::Arc; + +mod utils; +use crate::utils::example_file_path; + +const CUSTOM_WORD: &'static str = "mycustomcorrectword"; + +pub fn get_processor(enable_custom_dict: bool) -> Codebook { + let config = Arc::new(CodebookConfigMemory::default()); + + let custom_dict_name = "my_dict"; + let custom_dict_path = example_file_path("custom_dict.txt"); + let custom_dict = CustomDictionariesEntry { + name: custom_dict_name.to_owned(), + path: custom_dict_path, + ..Default::default() + }; + config.add_custom_dict(custom_dict); + + if enable_custom_dict { + config.add_dict_id(&custom_dict_name); + } + + Codebook::new(config.clone()).unwrap() +} + +#[test] +fn test_custom_dict_unused_if_not_added_to_dicts() { + let processor = get_processor(false); + let misspelled = processor + .spell_check(CUSTOM_WORD, Some(LanguageType::Text), None) + .to_vec(); + + assert_eq!(misspelled[0].word, CUSTOM_WORD); +} + +#[test] +fn test_custom_dict_used_if_added_to_dicts() { + let processor = get_processor(true); + + let misspelled = processor + .spell_check(CUSTOM_WORD, Some(LanguageType::Text), None) + .to_vec(); + + // active custom dict + assert!(misspelled.is_empty()); +} diff --git a/crates/codebook/tests/test_files.rs b/crates/codebook/tests/test_files.rs index 8ac409f..ec4174c 100644 --- a/crates/codebook/tests/test_files.rs +++ b/crates/codebook/tests/test_files.rs @@ -3,12 +3,9 @@ use codebook::{ queries::LanguageType, }; -mod utils; +use crate::utils::example_file_path; -fn example_file_path(file: &str) -> String { - // get root of the project through CARGO_MANIFEST_DIR - format!("tests/examples/{file}") -} +mod utils; #[test] fn test_ignore_file() { diff --git a/crates/codebook/tests/utils/mod.rs b/crates/codebook/tests/utils/mod.rs index d86efce..359ae83 100644 --- a/crates/codebook/tests/utils/mod.rs +++ b/crates/codebook/tests/utils/mod.rs @@ -15,3 +15,8 @@ pub fn get_processor() -> Codebook { pub fn init_logging() { let _ = env_logger::builder().is_test(true).try_init(); } + +pub fn example_file_path(file: &str) -> String { + // get root of the project through CARGO_MANIFEST_DIR + format!("tests/examples/{file}") +}