From 4f7e36cb261a3b244f63678743c59d57818fc3a4 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Wed, 19 Nov 2025 23:58:57 +0800 Subject: [PATCH 01/12] refactor(cli): unify storage configuration for export command - Utilize ObjectStoreConfig to unify storage configuration for export command - Support export command for Fs, S3, OSS, GCS and Azblob - Fix the Display implementation for SecretString always returned the string "SecretString([REDACTED])" even when the internal secret was empty. Signed-off-by: McKnight22 --- Cargo.lock | 1 + src/cli/Cargo.toml | 1 + src/cli/src/common.rs | 5 +- src/cli/src/common/object_store.rs | 231 +++++++++-- src/cli/src/data.rs | 1 + src/cli/src/data/export.rs | 609 +++++++++++++---------------- src/cli/src/data/storage_export.rs | 489 +++++++++++++++++++++++ src/cli/src/error.rs | 12 +- src/common/base/src/secrets.rs | 6 +- 9 files changed, 966 insertions(+), 389 deletions(-) create mode 100644 src/cli/src/data/storage_export.rs diff --git a/Cargo.lock b/Cargo.lock index bdf431f7ada9..dc411ecca3f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1786,6 +1786,7 @@ dependencies = [ "common-recordbatch", "common-runtime", "common-telemetry", + "common-test-util", "common-time", "common-version", "common-wal", diff --git a/src/cli/Cargo.toml b/src/cli/Cargo.toml index 3c3e91c403a8..802010a8d49a 100644 --- a/src/cli/Cargo.toml +++ b/src/cli/Cargo.toml @@ -67,6 +67,7 @@ tracing-appender.workspace = true [dev-dependencies] common-meta = { workspace = true, features = ["testing"] } +common-test-util.workspace = true common-version.workspace = true serde.workspace = true tempfile.workspace = true diff --git a/src/cli/src/common.rs b/src/cli/src/common.rs index 1ad80db942e9..d6dc461b8986 100644 --- a/src/cli/src/common.rs +++ b/src/cli/src/common.rs @@ -15,5 +15,8 @@ mod object_store; mod store; -pub use object_store::{ObjectStoreConfig, new_fs_object_store}; +pub use object_store::{ + ObjectStoreConfig, PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, + PrefixedS3Connection, new_fs_object_store, +}; pub use store::StoreConfig; diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index 601d17145e3b..0ab3dfa89668 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -70,6 +70,38 @@ wrap_with_clap_prefix! { } } +impl PrefixedAzblobConnection { + /// Get the container name. + pub fn container(&self) -> &str { + &self.azblob_container + } + + /// Get the root path. + pub fn root(&self) -> &str { + &self.azblob_root + } + + /// Get the account name. + pub fn account_name(&self) -> &SecretString { + &self.azblob_account_name + } + + /// Get the account key. + pub fn account_key(&self) -> &SecretString { + &self.azblob_account_key + } + + /// Get the endpoint. + pub fn endpoint(&self) -> &str { + &self.azblob_endpoint + } + + /// Get the SAS token. + pub fn sas_token(&self) -> Option<&String> { + self.azblob_sas_token.as_ref() + } +} + wrap_with_clap_prefix! { PrefixedS3Connection, "s3-", @@ -92,6 +124,43 @@ wrap_with_clap_prefix! { } } +impl PrefixedS3Connection { + /// Get the bucket name. + pub fn bucket(&self) -> &str { + &self.s3_bucket + } + + /// Get the root path. + pub fn root(&self) -> &str { + &self.s3_root + } + + /// Get the access key ID. + pub fn access_key_id(&self) -> &SecretString { + &self.s3_access_key_id + } + + /// Get the secret access key. + pub fn secret_access_key(&self) -> &SecretString { + &self.s3_secret_access_key + } + + /// Get the endpoint. + pub fn endpoint(&self) -> Option<&String> { + self.s3_endpoint.as_ref() + } + + /// Get the region. + pub fn region(&self) -> Option<&String> { + self.s3_region.as_ref() + } + + /// Check if virtual host style is enabled. + pub fn enable_virtual_host_style(&self) -> bool { + self.s3_enable_virtual_host_style + } +} + wrap_with_clap_prefix! { PrefixedOssConnection, "oss-", @@ -110,6 +179,33 @@ wrap_with_clap_prefix! { } } +impl PrefixedOssConnection { + /// Get the bucket name. + pub fn bucket(&self) -> &str { + &self.oss_bucket + } + + /// Get the root path. + pub fn root(&self) -> &str { + &self.oss_root + } + + /// Get the access key ID. + pub fn access_key_id(&self) -> &SecretString { + &self.oss_access_key_id + } + + /// Get the access key secret. + pub fn access_key_secret(&self) -> &SecretString { + &self.oss_access_key_secret + } + + /// Get the endpoint. + pub fn endpoint(&self) -> &str { + &self.oss_endpoint + } +} + wrap_with_clap_prefix! { PrefixedGcsConnection, "gcs-", @@ -130,6 +226,38 @@ wrap_with_clap_prefix! { } } +impl PrefixedGcsConnection { + /// Get the bucket name. + pub fn bucket(&self) -> &str { + &self.gcs_bucket + } + + /// Get the root path. + pub fn root(&self) -> &str { + &self.gcs_root + } + + /// Get the scope. + pub fn scope(&self) -> &str { + &self.gcs_scope + } + + /// Get the credential path. + pub fn credential_path(&self) -> &SecretString { + &self.gcs_credential_path + } + + /// Get the credential. + pub fn credential(&self) -> &SecretString { + &self.gcs_credential + } + + /// Get the endpoint. + pub fn endpoint(&self) -> &str { + &self.gcs_endpoint + } +} + /// common config for object store. #[derive(clap::Parser, Debug, Clone, PartialEq, Default)] pub struct ObjectStoreConfig { @@ -174,51 +302,74 @@ pub fn new_fs_object_store(root: &str) -> std::result::Result Result { + let s3 = S3Connection::from(self.s3.clone()); + common_telemetry::info!("Building object store with s3: {:?}", s3); + let object_store = ObjectStore::new(S3::from(&s3)) + .context(error::InitBackendSnafu) + .map_err(BoxedError::new)? + .finish(); + Ok(with_instrument_layers( + with_retry_layers(object_store), + false, + )) + } + + /// Builds the object store with OSS. + pub fn build_oss(&self) -> Result { + let oss = OssConnection::from(self.oss.clone()); + common_telemetry::info!("Building object store with oss: {:?}", oss); + let object_store = ObjectStore::new(Oss::from(&oss)) + .context(error::InitBackendSnafu) + .map_err(BoxedError::new)? + .finish(); + Ok(with_instrument_layers( + with_retry_layers(object_store), + false, + )) + } + + /// Builds the object store with GCS. + pub fn build_gcs(&self) -> Result { + let gcs = GcsConnection::from(self.gcs.clone()); + common_telemetry::info!("Building object store with gcs: {:?}", gcs); + let object_store = ObjectStore::new(Gcs::from(&gcs)) + .context(error::InitBackendSnafu) + .map_err(BoxedError::new)? + .finish(); + Ok(with_instrument_layers( + with_retry_layers(object_store), + false, + )) + } + + /// Builds the object store with Azure Blob. + pub fn build_azblob(&self) -> Result { + let azblob = AzblobConnection::from(self.azblob.clone()); + common_telemetry::info!("Building object store with azblob: {:?}", azblob); + let object_store = ObjectStore::new(Azblob::from(&azblob)) + .context(error::InitBackendSnafu) + .map_err(BoxedError::new)? + .finish(); + Ok(with_instrument_layers( + with_retry_layers(object_store), + false, + )) + } + /// Builds the object store from the config. pub fn build(&self) -> Result, BoxedError> { - let object_store = if self.enable_s3 { - let s3 = S3Connection::from(self.s3.clone()); - common_telemetry::info!("Building object store with s3: {:?}", s3); - Some( - ObjectStore::new(S3::from(&s3)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(), - ) + if self.enable_s3 { + self.build_s3().map(Some) } else if self.enable_oss { - let oss = OssConnection::from(self.oss.clone()); - common_telemetry::info!("Building object store with oss: {:?}", oss); - Some( - ObjectStore::new(Oss::from(&oss)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(), - ) + self.build_oss().map(Some) } else if self.enable_gcs { - let gcs = GcsConnection::from(self.gcs.clone()); - common_telemetry::info!("Building object store with gcs: {:?}", gcs); - Some( - ObjectStore::new(Gcs::from(&gcs)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(), - ) + self.build_gcs().map(Some) } else if self.enable_azblob { - let azblob = AzblobConnection::from(self.azblob.clone()); - common_telemetry::info!("Building object store with azblob: {:?}", azblob); - Some( - ObjectStore::new(Azblob::from(&azblob)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(), - ) + self.build_azblob().map(Some) } else { - None - }; - - let object_store = object_store - .map(|object_store| with_instrument_layers(with_retry_layers(object_store), false)); - - Ok(object_store) + Ok(None) + } } } diff --git a/src/cli/src/data.rs b/src/cli/src/data.rs index be623f63a202..5966040a3ba1 100644 --- a/src/cli/src/data.rs +++ b/src/cli/src/data.rs @@ -14,6 +14,7 @@ mod export; mod import; +mod storage_export; use clap::Subcommand; use client::DEFAULT_CATALOG_NAME; diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index 007f8aa67c3c..00ecbb18ce42 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -13,28 +13,27 @@ // limitations under the License. use std::collections::HashSet; -use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; use clap::{Parser, ValueEnum}; -use common_base::secrets::{ExposeSecret, SecretString}; use common_error::ext::BoxedError; use common_telemetry::{debug, error, info}; -use object_store::layers::LoggingLayer; -use object_store::services::Oss; -use object_store::{ObjectStore, services}; +use object_store::ObjectStore; use serde_json::Value; use snafu::{OptionExt, ResultExt}; use tokio::sync::Semaphore; use tokio::time::Instant; +use crate::common::{ObjectStoreConfig, new_fs_object_store}; +use crate::data::storage_export::{ + AzblobBackend, FsBackend, GcsBackend, OssBackend, S3Backend, StorageType, +}; use crate::data::{COPY_PATH_PLACEHOLDER, default_database}; use crate::database::{DatabaseClient, parse_proxy_opts}; use crate::error::{ - EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu, - SchemaNotFoundSnafu, + EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, SchemaNotFoundSnafu, }; use crate::{Tool, database}; @@ -118,11 +117,7 @@ pub struct ExportCommand { #[clap(long)] no_proxy: bool, - /// if export data to s3 - #[clap(long)] - s3: bool, - - /// if both `ddl_local_dir` and remote storage (s3/oss) are set, `ddl_local_dir` will be only used for + /// if both `ddl_local_dir` and remote storage are set, `ddl_local_dir` will be only used for /// exported SQL files, and the data will be exported to remote storage. /// /// Note that `ddl_local_dir` export sql files to **LOCAL** file system, this is useful if export client don't have @@ -132,75 +127,42 @@ pub struct ExportCommand { #[clap(long)] ddl_local_dir: Option, - /// The s3 bucket name - /// if s3 is set, this is required - #[clap(long)] - s3_bucket: Option, - - // The s3 root path - /// if s3 is set, this is required - #[clap(long)] - s3_root: Option, - - /// The s3 endpoint - /// if s3 is set, this is required - #[clap(long)] - s3_endpoint: Option, - - /// The s3 access key - /// if s3 is set, this is required - #[clap(long)] - s3_access_key: Option, - - /// The s3 secret key - /// if s3 is set, this is required - #[clap(long)] - s3_secret_key: Option, - - /// The s3 region - /// if s3 is set, this is required - #[clap(long)] - s3_region: Option, - - /// if export data to oss - #[clap(long)] - oss: bool, - - /// The oss bucket name - /// if oss is set, this is required - #[clap(long)] - oss_bucket: Option, - - /// The oss endpoint - /// if oss is set, this is required - #[clap(long)] - oss_endpoint: Option, - - /// The oss access key id - /// if oss is set, this is required - #[clap(long)] - oss_access_key_id: Option, - - /// The oss access key secret - /// if oss is set, this is required - #[clap(long)] - oss_access_key_secret: Option, + #[clap(flatten)] + storage: ObjectStoreConfig, } impl ExportCommand { pub async fn build(&self) -> std::result::Result, BoxedError> { - if self.s3 - && (self.s3_bucket.is_none() - || self.s3_endpoint.is_none() - || self.s3_access_key.is_none() - || self.s3_secret_key.is_none() - || self.s3_region.is_none()) - { - return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build())); - } - if !self.s3 && !self.oss && self.output_dir.is_none() { + // Determine storage type + let (storage_type, operator) = if self.storage.enable_s3 { + ( + StorageType::S3(S3Backend::new(self.storage.s3.clone())?), + self.storage.build_s3()?, + ) + } else if self.storage.enable_oss { + ( + StorageType::Oss(OssBackend::new(self.storage.oss.clone())?), + self.storage.build_oss()?, + ) + } else if self.storage.enable_gcs { + ( + StorageType::Gcs(GcsBackend::new(self.storage.gcs.clone())?), + self.storage.build_gcs()?, + ) + } else if self.storage.enable_azblob { + ( + StorageType::Azblob(AzblobBackend::new(self.storage.azblob.clone())?), + self.storage.build_azblob()?, + ) + } else if let Some(output_dir) = &self.output_dir { + ( + StorageType::Fs(FsBackend::new(output_dir.clone())), + new_fs_object_store(output_dir)?, + ) + } else { return Err(BoxedError::new(OutputDirNotSetSnafu {}.build())); - } + }; + let (catalog, schema) = database::split_database(&self.database).map_err(BoxedError::new)?; let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?; @@ -217,39 +179,14 @@ impl ExportCommand { catalog, schema, database_client, - output_dir: self.output_dir.clone(), export_jobs: self.db_parallelism, target: self.target.clone(), start_time: self.start_time.clone(), end_time: self.end_time.clone(), parallelism: self.table_parallelism, - s3: self.s3, + storage_type, ddl_local_dir: self.ddl_local_dir.clone(), - s3_bucket: self.s3_bucket.clone(), - s3_root: self.s3_root.clone(), - s3_endpoint: self.s3_endpoint.clone(), - // Wrap sensitive values in SecretString - s3_access_key: self - .s3_access_key - .as_ref() - .map(|k| SecretString::from(k.clone())), - s3_secret_key: self - .s3_secret_key - .as_ref() - .map(|k| SecretString::from(k.clone())), - s3_region: self.s3_region.clone(), - oss: self.oss, - oss_bucket: self.oss_bucket.clone(), - oss_endpoint: self.oss_endpoint.clone(), - // Wrap sensitive values in SecretString - oss_access_key_id: self - .oss_access_key_id - .as_ref() - .map(|k| SecretString::from(k.clone())), - oss_access_key_secret: self - .oss_access_key_secret - .as_ref() - .map(|k| SecretString::from(k.clone())), + operator, })) } } @@ -259,40 +196,17 @@ pub struct Export { catalog: String, schema: Option, database_client: DatabaseClient, - output_dir: Option, export_jobs: usize, target: ExportTarget, start_time: Option, end_time: Option, parallelism: usize, - s3: bool, + storage_type: StorageType, ddl_local_dir: Option, - s3_bucket: Option, - s3_root: Option, - s3_endpoint: Option, - // Changed to SecretString for sensitive data - s3_access_key: Option, - s3_secret_key: Option, - s3_region: Option, - oss: bool, - oss_bucket: Option, - oss_endpoint: Option, - // Changed to SecretString for sensitive data - oss_access_key_id: Option, - oss_access_key_secret: Option, + operator: ObjectStore, } impl Export { - fn catalog_path(&self) -> PathBuf { - if self.s3 || self.oss { - PathBuf::from(&self.catalog) - } else if let Some(dir) = &self.output_dir { - PathBuf::from(dir).join(&self.catalog) - } else { - unreachable!("catalog_path: output_dir must be set when not using remote storage") - } - } - async fn get_db_names(&self) -> Result> { let db_names = self.all_db_names().await?; let Some(schema) = &self.schema else { @@ -462,7 +376,8 @@ impl Export { "Exported {}.{} database creation SQL to {}", self.catalog, schema, - self.format_output_path(&file_path) + self.storage_type + .format_output_path(&self.catalog, &file_path) ); } @@ -491,7 +406,7 @@ impl Export { .await?; // Create directory if needed for file system storage - if !export_self.s3 && !export_self.oss { + if !export_self.storage_type.is_remote_storage() { let db_dir = format!("{}/{}/", export_self.catalog, schema); operator.create_dir(&db_dir).await.context(OpenDalSnafu)?; } @@ -520,7 +435,9 @@ impl Export { "Finished exporting {}.{schema} with {} table schemas to path: {}", export_self.catalog, metric_physical_tables.len() + remaining_tables.len() + views.len(), - export_self.format_output_path(&file_path) + export_self + .storage_type + .format_output_path(&export_self.catalog, &file_path) ); Ok::<(), Error>(()) @@ -535,102 +452,21 @@ impl Export { } async fn build_operator(&self) -> Result { - if self.s3 { - self.build_s3_operator().await - } else if self.oss { - self.build_oss_operator().await - } else { - self.build_fs_operator().await - } + Ok(self.operator.clone()) } /// build operator with preference for file system async fn build_prefer_fs_operator(&self) -> Result { - if (self.s3 || self.oss) && self.ddl_local_dir.is_some() { + if self.storage_type.is_remote_storage() && self.ddl_local_dir.is_some() { let root = self.ddl_local_dir.as_ref().unwrap().clone(); - let op = ObjectStore::new(services::Fs::default().root(&root)) - .context(OpenDalSnafu)? - .layer(LoggingLayer::default()) - .finish(); + let op = new_fs_object_store(&root).map_err(|e| Error::Other { + source: e, + location: snafu::location!(), + })?; Ok(op) - } else if self.s3 { - self.build_s3_operator().await - } else if self.oss { - self.build_oss_operator().await } else { - self.build_fs_operator().await - } - } - - async fn build_s3_operator(&self) -> Result { - let mut builder = services::S3::default().bucket( - self.s3_bucket - .as_ref() - .expect("s3_bucket must be provided when s3 is enabled"), - ); - - if let Some(root) = self.s3_root.as_ref() { - builder = builder.root(root); + Ok(self.operator.clone()) } - - if let Some(endpoint) = self.s3_endpoint.as_ref() { - builder = builder.endpoint(endpoint); - } - - if let Some(region) = self.s3_region.as_ref() { - builder = builder.region(region); - } - - if let Some(key_id) = self.s3_access_key.as_ref() { - builder = builder.access_key_id(key_id.expose_secret()); - } - - if let Some(secret_key) = self.s3_secret_key.as_ref() { - builder = builder.secret_access_key(secret_key.expose_secret()); - } - - let op = ObjectStore::new(builder) - .context(OpenDalSnafu)? - .layer(LoggingLayer::default()) - .finish(); - Ok(op) - } - - async fn build_oss_operator(&self) -> Result { - let mut builder = Oss::default() - .bucket(self.oss_bucket.as_ref().expect("oss_bucket must be set")) - .endpoint( - self.oss_endpoint - .as_ref() - .expect("oss_endpoint must be set"), - ); - - // Use expose_secret() to access the actual secret value - if let Some(key_id) = self.oss_access_key_id.as_ref() { - builder = builder.access_key_id(key_id.expose_secret()); - } - if let Some(secret_key) = self.oss_access_key_secret.as_ref() { - builder = builder.access_key_secret(secret_key.expose_secret()); - } - - let op = ObjectStore::new(builder) - .context(OpenDalSnafu)? - .layer(LoggingLayer::default()) - .finish(); - Ok(op) - } - - async fn build_fs_operator(&self) -> Result { - let root = self - .output_dir - .as_ref() - .context(OutputDirNotSetSnafu)? - .clone(); - let op = ObjectStore::new(services::Fs::default().root(&root)) - .context(OpenDalSnafu)? - .layer(LoggingLayer::default()) - .finish(); - Ok(op) } async fn export_database_data(&self) -> Result<()> { @@ -654,12 +490,14 @@ impl Export { let _permit = semaphore_moved.acquire().await.unwrap(); // Create directory if not using remote storage - if !export_self.s3 && !export_self.oss { + if !export_self.storage_type.is_remote_storage() { let db_dir = format!("{}/{}/", export_self.catalog, schema); operator.create_dir(&db_dir).await.context(OpenDalSnafu)?; } - let (path, connection_part) = export_self.get_storage_params(&schema); + let (path, connection_part) = export_self + .storage_type + .get_storage_path(&export_self.catalog, &schema); // Execute COPY DATABASE TO command let sql = format!( @@ -668,7 +506,7 @@ impl Export { ); // Log SQL command but mask sensitive information - let safe_sql = export_self.mask_sensitive_sql(&sql); + let safe_sql = export_self.storage_type.mask_sensitive_info(&sql); info!("Executing sql: {}", safe_sql); export_self.database_client.sql_in_public(&sql).await?; @@ -712,7 +550,9 @@ impl Export { "Finished exporting {}.{} copy_from.sql to {}", export_self.catalog, schema, - export_self.format_output_path(©_from_path) + export_self + .storage_type + .format_output_path(&export_self.catalog, ©_from_path) ); Ok::<(), Error>(()) @@ -726,61 +566,10 @@ impl Export { Ok(()) } - /// Mask sensitive information in SQL commands for safe logging - fn mask_sensitive_sql(&self, sql: &str) -> String { - let mut masked_sql = sql.to_string(); - - // Mask S3 credentials - if let Some(access_key) = &self.s3_access_key { - masked_sql = masked_sql.replace(access_key.expose_secret(), "[REDACTED]"); - } - if let Some(secret_key) = &self.s3_secret_key { - masked_sql = masked_sql.replace(secret_key.expose_secret(), "[REDACTED]"); - } - - // Mask OSS credentials - if let Some(access_key_id) = &self.oss_access_key_id { - masked_sql = masked_sql.replace(access_key_id.expose_secret(), "[REDACTED]"); - } - if let Some(access_key_secret) = &self.oss_access_key_secret { - masked_sql = masked_sql.replace(access_key_secret.expose_secret(), "[REDACTED]"); - } - - masked_sql - } - fn get_file_path(&self, schema: &str, file_name: &str) -> String { format!("{}/{}/{}", self.catalog, schema, file_name) } - fn format_output_path(&self, file_path: &str) -> String { - if self.s3 { - format!( - "s3://{}{}/{}", - self.s3_bucket.as_ref().unwrap_or(&String::new()), - if let Some(root) = &self.s3_root { - format!("/{}", root) - } else { - String::new() - }, - file_path - ) - } else if self.oss { - format!( - "oss://{}/{}/{}", - self.oss_bucket.as_ref().unwrap_or(&String::new()), - self.catalog, - file_path - ) - } else { - format!( - "{}/{}", - self.output_dir.as_ref().unwrap_or(&String::new()), - file_path - ) - } - } - async fn write_to_storage( &self, op: &ObjectStore, @@ -793,70 +582,6 @@ impl Export { .map(|_| ()) } - fn get_storage_params(&self, schema: &str) -> (String, String) { - if self.s3 { - let s3_path = format!( - "s3://{}{}/{}/{}/", - // Safety: s3_bucket is required when s3 is enabled - self.s3_bucket.as_ref().unwrap(), - if let Some(root) = &self.s3_root { - format!("/{}", root) - } else { - String::new() - }, - self.catalog, - schema - ); - - // endpoint is optional - let endpoint_option = if let Some(endpoint) = self.s3_endpoint.as_ref() { - format!(", ENDPOINT='{}'", endpoint) - } else { - String::new() - }; - - // Safety: All s3 options are required - // Use expose_secret() to access the actual secret values - let connection_options = format!( - "ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}", - self.s3_access_key.as_ref().unwrap().expose_secret(), - self.s3_secret_key.as_ref().unwrap().expose_secret(), - self.s3_region.as_ref().unwrap(), - endpoint_option - ); - - (s3_path, format!(" CONNECTION ({})", connection_options)) - } else if self.oss { - let oss_path = format!( - "oss://{}/{}/{}/", - self.oss_bucket.as_ref().unwrap(), - self.catalog, - schema - ); - let endpoint_option = if let Some(endpoint) = self.oss_endpoint.as_ref() { - format!(", ENDPOINT='{}'", endpoint) - } else { - String::new() - }; - - let connection_options = format!( - "ACCESS_KEY_ID='{}', ACCESS_KEY_SECRET='{}'{}", - self.oss_access_key_id.as_ref().unwrap().expose_secret(), - self.oss_access_key_secret.as_ref().unwrap().expose_secret(), - endpoint_option - ); - (oss_path, format!(" CONNECTION ({})", connection_options)) - } else { - ( - self.catalog_path() - .join(format!("{schema}/")) - .to_string_lossy() - .to_string(), - String::new(), - ) - } - } - async fn execute_tasks( &self, tasks: Vec>>, @@ -913,3 +638,211 @@ fn build_with_options( options.push(format!("parallelism = {}", parallelism)); options.join(", ") } + +#[cfg(test)] +mod tests { + use clap::Parser; + use common_test_util::temp_dir::create_temp_dir; + + use super::*; + + #[tokio::test] + async fn test_export_command_build_with_local_fs() { + let temp_dir = create_temp_dir("test_export_local_fs"); + let output_dir = temp_dir.path().to_str().unwrap(); + + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--output-dir", + output_dir, + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_s3_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--s3", + "--s3-bucket", + "test-bucket", + "--s3-access-key-id", + "test-key", + "--s3-secret-access-key", + "test-secret", + "--s3-region", + "us-west-2", + "--s3-endpoint", + "https://s3.amazonaws.com", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_s3_missing_config() { + // Missing bucket + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--s3", + "--s3-access-key-id", + "test-key", + "--s3-secret-access-key", + "test-secret", + "--s3-region", + "us-west-2", + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + match result { + Ok(_) => panic!("Expected error"), + Err(e) => assert!(e.to_string().contains("S3 bucket must be set")), + } + } + + #[tokio::test] + async fn test_export_command_build_with_oss_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--oss", + "--oss-bucket", + "test-bucket", + "--oss-access-key-id", + "test-key-id", + "--oss-access-key-secret", + "test-secret", + "--oss-endpoint", + "https://oss.example.com", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_oss_missing_config() { + // Missing endpoint + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--oss", + "--oss-bucket", + "test-bucket", + "--oss-access-key-id", + "test-key-id", + "--oss-access-key-secret", + "test-secret", + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + match result { + Ok(_) => panic!("Expected error"), + Err(e) => assert!(e.to_string().contains("OSS endpoint must be set")), + } + } + + #[tokio::test] + async fn test_export_command_build_with_gcs_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs", + "--gcs-bucket", + "test-bucket", + "--gcs-credential-path", + "/path/to/credential", + "--gcs-endpoint", + "https://storage.googleapis.com", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_gcs_missing_config() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs", + "--gcs-bucket", + "test-bucket", + // Missing credentials + ]); + + let result = cmd.build().await; + let err = match result { + Ok(_) => panic!("Expected error but got Ok"), + Err(e) => e, + }; + assert!( + err.to_string() + .contains("GCS credential path or credential must be set"), + "Unexpected error message: {}", + err + ); + } + + #[tokio::test] + async fn test_export_command_build_with_azblob_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-account-name", + "test-account", + "--azblob-account-key", + "test-key", + "--azblob-endpoint", + "https://account.blob.core.windows.net", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_azblob_missing_config() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-account-name", + "test-account", + // Missing account key + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + match result { + Ok(_) => panic!("Expected error"), + Err(e) => assert!( + e.to_string().contains("Azure Blob account key must be set"), + "Unexpected error message: {}", + e + ), + } + } +} diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs new file mode 100644 index 000000000000..e982881bf3c6 --- /dev/null +++ b/src/cli/src/data/storage_export.rs @@ -0,0 +1,489 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::path::PathBuf; + +use async_trait::async_trait; +use common_base::secrets::ExposeSecret; +use common_error::ext::BoxedError; + +use crate::common::{ + PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection, +}; +use crate::error; + +/// Trait for storage backends that can be used for data export. +#[async_trait] +pub trait StorageExport: Send + Sync { + /// Generate the storage path for COPY DATABASE command. + /// Returns (path, connection_string) where connection_string includes CONNECTION clause. + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String); + + /// Format the output path for logging purposes. + fn format_output_path(&self, catalog: &str, file_path: &str) -> String; + + /// Mask sensitive information in SQL commands for safe logging. + fn mask_sensitive_info(&self, sql: &str) -> String; +} + +/// Local file system storage backend. +#[derive(Clone)] +pub struct FsBackend { + output_dir: String, +} + +impl FsBackend { + pub fn new(output_dir: String) -> Self { + Self { output_dir } + } +} + +#[async_trait] +impl StorageExport for FsBackend { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + if self.output_dir.is_empty() { + unreachable!("output_dir must be set when not using remote storage") + } + let path = PathBuf::from(&self.output_dir) + .join(catalog) + .join(format!("{schema}/")) + .to_string_lossy() + .to_string(); + (path, String::new()) + } + + fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { + format!("{}/{}", self.output_dir, file_path) + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + sql.to_string() + } +} + +/// S3 storage backend. +#[derive(Clone)] +pub struct S3Backend { + config: PrefixedS3Connection, +} + +impl S3Backend { + pub fn new(config: PrefixedS3Connection) -> Result { + // Validate required fields + if config.bucket().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "S3 bucket must be set when --s3 is enabled", + } + .build(), + )); + } + if config.access_key_id().expose_secret().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "S3 access key ID must be set when --s3 is enabled", + } + .build(), + )); + } + if config.secret_access_key().expose_secret().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "S3 secret access key must be set when --s3 is enabled", + } + .build(), + )); + } + Ok(Self { config }) + } +} + +#[async_trait] +impl StorageExport for S3Backend { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + let bucket = self.config.bucket(); + let root = if self.config.root().is_empty() { + String::new() + } else { + format!("/{}", self.config.root()) + }; + + let s3_path = format!("s3://{}{}/{}/{}/", bucket, root, catalog, schema); + + let mut connection_options = vec![ + format!( + "ACCESS_KEY_ID='{}'", + self.config.access_key_id().expose_secret() + ), + format!( + "SECRET_ACCESS_KEY='{}'", + self.config.secret_access_key().expose_secret() + ), + ]; + + if let Some(region) = self.config.region() { + connection_options.push(format!("REGION='{}'", region)); + } + + if let Some(endpoint) = self.config.endpoint() { + connection_options.push(format!("ENDPOINT='{}'", endpoint)); + } + + let connection_str = format!(" CONNECTION ({})", connection_options.join(", ")); + (s3_path, connection_str) + } + + fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { + let bucket = self.config.bucket(); + let root = if self.config.root().is_empty() { + String::new() + } else { + format!("/{}", self.config.root()) + }; + format!("s3://{}{}/{}", bucket, root, file_path) + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + let mut masked = sql.to_string(); + masked = masked.replace(self.config.access_key_id().expose_secret(), "[REDACTED]"); + masked = masked.replace( + self.config.secret_access_key().expose_secret(), + "[REDACTED]", + ); + masked + } +} + +/// OSS storage backend. +#[derive(Clone)] +pub struct OssBackend { + config: PrefixedOssConnection, +} + +impl OssBackend { + pub fn new(config: PrefixedOssConnection) -> Result { + // Validate required fields + if config.bucket().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "OSS bucket must be set when --oss is enabled", + } + .build(), + )); + } + if config.endpoint().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "OSS endpoint must be set when --oss is enabled", + } + .build(), + )); + } + if config.access_key_id().expose_secret().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "OSS access key ID must be set when --oss is enabled", + } + .build(), + )); + } + if config.access_key_secret().expose_secret().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "OSS access key secret must be set when --oss is enabled", + } + .build(), + )); + } + Ok(Self { config }) + } +} + +#[async_trait] +impl StorageExport for OssBackend { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + let bucket = self.config.bucket(); + let oss_path = format!("oss://{}/{}/{}/", bucket, catalog, schema); + + let mut connection_options = vec![ + format!( + "ACCESS_KEY_ID='{}'", + self.config.access_key_id().expose_secret() + ), + format!( + "ACCESS_KEY_SECRET='{}'", + self.config.access_key_secret().expose_secret() + ), + ]; + + if !self.config.endpoint().is_empty() { + connection_options.push(format!("ENDPOINT='{}'", self.config.endpoint())); + } + + let connection_str = format!(" CONNECTION ({})", connection_options.join(", ")); + (oss_path, connection_str) + } + + fn format_output_path(&self, catalog: &str, file_path: &str) -> String { + let bucket = self.config.bucket(); + format!("oss://{}/{}/{}", bucket, catalog, file_path) + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + let mut masked = sql.to_string(); + masked = masked.replace(self.config.access_key_id().expose_secret(), "[REDACTED]"); + masked = masked.replace( + self.config.access_key_secret().expose_secret(), + "[REDACTED]", + ); + masked + } +} + +/// GCS storage backend. +#[derive(Clone)] +pub struct GcsBackend { + config: PrefixedGcsConnection, +} + +impl GcsBackend { + pub fn new(config: PrefixedGcsConnection) -> Result { + // Validate required fields + if config.bucket().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "GCS bucket must be set when --gcs is enabled", + } + .build(), + )); + } + // At least one of credential_path or credential must be set + if config.credential_path().expose_secret().is_empty() + && config.credential().expose_secret().is_empty() + { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "GCS credential path or credential must be set when --gcs is enabled", + } + .build(), + )); + } + Ok(Self { config }) + } +} + +#[async_trait] +impl StorageExport for GcsBackend { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + let bucket = self.config.bucket(); + let root = if self.config.root().is_empty() { + String::new() + } else { + format!("/{}", self.config.root()) + }; + + let gcs_path = format!("gcs://{}{}/{}/{}/", bucket, root, catalog, schema); + + let mut connection_options = Vec::new(); + + if !self.config.credential_path().expose_secret().is_empty() { + connection_options.push(format!( + "CREDENTIAL_PATH='{}'", + self.config.credential_path().expose_secret() + )); + } + + if !self.config.credential().expose_secret().is_empty() { + connection_options.push(format!( + "CREDENTIAL='{}'", + self.config.credential().expose_secret() + )); + } + + if !self.config.endpoint().is_empty() { + connection_options.push(format!("ENDPOINT='{}'", self.config.endpoint())); + } + + let connection_str = if connection_options.is_empty() { + String::new() + } else { + format!(" CONNECTION ({})", connection_options.join(", ")) + }; + + (gcs_path, connection_str) + } + + fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { + let bucket = self.config.bucket(); + let root = if self.config.root().is_empty() { + String::new() + } else { + format!("/{}", self.config.root()) + }; + format!("gcs://{}{}/{}", bucket, root, file_path) + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + let mut masked = sql.to_string(); + if !self.config.credential_path().expose_secret().is_empty() { + masked = masked.replace(self.config.credential_path().expose_secret(), "[REDACTED]"); + } + if !self.config.credential().expose_secret().is_empty() { + masked = masked.replace(self.config.credential().expose_secret(), "[REDACTED]"); + } + masked + } +} + +/// Azure Blob storage backend. +#[derive(Clone)] +pub struct AzblobBackend { + config: PrefixedAzblobConnection, +} + +impl AzblobBackend { + pub fn new(config: PrefixedAzblobConnection) -> Result { + // Validate required fields + if config.container().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "Azure Blob container must be set when --azblob is enabled", + } + .build(), + )); + } + if config.account_name().expose_secret().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "Azure Blob account name must be set when --azblob is enabled", + } + .build(), + )); + } + if config.account_key().expose_secret().is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: "Azure Blob account key must be set when --azblob is enabled", + } + .build(), + )); + } + Ok(Self { config }) + } +} + +#[async_trait] +impl StorageExport for AzblobBackend { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + let container = self.config.container(); + let root = if self.config.root().is_empty() { + String::new() + } else { + format!("/{}", self.config.root()) + }; + + let azblob_path = format!("azblob://{}{}/{}/{}/", container, root, catalog, schema); + + let mut connection_options = vec![ + format!( + "ACCOUNT_NAME='{}'", + self.config.account_name().expose_secret() + ), + format!( + "ACCOUNT_KEY='{}'", + self.config.account_key().expose_secret() + ), + ]; + + if !self.config.endpoint().is_empty() { + connection_options.push(format!("ENDPOINT='{}'", self.config.endpoint())); + } + + if let Some(sas_token) = self.config.sas_token() { + connection_options.push(format!("SAS_TOKEN='{}'", sas_token)); + } + + let connection_str = format!(" CONNECTION ({})", connection_options.join(", ")); + (azblob_path, connection_str) + } + + fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { + let container = self.config.container(); + let root = if self.config.root().is_empty() { + String::new() + } else { + format!("/{}", self.config.root()) + }; + format!("azblob://{}{}/{}", container, root, file_path) + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + let mut masked = sql.to_string(); + masked = masked.replace(self.config.account_name().expose_secret(), "[REDACTED]"); + masked = masked.replace(self.config.account_key().expose_secret(), "[REDACTED]"); + if let Some(sas_token) = self.config.sas_token() { + masked = masked.replace(sas_token, "[REDACTED]"); + } + masked + } +} + +/// Enum to represent different storage backend types. +#[derive(Clone)] +pub enum StorageType { + Fs(FsBackend), + S3(S3Backend), + Oss(OssBackend), + Gcs(GcsBackend), + Azblob(AzblobBackend), +} + +impl StorageType { + /// Get storage path and connection string. + pub fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + match self { + StorageType::Fs(backend) => backend.get_storage_path(catalog, schema), + StorageType::S3(backend) => backend.get_storage_path(catalog, schema), + StorageType::Oss(backend) => backend.get_storage_path(catalog, schema), + StorageType::Gcs(backend) => backend.get_storage_path(catalog, schema), + StorageType::Azblob(backend) => backend.get_storage_path(catalog, schema), + } + } + + /// Format output path for logging. + pub fn format_output_path(&self, catalog: &str, file_path: &str) -> String { + match self { + StorageType::Fs(backend) => backend.format_output_path(catalog, file_path), + StorageType::S3(backend) => backend.format_output_path(catalog, file_path), + StorageType::Oss(backend) => backend.format_output_path(catalog, file_path), + StorageType::Gcs(backend) => backend.format_output_path(catalog, file_path), + StorageType::Azblob(backend) => backend.format_output_path(catalog, file_path), + } + } + + pub fn is_remote_storage(&self) -> bool { + !matches!(self, StorageType::Fs(_)) + } + + /// Mask sensitive information in SQL commands. + pub fn mask_sensitive_info(&self, sql: &str) -> String { + match self { + StorageType::Fs(backend) => backend.mask_sensitive_info(sql), + StorageType::S3(backend) => backend.mask_sensitive_info(sql), + StorageType::Oss(backend) => backend.mask_sensitive_info(sql), + StorageType::Gcs(backend) => backend.mask_sensitive_info(sql), + StorageType::Azblob(backend) => backend.mask_sensitive_info(sql), + } + } +} diff --git a/src/cli/src/error.rs b/src/cli/src/error.rs index 91a6d35b5c19..aca3e6e29c29 100644 --- a/src/cli/src/error.rs +++ b/src/cli/src/error.rs @@ -253,12 +253,6 @@ pub enum Error { error: ObjectStoreError, }, - #[snafu(display("S3 config need be set"))] - S3ConfigNotSet { - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Output directory not set"))] OutputDirNotSet { #[snafu(implicit)] @@ -364,9 +358,9 @@ impl ErrorExt for Error { Error::Other { source, .. } => source.status_code(), Error::OpenDal { .. } | Error::InitBackend { .. } => StatusCode::Internal, - Error::S3ConfigNotSet { .. } - | Error::OutputDirNotSet { .. } - | Error::EmptyStoreAddrs { .. } => StatusCode::InvalidArguments, + Error::OutputDirNotSet { .. } | Error::EmptyStoreAddrs { .. } => { + StatusCode::InvalidArguments + } Error::BuildRuntime { source, .. } => source.status_code(), diff --git a/src/common/base/src/secrets.rs b/src/common/base/src/secrets.rs index 55f569fe23ca..be1f39e05fa7 100644 --- a/src/common/base/src/secrets.rs +++ b/src/common/base/src/secrets.rs @@ -48,7 +48,11 @@ impl From for SecretString { impl Display for SecretString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SecretString([REDACTED])") + if self.expose_secret().is_empty() { + write!(f, "") + } else { + write!(f, "SecretString([REDACTED])") + } } } From f8bdf7ca78890fcb6d4d7dd02322207030a46b61 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Mon, 24 Nov 2025 22:39:29 +0800 Subject: [PATCH 02/12] refactor(cli): unify storage configuration for export command - Change the encapsulation permissions of each configuration options for every storage backend to public access. Signed-off-by: McKnight22 Co-authored-by: WenyXu --- src/cli/src/common/object_store.rs | 130 +--------------------------- src/cli/src/data/storage_export.rs | 132 +++++++++++++++-------------- 2 files changed, 70 insertions(+), 192 deletions(-) diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index 0ab3dfa89668..5dae4eefe95c 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -35,7 +35,7 @@ macro_rules! wrap_with_clap_prefix { $( #[doc = $doc] )? $( #[clap(alias = $alias)] )? #[clap(long $(, default_value_t = $default )? )] - [<$prefix $field>]: $type, + pub [<$prefix $field>]: $type, )* } @@ -70,38 +70,6 @@ wrap_with_clap_prefix! { } } -impl PrefixedAzblobConnection { - /// Get the container name. - pub fn container(&self) -> &str { - &self.azblob_container - } - - /// Get the root path. - pub fn root(&self) -> &str { - &self.azblob_root - } - - /// Get the account name. - pub fn account_name(&self) -> &SecretString { - &self.azblob_account_name - } - - /// Get the account key. - pub fn account_key(&self) -> &SecretString { - &self.azblob_account_key - } - - /// Get the endpoint. - pub fn endpoint(&self) -> &str { - &self.azblob_endpoint - } - - /// Get the SAS token. - pub fn sas_token(&self) -> Option<&String> { - self.azblob_sas_token.as_ref() - } -} - wrap_with_clap_prefix! { PrefixedS3Connection, "s3-", @@ -124,43 +92,6 @@ wrap_with_clap_prefix! { } } -impl PrefixedS3Connection { - /// Get the bucket name. - pub fn bucket(&self) -> &str { - &self.s3_bucket - } - - /// Get the root path. - pub fn root(&self) -> &str { - &self.s3_root - } - - /// Get the access key ID. - pub fn access_key_id(&self) -> &SecretString { - &self.s3_access_key_id - } - - /// Get the secret access key. - pub fn secret_access_key(&self) -> &SecretString { - &self.s3_secret_access_key - } - - /// Get the endpoint. - pub fn endpoint(&self) -> Option<&String> { - self.s3_endpoint.as_ref() - } - - /// Get the region. - pub fn region(&self) -> Option<&String> { - self.s3_region.as_ref() - } - - /// Check if virtual host style is enabled. - pub fn enable_virtual_host_style(&self) -> bool { - self.s3_enable_virtual_host_style - } -} - wrap_with_clap_prefix! { PrefixedOssConnection, "oss-", @@ -179,33 +110,6 @@ wrap_with_clap_prefix! { } } -impl PrefixedOssConnection { - /// Get the bucket name. - pub fn bucket(&self) -> &str { - &self.oss_bucket - } - - /// Get the root path. - pub fn root(&self) -> &str { - &self.oss_root - } - - /// Get the access key ID. - pub fn access_key_id(&self) -> &SecretString { - &self.oss_access_key_id - } - - /// Get the access key secret. - pub fn access_key_secret(&self) -> &SecretString { - &self.oss_access_key_secret - } - - /// Get the endpoint. - pub fn endpoint(&self) -> &str { - &self.oss_endpoint - } -} - wrap_with_clap_prefix! { PrefixedGcsConnection, "gcs-", @@ -226,38 +130,6 @@ wrap_with_clap_prefix! { } } -impl PrefixedGcsConnection { - /// Get the bucket name. - pub fn bucket(&self) -> &str { - &self.gcs_bucket - } - - /// Get the root path. - pub fn root(&self) -> &str { - &self.gcs_root - } - - /// Get the scope. - pub fn scope(&self) -> &str { - &self.gcs_scope - } - - /// Get the credential path. - pub fn credential_path(&self) -> &SecretString { - &self.gcs_credential_path - } - - /// Get the credential. - pub fn credential(&self) -> &SecretString { - &self.gcs_credential - } - - /// Get the endpoint. - pub fn endpoint(&self) -> &str { - &self.gcs_endpoint - } -} - /// common config for object store. #[derive(clap::Parser, Debug, Clone, PartialEq, Default)] pub struct ObjectStoreConfig { diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index e982881bf3c6..9d7dc41a94f7 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -81,7 +81,7 @@ pub struct S3Backend { impl S3Backend { pub fn new(config: PrefixedS3Connection) -> Result { // Validate required fields - if config.bucket().is_empty() { + if config.s3_bucket.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "S3 bucket must be set when --s3 is enabled", @@ -89,7 +89,7 @@ impl S3Backend { .build(), )); } - if config.access_key_id().expose_secret().is_empty() { + if config.s3_access_key_id.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "S3 access key ID must be set when --s3 is enabled", @@ -97,7 +97,7 @@ impl S3Backend { .build(), )); } - if config.secret_access_key().expose_secret().is_empty() { + if config.s3_secret_access_key.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "S3 secret access key must be set when --s3 is enabled", @@ -112,11 +112,11 @@ impl S3Backend { #[async_trait] impl StorageExport for S3Backend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let bucket = self.config.bucket(); - let root = if self.config.root().is_empty() { + let bucket = &self.config.s3_bucket; + let root = if self.config.s3_root.is_empty() { String::new() } else { - format!("/{}", self.config.root()) + format!("/{}", self.config.s3_root) }; let s3_path = format!("s3://{}{}/{}/{}/", bucket, root, catalog, schema); @@ -124,19 +124,19 @@ impl StorageExport for S3Backend { let mut connection_options = vec![ format!( "ACCESS_KEY_ID='{}'", - self.config.access_key_id().expose_secret() + self.config.s3_access_key_id.expose_secret() ), format!( "SECRET_ACCESS_KEY='{}'", - self.config.secret_access_key().expose_secret() + self.config.s3_secret_access_key.expose_secret() ), ]; - if let Some(region) = self.config.region() { + if let Some(region) = &self.config.s3_region { connection_options.push(format!("REGION='{}'", region)); } - if let Some(endpoint) = self.config.endpoint() { + if let Some(endpoint) = &self.config.s3_endpoint { connection_options.push(format!("ENDPOINT='{}'", endpoint)); } @@ -145,20 +145,20 @@ impl StorageExport for S3Backend { } fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { - let bucket = self.config.bucket(); - let root = if self.config.root().is_empty() { + let bucket = &self.config.s3_bucket; + let root = if self.config.s3_root.is_empty() { String::new() } else { - format!("/{}", self.config.root()) + format!("/{}", self.config.s3_root) }; format!("s3://{}{}/{}", bucket, root, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { let mut masked = sql.to_string(); - masked = masked.replace(self.config.access_key_id().expose_secret(), "[REDACTED]"); + masked = masked.replace(self.config.s3_access_key_id.expose_secret(), "[REDACTED]"); masked = masked.replace( - self.config.secret_access_key().expose_secret(), + self.config.s3_secret_access_key.expose_secret(), "[REDACTED]", ); masked @@ -174,7 +174,7 @@ pub struct OssBackend { impl OssBackend { pub fn new(config: PrefixedOssConnection) -> Result { // Validate required fields - if config.bucket().is_empty() { + if config.oss_bucket.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "OSS bucket must be set when --oss is enabled", @@ -182,7 +182,7 @@ impl OssBackend { .build(), )); } - if config.endpoint().is_empty() { + if config.oss_endpoint.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "OSS endpoint must be set when --oss is enabled", @@ -190,7 +190,7 @@ impl OssBackend { .build(), )); } - if config.access_key_id().expose_secret().is_empty() { + if config.oss_access_key_id.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "OSS access key ID must be set when --oss is enabled", @@ -198,7 +198,7 @@ impl OssBackend { .build(), )); } - if config.access_key_secret().expose_secret().is_empty() { + if config.oss_access_key_secret.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "OSS access key secret must be set when --oss is enabled", @@ -213,22 +213,22 @@ impl OssBackend { #[async_trait] impl StorageExport for OssBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let bucket = self.config.bucket(); + let bucket = &self.config.oss_bucket; let oss_path = format!("oss://{}/{}/{}/", bucket, catalog, schema); let mut connection_options = vec![ format!( "ACCESS_KEY_ID='{}'", - self.config.access_key_id().expose_secret() + self.config.oss_access_key_id.expose_secret() ), format!( "ACCESS_KEY_SECRET='{}'", - self.config.access_key_secret().expose_secret() + self.config.oss_access_key_secret.expose_secret() ), ]; - if !self.config.endpoint().is_empty() { - connection_options.push(format!("ENDPOINT='{}'", self.config.endpoint())); + if !self.config.oss_endpoint.is_empty() { + connection_options.push(format!("ENDPOINT='{}'", self.config.oss_endpoint)); } let connection_str = format!(" CONNECTION ({})", connection_options.join(", ")); @@ -236,15 +236,15 @@ impl StorageExport for OssBackend { } fn format_output_path(&self, catalog: &str, file_path: &str) -> String { - let bucket = self.config.bucket(); + let bucket = &self.config.oss_bucket; format!("oss://{}/{}/{}", bucket, catalog, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { let mut masked = sql.to_string(); - masked = masked.replace(self.config.access_key_id().expose_secret(), "[REDACTED]"); + masked = masked.replace(self.config.oss_access_key_id.expose_secret(), "[REDACTED]"); masked = masked.replace( - self.config.access_key_secret().expose_secret(), + self.config.oss_access_key_secret.expose_secret(), "[REDACTED]", ); masked @@ -260,7 +260,7 @@ pub struct GcsBackend { impl GcsBackend { pub fn new(config: PrefixedGcsConnection) -> Result { // Validate required fields - if config.bucket().is_empty() { + if config.gcs_bucket.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "GCS bucket must be set when --gcs is enabled", @@ -269,8 +269,8 @@ impl GcsBackend { )); } // At least one of credential_path or credential must be set - if config.credential_path().expose_secret().is_empty() - && config.credential().expose_secret().is_empty() + if config.gcs_credential_path.expose_secret().is_empty() + && config.gcs_credential.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { @@ -286,33 +286,33 @@ impl GcsBackend { #[async_trait] impl StorageExport for GcsBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let bucket = self.config.bucket(); - let root = if self.config.root().is_empty() { + let bucket = &self.config.gcs_bucket; + let root = if self.config.gcs_root.is_empty() { String::new() } else { - format!("/{}", self.config.root()) + format!("/{}", self.config.gcs_root) }; let gcs_path = format!("gcs://{}{}/{}/{}/", bucket, root, catalog, schema); let mut connection_options = Vec::new(); - if !self.config.credential_path().expose_secret().is_empty() { + if !self.config.gcs_credential_path.expose_secret().is_empty() { connection_options.push(format!( "CREDENTIAL_PATH='{}'", - self.config.credential_path().expose_secret() + self.config.gcs_credential_path.expose_secret() )); } - if !self.config.credential().expose_secret().is_empty() { + if !self.config.gcs_credential.expose_secret().is_empty() { connection_options.push(format!( "CREDENTIAL='{}'", - self.config.credential().expose_secret() + self.config.gcs_credential.expose_secret() )); } - if !self.config.endpoint().is_empty() { - connection_options.push(format!("ENDPOINT='{}'", self.config.endpoint())); + if !self.config.gcs_endpoint.is_empty() { + connection_options.push(format!("ENDPOINT='{}'", self.config.gcs_endpoint)); } let connection_str = if connection_options.is_empty() { @@ -325,22 +325,25 @@ impl StorageExport for GcsBackend { } fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { - let bucket = self.config.bucket(); - let root = if self.config.root().is_empty() { + let bucket = &self.config.gcs_bucket; + let root = if self.config.gcs_root.is_empty() { String::new() } else { - format!("/{}", self.config.root()) + format!("/{}", self.config.gcs_root) }; format!("gcs://{}{}/{}", bucket, root, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { let mut masked = sql.to_string(); - if !self.config.credential_path().expose_secret().is_empty() { - masked = masked.replace(self.config.credential_path().expose_secret(), "[REDACTED]"); + if !self.config.gcs_credential_path.expose_secret().is_empty() { + masked = masked.replace( + self.config.gcs_credential_path.expose_secret(), + "[REDACTED]", + ); } - if !self.config.credential().expose_secret().is_empty() { - masked = masked.replace(self.config.credential().expose_secret(), "[REDACTED]"); + if !self.config.gcs_credential.expose_secret().is_empty() { + masked = masked.replace(self.config.gcs_credential.expose_secret(), "[REDACTED]"); } masked } @@ -355,7 +358,7 @@ pub struct AzblobBackend { impl AzblobBackend { pub fn new(config: PrefixedAzblobConnection) -> Result { // Validate required fields - if config.container().is_empty() { + if config.azblob_container.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "Azure Blob container must be set when --azblob is enabled", @@ -363,7 +366,7 @@ impl AzblobBackend { .build(), )); } - if config.account_name().expose_secret().is_empty() { + if config.azblob_account_name.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "Azure Blob account name must be set when --azblob is enabled", @@ -371,7 +374,7 @@ impl AzblobBackend { .build(), )); } - if config.account_key().expose_secret().is_empty() { + if config.azblob_account_key.expose_secret().is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: "Azure Blob account key must be set when --azblob is enabled", @@ -386,11 +389,11 @@ impl AzblobBackend { #[async_trait] impl StorageExport for AzblobBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let container = self.config.container(); - let root = if self.config.root().is_empty() { + let container = &self.config.azblob_container; + let root = if self.config.azblob_root.is_empty() { String::new() } else { - format!("/{}", self.config.root()) + format!("/{}", self.config.azblob_root) }; let azblob_path = format!("azblob://{}{}/{}/{}/", container, root, catalog, schema); @@ -398,19 +401,19 @@ impl StorageExport for AzblobBackend { let mut connection_options = vec![ format!( "ACCOUNT_NAME='{}'", - self.config.account_name().expose_secret() + self.config.azblob_account_name.expose_secret() ), format!( "ACCOUNT_KEY='{}'", - self.config.account_key().expose_secret() + self.config.azblob_account_key.expose_secret() ), ]; - if !self.config.endpoint().is_empty() { - connection_options.push(format!("ENDPOINT='{}'", self.config.endpoint())); + if !self.config.azblob_endpoint.is_empty() { + connection_options.push(format!("ENDPOINT='{}'", self.config.azblob_endpoint)); } - if let Some(sas_token) = self.config.sas_token() { + if let Some(sas_token) = &self.config.azblob_sas_token { connection_options.push(format!("SAS_TOKEN='{}'", sas_token)); } @@ -419,20 +422,23 @@ impl StorageExport for AzblobBackend { } fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { - let container = self.config.container(); - let root = if self.config.root().is_empty() { + let container = &self.config.azblob_container; + let root = if self.config.azblob_root.is_empty() { String::new() } else { - format!("/{}", self.config.root()) + format!("/{}", self.config.azblob_root) }; format!("azblob://{}{}/{}", container, root, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { let mut masked = sql.to_string(); - masked = masked.replace(self.config.account_name().expose_secret(), "[REDACTED]"); - masked = masked.replace(self.config.account_key().expose_secret(), "[REDACTED]"); - if let Some(sas_token) = self.config.sas_token() { + masked = masked.replace( + self.config.azblob_account_name.expose_secret(), + "[REDACTED]", + ); + masked = masked.replace(self.config.azblob_account_key.expose_secret(), "[REDACTED]"); + if let Some(sas_token) = &self.config.azblob_sas_token { masked = masked.replace(sas_token, "[REDACTED]"); } masked From 569ee449b5b2ef7bcadc684f06286f225d9e34e0 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Tue, 25 Nov 2025 09:51:29 +0800 Subject: [PATCH 03/12] refactor(cli): unify storage configuration for export command - Update the implementation of ObjectStoreConfig::build_xxx() using macro solutions Signed-off-by: McKnight22 Co-authored-by: WenyXu --- src/cli/src/common/object_store.rs | 77 ++++++++++-------------------- 1 file changed, 25 insertions(+), 52 deletions(-) diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index 5dae4eefe95c..ea174aa41fbe 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -173,62 +173,35 @@ pub fn new_fs_object_store(root: &str) -> std::result::Result { + pub fn $method(&self) -> Result { + let config = <$conn_type>::from(self.$field.clone()); + common_telemetry::info!( + "Building object store with {}: {:?}", + stringify!($field), + config + ); + let object_store = ObjectStore::new(<$service_type>::from(&config)) + .context(error::InitBackendSnafu) + .map_err(BoxedError::new)? + .finish(); + Ok(with_instrument_layers( + with_retry_layers(object_store), + false, + )) + } + }; +} + impl ObjectStoreConfig { - /// Builds the object store with S3. - pub fn build_s3(&self) -> Result { - let s3 = S3Connection::from(self.s3.clone()); - common_telemetry::info!("Building object store with s3: {:?}", s3); - let object_store = ObjectStore::new(S3::from(&s3)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(); - Ok(with_instrument_layers( - with_retry_layers(object_store), - false, - )) - } + gen_object_store_builder!(build_s3, s3, S3Connection, S3); - /// Builds the object store with OSS. - pub fn build_oss(&self) -> Result { - let oss = OssConnection::from(self.oss.clone()); - common_telemetry::info!("Building object store with oss: {:?}", oss); - let object_store = ObjectStore::new(Oss::from(&oss)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(); - Ok(with_instrument_layers( - with_retry_layers(object_store), - false, - )) - } + gen_object_store_builder!(build_oss, oss, OssConnection, Oss); - /// Builds the object store with GCS. - pub fn build_gcs(&self) -> Result { - let gcs = GcsConnection::from(self.gcs.clone()); - common_telemetry::info!("Building object store with gcs: {:?}", gcs); - let object_store = ObjectStore::new(Gcs::from(&gcs)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(); - Ok(with_instrument_layers( - with_retry_layers(object_store), - false, - )) - } + gen_object_store_builder!(build_gcs, gcs, GcsConnection, Gcs); - /// Builds the object store with Azure Blob. - pub fn build_azblob(&self) -> Result { - let azblob = AzblobConnection::from(self.azblob.clone()); - common_telemetry::info!("Building object store with azblob: {:?}", azblob); - let object_store = ObjectStore::new(Azblob::from(&azblob)) - .context(error::InitBackendSnafu) - .map_err(BoxedError::new)? - .finish(); - Ok(with_instrument_layers( - with_retry_layers(object_store), - false, - )) - } + gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob); /// Builds the object store from the config. pub fn build(&self) -> Result, BoxedError> { From 02219cfbd7d8a35a0a6c12a649dd34a502b7da21 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Wed, 26 Nov 2025 11:21:56 +0800 Subject: [PATCH 04/12] refactor(cli): unify storage configuration for export command - Introduce config validation for each storage type Signed-off-by: McKnight22 --- src/cli/src/common/object_store.rs | 176 ++++++++++++++++++++++++++++- src/cli/src/data/export.rs | 108 +++++++++++++----- src/cli/src/data/storage_export.rs | 121 ++------------------ 3 files changed, 256 insertions(+), 149 deletions(-) diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index ea174aa41fbe..636d548d411d 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_base::secrets::SecretString; +use common_base::secrets::{ExposeSecret, SecretString}; use common_error::ext::BoxedError; use object_store::services::{Azblob, Fs, Gcs, Oss, S3}; use object_store::util::{with_instrument_layers, with_retry_layers}; @@ -132,30 +132,31 @@ wrap_with_clap_prefix! { /// common config for object store. #[derive(clap::Parser, Debug, Clone, PartialEq, Default)] +#[clap(group(clap::ArgGroup::new("storage_backend").required(false).multiple(false)))] pub struct ObjectStoreConfig { /// Whether to use S3 object store. - #[clap(long, alias = "s3")] + #[clap(long, alias = "s3", group = "storage_backend")] pub enable_s3: bool, #[clap(flatten)] pub s3: PrefixedS3Connection, /// Whether to use OSS. - #[clap(long, alias = "oss")] + #[clap(long, alias = "oss", group = "storage_backend")] pub enable_oss: bool, #[clap(flatten)] pub oss: PrefixedOssConnection, /// Whether to use GCS. - #[clap(long, alias = "gcs")] + #[clap(long, alias = "gcs", group = "storage_backend")] pub enable_gcs: bool, #[clap(flatten)] pub gcs: PrefixedGcsConnection, /// Whether to use Azure Blob. - #[clap(long, alias = "azblob")] + #[clap(long, alias = "azblob", group = "storage_backend")] pub enable_azblob: bool, #[clap(flatten)] @@ -203,8 +204,173 @@ impl ObjectStoreConfig { gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob); + /// Helper function to validate storage backend configuration. + /// + /// This function enforces two validation rules: + /// 1. When a backend is enabled, all required fields must be non-empty. + /// 2. When a backend is disabled, no configuration fields should be set. + /// + /// # Arguments + /// + /// * `enable` - Whether this storage backend is enabled (via --s3, --oss, etc.) + /// * `name` - Human-readable name of the backend (e.g., "S3", "OSS") + /// * `required` - List of (field_value, field_name) tuples for required fields + /// * `extra_check` - Additional boolean check for optional fields (e.g., endpoint.is_some()) + /// + /// # Returns + /// + /// * `Ok(())` if validation passes + /// * `Err(MissingConfigSnafu)` if enabled but required fields are missing + /// * `Err(InvalidArgumentsSnafu)` if disabled but configuration is provided + fn check_config( + enable: bool, + name: &str, + required: &[(&str, &str)], + extra_check: bool, + ) -> Result<(), BoxedError> { + if enable { + // When enabled, check that all required fields are non-empty + let mut missing = Vec::new(); + for (val, name) in required { + if val.is_empty() { + missing.push(*name); + } + } + if !missing.is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: format!( + "{} {} must be set when --{} is enabled.", + name, + missing.join(", "), + name.to_lowercase() + ), + } + .build(), + )); + } + } else { + // When disabled, check that no configuration is provided + let mut is_set = false; + for (val, _) in required { + if !val.is_empty() { + is_set = true; + break; + } + } + if is_set || extra_check { + return Err(BoxedError::new( + error::InvalidArgumentsSnafu { + msg: format!( + "{} configuration is set but --{} is not enabled.", + name, + name.to_lowercase() + ), + } + .build(), + )); + } + } + Ok(()) + } + + pub fn validate_s3(&self) -> Result<(), BoxedError> { + let s3 = &self.s3; + Self::check_config( + self.enable_s3, + "S3", + &[ + (s3.s3_bucket.as_str(), "bucket"), + (s3.s3_root.as_str(), "root"), + ( + s3.s3_access_key_id.expose_secret().as_str(), + "access key ID", + ), + ( + s3.s3_secret_access_key.expose_secret().as_str(), + "secret access key", + ), + ], + s3.s3_endpoint.is_some() || s3.s3_region.is_some() || s3.s3_enable_virtual_host_style, + ) + } + + pub fn validate_oss(&self) -> Result<(), BoxedError> { + let oss = &self.oss; + Self::check_config( + self.enable_oss, + "OSS", + &[ + (oss.oss_bucket.as_str(), "bucket"), + (oss.oss_root.as_str(), "root"), + ( + oss.oss_access_key_id.expose_secret().as_str(), + "access key ID", + ), + ( + oss.oss_access_key_secret.expose_secret().as_str(), + "access key secret", + ), + (oss.oss_endpoint.as_str(), "endpoint"), + ], + false, + ) + } + + pub fn validate_gcs(&self) -> Result<(), BoxedError> { + let gcs = &self.gcs; + Self::check_config( + self.enable_gcs, + "GCS", + &[ + (gcs.gcs_bucket.as_str(), "bucket"), + (gcs.gcs_root.as_str(), "root"), + (gcs.gcs_scope.as_str(), "scope"), + ( + gcs.gcs_credential_path.expose_secret().as_str(), + "credential path", + ), + (gcs.gcs_credential.expose_secret().as_str(), "credential"), + (gcs.gcs_endpoint.as_str(), "endpoint"), + ], + false, + ) + } + + pub fn validate_azblob(&self) -> Result<(), BoxedError> { + let azblob = &self.azblob; + Self::check_config( + self.enable_azblob, + "Azure Blob", + &[ + (azblob.azblob_container.as_str(), "container"), + (azblob.azblob_root.as_str(), "root"), + ( + azblob.azblob_account_name.expose_secret().as_str(), + "account name", + ), + ( + azblob.azblob_account_key.expose_secret().as_str(), + "account key", + ), + (azblob.azblob_endpoint.as_str(), "endpoint"), + ], + azblob.azblob_sas_token.is_some(), + ) + } + + pub fn validate(&self) -> Result<(), BoxedError> { + self.validate_s3()?; + self.validate_oss()?; + self.validate_gcs()?; + self.validate_azblob()?; + Ok(()) + } + /// Builds the object store from the config. pub fn build(&self) -> Result, BoxedError> { + self.validate()?; + if self.enable_s3 { self.build_s3().map(Some) } else if self.enable_oss { diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index 00ecbb18ce42..d2d18a4b04e0 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -135,23 +135,27 @@ impl ExportCommand { pub async fn build(&self) -> std::result::Result, BoxedError> { // Determine storage type let (storage_type, operator) = if self.storage.enable_s3 { + self.storage.validate_s3()?; ( - StorageType::S3(S3Backend::new(self.storage.s3.clone())?), + StorageType::S3(S3Backend::new(self.storage.s3.clone())), self.storage.build_s3()?, ) } else if self.storage.enable_oss { + self.storage.validate_oss()?; ( - StorageType::Oss(OssBackend::new(self.storage.oss.clone())?), + StorageType::Oss(OssBackend::new(self.storage.oss.clone())), self.storage.build_oss()?, ) } else if self.storage.enable_gcs { + self.storage.validate_gcs()?; ( - StorageType::Gcs(GcsBackend::new(self.storage.gcs.clone())?), + StorageType::Gcs(GcsBackend::new(self.storage.gcs.clone())), self.storage.build_gcs()?, ) } else if self.storage.enable_azblob { + self.storage.validate_azblob()?; ( - StorageType::Azblob(AzblobBackend::new(self.storage.azblob.clone())?), + StorageType::Azblob(AzblobBackend::new(self.storage.azblob.clone())), self.storage.build_azblob()?, ) } else if let Some(output_dir) = &self.output_dir { @@ -672,10 +676,13 @@ mod tests { "--s3", "--s3-bucket", "test-bucket", + "--s3-root", + "test-root", "--s3-access-key-id", "test-key", "--s3-secret-access-key", "test-secret", + // Optional fields "--s3-region", "us-west-2", "--s3-endpoint", @@ -694,19 +701,22 @@ mod tests { "--addr", "127.0.0.1:4000", "--s3", + "--s3-root", + "test-root", "--s3-access-key-id", "test-key", "--s3-secret-access-key", "test-secret", - "--s3-region", - "us-west-2", ]); let result = cmd.build().await; assert!(result.is_err()); - match result { - Ok(_) => panic!("Expected error"), - Err(e) => assert!(e.to_string().contains("S3 bucket must be set")), + if let Err(err) = result { + assert!( + err.to_string().contains("S3 bucket must be set"), + "Actual error: {}", + err + ); } } @@ -719,6 +729,8 @@ mod tests { "--oss", "--oss-bucket", "test-bucket", + "--oss-root", + "test-root", "--oss-access-key-id", "test-key-id", "--oss-access-key-secret", @@ -741,6 +753,8 @@ mod tests { "--oss", "--oss-bucket", "test-bucket", + "--oss-root", + "test-root", "--oss-access-key-id", "test-key-id", "--oss-access-key-secret", @@ -749,9 +763,12 @@ mod tests { let result = cmd.build().await; assert!(result.is_err()); - match result { - Ok(_) => panic!("Expected error"), - Err(e) => assert!(e.to_string().contains("OSS endpoint must be set")), + if let Err(err) = result { + assert!( + err.to_string().contains("OSS endpoint must be set"), + "Actual error: {}", + err + ); } } @@ -764,8 +781,14 @@ mod tests { "--gcs", "--gcs-bucket", "test-bucket", + "--gcs-root", + "test-root", + "--gcs-scope", + "test-scope", "--gcs-credential-path", "/path/to/credential", + "--gcs-credential", + "test-credential-content", "--gcs-endpoint", "https://storage.googleapis.com", ]); @@ -776,6 +799,7 @@ mod tests { #[tokio::test] async fn test_export_command_build_with_gcs_missing_config() { + // Missing credentials let cmd = ExportCommand::parse_from([ "export", "--addr", @@ -783,20 +807,24 @@ mod tests { "--gcs", "--gcs-bucket", "test-bucket", - // Missing credentials + "--gcs-root", + "test-root", + "--gcs-scope", + "test-scope", + "--gcs-endpoint", + "https://storage.googleapis.com", ]); let result = cmd.build().await; - let err = match result { - Ok(_) => panic!("Expected error but got Ok"), - Err(e) => e, - }; - assert!( - err.to_string() - .contains("GCS credential path or credential must be set"), - "Unexpected error message: {}", - err - ); + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string() + .contains("GCS credential path, credential must be set"), + "Actual error: {}", + err + ); + } } #[tokio::test] @@ -808,6 +836,8 @@ mod tests { "--azblob", "--azblob-container", "test-container", + "--azblob-root", + "test-root", "--azblob-account-name", "test-account", "--azblob-account-key", @@ -822,6 +852,7 @@ mod tests { #[tokio::test] async fn test_export_command_build_with_azblob_missing_config() { + // Missing account key let cmd = ExportCommand::parse_from([ "export", "--addr", @@ -829,20 +860,35 @@ mod tests { "--azblob", "--azblob-container", "test-container", + "--azblob-root", + "test-root", "--azblob-account-name", "test-account", - // Missing account key + "--azblob-endpoint", + "https://account.blob.core.windows.net", ]); let result = cmd.build().await; assert!(result.is_err()); - match result { - Ok(_) => panic!("Expected error"), - Err(e) => assert!( - e.to_string().contains("Azure Blob account key must be set"), - "Unexpected error message: {}", - e - ), + if let Err(err) = result { + assert!( + err.to_string() + .contains("Azure Blob account key must be set"), + "Actual error: {}", + err + ); } } + + #[test] + fn test_export_command_conflict() { + // Try to enable both S3 and OSS + let result = + ExportCommand::try_parse_from(["export", "--addr", "127.0.0.1:4000", "--s3", "--oss"]); + + assert!(result.is_err()); + let err = result.unwrap_err(); + // clap error for conflicting arguments + assert!(err.kind() == clap::error::ErrorKind::ArgumentConflict); + } } diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index 9d7dc41a94f7..658757e22c48 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -16,12 +16,10 @@ use std::path::PathBuf; use async_trait::async_trait; use common_base::secrets::ExposeSecret; -use common_error::ext::BoxedError; use crate::common::{ PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection, }; -use crate::error; /// Trait for storage backends that can be used for data export. #[async_trait] @@ -79,33 +77,8 @@ pub struct S3Backend { } impl S3Backend { - pub fn new(config: PrefixedS3Connection) -> Result { - // Validate required fields - if config.s3_bucket.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "S3 bucket must be set when --s3 is enabled", - } - .build(), - )); - } - if config.s3_access_key_id.expose_secret().is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "S3 access key ID must be set when --s3 is enabled", - } - .build(), - )); - } - if config.s3_secret_access_key.expose_secret().is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "S3 secret access key must be set when --s3 is enabled", - } - .build(), - )); - } - Ok(Self { config }) + pub fn new(config: PrefixedS3Connection) -> Self { + Self { config } } } @@ -172,41 +145,8 @@ pub struct OssBackend { } impl OssBackend { - pub fn new(config: PrefixedOssConnection) -> Result { - // Validate required fields - if config.oss_bucket.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "OSS bucket must be set when --oss is enabled", - } - .build(), - )); - } - if config.oss_endpoint.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "OSS endpoint must be set when --oss is enabled", - } - .build(), - )); - } - if config.oss_access_key_id.expose_secret().is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "OSS access key ID must be set when --oss is enabled", - } - .build(), - )); - } - if config.oss_access_key_secret.expose_secret().is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "OSS access key secret must be set when --oss is enabled", - } - .build(), - )); - } - Ok(Self { config }) + pub fn new(config: PrefixedOssConnection) -> Self { + Self { config } } } @@ -258,28 +198,8 @@ pub struct GcsBackend { } impl GcsBackend { - pub fn new(config: PrefixedGcsConnection) -> Result { - // Validate required fields - if config.gcs_bucket.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "GCS bucket must be set when --gcs is enabled", - } - .build(), - )); - } - // At least one of credential_path or credential must be set - if config.gcs_credential_path.expose_secret().is_empty() - && config.gcs_credential.expose_secret().is_empty() - { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "GCS credential path or credential must be set when --gcs is enabled", - } - .build(), - )); - } - Ok(Self { config }) + pub fn new(config: PrefixedGcsConnection) -> Self { + Self { config } } } @@ -356,33 +276,8 @@ pub struct AzblobBackend { } impl AzblobBackend { - pub fn new(config: PrefixedAzblobConnection) -> Result { - // Validate required fields - if config.azblob_container.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "Azure Blob container must be set when --azblob is enabled", - } - .build(), - )); - } - if config.azblob_account_name.expose_secret().is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "Azure Blob account name must be set when --azblob is enabled", - } - .build(), - )); - } - if config.azblob_account_key.expose_secret().is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: "Azure Blob account key must be set when --azblob is enabled", - } - .build(), - )); - } - Ok(Self { config }) + pub fn new(config: PrefixedAzblobConnection) -> Self { + Self { config } } } From 25ce12362e793b0ac7f0830050ad6366c2ad83a6 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Wed, 26 Nov 2025 15:29:26 +0800 Subject: [PATCH 05/12] refactor(cli): unify storage configuration for export command - Enable trait-based polymorphism for storage type handling (from inherent impl to trait impl) - Extract helper functions to reduce code duplication Signed-off-by: McKnight22 --- src/cli/src/data/export.rs | 2 +- src/cli/src/data/storage_export.rs | 139 +++++++++++++---------------- 2 files changed, 65 insertions(+), 76 deletions(-) diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index d2d18a4b04e0..11c4be766ebb 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -28,7 +28,7 @@ use tokio::time::Instant; use crate::common::{ObjectStoreConfig, new_fs_object_store}; use crate::data::storage_export::{ - AzblobBackend, FsBackend, GcsBackend, OssBackend, S3Backend, StorageType, + AzblobBackend, FsBackend, GcsBackend, OssBackend, S3Backend, StorageExport, StorageType, }; use crate::data::{COPY_PATH_PLACEHOLDER, default_database}; use crate::database::{DatabaseClient, parse_proxy_opts}; diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index 658757e22c48..41ef3aa64d60 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -21,6 +21,25 @@ use crate::common::{ PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection, }; +/// Helper function to format root path with leading slash if non-empty. +fn format_root_path(root: &str) -> String { + if root.is_empty() { + String::new() + } else { + format!("/{}", root) + } +} + +/// Helper function to mask multiple secrets in a string. +fn mask_secrets(mut sql: String, secrets: &[&str]) -> String { + for secret in secrets { + if !secret.is_empty() { + sql = sql.replace(secret, "[REDACTED]"); + } + } + sql +} + /// Trait for storage backends that can be used for data export. #[async_trait] pub trait StorageExport: Send + Sync { @@ -86,11 +105,7 @@ impl S3Backend { impl StorageExport for S3Backend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let bucket = &self.config.s3_bucket; - let root = if self.config.s3_root.is_empty() { - String::new() - } else { - format!("/{}", self.config.s3_root) - }; + let root = format_root_path(&self.config.s3_root); let s3_path = format!("s3://{}{}/{}/{}/", bucket, root, catalog, schema); @@ -119,22 +134,18 @@ impl StorageExport for S3Backend { fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { let bucket = &self.config.s3_bucket; - let root = if self.config.s3_root.is_empty() { - String::new() - } else { - format!("/{}", self.config.s3_root) - }; + let root = format_root_path(&self.config.s3_root); format!("s3://{}{}/{}", bucket, root, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { - let mut masked = sql.to_string(); - masked = masked.replace(self.config.s3_access_key_id.expose_secret(), "[REDACTED]"); - masked = masked.replace( - self.config.s3_secret_access_key.expose_secret(), - "[REDACTED]", - ); - masked + mask_secrets( + sql.to_string(), + &[ + self.config.s3_access_key_id.expose_secret(), + self.config.s3_secret_access_key.expose_secret(), + ], + ) } } @@ -181,13 +192,13 @@ impl StorageExport for OssBackend { } fn mask_sensitive_info(&self, sql: &str) -> String { - let mut masked = sql.to_string(); - masked = masked.replace(self.config.oss_access_key_id.expose_secret(), "[REDACTED]"); - masked = masked.replace( - self.config.oss_access_key_secret.expose_secret(), - "[REDACTED]", - ); - masked + mask_secrets( + sql.to_string(), + &[ + self.config.oss_access_key_id.expose_secret(), + self.config.oss_access_key_secret.expose_secret(), + ], + ) } } @@ -207,11 +218,7 @@ impl GcsBackend { impl StorageExport for GcsBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let bucket = &self.config.gcs_bucket; - let root = if self.config.gcs_root.is_empty() { - String::new() - } else { - format!("/{}", self.config.gcs_root) - }; + let root = format_root_path(&self.config.gcs_root); let gcs_path = format!("gcs://{}{}/{}/{}/", bucket, root, catalog, schema); @@ -246,26 +253,18 @@ impl StorageExport for GcsBackend { fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { let bucket = &self.config.gcs_bucket; - let root = if self.config.gcs_root.is_empty() { - String::new() - } else { - format!("/{}", self.config.gcs_root) - }; + let root = format_root_path(&self.config.gcs_root); format!("gcs://{}{}/{}", bucket, root, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { - let mut masked = sql.to_string(); - if !self.config.gcs_credential_path.expose_secret().is_empty() { - masked = masked.replace( + mask_secrets( + sql.to_string(), + &[ self.config.gcs_credential_path.expose_secret(), - "[REDACTED]", - ); - } - if !self.config.gcs_credential.expose_secret().is_empty() { - masked = masked.replace(self.config.gcs_credential.expose_secret(), "[REDACTED]"); - } - masked + self.config.gcs_credential.expose_secret(), + ], + ) } } @@ -285,11 +284,7 @@ impl AzblobBackend { impl StorageExport for AzblobBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let container = &self.config.azblob_container; - let root = if self.config.azblob_root.is_empty() { - String::new() - } else { - format!("/{}", self.config.azblob_root) - }; + let root = format_root_path(&self.config.azblob_root); let azblob_path = format!("azblob://{}{}/{}/{}/", container, root, catalog, schema); @@ -318,25 +313,18 @@ impl StorageExport for AzblobBackend { fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { let container = &self.config.azblob_container; - let root = if self.config.azblob_root.is_empty() { - String::new() - } else { - format!("/{}", self.config.azblob_root) - }; + let root = format_root_path(&self.config.azblob_root); format!("azblob://{}{}/{}", container, root, file_path) } fn mask_sensitive_info(&self, sql: &str) -> String { - let mut masked = sql.to_string(); - masked = masked.replace( - self.config.azblob_account_name.expose_secret(), - "[REDACTED]", - ); - masked = masked.replace(self.config.azblob_account_key.expose_secret(), "[REDACTED]"); - if let Some(sas_token) = &self.config.azblob_sas_token { - masked = masked.replace(sas_token, "[REDACTED]"); - } - masked + mask_secrets( + sql.to_string(), + &[ + self.config.azblob_account_name.expose_secret(), + self.config.azblob_account_key.expose_secret(), + ], + ) } } @@ -350,9 +338,9 @@ pub enum StorageType { Azblob(AzblobBackend), } -impl StorageType { - /// Get storage path and connection string. - pub fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { +#[async_trait] +impl StorageExport for StorageType { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { match self { StorageType::Fs(backend) => backend.get_storage_path(catalog, schema), StorageType::S3(backend) => backend.get_storage_path(catalog, schema), @@ -362,8 +350,7 @@ impl StorageType { } } - /// Format output path for logging. - pub fn format_output_path(&self, catalog: &str, file_path: &str) -> String { + fn format_output_path(&self, catalog: &str, file_path: &str) -> String { match self { StorageType::Fs(backend) => backend.format_output_path(catalog, file_path), StorageType::S3(backend) => backend.format_output_path(catalog, file_path), @@ -373,12 +360,7 @@ impl StorageType { } } - pub fn is_remote_storage(&self) -> bool { - !matches!(self, StorageType::Fs(_)) - } - - /// Mask sensitive information in SQL commands. - pub fn mask_sensitive_info(&self, sql: &str) -> String { + fn mask_sensitive_info(&self, sql: &str) -> String { match self { StorageType::Fs(backend) => backend.mask_sensitive_info(sql), StorageType::S3(backend) => backend.mask_sensitive_info(sql), @@ -388,3 +370,10 @@ impl StorageType { } } } + +impl StorageType { + /// Returns true if the storage backend is remote (not local filesystem). + pub fn is_remote_storage(&self) -> bool { + !matches!(self, StorageType::Fs(_)) + } +} From c289db0930ef84a30b84b36966f38ffbb65e267a Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Thu, 27 Nov 2025 22:02:57 +0800 Subject: [PATCH 06/12] refactor(cli): unify storage configuration for export command - Improve SecretString handling and validation (Distinguishing between "not provided" and "empty string") - Add validation when using filesystem storage Signed-off-by: McKnight22 --- src/cli/src/common.rs | 2 +- src/cli/src/common/object_store.rs | 395 +++++++++++++++++++---------- src/cli/src/data/export.rs | 199 ++++++++++++++- src/cli/src/data/storage_export.rs | 55 ++-- src/common/base/src/secrets.rs | 6 +- 5 files changed, 489 insertions(+), 168 deletions(-) diff --git a/src/cli/src/common.rs b/src/cli/src/common.rs index d6dc461b8986..b213c61e4c81 100644 --- a/src/cli/src/common.rs +++ b/src/cli/src/common.rs @@ -17,6 +17,6 @@ mod store; pub use object_store::{ ObjectStoreConfig, PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, - PrefixedS3Connection, new_fs_object_store, + PrefixedS3Connection, new_fs_object_store, validate_fs, }; pub use store::StoreConfig; diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index 636d548d411d..bc76aa214aeb 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -22,6 +22,41 @@ use snafu::ResultExt; use crate::error::{self}; +/// Trait to convert CLI field types to target struct field types. +/// This enables `Option` (CLI) -> `SecretString` (target) conversions, +/// allowing us to distinguish "not provided" from "provided but empty". +trait IntoField { + fn into_field(self) -> T; +} + +/// Identity conversion for types that are the same. +impl IntoField for T { + fn into_field(self) -> T { + self + } +} + +/// Convert `Option` to `SecretString`, using default for None. +impl IntoField for Option { + fn into_field(self) -> SecretString { + self.unwrap_or_default() + } +} + +/// Check if an `Option` is effectively empty. +/// Returns `true` if: +/// - `None` (user didn't provide the argument) +/// - `Some("")` (user provided an empty string) +fn is_secret_empty(secret: &Option) -> bool { + secret.as_ref().is_none_or(|s| s.expose_secret().is_empty()) +} + +/// Check if an `Option` is effectively non-empty. +/// Returns `true` only if user provided a non-empty secret value. +fn is_secret_provided(secret: &Option) -> bool { + !is_secret_empty(secret) +} + macro_rules! wrap_with_clap_prefix { ( $new_name:ident, $prefix:literal, $base:ty, { @@ -42,7 +77,8 @@ macro_rules! wrap_with_clap_prefix { impl From<$new_name> for $base { fn from(w: $new_name) -> Self { Self { - $( $field: w.[<$prefix $field>] ),* + // Use into_field() to handle Option -> SecretString conversion + $( $field: w.[<$prefix $field>].into_field() ),* } } } @@ -60,9 +96,9 @@ wrap_with_clap_prefix! { #[doc = "The root of the object store."] root: String = Default::default(), #[doc = "The account name of the object store."] - account_name: SecretString = Default::default(), + account_name: Option, #[doc = "The account key of the object store."] - account_key: SecretString = Default::default(), + account_key: Option, #[doc = "The endpoint of the object store."] endpoint: String = Default::default(), #[doc = "The SAS token of the object store."] @@ -80,9 +116,9 @@ wrap_with_clap_prefix! { #[doc = "The root of the object store."] root: String = Default::default(), #[doc = "The access key ID of the object store."] - access_key_id: SecretString = Default::default(), + access_key_id: Option, #[doc = "The secret access key of the object store."] - secret_access_key: SecretString = Default::default(), + secret_access_key: Option, #[doc = "The endpoint of the object store."] endpoint: Option, #[doc = "The region of the object store."] @@ -102,9 +138,9 @@ wrap_with_clap_prefix! { #[doc = "The root of the object store."] root: String = Default::default(), #[doc = "The access key ID of the object store."] - access_key_id: SecretString = Default::default(), + access_key_id: Option, #[doc = "The access key secret of the object store."] - access_key_secret: SecretString = Default::default(), + access_key_secret: Option, #[doc = "The endpoint of the object store."] endpoint: String = Default::default(), } @@ -122,9 +158,9 @@ wrap_with_clap_prefix! { #[doc = "The scope of the object store."] scope: String = Default::default(), #[doc = "The credential path of the object store."] - credential_path: SecretString = Default::default(), + credential_path: Option, #[doc = "The credential of the object store."] - credential: SecretString = Default::default(), + credential: Option, #[doc = "The endpoint of the object store."] endpoint: String = Default::default(), } @@ -163,6 +199,47 @@ pub struct ObjectStoreConfig { pub azblob: PrefixedAzblobConnection, } +/// This function is called when the user chooses to use local filesystem storage +/// (via `--output-dir`) instead of a remote storage backend. It ensures that the user +/// hasn't accidentally or incorrectly provided configuration for remote storage backends +/// (S3, OSS, GCS, or Azure Blob) without enabling them. +/// +/// # Validation Rules +/// +/// For each storage backend (S3, OSS, GCS, Azblob), this function validates: +/// 1. **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty +/// 2. **When backend is disabled**: No configuration fields should be provided +/// +/// The second rule is critical for filesystem usage: if a user provides something like +/// `--s3-bucket my-bucket` without `--s3`, it likely indicates a configuration error +/// that should be caught early. +/// +/// # Examples +/// +/// Valid usage with local filesystem: +/// ```bash +/// # No remote storage config provided - OK +/// export --output-dir /tmp/data --addr localhost:4000 +/// ``` +/// +/// Invalid usage (caught by this function): +/// ```bash +/// # ERROR: S3 config provided but --s3 not enabled +/// export --output-dir /tmp/data --s3-bucket my-bucket --addr localhost:4000 +/// ``` +/// +/// # Errors +/// +/// Returns an error if any remote storage configuration is detected without the +/// corresponding enable flag (--s3, --oss, --gcs, or --azblob). +pub fn validate_fs(config: &ObjectStoreConfig) -> std::result::Result<(), BoxedError> { + config.validate_s3()?; + config.validate_oss()?; + config.validate_gcs()?; + config.validate_azblob()?; + Ok(()) +} + /// Creates a new file system object store. pub fn new_fs_object_store(root: &str) -> std::result::Result { let builder = Fs::default().root(root); @@ -204,159 +281,219 @@ impl ObjectStoreConfig { gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob); - /// Helper function to validate storage backend configuration. - /// - /// This function enforces two validation rules: - /// 1. When a backend is enabled, all required fields must be non-empty. - /// 2. When a backend is disabled, no configuration fields should be set. - /// - /// # Arguments - /// - /// * `enable` - Whether this storage backend is enabled (via --s3, --oss, etc.) - /// * `name` - Human-readable name of the backend (e.g., "S3", "OSS") - /// * `required` - List of (field_value, field_name) tuples for required fields - /// * `extra_check` - Additional boolean check for optional fields (e.g., endpoint.is_some()) - /// - /// # Returns - /// - /// * `Ok(())` if validation passes - /// * `Err(MissingConfigSnafu)` if enabled but required fields are missing - /// * `Err(InvalidArgumentsSnafu)` if disabled but configuration is provided - fn check_config( - enable: bool, - name: &str, - required: &[(&str, &str)], - extra_check: bool, - ) -> Result<(), BoxedError> { - if enable { - // When enabled, check that all required fields are non-empty + pub fn validate_s3(&self) -> Result<(), BoxedError> { + let s3 = &self.s3; + + if self.enable_s3 { + // Check required fields (root is optional for S3) let mut missing = Vec::new(); - for (val, name) in required { - if val.is_empty() { - missing.push(*name); - } + if s3.s3_bucket.is_empty() { + missing.push("bucket"); } + if is_secret_empty(&s3.s3_access_key_id) { + missing.push("access key ID"); + } + if is_secret_empty(&s3.s3_secret_access_key) { + missing.push("secret access key"); + } + if !missing.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: format!( - "{} {} must be set when --{} is enabled.", - name, + "S3 {} must be set when --s3 is enabled.", missing.join(", "), - name.to_lowercase() ), } .build(), )); } } else { - // When disabled, check that no configuration is provided - let mut is_set = false; - for (val, _) in required { - if !val.is_empty() { - is_set = true; - break; - } - } - if is_set || extra_check { + // When disabled, check that no meaningful configuration is provided + if !s3.s3_bucket.is_empty() + || !s3.s3_root.is_empty() + || s3.s3_endpoint.is_some() + || s3.s3_region.is_some() + || s3.s3_enable_virtual_host_style + || is_secret_provided(&s3.s3_access_key_id) + || is_secret_provided(&s3.s3_secret_access_key) + { return Err(BoxedError::new( error::InvalidArgumentsSnafu { - msg: format!( - "{} configuration is set but --{} is not enabled.", - name, - name.to_lowercase() - ), + msg: "S3 configuration is set but --s3 is not enabled.".to_string(), } .build(), )); } } - Ok(()) - } - pub fn validate_s3(&self) -> Result<(), BoxedError> { - let s3 = &self.s3; - Self::check_config( - self.enable_s3, - "S3", - &[ - (s3.s3_bucket.as_str(), "bucket"), - (s3.s3_root.as_str(), "root"), - ( - s3.s3_access_key_id.expose_secret().as_str(), - "access key ID", - ), - ( - s3.s3_secret_access_key.expose_secret().as_str(), - "secret access key", - ), - ], - s3.s3_endpoint.is_some() || s3.s3_region.is_some() || s3.s3_enable_virtual_host_style, - ) + Ok(()) } pub fn validate_oss(&self) -> Result<(), BoxedError> { let oss = &self.oss; - Self::check_config( - self.enable_oss, - "OSS", - &[ - (oss.oss_bucket.as_str(), "bucket"), - (oss.oss_root.as_str(), "root"), - ( - oss.oss_access_key_id.expose_secret().as_str(), - "access key ID", - ), - ( - oss.oss_access_key_secret.expose_secret().as_str(), - "access key secret", - ), - (oss.oss_endpoint.as_str(), "endpoint"), - ], - false, - ) + + if self.enable_oss { + // Check required fields + let mut missing = Vec::new(); + if oss.oss_bucket.is_empty() { + missing.push("bucket"); + } + if oss.oss_root.is_empty() { + missing.push("root"); + } + if is_secret_empty(&oss.oss_access_key_id) { + missing.push("access key ID"); + } + if is_secret_empty(&oss.oss_access_key_secret) { + missing.push("access key secret"); + } + if oss.oss_endpoint.is_empty() { + missing.push("endpoint"); + } + + if !missing.is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: format!( + "OSS {} must be set when --oss is enabled.", + missing.join(", "), + ), + } + .build(), + )); + } + } else { + // When disabled, check that no meaningful configuration is provided + if !oss.oss_bucket.is_empty() + || !oss.oss_root.is_empty() + || !oss.oss_endpoint.is_empty() + || is_secret_provided(&oss.oss_access_key_id) + || is_secret_provided(&oss.oss_access_key_secret) + { + return Err(BoxedError::new( + error::InvalidArgumentsSnafu { + msg: "OSS configuration is set but --oss is not enabled.".to_string(), + } + .build(), + )); + } + } + + Ok(()) } pub fn validate_gcs(&self) -> Result<(), BoxedError> { let gcs = &self.gcs; - Self::check_config( - self.enable_gcs, - "GCS", - &[ - (gcs.gcs_bucket.as_str(), "bucket"), - (gcs.gcs_root.as_str(), "root"), - (gcs.gcs_scope.as_str(), "scope"), - ( - gcs.gcs_credential_path.expose_secret().as_str(), - "credential path", - ), - (gcs.gcs_credential.expose_secret().as_str(), "credential"), - (gcs.gcs_endpoint.as_str(), "endpoint"), - ], - false, - ) + + if self.enable_gcs { + // Check required fields (excluding credentials) + let mut missing = Vec::new(); + if gcs.gcs_bucket.is_empty() { + missing.push("bucket"); + } + if gcs.gcs_root.is_empty() { + missing.push("root"); + } + if gcs.gcs_scope.is_empty() { + missing.push("scope"); + } + if gcs.gcs_endpoint.is_empty() { + missing.push("endpoint"); + } + + // At least one of credential_path or credential must be provided (non-empty) + if is_secret_empty(&gcs.gcs_credential_path) && is_secret_empty(&gcs.gcs_credential) { + missing.push("credential path, credential"); + } + + if !missing.is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: format!( + "GCS {} must be set when --gcs is enabled.", + missing.join(", "), + ), + } + .build(), + )); + } + } else { + // When disabled, check that no meaningful configuration is provided + if !gcs.gcs_bucket.is_empty() + || !gcs.gcs_root.is_empty() + || !gcs.gcs_scope.is_empty() + || !gcs.gcs_endpoint.is_empty() + || is_secret_provided(&gcs.gcs_credential_path) + || is_secret_provided(&gcs.gcs_credential) + { + return Err(BoxedError::new( + error::InvalidArgumentsSnafu { + msg: "GCS configuration is set but --gcs is not enabled.".to_string(), + } + .build(), + )); + } + } + + Ok(()) } pub fn validate_azblob(&self) -> Result<(), BoxedError> { let azblob = &self.azblob; - Self::check_config( - self.enable_azblob, - "Azure Blob", - &[ - (azblob.azblob_container.as_str(), "container"), - (azblob.azblob_root.as_str(), "root"), - ( - azblob.azblob_account_name.expose_secret().as_str(), - "account name", - ), - ( - azblob.azblob_account_key.expose_secret().as_str(), - "account key", - ), - (azblob.azblob_endpoint.as_str(), "endpoint"), - ], - azblob.azblob_sas_token.is_some(), - ) + + if self.enable_azblob { + // Check required fields + let mut missing = Vec::new(); + if azblob.azblob_container.is_empty() { + missing.push("container"); + } + if azblob.azblob_root.is_empty() { + missing.push("root"); + } + if is_secret_empty(&azblob.azblob_account_name) { + missing.push("account name"); + } + if azblob.azblob_endpoint.is_empty() { + missing.push("endpoint"); + } + + // account_key is only required if sas_token is not provided + if azblob.azblob_sas_token.is_none() && is_secret_empty(&azblob.azblob_account_key) { + missing.push("account key"); + } + + if !missing.is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: format!( + "Azure Blob {} must be set when --azblob is enabled.", + missing.join(", "), + ), + } + .build(), + )); + } + } else { + // When disabled, check that no meaningful configuration is provided + if !azblob.azblob_container.is_empty() + || !azblob.azblob_root.is_empty() + || !azblob.azblob_endpoint.is_empty() + || azblob.azblob_sas_token.is_some() + || is_secret_provided(&azblob.azblob_account_name) + || is_secret_provided(&azblob.azblob_account_key) + { + return Err(BoxedError::new( + error::InvalidArgumentsSnafu { + msg: "Azure Blob configuration is set but --azblob is not enabled." + .to_string(), + } + .build(), + )); + } + } + + Ok(()) } pub fn validate(&self) -> Result<(), BoxedError> { diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index 11c4be766ebb..0288f6f2bd0f 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -26,7 +26,7 @@ use snafu::{OptionExt, ResultExt}; use tokio::sync::Semaphore; use tokio::time::Instant; -use crate::common::{ObjectStoreConfig, new_fs_object_store}; +use crate::common::{ObjectStoreConfig, new_fs_object_store, validate_fs}; use crate::data::storage_export::{ AzblobBackend, FsBackend, GcsBackend, OssBackend, S3Backend, StorageExport, StorageType, }; @@ -159,6 +159,7 @@ impl ExportCommand { self.storage.build_azblob()?, ) } else if let Some(output_dir) = &self.output_dir { + validate_fs(&self.storage)?; ( StorageType::Fs(FsBackend::new(output_dir.clone())), new_fs_object_store(output_dir)?, @@ -694,26 +695,118 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_s3_missing_config() { - // Missing bucket + async fn test_export_command_build_with_s3_empty_access_key() { + // Test S3 with empty access key ID (empty string, not missing) let cmd = ExportCommand::parse_from([ "export", "--addr", "127.0.0.1:4000", "--s3", + "--s3-bucket", + "test-bucket", + "--s3-root", + "test-root", + "--s3-access-key-id", + "", // Empty string + "--s3-secret-access-key", + "test-secret", + "--s3-region", + "us-west-2", + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string().contains("S3 access key ID must be set"), + "Actual error: {}", + err + ); + } + } + + #[tokio::test] + async fn test_export_command_build_with_s3_missing_secret_key() { + // Test S3 with empty secret access key + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--s3", + "--s3-bucket", + "test-bucket", "--s3-root", "test-root", "--s3-access-key-id", "test-key", + // Missing --s3-secret-access-key + "--s3-region", + "us-west-2", + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string().contains("S3 secret access key must be set"), + "Actual error: {}", + err + ); + } + } + + #[tokio::test] + async fn test_export_command_build_with_s3_empty_root() { + // Empty root should be allowed (it's optional path component) + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--s3", + "--s3-bucket", + "test-bucket", + "--s3-root", + "", // Empty root is OK + "--s3-access-key-id", + "test-key", "--s3-secret-access-key", "test-secret", + "--s3-region", + "us-west-2", + ]); + + let result = cmd.build().await; + // Should succeed because root is not a required field + assert!( + result.is_ok(), + "Expected success but got: {:?}", + result.err() + ); + } + + #[tokio::test] + async fn test_export_command_build_with_s3_no_enable_flag() { + // Test that providing S3 config without --s3 flag fails + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + // Note: no --s3 flag + "--s3-bucket", + "test-bucket", + "--s3-access-key-id", + "test-key", + "--output-dir", + "/tmp/test", ]); let result = cmd.build().await; + // Should fail because S3 config is provided but --s3 is not enabled assert!(result.is_err()); if let Err(err) = result { assert!( - err.to_string().contains("S3 bucket must be set"), + err.to_string() + .contains("S3 configuration is set but --s3 is not enabled"), "Actual error: {}", err ); @@ -744,7 +837,7 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_oss_missing_config() { + async fn test_export_command_build_with_oss_missing_endpoint() { // Missing endpoint let cmd = ExportCommand::parse_from([ "export", @@ -772,6 +865,37 @@ mod tests { } } + #[tokio::test] + async fn test_export_command_build_with_oss_multiple_missing_fields() { + // Test OSS with multiple missing required fields + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--oss", + "--oss-bucket", + "test-bucket", + // Missing: root, access_key_id, access_key_secret, endpoint + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + if let Err(err) = result { + let err_str = err.to_string(); + // Should mention multiple missing fields + assert!( + err_str.contains("OSS"), + "Error should mention OSS: {}", + err_str + ); + assert!( + err_str.contains("must be set"), + "Error should mention required fields: {}", + err_str + ); + } + } + #[tokio::test] async fn test_export_command_build_with_gcs_success() { let cmd = ExportCommand::parse_from([ @@ -798,7 +922,7 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_gcs_missing_config() { + async fn test_export_command_build_with_gcs_missing_credential() { // Missing credentials let cmd = ExportCommand::parse_from([ "export", @@ -827,6 +951,39 @@ mod tests { } } + #[tokio::test] + async fn test_export_command_build_with_gcs_empty_root() { + // Test GCS when root is missing (should fail as it's required) + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs", + "--gcs-bucket", + "test-bucket", + "--gcs-root", + "", // Empty root + "--gcs-scope", + "test-scope", + "--gcs-credential-path", + "/path/to/credential", + "--gcs-credential", + "test-credential", + "--gcs-endpoint", + "https://storage.googleapis.com", + ]); + + let result = cmd.build().await; + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string().contains("GCS root must be set"), + "Actual error: {}", + err + ); + } + } + #[tokio::test] async fn test_export_command_build_with_azblob_success() { let cmd = ExportCommand::parse_from([ @@ -851,7 +1008,7 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_azblob_missing_config() { + async fn test_export_command_build_with_azblob_missing_account_key() { // Missing account key let cmd = ExportCommand::parse_from([ "export", @@ -880,8 +1037,34 @@ mod tests { } } + #[tokio::test] + async fn test_export_command_build_with_azblob_with_sas_token() { + // Test Azure Blob with SAS token + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-root", + "test-root", + "--azblob-account-name", + "test-account", + "--azblob-account-key", + "test-key", + "--azblob-endpoint", + "https://account.blob.core.windows.net", + "--azblob-sas-token", + "test-sas-token", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + #[test] - fn test_export_command_conflict() { + fn test_export_command_build_with_conflict() { // Try to enable both S3 and OSS let result = ExportCommand::try_parse_from(["export", "--addr", "127.0.0.1:4000", "--s3", "--oss"]); diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index 41ef3aa64d60..78b7a8f78899 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -15,12 +15,21 @@ use std::path::PathBuf; use async_trait::async_trait; -use common_base::secrets::ExposeSecret; +use common_base::secrets::{ExposeSecret, SecretString}; use crate::common::{ PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection, }; +/// Helper function to extract secret string from Option. +/// Returns empty string if None. +fn expose_optional_secret(secret: &Option) -> &str { + secret + .as_ref() + .map(|s| s.expose_secret().as_str()) + .unwrap_or("") +} + /// Helper function to format root path with leading slash if non-empty. fn format_root_path(root: &str) -> String { if root.is_empty() { @@ -112,11 +121,11 @@ impl StorageExport for S3Backend { let mut connection_options = vec![ format!( "ACCESS_KEY_ID='{}'", - self.config.s3_access_key_id.expose_secret() + expose_optional_secret(&self.config.s3_access_key_id) ), format!( "SECRET_ACCESS_KEY='{}'", - self.config.s3_secret_access_key.expose_secret() + expose_optional_secret(&self.config.s3_secret_access_key) ), ]; @@ -142,8 +151,8 @@ impl StorageExport for S3Backend { mask_secrets( sql.to_string(), &[ - self.config.s3_access_key_id.expose_secret(), - self.config.s3_secret_access_key.expose_secret(), + expose_optional_secret(&self.config.s3_access_key_id), + expose_optional_secret(&self.config.s3_secret_access_key), ], ) } @@ -170,11 +179,11 @@ impl StorageExport for OssBackend { let mut connection_options = vec![ format!( "ACCESS_KEY_ID='{}'", - self.config.oss_access_key_id.expose_secret() + expose_optional_secret(&self.config.oss_access_key_id) ), format!( "ACCESS_KEY_SECRET='{}'", - self.config.oss_access_key_secret.expose_secret() + expose_optional_secret(&self.config.oss_access_key_secret) ), ]; @@ -195,8 +204,8 @@ impl StorageExport for OssBackend { mask_secrets( sql.to_string(), &[ - self.config.oss_access_key_id.expose_secret(), - self.config.oss_access_key_secret.expose_secret(), + expose_optional_secret(&self.config.oss_access_key_id), + expose_optional_secret(&self.config.oss_access_key_secret), ], ) } @@ -224,18 +233,14 @@ impl StorageExport for GcsBackend { let mut connection_options = Vec::new(); - if !self.config.gcs_credential_path.expose_secret().is_empty() { - connection_options.push(format!( - "CREDENTIAL_PATH='{}'", - self.config.gcs_credential_path.expose_secret() - )); + let credential_path = expose_optional_secret(&self.config.gcs_credential_path); + if !credential_path.is_empty() { + connection_options.push(format!("CREDENTIAL_PATH='{}'", credential_path)); } - if !self.config.gcs_credential.expose_secret().is_empty() { - connection_options.push(format!( - "CREDENTIAL='{}'", - self.config.gcs_credential.expose_secret() - )); + let credential = expose_optional_secret(&self.config.gcs_credential); + if !credential.is_empty() { + connection_options.push(format!("CREDENTIAL='{}'", credential)); } if !self.config.gcs_endpoint.is_empty() { @@ -261,8 +266,8 @@ impl StorageExport for GcsBackend { mask_secrets( sql.to_string(), &[ - self.config.gcs_credential_path.expose_secret(), - self.config.gcs_credential.expose_secret(), + expose_optional_secret(&self.config.gcs_credential_path), + expose_optional_secret(&self.config.gcs_credential), ], ) } @@ -291,11 +296,11 @@ impl StorageExport for AzblobBackend { let mut connection_options = vec![ format!( "ACCOUNT_NAME='{}'", - self.config.azblob_account_name.expose_secret() + expose_optional_secret(&self.config.azblob_account_name) ), format!( "ACCOUNT_KEY='{}'", - self.config.azblob_account_key.expose_secret() + expose_optional_secret(&self.config.azblob_account_key) ), ]; @@ -321,8 +326,8 @@ impl StorageExport for AzblobBackend { mask_secrets( sql.to_string(), &[ - self.config.azblob_account_name.expose_secret(), - self.config.azblob_account_key.expose_secret(), + expose_optional_secret(&self.config.azblob_account_name), + expose_optional_secret(&self.config.azblob_account_key), ], ) } diff --git a/src/common/base/src/secrets.rs b/src/common/base/src/secrets.rs index be1f39e05fa7..55f569fe23ca 100644 --- a/src/common/base/src/secrets.rs +++ b/src/common/base/src/secrets.rs @@ -48,11 +48,7 @@ impl From for SecretString { impl Display for SecretString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.expose_secret().is_empty() { - write!(f, "") - } else { - write!(f, "SecretString([REDACTED])") - } + write!(f, "SecretString([REDACTED])") } } From 309ddeb208ee3c5c4f399a86085259870eac7c17 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Fri, 28 Nov 2025 11:44:06 +0800 Subject: [PATCH 07/12] refactor(cli): unify storage configuration for export command - Refactor storage field validation with macro Signed-off-by: McKnight22 --- src/cli/src/common/object_store.rs | 449 +++++++++++++++-------------- src/cli/src/data/export.rs | 7 +- 2 files changed, 242 insertions(+), 214 deletions(-) diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index bc76aa214aeb..f5db5ed8cc4c 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -43,18 +43,74 @@ impl IntoField for Option { } } -/// Check if an `Option` is effectively empty. -/// Returns `true` if: -/// - `None` (user didn't provide the argument) -/// - `Some("")` (user provided an empty string) -fn is_secret_empty(secret: &Option) -> bool { - secret.as_ref().is_none_or(|s| s.expose_secret().is_empty()) +/// Trait for checking if a field is effectively empty or provided. +/// +/// This trait provides a unified interface for validation across different field types. +/// It defines two key semantic checks: +/// +/// 1. **`is_empty()`**: Checks if the field has no meaningful value +/// - Used when backend is enabled to validate required fields +/// - `None`, `Some("")`, `false`, or `""` are considered empty +/// +/// 2. **`is_provided()`**: Checks if user explicitly provided the field +/// - Used when backend is disabled to detect configuration errors +/// - Different semantics for different types (see implementations) +trait FieldValidator { + /// Check if the field is empty (has no meaningful value). + fn is_empty(&self) -> bool; + + /// Check if the field was explicitly provided by the user. + fn is_provided(&self) -> bool; } -/// Check if an `Option` is effectively non-empty. -/// Returns `true` only if user provided a non-empty secret value. -fn is_secret_provided(secret: &Option) -> bool { - !is_secret_empty(secret) +/// String fields: empty if the string is empty +impl FieldValidator for String { + fn is_empty(&self) -> bool { + self.is_empty() + } + + fn is_provided(&self) -> bool { + !self.is_empty() + } +} + +/// Bool fields: false is considered "empty", true is "provided" +impl FieldValidator for bool { + fn is_empty(&self) -> bool { + !self + } + + fn is_provided(&self) -> bool { + *self + } +} + +/// Option fields: None or empty content is empty +/// IMPORTANT: is_provided() returns true for Some(_) regardless of content, +/// to detect when user provided the argument (even if value is empty) +impl FieldValidator for Option { + fn is_empty(&self) -> bool { + self.as_ref().is_none_or(|s| s.is_empty()) + } + + fn is_provided(&self) -> bool { + // Key difference: Some("") is considered "provided" (user typed the flag) + // even though is_empty() would return true for it + self.is_some() + } +} + +/// Option fields: None or empty secret is empty +/// For secrets, Some("") is treated as "not provided" for both checks +impl FieldValidator for Option { + fn is_empty(&self) -> bool { + self.as_ref().is_none_or(|s| s.expose_secret().is_empty()) + } + + fn is_provided(&self) -> bool { + // For secrets, empty string is equivalent to None + !self.is_empty() + } } macro_rules! wrap_with_clap_prefix { @@ -204,16 +260,6 @@ pub struct ObjectStoreConfig { /// hasn't accidentally or incorrectly provided configuration for remote storage backends /// (S3, OSS, GCS, or Azure Blob) without enabling them. /// -/// # Validation Rules -/// -/// For each storage backend (S3, OSS, GCS, Azblob), this function validates: -/// 1. **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty -/// 2. **When backend is disabled**: No configuration fields should be provided -/// -/// The second rule is critical for filesystem usage: if a user provides something like -/// `--s3-bucket my-bucket` without `--s3`, it likely indicates a configuration error -/// that should be caught early. -/// /// # Examples /// /// Valid usage with local filesystem: @@ -233,10 +279,7 @@ pub struct ObjectStoreConfig { /// Returns an error if any remote storage configuration is detected without the /// corresponding enable flag (--s3, --oss, --gcs, or --azblob). pub fn validate_fs(config: &ObjectStoreConfig) -> std::result::Result<(), BoxedError> { - config.validate_s3()?; - config.validate_oss()?; - config.validate_gcs()?; - config.validate_azblob()?; + config.validate()?; Ok(()) } @@ -272,228 +315,212 @@ macro_rules! gen_object_store_builder { }; } -impl ObjectStoreConfig { - gen_object_store_builder!(build_s3, s3, S3Connection, S3); - - gen_object_store_builder!(build_oss, oss, OssConnection, Oss); - - gen_object_store_builder!(build_gcs, gcs, GcsConnection, Gcs); - - gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob); - - pub fn validate_s3(&self) -> Result<(), BoxedError> { - let s3 = &self.s3; - - if self.enable_s3 { - // Check required fields (root is optional for S3) +/// Macro for declarative backend validation. +/// +/// # Validation Rules +/// +/// For each storage backend (S3, OSS, GCS, Azblob), this function validates: +/// 1. **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty +/// 2. **When backend is disabled**: No configuration fields should be provided +/// +/// The second rule is critical for filesystem usage: if a user provides something like +/// `--s3-bucket my-bucket` without `--s3`, it likely indicates a configuration error +/// that should be caught early. +/// +/// This macro generates validation logic for storage backends with two main checks: +/// 1. When enabled: verify all required fields are non-empty +/// 2. When disabled: verify no configuration fields are provided +/// +/// # Syntax +/// +/// ```ignore +/// validate_backend!( +/// enable: self.enable_s3, +/// name: "S3", +/// required: [field1, field2, ...], +/// optional: [field3, field4, ...], +/// custom_validator: |missing| { ... } // optional +/// ) +/// ``` +/// +/// # Arguments +/// +/// - `enable`: Boolean expression indicating if backend is enabled +/// - `name`: Human-readable backend name for error messages +/// - `required`: Array of (field_ref, field_name) tuples for required fields +/// - `optional`: Array of field references that are optional (only checked when disabled) +/// - `custom_validator`: Optional closure for complex validation logic +/// +/// # Example +/// +/// ```ignore +/// validate_backend!( +/// enable: self.enable_s3, +/// name: "S3", +/// required: [ +/// (&self.s3.s3_bucket, "bucket"), +/// (&self.s3.s3_access_key_id, "access key ID"), +/// ], +/// optional: [ +/// &self.s3.s3_root, +/// &self.s3.s3_endpoint, +/// ] +/// ) +/// ``` +macro_rules! validate_backend { + ( + enable: $enable:expr, + name: $backend_name:expr, + required: [ $( ($field:expr, $field_name:expr) ),* $(,)? ], + optional: [ $( $opt_field:expr ),* $(,)? ] + $(, custom_validator: $custom_validator:expr)? + ) => {{ + if $enable { + // Check required fields when backend is enabled let mut missing = Vec::new(); - if s3.s3_bucket.is_empty() { - missing.push("bucket"); - } - if is_secret_empty(&s3.s3_access_key_id) { - missing.push("access key ID"); - } - if is_secret_empty(&s3.s3_secret_access_key) { - missing.push("secret access key"); - } + $( + if FieldValidator::is_empty($field) { + missing.push($field_name); + } + )* + + // Run custom validation if provided + $( + $custom_validator(&mut missing); + )? if !missing.is_empty() { return Err(BoxedError::new( error::MissingConfigSnafu { msg: format!( - "S3 {} must be set when --s3 is enabled.", + "{} {} must be set when --{} is enabled.", + $backend_name, missing.join(", "), + $backend_name.to_lowercase().replace(" ", "") ), } .build(), )); } } else { - // When disabled, check that no meaningful configuration is provided - if !s3.s3_bucket.is_empty() - || !s3.s3_root.is_empty() - || s3.s3_endpoint.is_some() - || s3.s3_region.is_some() - || s3.s3_enable_virtual_host_style - || is_secret_provided(&s3.s3_access_key_id) - || is_secret_provided(&s3.s3_secret_access_key) - { + // Check that no configuration is provided when backend is disabled + #[allow(unused_assignments)] + let mut has_config = false; + $( + has_config = has_config || FieldValidator::is_provided($field); + )* + $( + has_config = has_config || FieldValidator::is_provided($opt_field); + )* + + if has_config { return Err(BoxedError::new( error::InvalidArgumentsSnafu { - msg: "S3 configuration is set but --s3 is not enabled.".to_string(), - } - .build(), - )); - } - } - - Ok(()) - } - - pub fn validate_oss(&self) -> Result<(), BoxedError> { - let oss = &self.oss; - - if self.enable_oss { - // Check required fields - let mut missing = Vec::new(); - if oss.oss_bucket.is_empty() { - missing.push("bucket"); - } - if oss.oss_root.is_empty() { - missing.push("root"); - } - if is_secret_empty(&oss.oss_access_key_id) { - missing.push("access key ID"); - } - if is_secret_empty(&oss.oss_access_key_secret) { - missing.push("access key secret"); - } - if oss.oss_endpoint.is_empty() { - missing.push("endpoint"); - } - - if !missing.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { msg: format!( - "OSS {} must be set when --oss is enabled.", - missing.join(", "), + "{} configuration is set but --{} is not enabled.", + $backend_name, + $backend_name.to_lowercase().replace(" ", "") ), } .build(), )); } - } else { - // When disabled, check that no meaningful configuration is provided - if !oss.oss_bucket.is_empty() - || !oss.oss_root.is_empty() - || !oss.oss_endpoint.is_empty() - || is_secret_provided(&oss.oss_access_key_id) - || is_secret_provided(&oss.oss_access_key_secret) - { - return Err(BoxedError::new( - error::InvalidArgumentsSnafu { - msg: "OSS configuration is set but --oss is not enabled.".to_string(), - } - .build(), - )); - } } Ok(()) - } + }}; +} - pub fn validate_gcs(&self) -> Result<(), BoxedError> { - let gcs = &self.gcs; +impl ObjectStoreConfig { + gen_object_store_builder!(build_s3, s3, S3Connection, S3); - if self.enable_gcs { - // Check required fields (excluding credentials) - let mut missing = Vec::new(); - if gcs.gcs_bucket.is_empty() { - missing.push("bucket"); - } - if gcs.gcs_root.is_empty() { - missing.push("root"); - } - if gcs.gcs_scope.is_empty() { - missing.push("scope"); - } - if gcs.gcs_endpoint.is_empty() { - missing.push("endpoint"); - } + gen_object_store_builder!(build_oss, oss, OssConnection, Oss); - // At least one of credential_path or credential must be provided (non-empty) - if is_secret_empty(&gcs.gcs_credential_path) && is_secret_empty(&gcs.gcs_credential) { - missing.push("credential path, credential"); - } + gen_object_store_builder!(build_gcs, gcs, GcsConnection, Gcs); - if !missing.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: format!( - "GCS {} must be set when --gcs is enabled.", - missing.join(", "), - ), - } - .build(), - )); - } - } else { - // When disabled, check that no meaningful configuration is provided - if !gcs.gcs_bucket.is_empty() - || !gcs.gcs_root.is_empty() - || !gcs.gcs_scope.is_empty() - || !gcs.gcs_endpoint.is_empty() - || is_secret_provided(&gcs.gcs_credential_path) - || is_secret_provided(&gcs.gcs_credential) - { - return Err(BoxedError::new( - error::InvalidArgumentsSnafu { - msg: "GCS configuration is set but --gcs is not enabled.".to_string(), - } - .build(), - )); - } - } + gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob); - Ok(()) + pub fn validate_s3(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: self.enable_s3, + name: "S3", + required: [ + (&self.s3.s3_bucket, "bucket"), + (&self.s3.s3_access_key_id, "access key ID"), + (&self.s3.s3_secret_access_key, "secret access key"), + ], + optional: [ + &self.s3.s3_root, + &self.s3.s3_endpoint, + &self.s3.s3_region, + &self.s3.s3_enable_virtual_host_style + ] + ) } - pub fn validate_azblob(&self) -> Result<(), BoxedError> { - let azblob = &self.azblob; - - if self.enable_azblob { - // Check required fields - let mut missing = Vec::new(); - if azblob.azblob_container.is_empty() { - missing.push("container"); - } - if azblob.azblob_root.is_empty() { - missing.push("root"); - } - if is_secret_empty(&azblob.azblob_account_name) { - missing.push("account name"); - } - if azblob.azblob_endpoint.is_empty() { - missing.push("endpoint"); - } + pub fn validate_oss(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: self.enable_oss, + name: "OSS", + required: [ + (&self.oss.oss_bucket, "bucket"), + (&self.oss.oss_root, "root"), + (&self.oss.oss_access_key_id, "access key ID"), + (&self.oss.oss_access_key_secret, "access key secret"), + (&self.oss.oss_endpoint, "endpoint"), + ], + optional: [] + ) + } - // account_key is only required if sas_token is not provided - if azblob.azblob_sas_token.is_none() && is_secret_empty(&azblob.azblob_account_key) { - missing.push("account key"); + pub fn validate_gcs(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: self.enable_gcs, + name: "GCS", + required: [ + (&self.gcs.gcs_bucket, "bucket"), + (&self.gcs.gcs_root, "root"), + (&self.gcs.gcs_scope, "scope"), + (&self.gcs.gcs_endpoint, "endpoint"), + ], + optional: [ + &self.gcs.gcs_credential_path, + &self.gcs.gcs_credential + ], + custom_validator: |missing: &mut Vec<&str>| { + // At least one of credential_path or credential must be provided + if self.gcs.gcs_credential_path.is_empty() + && self.gcs.gcs_credential.is_empty() + { + missing.push("credential path or credential"); + } } + ) + } - if !missing.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: format!( - "Azure Blob {} must be set when --azblob is enabled.", - missing.join(", "), - ), - } - .build(), - )); - } - } else { - // When disabled, check that no meaningful configuration is provided - if !azblob.azblob_container.is_empty() - || !azblob.azblob_root.is_empty() - || !azblob.azblob_endpoint.is_empty() - || azblob.azblob_sas_token.is_some() - || is_secret_provided(&azblob.azblob_account_name) - || is_secret_provided(&azblob.azblob_account_key) - { - return Err(BoxedError::new( - error::InvalidArgumentsSnafu { - msg: "Azure Blob configuration is set but --azblob is not enabled." - .to_string(), - } - .build(), - )); + pub fn validate_azblob(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: self.enable_azblob, + name: "Azure Blob", + required: [ + (&self.azblob.azblob_container, "container"), + (&self.azblob.azblob_root, "root"), + (&self.azblob.azblob_account_name, "account name"), + (&self.azblob.azblob_endpoint, "endpoint"), + ], + optional: [ + &self.azblob.azblob_account_key, + &self.azblob.azblob_sas_token + ], + custom_validator: |missing: &mut Vec<&str>| { + // account_key is only required if sas_token is not provided + if self.azblob.azblob_sas_token.is_none() + && self.azblob.azblob_account_key.is_empty() + { + missing.push("account key (when sas_token is not provided)"); + } } - } - - Ok(()) + ) } pub fn validate(&self) -> Result<(), BoxedError> { diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index 0288f6f2bd0f..c2921635c12c 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -944,7 +944,7 @@ mod tests { if let Err(err) = result { assert!( err.to_string() - .contains("GCS credential path, credential must be set"), + .contains("GCS credential path or credential must be set"), "Actual error: {}", err ); @@ -1029,8 +1029,9 @@ mod tests { assert!(result.is_err()); if let Err(err) = result { assert!( - err.to_string() - .contains("Azure Blob account key must be set"), + err.to_string().contains( + "Azure Blob account key (when sas_token is not provided) must be set" + ), "Actual error: {}", err ); From 3f810bb92b65958fe8ad7cbc124ef26d5f7b8357 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Fri, 28 Nov 2025 13:28:23 +0800 Subject: [PATCH 08/12] refactor(cli): unify storage configuration for export command - support GCS Application Default Credentials (like GKE, Cloud Run, or local development with ) in export (Enabling ADC without validating or to be present) (Making optional in GCS validation (defaults to https://storage.googleapis.com)) Signed-off-by: McKnight22 --- src/cli/src/common/object_store.rs | 15 +++++---------- src/cli/src/data/export.rs | 18 +++++------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index f5db5ed8cc4c..500181ba2c8c 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -481,20 +481,15 @@ impl ObjectStoreConfig { (&self.gcs.gcs_bucket, "bucket"), (&self.gcs.gcs_root, "root"), (&self.gcs.gcs_scope, "scope"), - (&self.gcs.gcs_endpoint, "endpoint"), ], optional: [ + &self.gcs.gcs_endpoint, &self.gcs.gcs_credential_path, &self.gcs.gcs_credential - ], - custom_validator: |missing: &mut Vec<&str>| { - // At least one of credential_path or credential must be provided - if self.gcs.gcs_credential_path.is_empty() - && self.gcs.gcs_credential.is_empty() - { - missing.push("credential path or credential"); - } - } + ] + // No custom_validator needed: GCS supports Application Default Credentials (ADC) + // where neither credential_path nor credential is required. + // Endpoint is also optional (defaults to https://storage.googleapis.com). ) } diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index c2921635c12c..7c15cfbc1d21 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -922,8 +922,8 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_gcs_missing_credential() { - // Missing credentials + async fn test_export_command_build_with_gcs_adc_success() { + // Test GCS with Application Default Credentials (no explicit credentials provided) let cmd = ExportCommand::parse_from([ "export", "--addr", @@ -935,20 +935,12 @@ mod tests { "test-root", "--gcs-scope", "test-scope", - "--gcs-endpoint", - "https://storage.googleapis.com", + // No credential_path or credential + // No endpoint (optional) ]); let result = cmd.build().await; - assert!(result.is_err()); - if let Err(err) = result { - assert!( - err.to_string() - .contains("GCS credential path or credential must be set"), - "Actual error: {}", - err - ); - } + assert!(result.is_ok()); } #[tokio::test] From 7dbd4927680c421cbdc5eb511e19d634824b302d Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Sat, 29 Nov 2025 21:06:57 +0800 Subject: [PATCH 09/12] refactor(cli): unify storage configuration for export command This commit refactors the validation logic for object store configurations in the CLI to leverage clap features and reduce boilerplate. Key changes: - Update wrap_with_clap_prefix macro to use clap's requires attribute. This ensures that storage-specific options (e.g., --s3-bucket) are only accepted when the corresponding backend is enabled (e.g., --s3). - Simplify FieldValidator trait by removing the is_provided method, as dependency checks are now handled by clap. - Introduce validate_backend! macro to standardize the validation of required fields for enabled backends. - Refactor ExportCommand to remove explicit validation calls (validate_s3, etc.) and rely on the validation within backend constructors. - Add integration tests for ExportCommand to verify build success with S3, OSS, GCS, and Azblob configurations. Signed-off-by: McKnight22 --- src/cli/src/common.rs | 2 +- src/cli/src/common/object_store.rs | 459 +++++++++++-------------- src/cli/src/data/export.rs | 520 ++++++++++++++++++++++++----- src/cli/src/data/storage_export.rs | 31 +- 4 files changed, 636 insertions(+), 376 deletions(-) diff --git a/src/cli/src/common.rs b/src/cli/src/common.rs index b213c61e4c81..d6dc461b8986 100644 --- a/src/cli/src/common.rs +++ b/src/cli/src/common.rs @@ -17,6 +17,6 @@ mod store; pub use object_store::{ ObjectStoreConfig, PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, - PrefixedS3Connection, new_fs_object_store, validate_fs, + PrefixedS3Connection, new_fs_object_store, }; pub use store::StoreConfig; diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index 500181ba2c8c..4e746dbeb346 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -43,24 +43,14 @@ impl IntoField for Option { } } -/// Trait for checking if a field is effectively empty or provided. +/// Trait for checking if a field is effectively empty. /// -/// This trait provides a unified interface for validation across different field types. -/// It defines two key semantic checks: -/// -/// 1. **`is_empty()`**: Checks if the field has no meaningful value -/// - Used when backend is enabled to validate required fields -/// - `None`, `Some("")`, `false`, or `""` are considered empty -/// -/// 2. **`is_provided()`**: Checks if user explicitly provided the field -/// - Used when backend is disabled to detect configuration errors -/// - Different semantics for different types (see implementations) +/// **`is_empty()`**: Checks if the field has no meaningful value +/// - Used when backend is enabled to validate required fields +/// - `None`, `Some("")`, `false`, or `""` are considered empty trait FieldValidator { /// Check if the field is empty (has no meaningful value). fn is_empty(&self) -> bool; - - /// Check if the field was explicitly provided by the user. - fn is_provided(&self) -> bool; } /// String fields: empty if the string is empty @@ -68,10 +58,6 @@ impl FieldValidator for String { fn is_empty(&self) -> bool { self.is_empty() } - - fn is_provided(&self) -> bool { - !self.is_empty() - } } /// Bool fields: false is considered "empty", true is "provided" @@ -79,25 +65,13 @@ impl FieldValidator for bool { fn is_empty(&self) -> bool { !self } - - fn is_provided(&self) -> bool { - *self - } } /// Option fields: None or empty content is empty -/// IMPORTANT: is_provided() returns true for Some(_) regardless of content, -/// to detect when user provided the argument (even if value is empty) impl FieldValidator for Option { fn is_empty(&self) -> bool { self.as_ref().is_none_or(|s| s.is_empty()) } - - fn is_provided(&self) -> bool { - // Key difference: Some("") is considered "provided" (user typed the flag) - // even though is_empty() would return true for it - self.is_some() - } } /// Option fields: None or empty secret is empty @@ -106,16 +80,11 @@ impl FieldValidator for Option { fn is_empty(&self) -> bool { self.as_ref().is_none_or(|s| s.expose_secret().is_empty()) } - - fn is_provided(&self) -> bool { - // For secrets, empty string is equivalent to None - !self.is_empty() - } } macro_rules! wrap_with_clap_prefix { ( - $new_name:ident, $prefix:literal, $base:ty, { + $new_name:ident, $prefix:literal, $enable_flag:literal, $base:ty, { $( $( #[doc = $doc:expr] )? $( #[alias = $alias:literal] )? $field:ident : $type:ty $( = $default:expr )? ),* $(,)? } ) => { @@ -125,7 +94,7 @@ macro_rules! wrap_with_clap_prefix { $( $( #[doc = $doc] )? $( #[clap(alias = $alias)] )? - #[clap(long $(, default_value_t = $default )? )] + #[clap(long, requires = $enable_flag $(, default_value_t = $default )? )] pub [<$prefix $field>]: $type, )* } @@ -142,9 +111,90 @@ macro_rules! wrap_with_clap_prefix { }; } +/// Macro for declarative backend validation. +/// +/// # Validation Rules +/// +/// For each storage backend (S3, OSS, GCS, Azblob), this function validates: +/// **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty +/// +/// Note: When backend is disabled, clap's `requires` attribute ensures no configuration +/// fields can be provided at parse time. +/// +/// # Syntax +/// +/// ```ignore +/// validate_backend!( +/// enable: self.enable_s3, +/// name: "S3", +/// required: [(field1, "name1"), (field2, "name2"), ...], +/// custom_validator: |missing| { ... } // optional +/// ) +/// ``` +/// +/// # Arguments +/// +/// - `enable`: Boolean expression indicating if backend is enabled +/// - `name`: Human-readable backend name for error messages +/// - `required`: Array of (field_ref, field_name) tuples for required fields +/// - `custom_validator`: Optional closure for complex validation logic +/// +/// # Example +/// +/// ```ignore +/// validate_backend!( +/// enable: self.enable_s3, +/// name: "S3", +/// required: [ +/// (&self.s3.s3_bucket, "bucket"), +/// (&self.s3.s3_access_key_id, "access key ID"), +/// ] +/// ) +/// ``` +macro_rules! validate_backend { + ( + enable: $enable:expr, + name: $backend_name:expr, + required: [ $( ($field:expr, $field_name:expr) ),* $(,)? ] + $(, custom_validator: $custom_validator:expr)? + ) => {{ + if $enable { + // Check required fields when backend is enabled + let mut missing = Vec::new(); + $( + if FieldValidator::is_empty($field) { + missing.push($field_name); + } + )* + + // Run custom validation if provided + $( + $custom_validator(&mut missing); + )? + + if !missing.is_empty() { + return Err(BoxedError::new( + error::MissingConfigSnafu { + msg: format!( + "{} {} must be set when --{} is enabled.", + $backend_name, + missing.join(", "), + $backend_name.to_lowercase().replace(" ", "") + ), + } + .build(), + )); + } + } + + Ok(()) + }}; +} + wrap_with_clap_prefix! { PrefixedAzblobConnection, "azblob-", + "enable_azblob", AzblobConnection, { #[doc = "The container of the object store."] @@ -162,9 +212,33 @@ wrap_with_clap_prefix! { } } +impl PrefixedAzblobConnection { + pub fn validate(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: true, + name: "Azure Blob", + required: [ + (&self.azblob_container, "container"), + (&self.azblob_root, "root"), + (&self.azblob_account_name, "account name"), + (&self.azblob_endpoint, "endpoint"), + ], + custom_validator: |missing: &mut Vec<&str>| { + // account_key is only required if sas_token is not provided + if self.azblob_sas_token.is_none() + && self.azblob_account_key.is_empty() + { + missing.push("account key (when sas_token is not provided)"); + } + } + ) + } +} + wrap_with_clap_prefix! { PrefixedS3Connection, "s3-", + "enable_s3", S3Connection, { #[doc = "The bucket of the object store."] @@ -184,9 +258,25 @@ wrap_with_clap_prefix! { } } +impl PrefixedS3Connection { + pub fn validate(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: true, + name: "S3", + required: [ + (&self.s3_bucket, "bucket"), + (&self.s3_access_key_id, "access key ID"), + (&self.s3_secret_access_key, "secret access key"), + (&self.s3_region, "region"), + ] + ) + } +} + wrap_with_clap_prefix! { PrefixedOssConnection, "oss-", + "enable_oss", OssConnection, { #[doc = "The bucket of the object store."] @@ -202,9 +292,25 @@ wrap_with_clap_prefix! { } } +impl PrefixedOssConnection { + pub fn validate(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: true, + name: "OSS", + required: [ + (&self.oss_bucket, "bucket"), + (&self.oss_access_key_id, "access key ID"), + (&self.oss_access_key_secret, "access key secret"), + (&self.oss_endpoint, "endpoint"), + ] + ) + } +} + wrap_with_clap_prefix! { PrefixedGcsConnection, "gcs-", + "enable_gcs", GcsConnection, { #[doc = "The root of the object store."] @@ -222,67 +328,70 @@ wrap_with_clap_prefix! { } } -/// common config for object store. +impl PrefixedGcsConnection { + pub fn validate(&self) -> Result<(), BoxedError> { + validate_backend!( + enable: true, + name: "GCS", + required: [ + (&self.gcs_bucket, "bucket"), + (&self.gcs_root, "root"), + (&self.gcs_scope, "scope"), + ] + // No custom_validator needed: GCS supports Application Default Credentials (ADC) + // where neither credential_path nor credential is required. + // Endpoint is also optional (defaults to https://storage.googleapis.com). + ) + } +} + +/// Common config for object store. +/// +/// # Dependency Enforcement +/// +/// Each backend's configuration fields (e.g., `--s3-bucket`) requires its corresponding +/// enable flag (e.g., `--s3`) to be present. This is enforced by `clap` at parse time +/// using the `requires` attribute. +/// +/// For example, attempting to use `--s3-bucket my-bucket` without `--s3` will result in: +/// ```text +/// error: The argument '--s3-bucket ' requires '--s3' +/// ``` +/// +/// This ensures that users cannot accidentally provide backend-specific configuration +/// without explicitly enabling that backend. #[derive(clap::Parser, Debug, Clone, PartialEq, Default)] #[clap(group(clap::ArgGroup::new("storage_backend").required(false).multiple(false)))] pub struct ObjectStoreConfig { /// Whether to use S3 object store. - #[clap(long, alias = "s3", group = "storage_backend")] + #[clap(long = "s3", group = "storage_backend")] pub enable_s3: bool, #[clap(flatten)] pub s3: PrefixedS3Connection, /// Whether to use OSS. - #[clap(long, alias = "oss", group = "storage_backend")] + #[clap(long = "oss", group = "storage_backend")] pub enable_oss: bool, #[clap(flatten)] pub oss: PrefixedOssConnection, /// Whether to use GCS. - #[clap(long, alias = "gcs", group = "storage_backend")] + #[clap(long = "gcs", group = "storage_backend")] pub enable_gcs: bool, #[clap(flatten)] pub gcs: PrefixedGcsConnection, /// Whether to use Azure Blob. - #[clap(long, alias = "azblob", group = "storage_backend")] + #[clap(long = "azblob", group = "storage_backend")] pub enable_azblob: bool, #[clap(flatten)] pub azblob: PrefixedAzblobConnection, } -/// This function is called when the user chooses to use local filesystem storage -/// (via `--output-dir`) instead of a remote storage backend. It ensures that the user -/// hasn't accidentally or incorrectly provided configuration for remote storage backends -/// (S3, OSS, GCS, or Azure Blob) without enabling them. -/// -/// # Examples -/// -/// Valid usage with local filesystem: -/// ```bash -/// # No remote storage config provided - OK -/// export --output-dir /tmp/data --addr localhost:4000 -/// ``` -/// -/// Invalid usage (caught by this function): -/// ```bash -/// # ERROR: S3 config provided but --s3 not enabled -/// export --output-dir /tmp/data --s3-bucket my-bucket --addr localhost:4000 -/// ``` -/// -/// # Errors -/// -/// Returns an error if any remote storage configuration is detected without the -/// corresponding enable flag (--s3, --oss, --gcs, or --azblob). -pub fn validate_fs(config: &ObjectStoreConfig) -> std::result::Result<(), BoxedError> { - config.validate()?; - Ok(()) -} - /// Creates a new file system object store. pub fn new_fs_object_store(root: &str) -> std::result::Result { let builder = Fs::default().root(root); @@ -315,122 +424,6 @@ macro_rules! gen_object_store_builder { }; } -/// Macro for declarative backend validation. -/// -/// # Validation Rules -/// -/// For each storage backend (S3, OSS, GCS, Azblob), this function validates: -/// 1. **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty -/// 2. **When backend is disabled**: No configuration fields should be provided -/// -/// The second rule is critical for filesystem usage: if a user provides something like -/// `--s3-bucket my-bucket` without `--s3`, it likely indicates a configuration error -/// that should be caught early. -/// -/// This macro generates validation logic for storage backends with two main checks: -/// 1. When enabled: verify all required fields are non-empty -/// 2. When disabled: verify no configuration fields are provided -/// -/// # Syntax -/// -/// ```ignore -/// validate_backend!( -/// enable: self.enable_s3, -/// name: "S3", -/// required: [field1, field2, ...], -/// optional: [field3, field4, ...], -/// custom_validator: |missing| { ... } // optional -/// ) -/// ``` -/// -/// # Arguments -/// -/// - `enable`: Boolean expression indicating if backend is enabled -/// - `name`: Human-readable backend name for error messages -/// - `required`: Array of (field_ref, field_name) tuples for required fields -/// - `optional`: Array of field references that are optional (only checked when disabled) -/// - `custom_validator`: Optional closure for complex validation logic -/// -/// # Example -/// -/// ```ignore -/// validate_backend!( -/// enable: self.enable_s3, -/// name: "S3", -/// required: [ -/// (&self.s3.s3_bucket, "bucket"), -/// (&self.s3.s3_access_key_id, "access key ID"), -/// ], -/// optional: [ -/// &self.s3.s3_root, -/// &self.s3.s3_endpoint, -/// ] -/// ) -/// ``` -macro_rules! validate_backend { - ( - enable: $enable:expr, - name: $backend_name:expr, - required: [ $( ($field:expr, $field_name:expr) ),* $(,)? ], - optional: [ $( $opt_field:expr ),* $(,)? ] - $(, custom_validator: $custom_validator:expr)? - ) => {{ - if $enable { - // Check required fields when backend is enabled - let mut missing = Vec::new(); - $( - if FieldValidator::is_empty($field) { - missing.push($field_name); - } - )* - - // Run custom validation if provided - $( - $custom_validator(&mut missing); - )? - - if !missing.is_empty() { - return Err(BoxedError::new( - error::MissingConfigSnafu { - msg: format!( - "{} {} must be set when --{} is enabled.", - $backend_name, - missing.join(", "), - $backend_name.to_lowercase().replace(" ", "") - ), - } - .build(), - )); - } - } else { - // Check that no configuration is provided when backend is disabled - #[allow(unused_assignments)] - let mut has_config = false; - $( - has_config = has_config || FieldValidator::is_provided($field); - )* - $( - has_config = has_config || FieldValidator::is_provided($opt_field); - )* - - if has_config { - return Err(BoxedError::new( - error::InvalidArgumentsSnafu { - msg: format!( - "{} configuration is set but --{} is not enabled.", - $backend_name, - $backend_name.to_lowercase().replace(" ", "") - ), - } - .build(), - )); - } - } - - Ok(()) - }}; -} - impl ObjectStoreConfig { gen_object_store_builder!(build_s3, s3, S3Connection, S3); @@ -440,89 +433,19 @@ impl ObjectStoreConfig { gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob); - pub fn validate_s3(&self) -> Result<(), BoxedError> { - validate_backend!( - enable: self.enable_s3, - name: "S3", - required: [ - (&self.s3.s3_bucket, "bucket"), - (&self.s3.s3_access_key_id, "access key ID"), - (&self.s3.s3_secret_access_key, "secret access key"), - ], - optional: [ - &self.s3.s3_root, - &self.s3.s3_endpoint, - &self.s3.s3_region, - &self.s3.s3_enable_virtual_host_style - ] - ) - } - - pub fn validate_oss(&self) -> Result<(), BoxedError> { - validate_backend!( - enable: self.enable_oss, - name: "OSS", - required: [ - (&self.oss.oss_bucket, "bucket"), - (&self.oss.oss_root, "root"), - (&self.oss.oss_access_key_id, "access key ID"), - (&self.oss.oss_access_key_secret, "access key secret"), - (&self.oss.oss_endpoint, "endpoint"), - ], - optional: [] - ) - } - - pub fn validate_gcs(&self) -> Result<(), BoxedError> { - validate_backend!( - enable: self.enable_gcs, - name: "GCS", - required: [ - (&self.gcs.gcs_bucket, "bucket"), - (&self.gcs.gcs_root, "root"), - (&self.gcs.gcs_scope, "scope"), - ], - optional: [ - &self.gcs.gcs_endpoint, - &self.gcs.gcs_credential_path, - &self.gcs.gcs_credential - ] - // No custom_validator needed: GCS supports Application Default Credentials (ADC) - // where neither credential_path nor credential is required. - // Endpoint is also optional (defaults to https://storage.googleapis.com). - ) - } - - pub fn validate_azblob(&self) -> Result<(), BoxedError> { - validate_backend!( - enable: self.enable_azblob, - name: "Azure Blob", - required: [ - (&self.azblob.azblob_container, "container"), - (&self.azblob.azblob_root, "root"), - (&self.azblob.azblob_account_name, "account name"), - (&self.azblob.azblob_endpoint, "endpoint"), - ], - optional: [ - &self.azblob.azblob_account_key, - &self.azblob.azblob_sas_token - ], - custom_validator: |missing: &mut Vec<&str>| { - // account_key is only required if sas_token is not provided - if self.azblob.azblob_sas_token.is_none() - && self.azblob.azblob_account_key.is_empty() - { - missing.push("account key (when sas_token is not provided)"); - } - } - ) - } - pub fn validate(&self) -> Result<(), BoxedError> { - self.validate_s3()?; - self.validate_oss()?; - self.validate_gcs()?; - self.validate_azblob()?; + if self.enable_s3 { + self.s3.validate()?; + } + if self.enable_oss { + self.oss.validate()?; + } + if self.enable_gcs { + self.gcs.validate()?; + } + if self.enable_azblob { + self.azblob.validate()?; + } Ok(()) } diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index 7c15cfbc1d21..f5c030aa1121 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -26,7 +26,7 @@ use snafu::{OptionExt, ResultExt}; use tokio::sync::Semaphore; use tokio::time::Instant; -use crate::common::{ObjectStoreConfig, new_fs_object_store, validate_fs}; +use crate::common::{ObjectStoreConfig, new_fs_object_store}; use crate::data::storage_export::{ AzblobBackend, FsBackend, GcsBackend, OssBackend, S3Backend, StorageExport, StorageType, }; @@ -135,31 +135,26 @@ impl ExportCommand { pub async fn build(&self) -> std::result::Result, BoxedError> { // Determine storage type let (storage_type, operator) = if self.storage.enable_s3 { - self.storage.validate_s3()?; ( - StorageType::S3(S3Backend::new(self.storage.s3.clone())), + StorageType::S3(S3Backend::new(self.storage.s3.clone())?), self.storage.build_s3()?, ) } else if self.storage.enable_oss { - self.storage.validate_oss()?; ( - StorageType::Oss(OssBackend::new(self.storage.oss.clone())), + StorageType::Oss(OssBackend::new(self.storage.oss.clone())?), self.storage.build_oss()?, ) } else if self.storage.enable_gcs { - self.storage.validate_gcs()?; ( - StorageType::Gcs(GcsBackend::new(self.storage.gcs.clone())), + StorageType::Gcs(GcsBackend::new(self.storage.gcs.clone())?), self.storage.build_gcs()?, ) } else if self.storage.enable_azblob { - self.storage.validate_azblob()?; ( - StorageType::Azblob(AzblobBackend::new(self.storage.azblob.clone())), + StorageType::Azblob(AzblobBackend::new(self.storage.azblob.clone())?), self.storage.build_azblob()?, ) } else if let Some(output_dir) = &self.output_dir { - validate_fs(&self.storage)?; ( StorageType::Fs(FsBackend::new(output_dir.clone())), new_fs_object_store(output_dir)?, @@ -651,6 +646,8 @@ mod tests { use super::*; + // ==================== Basic Success Cases ==================== + #[tokio::test] async fn test_export_command_build_with_local_fs() { let temp_dir = create_temp_dir("test_export_local_fs"); @@ -694,6 +691,220 @@ mod tests { assert!(result.is_ok()); } + #[tokio::test] + async fn test_export_command_build_with_oss_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--oss", + "--oss-bucket", + "test-bucket", + "--oss-root", + "test-root", + "--oss-access-key-id", + "test-key-id", + "--oss-access-key-secret", + "test-secret", + "--oss-endpoint", + "https://oss.example.com", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_gcs_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs", + "--gcs-bucket", + "test-bucket", + "--gcs-root", + "test-root", + "--gcs-scope", + "test-scope", + "--gcs-credential-path", + "/path/to/credential", + "--gcs-credential", + "test-credential-content", + "--gcs-endpoint", + "https://storage.googleapis.com", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_gcs_adc_success() { + // Test GCS with Application Default Credentials (no explicit credentials provided) + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs", + "--gcs-bucket", + "test-bucket", + "--gcs-root", + "test-root", + "--gcs-scope", + "test-scope", + // No credential_path or credential + // No endpoint (optional) + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_azblob_success() { + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-root", + "test-root", + "--azblob-account-name", + "test-account", + "--azblob-account-key", + "test-key", + "--azblob-endpoint", + "https://account.blob.core.windows.net", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_export_command_build_with_azblob_with_sas_token() { + // Test Azure Blob with SAS token + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-root", + "test-root", + "--azblob-account-name", + "test-account", + "--azblob-account-key", + "test-key", + "--azblob-endpoint", + "https://account.blob.core.windows.net", + "--azblob-sas-token", + "test-sas-token", + ]); + + let result = cmd.build().await; + assert!(result.is_ok()); + } + + // ==================== Gap 1: Parse-time dependency checks ==================== + + #[test] + fn test_export_command_build_with_conflict() { + // Try to enable both S3 and OSS + let result = + ExportCommand::try_parse_from(["export", "--addr", "127.0.0.1:4000", "--s3", "--oss"]); + + assert!(result.is_err()); + let err = result.unwrap_err(); + // clap error for conflicting arguments + assert!(err.kind() == clap::error::ErrorKind::ArgumentConflict); + } + + #[tokio::test] + async fn test_export_command_build_with_s3_no_enable_flag() { + // Test that providing S3 config without --s3 flag fails + let result = ExportCommand::try_parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + // Note: no --s3 flag + "--s3-bucket", + "test-bucket", + "--s3-access-key-id", + "test-key", + "--output-dir", + "/tmp/test", + ]); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument); + assert!(err.to_string().contains("--s3")); + } + + #[tokio::test] + async fn test_export_command_build_with_oss_no_enable_flag() { + // Test that providing OSS config without --oss flag fails at parse time + let result = ExportCommand::try_parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--oss-bucket", + "test-bucket", + "--output-dir", + "/tmp/test", + ]); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument); + assert!(err.to_string().contains("--oss")); + } + + #[tokio::test] + async fn test_export_command_build_with_gcs_no_enable_flag() { + // Test that providing GCS config without --gcs flag fails at parse time + let result = ExportCommand::try_parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs-bucket", + "test-bucket", + "--output-dir", + "/tmp/test", + ]); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument); + assert!(err.to_string().contains("--gcs")); + } + + #[tokio::test] + async fn test_export_command_build_with_azblob_no_enable_flag() { + // Test that providing Azure Blob config without --azblob flag fails at parse time + let result = ExportCommand::try_parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob-container", + "test-container", + "--output-dir", + "/tmp/test", + ]); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument); + assert!(err.to_string().contains("--azblob")); + } + + // ==================== Gap 2: Empty string vs missing tests ==================== + #[tokio::test] async fn test_export_command_build_with_s3_empty_access_key() { // Test S3 with empty access key ID (empty string, not missing) @@ -785,36 +996,8 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_s3_no_enable_flag() { - // Test that providing S3 config without --s3 flag fails - let cmd = ExportCommand::parse_from([ - "export", - "--addr", - "127.0.0.1:4000", - // Note: no --s3 flag - "--s3-bucket", - "test-bucket", - "--s3-access-key-id", - "test-key", - "--output-dir", - "/tmp/test", - ]); - - let result = cmd.build().await; - // Should fail because S3 config is provided but --s3 is not enabled - assert!(result.is_err()); - if let Err(err) = result { - assert!( - err.to_string() - .contains("S3 configuration is set but --s3 is not enabled"), - "Actual error: {}", - err - ); - } - } - - #[tokio::test] - async fn test_export_command_build_with_oss_success() { + async fn test_export_command_build_with_oss_empty_access_key_id() { + // Test OSS with empty access_key_id (empty string, not missing) let cmd = ExportCommand::parse_from([ "export", "--addr", @@ -822,10 +1005,8 @@ mod tests { "--oss", "--oss-bucket", "test-bucket", - "--oss-root", - "test-root", "--oss-access-key-id", - "test-key-id", + "", // Empty string "--oss-access-key-secret", "test-secret", "--oss-endpoint", @@ -833,7 +1014,14 @@ mod tests { ]); let result = cmd.build().await; - assert!(result.is_ok()); + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string().contains("OSS access key ID must be set"), + "Actual error: {}", + err + ); + } } #[tokio::test] @@ -897,50 +1085,30 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_gcs_success() { + async fn test_export_command_build_with_gcs_empty_bucket() { + // Test GCS with empty bucket let cmd = ExportCommand::parse_from([ "export", "--addr", "127.0.0.1:4000", "--gcs", "--gcs-bucket", - "test-bucket", + "", // Empty bucket "--gcs-root", "test-root", "--gcs-scope", "test-scope", - "--gcs-credential-path", - "/path/to/credential", - "--gcs-credential", - "test-credential-content", - "--gcs-endpoint", - "https://storage.googleapis.com", ]); let result = cmd.build().await; - assert!(result.is_ok()); - } - - #[tokio::test] - async fn test_export_command_build_with_gcs_adc_success() { - // Test GCS with Application Default Credentials (no explicit credentials provided) - let cmd = ExportCommand::parse_from([ - "export", - "--addr", - "127.0.0.1:4000", - "--gcs", - "--gcs-bucket", - "test-bucket", - "--gcs-root", - "test-root", - "--gcs-scope", - "test-scope", - // No credential_path or credential - // No endpoint (optional) - ]); - - let result = cmd.build().await; - assert!(result.is_ok()); + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string().contains("GCS bucket must be set"), + "Actual error: {}", + err + ); + } } #[tokio::test] @@ -977,7 +1145,8 @@ mod tests { } #[tokio::test] - async fn test_export_command_build_with_azblob_success() { + async fn test_export_command_build_with_azblob_empty_account_name() { + // Test Azure Blob with empty account_name let cmd = ExportCommand::parse_from([ "export", "--addr", @@ -988,7 +1157,7 @@ mod tests { "--azblob-root", "test-root", "--azblob-account-name", - "test-account", + "", // Empty account name "--azblob-account-key", "test-key", "--azblob-endpoint", @@ -996,7 +1165,15 @@ mod tests { ]); let result = cmd.build().await; - assert!(result.is_ok()); + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string() + .contains("Azure Blob account name must be set"), + "Actual error: {}", + err + ); + } } #[tokio::test] @@ -1030,9 +1207,94 @@ mod tests { } } + // ==================== Gap 3: Boundary cases ==================== + #[tokio::test] - async fn test_export_command_build_with_azblob_with_sas_token() { - // Test Azure Blob with SAS token + async fn test_export_command_build_with_no_storage() { + // No output-dir and no backend - should fail + let cmd = ExportCommand::parse_from(["export", "--addr", "127.0.0.1:4000"]); + + let result = cmd.build().await; + assert!(result.is_err()); + if let Err(err) = result { + assert!( + err.to_string().contains("Output directory not set"), + "Actual error: {}", + err + ); + } + } + + #[tokio::test] + async fn test_export_command_build_with_s3_minimal_config() { + // S3 with only required fields (no optional fields) + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--s3", + "--s3-bucket", + "test-bucket", + "--s3-access-key-id", + "test-key", + "--s3-secret-access-key", + "test-secret", + "--s3-region", + "us-west-2", + // No root, endpoint, or enable_virtual_host_style + ]); + + let result = cmd.build().await; + assert!(result.is_ok(), "Minimal S3 config should succeed"); + } + + #[tokio::test] + async fn test_export_command_build_with_oss_minimal_config() { + // OSS with only required fields + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--oss", + "--oss-bucket", + "test-bucket", + "--oss-access-key-id", + "test-key-id", + "--oss-access-key-secret", + "test-secret", + "--oss-endpoint", + "https://oss.example.com", + // No root + ]); + + let result = cmd.build().await; + assert!(result.is_ok(), "Minimal OSS config should succeed"); + } + + #[tokio::test] + async fn test_export_command_build_with_gcs_minimal_config() { + // GCS with only required fields (using ADC) + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--gcs", + "--gcs-bucket", + "test-bucket", + "--gcs-root", + "test-root", + "--gcs-scope", + "test-scope", + // No credential_path, credential, or endpoint + ]); + + let result = cmd.build().await; + assert!(result.is_ok(), "Minimal GCS config should succeed"); + } + + #[tokio::test] + async fn test_export_command_build_with_azblob_minimal_config() { + // Azure Blob with only required fields let cmd = ExportCommand::parse_from([ "export", "--addr", @@ -1048,23 +1310,101 @@ mod tests { "test-key", "--azblob-endpoint", "https://account.blob.core.windows.net", + // No sas_token + ]); + + let result = cmd.build().await; + assert!(result.is_ok(), "Minimal Azure Blob config should succeed"); + } + + #[tokio::test] + async fn test_export_command_build_with_local_and_s3() { + // Both output-dir and S3 - S3 should take precedence + let temp_dir = create_temp_dir("test_export_local_and_s3"); + let output_dir = temp_dir.path().to_str().unwrap(); + + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--output-dir", + output_dir, + "--s3", + "--s3-bucket", + "test-bucket", + "--s3-access-key-id", + "test-key", + "--s3-secret-access-key", + "test-secret", + "--s3-region", + "us-west-2", + ]); + + let result = cmd.build().await; + assert!( + result.is_ok(), + "S3 should be selected when both are provided" + ); + } + + // ==================== Gap 4: Custom validation (Azure Blob) ==================== + + #[tokio::test] + async fn test_export_command_build_with_azblob_only_sas_token() { + // Azure Blob with sas_token but no account_key - should succeed + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-root", + "test-root", + "--azblob-account-name", + "test-account", + "--azblob-endpoint", + "https://account.blob.core.windows.net", "--azblob-sas-token", "test-sas-token", + // No account_key ]); let result = cmd.build().await; - assert!(result.is_ok()); + assert!( + result.is_ok(), + "Azure Blob with only sas_token should succeed: {:?}", + result.err() + ); } - #[test] - fn test_export_command_build_with_conflict() { - // Try to enable both S3 and OSS - let result = - ExportCommand::try_parse_from(["export", "--addr", "127.0.0.1:4000", "--s3", "--oss"]); + #[tokio::test] + async fn test_export_command_build_with_azblob_empty_account_key_with_sas() { + // Azure Blob with empty account_key but valid sas_token - should succeed + let cmd = ExportCommand::parse_from([ + "export", + "--addr", + "127.0.0.1:4000", + "--azblob", + "--azblob-container", + "test-container", + "--azblob-root", + "test-root", + "--azblob-account-name", + "test-account", + "--azblob-account-key", + "", // Empty account_key is OK if sas_token is provided + "--azblob-endpoint", + "https://account.blob.core.windows.net", + "--azblob-sas-token", + "test-sas-token", + ]); - assert!(result.is_err()); - let err = result.unwrap_err(); - // clap error for conflicting arguments - assert!(err.kind() == clap::error::ErrorKind::ArgumentConflict); + let result = cmd.build().await; + assert!( + result.is_ok(), + "Azure Blob with empty account_key but sas_token should succeed: {:?}", + result.err() + ); } } diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index 78b7a8f78899..c7456bc8d17e 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -16,6 +16,7 @@ use std::path::PathBuf; use async_trait::async_trait; use common_base::secrets::{ExposeSecret, SecretString}; +use common_error::ext::BoxedError; use crate::common::{ PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection, @@ -105,8 +106,9 @@ pub struct S3Backend { } impl S3Backend { - pub fn new(config: PrefixedS3Connection) -> Self { - Self { config } + pub fn new(config: PrefixedS3Connection) -> Result { + config.validate()?; + Ok(Self { config }) } } @@ -165,8 +167,9 @@ pub struct OssBackend { } impl OssBackend { - pub fn new(config: PrefixedOssConnection) -> Self { - Self { config } + pub fn new(config: PrefixedOssConnection) -> Result { + config.validate()?; + Ok(Self { config }) } } @@ -176,7 +179,7 @@ impl StorageExport for OssBackend { let bucket = &self.config.oss_bucket; let oss_path = format!("oss://{}/{}/{}/", bucket, catalog, schema); - let mut connection_options = vec![ + let connection_options = [ format!( "ACCESS_KEY_ID='{}'", expose_optional_secret(&self.config.oss_access_key_id) @@ -187,10 +190,6 @@ impl StorageExport for OssBackend { ), ]; - if !self.config.oss_endpoint.is_empty() { - connection_options.push(format!("ENDPOINT='{}'", self.config.oss_endpoint)); - } - let connection_str = format!(" CONNECTION ({})", connection_options.join(", ")); (oss_path, connection_str) } @@ -218,8 +217,9 @@ pub struct GcsBackend { } impl GcsBackend { - pub fn new(config: PrefixedGcsConnection) -> Self { - Self { config } + pub fn new(config: PrefixedGcsConnection) -> Result { + config.validate()?; + Ok(Self { config }) } } @@ -280,8 +280,9 @@ pub struct AzblobBackend { } impl AzblobBackend { - pub fn new(config: PrefixedAzblobConnection) -> Self { - Self { config } + pub fn new(config: PrefixedAzblobConnection) -> Result { + config.validate()?; + Ok(Self { config }) } } @@ -304,10 +305,6 @@ impl StorageExport for AzblobBackend { ), ]; - if !self.config.azblob_endpoint.is_empty() { - connection_options.push(format!("ENDPOINT='{}'", self.config.azblob_endpoint)); - } - if let Some(sas_token) = &self.config.azblob_sas_token { connection_options.push(format!("SAS_TOKEN='{}'", sas_token)); } From 2c012c951dd0466de6b19c20a887731cff76896d Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Sun, 30 Nov 2025 13:04:47 +0800 Subject: [PATCH 10/12] refactor(cli): unify storage configuration for export command - Use macros to simplify storage export implementation Signed-off-by: McKnight22 Co-authored-by: WenyXu --- src/cli/src/data/storage_export.rs | 166 ++++++++++++----------------- 1 file changed, 69 insertions(+), 97 deletions(-) diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index c7456bc8d17e..03d4ba5093a0 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -64,6 +64,63 @@ pub trait StorageExport: Send + Sync { fn mask_sensitive_info(&self, sql: &str) -> String; } +macro_rules! define_backend { + ($name:ident, $config:ty) => { + #[derive(Clone)] + pub struct $name { + config: $config, + } + + impl $name { + pub fn new(config: $config) -> Result { + config.validate()?; + Ok(Self { config }) + } + } + }; +} + +macro_rules! define_storage_type { + ( + pub enum $enum_name:ident { + $($variant:ident($backend:ty)),* $(,)? + } + ) => { + #[derive(Clone)] + pub enum $enum_name { + $($variant($backend)),* + } + + #[async_trait] + impl StorageExport for $enum_name { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + match self { + $(Self::$variant(backend) => backend.get_storage_path(catalog, schema)),* + } + } + + fn format_output_path(&self, catalog: &str, file_path: &str) -> String { + match self { + $(Self::$variant(backend) => backend.format_output_path(catalog, file_path)),* + } + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + match self { + $(Self::$variant(backend) => backend.mask_sensitive_info(sql)),* + } + } + } + + impl $enum_name { + /// Returns true if the storage backend is remote (not local filesystem). + pub fn is_remote_storage(&self) -> bool { + !matches!(self, Self::Fs(_)) + } + } + }; +} + /// Local file system storage backend. #[derive(Clone)] pub struct FsBackend { @@ -99,18 +156,7 @@ impl StorageExport for FsBackend { } } -/// S3 storage backend. -#[derive(Clone)] -pub struct S3Backend { - config: PrefixedS3Connection, -} - -impl S3Backend { - pub fn new(config: PrefixedS3Connection) -> Result { - config.validate()?; - Ok(Self { config }) - } -} +define_backend!(S3Backend, PrefixedS3Connection); #[async_trait] impl StorageExport for S3Backend { @@ -159,19 +205,7 @@ impl StorageExport for S3Backend { ) } } - -/// OSS storage backend. -#[derive(Clone)] -pub struct OssBackend { - config: PrefixedOssConnection, -} - -impl OssBackend { - pub fn new(config: PrefixedOssConnection) -> Result { - config.validate()?; - Ok(Self { config }) - } -} +define_backend!(OssBackend, PrefixedOssConnection); #[async_trait] impl StorageExport for OssBackend { @@ -210,18 +244,7 @@ impl StorageExport for OssBackend { } } -/// GCS storage backend. -#[derive(Clone)] -pub struct GcsBackend { - config: PrefixedGcsConnection, -} - -impl GcsBackend { - pub fn new(config: PrefixedGcsConnection) -> Result { - config.validate()?; - Ok(Self { config }) - } -} +define_backend!(GcsBackend, PrefixedGcsConnection); #[async_trait] impl StorageExport for GcsBackend { @@ -273,18 +296,7 @@ impl StorageExport for GcsBackend { } } -/// Azure Blob storage backend. -#[derive(Clone)] -pub struct AzblobBackend { - config: PrefixedAzblobConnection, -} - -impl AzblobBackend { - pub fn new(config: PrefixedAzblobConnection) -> Result { - config.validate()?; - Ok(Self { config }) - } -} +define_backend!(AzblobBackend, PrefixedAzblobConnection); #[async_trait] impl StorageExport for AzblobBackend { @@ -330,52 +342,12 @@ impl StorageExport for AzblobBackend { } } -/// Enum to represent different storage backend types. -#[derive(Clone)] -pub enum StorageType { - Fs(FsBackend), - S3(S3Backend), - Oss(OssBackend), - Gcs(GcsBackend), - Azblob(AzblobBackend), -} - -#[async_trait] -impl StorageExport for StorageType { - fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - match self { - StorageType::Fs(backend) => backend.get_storage_path(catalog, schema), - StorageType::S3(backend) => backend.get_storage_path(catalog, schema), - StorageType::Oss(backend) => backend.get_storage_path(catalog, schema), - StorageType::Gcs(backend) => backend.get_storage_path(catalog, schema), - StorageType::Azblob(backend) => backend.get_storage_path(catalog, schema), - } - } - - fn format_output_path(&self, catalog: &str, file_path: &str) -> String { - match self { - StorageType::Fs(backend) => backend.format_output_path(catalog, file_path), - StorageType::S3(backend) => backend.format_output_path(catalog, file_path), - StorageType::Oss(backend) => backend.format_output_path(catalog, file_path), - StorageType::Gcs(backend) => backend.format_output_path(catalog, file_path), - StorageType::Azblob(backend) => backend.format_output_path(catalog, file_path), - } - } - - fn mask_sensitive_info(&self, sql: &str) -> String { - match self { - StorageType::Fs(backend) => backend.mask_sensitive_info(sql), - StorageType::S3(backend) => backend.mask_sensitive_info(sql), - StorageType::Oss(backend) => backend.mask_sensitive_info(sql), - StorageType::Gcs(backend) => backend.mask_sensitive_info(sql), - StorageType::Azblob(backend) => backend.mask_sensitive_info(sql), - } - } -} - -impl StorageType { - /// Returns true if the storage backend is remote (not local filesystem). - pub fn is_remote_storage(&self) -> bool { - !matches!(self, StorageType::Fs(_)) +define_storage_type!( + pub enum StorageType { + Fs(FsBackend), + S3(S3Backend), + Oss(OssBackend), + Gcs(GcsBackend), + Azblob(AzblobBackend), } -} +); From 0eea99adf98803dc19eb82fdc555a42f94de3ed7 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Tue, 9 Dec 2025 20:14:19 +0800 Subject: [PATCH 11/12] refactor(cli): unify storage configuration for export command - Rollback StorageExport trait implementation to not using macro for better code clarity and maintainability - Introduce format_uri helper function to unify URI formatting logic - Fix OSS URI path bug inherited from legacy code Signed-off-by: McKnight22 Co-authored-by: WenyXu --- src/cli/src/common/object_store.rs | 4 +- src/cli/src/data/export.rs | 25 ++-- src/cli/src/data/storage_export.rs | 188 +++++++++++++++++------------ 3 files changed, 119 insertions(+), 98 deletions(-) diff --git a/src/cli/src/common/object_store.rs b/src/cli/src/common/object_store.rs index 4e746dbeb346..bfe9662b8d9c 100644 --- a/src/cli/src/common/object_store.rs +++ b/src/cli/src/common/object_store.rs @@ -179,7 +179,7 @@ macro_rules! validate_backend { "{} {} must be set when --{} is enabled.", $backend_name, missing.join(", "), - $backend_name.to_lowercase().replace(" ", "") + $backend_name.to_lowercase() ), } .build(), @@ -216,7 +216,7 @@ impl PrefixedAzblobConnection { pub fn validate(&self) -> Result<(), BoxedError> { validate_backend!( enable: true, - name: "Azure Blob", + name: "AzBlob", required: [ (&self.azblob_container, "container"), (&self.azblob_root, "root"), diff --git a/src/cli/src/data/export.rs b/src/cli/src/data/export.rs index f5c030aa1121..374d32232181 100644 --- a/src/cli/src/data/export.rs +++ b/src/cli/src/data/export.rs @@ -376,8 +376,7 @@ impl Export { "Exported {}.{} database creation SQL to {}", self.catalog, schema, - self.storage_type - .format_output_path(&self.catalog, &file_path) + self.storage_type.format_output_path(&file_path) ); } @@ -435,9 +434,7 @@ impl Export { "Finished exporting {}.{schema} with {} table schemas to path: {}", export_self.catalog, metric_physical_tables.len() + remaining_tables.len() + views.len(), - export_self - .storage_type - .format_output_path(&export_self.catalog, &file_path) + export_self.storage_type.format_output_path(&file_path) ); Ok::<(), Error>(()) @@ -550,9 +547,7 @@ impl Export { "Finished exporting {}.{} copy_from.sql to {}", export_self.catalog, schema, - export_self - .storage_type - .format_output_path(&export_self.catalog, ©_from_path) + export_self.storage_type.format_output_path(©_from_path) ); Ok::<(), Error>(()) @@ -1168,8 +1163,7 @@ mod tests { assert!(result.is_err()); if let Err(err) = result { assert!( - err.to_string() - .contains("Azure Blob account name must be set"), + err.to_string().contains("AzBlob account name must be set"), "Actual error: {}", err ); @@ -1198,9 +1192,8 @@ mod tests { assert!(result.is_err()); if let Err(err) = result { assert!( - err.to_string().contains( - "Azure Blob account key (when sas_token is not provided) must be set" - ), + err.to_string() + .contains("AzBlob account key (when sas_token is not provided) must be set"), "Actual error: {}", err ); @@ -1314,7 +1307,7 @@ mod tests { ]); let result = cmd.build().await; - assert!(result.is_ok(), "Minimal Azure Blob config should succeed"); + assert!(result.is_ok(), "Minimal AzBlob config should succeed"); } #[tokio::test] @@ -1373,7 +1366,7 @@ mod tests { let result = cmd.build().await; assert!( result.is_ok(), - "Azure Blob with only sas_token should succeed: {:?}", + "AzBlob with only sas_token should succeed: {:?}", result.err() ); } @@ -1403,7 +1396,7 @@ mod tests { let result = cmd.build().await; assert!( result.is_ok(), - "Azure Blob with empty account_key but sas_token should succeed: {:?}", + "AzBlob with empty account_key but sas_token should succeed: {:?}", result.err() ); } diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index 03d4ba5093a0..e73839aeb38d 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -50,6 +50,12 @@ fn mask_secrets(mut sql: String, secrets: &[&str]) -> String { sql } +/// Helper function to format storage URI. +fn format_uri(scheme: &str, bucket: &str, root: &str, path: &str) -> String { + let root = format_root_path(root); + format!("{}://{}{}/{}", scheme, bucket, root, path) +} + /// Trait for storage backends that can be used for data export. #[async_trait] pub trait StorageExport: Send + Sync { @@ -58,7 +64,7 @@ pub trait StorageExport: Send + Sync { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String); /// Format the output path for logging purposes. - fn format_output_path(&self, catalog: &str, file_path: &str) -> String; + fn format_output_path(&self, file_path: &str) -> String; /// Mask sensitive information in SQL commands for safe logging. fn mask_sensitive_info(&self, sql: &str) -> String; @@ -80,47 +86,6 @@ macro_rules! define_backend { }; } -macro_rules! define_storage_type { - ( - pub enum $enum_name:ident { - $($variant:ident($backend:ty)),* $(,)? - } - ) => { - #[derive(Clone)] - pub enum $enum_name { - $($variant($backend)),* - } - - #[async_trait] - impl StorageExport for $enum_name { - fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - match self { - $(Self::$variant(backend) => backend.get_storage_path(catalog, schema)),* - } - } - - fn format_output_path(&self, catalog: &str, file_path: &str) -> String { - match self { - $(Self::$variant(backend) => backend.format_output_path(catalog, file_path)),* - } - } - - fn mask_sensitive_info(&self, sql: &str) -> String { - match self { - $(Self::$variant(backend) => backend.mask_sensitive_info(sql)),* - } - } - } - - impl $enum_name { - /// Returns true if the storage backend is remote (not local filesystem). - pub fn is_remote_storage(&self) -> bool { - !matches!(self, Self::Fs(_)) - } - } - }; -} - /// Local file system storage backend. #[derive(Clone)] pub struct FsBackend { @@ -147,7 +112,7 @@ impl StorageExport for FsBackend { (path, String::new()) } - fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { + fn format_output_path(&self, file_path: &str) -> String { format!("{}/{}", self.output_dir, file_path) } @@ -161,10 +126,12 @@ define_backend!(S3Backend, PrefixedS3Connection); #[async_trait] impl StorageExport for S3Backend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let bucket = &self.config.s3_bucket; - let root = format_root_path(&self.config.s3_root); - - let s3_path = format!("s3://{}{}/{}/{}/", bucket, root, catalog, schema); + let s3_path = format_uri( + "s3", + &self.config.s3_bucket, + &self.config.s3_root, + &format!("{}/{}/", catalog, schema), + ); let mut connection_options = vec![ format!( @@ -189,10 +156,13 @@ impl StorageExport for S3Backend { (s3_path, connection_str) } - fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { - let bucket = &self.config.s3_bucket; - let root = format_root_path(&self.config.s3_root); - format!("s3://{}{}/{}", bucket, root, file_path) + fn format_output_path(&self, file_path: &str) -> String { + format_uri( + "s3", + &self.config.s3_bucket, + &self.config.s3_root, + file_path, + ) } fn mask_sensitive_info(&self, sql: &str) -> String { @@ -205,13 +175,18 @@ impl StorageExport for S3Backend { ) } } + define_backend!(OssBackend, PrefixedOssConnection); #[async_trait] impl StorageExport for OssBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let bucket = &self.config.oss_bucket; - let oss_path = format!("oss://{}/{}/{}/", bucket, catalog, schema); + let oss_path = format_uri( + "oss", + &self.config.oss_bucket, + &self.config.oss_root, + &format!("{}/{}/", catalog, schema), + ); let connection_options = [ format!( @@ -228,9 +203,13 @@ impl StorageExport for OssBackend { (oss_path, connection_str) } - fn format_output_path(&self, catalog: &str, file_path: &str) -> String { - let bucket = &self.config.oss_bucket; - format!("oss://{}/{}/{}", bucket, catalog, file_path) + fn format_output_path(&self, file_path: &str) -> String { + format_uri( + "oss", + &self.config.oss_bucket, + &self.config.oss_root, + file_path, + ) } fn mask_sensitive_info(&self, sql: &str) -> String { @@ -249,10 +228,12 @@ define_backend!(GcsBackend, PrefixedGcsConnection); #[async_trait] impl StorageExport for GcsBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let bucket = &self.config.gcs_bucket; - let root = format_root_path(&self.config.gcs_root); - - let gcs_path = format!("gcs://{}{}/{}/{}/", bucket, root, catalog, schema); + let gcs_path = format_uri( + "gcs", + &self.config.gcs_bucket, + &self.config.gcs_root, + &format!("{}/{}/", catalog, schema), + ); let mut connection_options = Vec::new(); @@ -279,10 +260,13 @@ impl StorageExport for GcsBackend { (gcs_path, connection_str) } - fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { - let bucket = &self.config.gcs_bucket; - let root = format_root_path(&self.config.gcs_root); - format!("gcs://{}{}/{}", bucket, root, file_path) + fn format_output_path(&self, file_path: &str) -> String { + format_uri( + "gcs", + &self.config.gcs_bucket, + &self.config.gcs_root, + file_path, + ) } fn mask_sensitive_info(&self, sql: &str) -> String { @@ -301,10 +285,12 @@ define_backend!(AzblobBackend, PrefixedAzblobConnection); #[async_trait] impl StorageExport for AzblobBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { - let container = &self.config.azblob_container; - let root = format_root_path(&self.config.azblob_root); - - let azblob_path = format!("azblob://{}{}/{}/{}/", container, root, catalog, schema); + let azblob_path = format_uri( + "azblob", + &self.config.azblob_container, + &self.config.azblob_root, + &format!("{}/{}/", catalog, schema), + ); let mut connection_options = vec![ format!( @@ -325,10 +311,13 @@ impl StorageExport for AzblobBackend { (azblob_path, connection_str) } - fn format_output_path(&self, _catalog: &str, file_path: &str) -> String { - let container = &self.config.azblob_container; - let root = format_root_path(&self.config.azblob_root); - format!("azblob://{}{}/{}", container, root, file_path) + fn format_output_path(&self, file_path: &str) -> String { + format_uri( + "azblob", + &self.config.azblob_container, + &self.config.azblob_root, + file_path, + ) } fn mask_sensitive_info(&self, sql: &str) -> String { @@ -342,12 +331,51 @@ impl StorageExport for AzblobBackend { } } -define_storage_type!( - pub enum StorageType { - Fs(FsBackend), - S3(S3Backend), - Oss(OssBackend), - Gcs(GcsBackend), - Azblob(AzblobBackend), +#[derive(Clone)] +pub enum StorageType { + Fs(FsBackend), + S3(S3Backend), + Oss(OssBackend), + Gcs(GcsBackend), + Azblob(AzblobBackend), +} + +#[async_trait] +impl StorageExport for StorageType { + fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { + match self { + StorageType::Fs(backend) => backend.get_storage_path(catalog, schema), + StorageType::S3(backend) => backend.get_storage_path(catalog, schema), + StorageType::Oss(backend) => backend.get_storage_path(catalog, schema), + StorageType::Gcs(backend) => backend.get_storage_path(catalog, schema), + StorageType::Azblob(backend) => backend.get_storage_path(catalog, schema), + } } -); + + fn format_output_path(&self, file_path: &str) -> String { + match self { + StorageType::Fs(backend) => backend.format_output_path(file_path), + StorageType::S3(backend) => backend.format_output_path(file_path), + StorageType::Oss(backend) => backend.format_output_path(file_path), + StorageType::Gcs(backend) => backend.format_output_path(file_path), + StorageType::Azblob(backend) => backend.format_output_path(file_path), + } + } + + fn mask_sensitive_info(&self, sql: &str) -> String { + match self { + StorageType::Fs(backend) => backend.mask_sensitive_info(sql), + StorageType::S3(backend) => backend.mask_sensitive_info(sql), + StorageType::Oss(backend) => backend.mask_sensitive_info(sql), + StorageType::Gcs(backend) => backend.mask_sensitive_info(sql), + StorageType::Azblob(backend) => backend.mask_sensitive_info(sql), + } + } +} + +impl StorageType { + /// Returns true if the storage backend is remote (not local filesystem). + pub fn is_remote_storage(&self) -> bool { + !matches!(self, StorageType::Fs(_)) + } +} From c3f1904281f85b39858978820fcaa44db9900c93 Mon Sep 17 00:00:00 2001 From: McKnight22 Date: Mon, 15 Dec 2025 12:40:33 +0800 Subject: [PATCH 12/12] refactor(cli): unify storage configuration for export command - Remove unnecessary async_trait Signed-off-by: McKnight22 Co-authored-by: jeremyhi --- src/cli/src/data/storage_export.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/cli/src/data/storage_export.rs b/src/cli/src/data/storage_export.rs index e73839aeb38d..acec23559812 100644 --- a/src/cli/src/data/storage_export.rs +++ b/src/cli/src/data/storage_export.rs @@ -14,7 +14,6 @@ use std::path::PathBuf; -use async_trait::async_trait; use common_base::secrets::{ExposeSecret, SecretString}; use common_error::ext::BoxedError; @@ -57,7 +56,6 @@ fn format_uri(scheme: &str, bucket: &str, root: &str, path: &str) -> String { } /// Trait for storage backends that can be used for data export. -#[async_trait] pub trait StorageExport: Send + Sync { /// Generate the storage path for COPY DATABASE command. /// Returns (path, connection_string) where connection_string includes CONNECTION clause. @@ -98,7 +96,6 @@ impl FsBackend { } } -#[async_trait] impl StorageExport for FsBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { if self.output_dir.is_empty() { @@ -123,7 +120,6 @@ impl StorageExport for FsBackend { define_backend!(S3Backend, PrefixedS3Connection); -#[async_trait] impl StorageExport for S3Backend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let s3_path = format_uri( @@ -178,7 +174,6 @@ impl StorageExport for S3Backend { define_backend!(OssBackend, PrefixedOssConnection); -#[async_trait] impl StorageExport for OssBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let oss_path = format_uri( @@ -225,7 +220,6 @@ impl StorageExport for OssBackend { define_backend!(GcsBackend, PrefixedGcsConnection); -#[async_trait] impl StorageExport for GcsBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let gcs_path = format_uri( @@ -282,7 +276,6 @@ impl StorageExport for GcsBackend { define_backend!(AzblobBackend, PrefixedAzblobConnection); -#[async_trait] impl StorageExport for AzblobBackend { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { let azblob_path = format_uri( @@ -340,7 +333,6 @@ pub enum StorageType { Azblob(AzblobBackend), } -#[async_trait] impl StorageExport for StorageType { fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) { match self {