Skip to content

Commit 81954a5

Browse files
committed
feat: add verity command for data integrity verification
This commit introduces a new `verity` subcommand to support content integrity verification of data directories using cryptographic hashing (SHA-256). The `verity` command includes three subcommands: - `format`: Scans a data directory, computes SHA-256 hashes for all files, generates metadata (JSON), and outputs a root hash representing the entire directory state. - `verify`: Validates the integrity of a data directory against a known root hash and metadata file, ensuring no files have been modified. - `dump`: Extracts and displays either the full metadata or the root hash from an existing metadata file. Metadata is saved as a JSON file (`cryptpilot.metadata.json` by default), listing each file path and its corresponding hash. The root hash is derived from the serialized metadata, enabling secure anchoring in trusted environments. Implementation details: - New CLI structures added in `cli.rs` with proper argument parsing. - Modular command implementation under `src/cmd/verity/`. - Integration into the global command dispatch via `IntoCommand`. - Files are processed asynchronously using `tokio` and `async_walkdir`. - Excludes metadata file during scanning to prevent self-inclusion. This feature is useful for verifying system-critical directories during boot or in security-sensitive workflows. Signed-off-by: Kun Lai <laikun@linux.alibaba.com>
1 parent 1bf768c commit 81954a5

File tree

6 files changed

+362
-0
lines changed

6 files changed

+362
-0
lines changed

src/cli.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,81 @@ pub enum GlobalSubcommand {
4444
/// Running during system booting (both initrd stage and system stage).
4545
#[command(name = "boot-service")]
4646
BootService(BootServiceOptions),
47+
48+
/// Calculate reference values (hashes) for a given model directory.
49+
#[command(name = "verity")]
50+
Verity(VerityOptions),
51+
}
52+
53+
#[derive(Debug, Args)]
54+
#[command(args_conflicts_with_subcommands = true)]
55+
pub struct VerityOptions {
56+
#[command(subcommand)]
57+
pub command: VeritySubcommand,
58+
}
59+
60+
#[derive(Subcommand, Debug)]
61+
pub enum VeritySubcommand {
62+
/// Format and calculate reference values (hashes) for a given data directory.
63+
#[command(name = "format")]
64+
Format(FormatOptions),
65+
66+
/// Verify the integrity of a data directory against reference values.
67+
#[command(name = "verify")]
68+
Verify(VerifyOptions),
69+
70+
/// Dump metadata or root hash of a data directory.
71+
#[command(name = "dump")]
72+
Dump(DumpOptions),
73+
}
74+
75+
#[derive(Parser, Debug)]
76+
pub struct FormatOptions {
77+
/// Path to the data directory to calculate reference values for
78+
#[arg()]
79+
pub data_dir: std::path::PathBuf,
80+
81+
/// Output file path for the metadata JSON result
82+
#[arg(short, long)]
83+
pub metadata: Option<std::path::PathBuf>,
84+
85+
/// Output file path for the root hash ("-" for stdout)
86+
#[arg(long)]
87+
pub hash_output: std::path::PathBuf,
88+
}
89+
90+
#[derive(Parser, Debug)]
91+
pub struct VerifyOptions {
92+
/// Path to the data directory to verify
93+
#[arg()]
94+
pub data_dir: std::path::PathBuf,
95+
96+
/// Expected root hash for verification
97+
#[arg()]
98+
pub hash: String,
99+
100+
/// Path to the metadata JSON file
101+
#[arg(short, long)]
102+
pub metadata: Option<std::path::PathBuf>,
103+
}
104+
105+
#[derive(Parser, Debug)]
106+
pub struct DumpOptions {
107+
/// Path to the data directory
108+
#[arg(long, required_unless_present = "metadata")]
109+
pub data_dir: Option<std::path::PathBuf>,
110+
111+
/// Path to the metadata JSON file
112+
#[arg(long, required_unless_present = "data_dir")]
113+
pub metadata: Option<std::path::PathBuf>,
114+
115+
/// Print full metadata
116+
#[arg(long, required_unless_present = "print_root_hash")]
117+
pub print_metadata: bool,
118+
119+
/// Print only the root hash instead of full metadata
120+
#[arg(long, required_unless_present = "print_metadata")]
121+
pub print_root_hash: bool,
47122
}
48123

49124
#[derive(Parser, Debug)]

src/cmd/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub mod fde;
88
pub mod init;
99
pub mod open;
1010
pub mod show;
11+
pub mod verity;
1112

1213
#[async_trait]
1314
pub trait Command {
@@ -40,6 +41,7 @@ impl IntoCommand for crate::cli::GlobalSubcommand {
4041
})
4142
}
4243
crate::cli::GlobalSubcommand::Fde(fde_options) => fde_options.into_command(),
44+
crate::cli::GlobalSubcommand::Verity(verity_options) => verity_options.into_command(),
4345
}
4446
}
4547
}

src/cmd/verity/dump.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
use anyhow::Result;
2+
use async_trait::async_trait;
3+
use serde::{Deserialize, Serialize};
4+
use sha2::{Digest, Sha256};
5+
use tokio::fs;
6+
7+
use crate::cmd::Command;
8+
9+
const DEFAULT_METADATA_FILE: &str = "cryptpilot.metadata.json";
10+
11+
pub struct DumpCommand {
12+
pub options: crate::cli::DumpOptions,
13+
}
14+
15+
#[derive(Serialize, Deserialize, Debug)]
16+
struct FileInfo {
17+
path: String,
18+
sha256: String,
19+
}
20+
21+
#[async_trait]
22+
impl Command for DumpCommand {
23+
async fn run(&self) -> Result<()> {
24+
tracing::info!("Starting verity dump command");
25+
26+
// Determine the metadata file path
27+
let metadata_path = if let Some(ref metadata) = self.options.metadata {
28+
metadata.clone()
29+
} else if let Some(ref data_dir) = self.options.data_dir {
30+
data_dir.join(DEFAULT_METADATA_FILE)
31+
} else {
32+
anyhow::bail!("Either --metadata or --data-dir must be specified");
33+
};
34+
35+
tracing::info!("Reading metadata from: {:?}", metadata_path);
36+
37+
// Read metadata file
38+
let metadata_content = fs::read_to_string(&metadata_path).await?;
39+
let file_infos: Vec<FileInfo> = serde_json::from_str(&metadata_content)?;
40+
41+
// Handle output based on flags
42+
if self.options.print_root_hash {
43+
// Calculate and print root hash
44+
let mut hasher = Sha256::new();
45+
hasher.update(&metadata_content);
46+
let root_hash = hex::encode(hasher.finalize());
47+
println!("{}", root_hash);
48+
} else if self.options.print_metadata {
49+
// Print full metadata JSON
50+
let metadata_content = serde_json::to_string_pretty(&file_infos)?;
51+
println!("{}", metadata_content);
52+
} else {
53+
anyhow::bail!("Either --print-root-hash or --print-metadata must be specified");
54+
};
55+
56+
Ok(())
57+
}
58+
}

src/cmd/verity/format.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
use anyhow::Result;
2+
use async_trait::async_trait;
3+
use async_walkdir::WalkDir;
4+
use futures::StreamExt;
5+
use serde::{Deserialize, Serialize};
6+
use sha2::{Digest, Sha256};
7+
use std::path::{Path, PathBuf};
8+
use tokio::fs;
9+
10+
use crate::cmd::Command;
11+
12+
const DEFAULT_METADATA_FILE: &str = "cryptpilot.metadata.json";
13+
14+
pub struct FormatCommand {
15+
pub options: crate::cli::FormatOptions,
16+
}
17+
18+
#[derive(Serialize, Deserialize, Debug)]
19+
struct FileInfo {
20+
path: String,
21+
sha256: String,
22+
}
23+
24+
#[async_trait]
25+
impl Command for FormatCommand {
26+
async fn run(&self) -> Result<()> {
27+
tracing::info!("Starting verity format command");
28+
tracing::info!("Data directory: {:?}", self.options.data_dir);
29+
30+
// Collect all file paths
31+
let mut files = Vec::new();
32+
self.collect_files(&self.options.data_dir, &mut files)
33+
.await?;
34+
35+
tracing::info!("Found {} files in data directory", files.len());
36+
37+
// Sort file paths to ensure deterministic output
38+
files.sort();
39+
40+
// Calculate hash for each file
41+
let mut file_infos = Vec::new();
42+
for file_path in files {
43+
tracing::debug!("Processing file: {:?}", file_path);
44+
let content = fs::read(&file_path).await?;
45+
let hash = hex::encode(Sha256::digest(&content));
46+
47+
let relative_path = file_path
48+
.strip_prefix(&self.options.data_dir)?
49+
.to_path_buf();
50+
let path_str = relative_path.to_string_lossy().to_string();
51+
52+
file_infos.push(FileInfo {
53+
path: path_str,
54+
sha256: hash,
55+
});
56+
}
57+
58+
// Generate JSON metadata
59+
let json = serde_json::to_string_pretty(&file_infos)?;
60+
tracing::debug!("Generated metadata JSON with {} entries", file_infos.len());
61+
62+
// Determine the actual metadata file path
63+
let metadata_path = if let Some(ref metadata) = self.options.metadata {
64+
if metadata.is_absolute() {
65+
metadata.clone()
66+
} else {
67+
self.options.data_dir.join(metadata)
68+
}
69+
} else {
70+
self.options.data_dir.join(DEFAULT_METADATA_FILE)
71+
};
72+
73+
tracing::info!("Writing metadata to: {:?}", metadata_path);
74+
75+
// Write JSON metadata to file
76+
fs::write(&metadata_path, &json).await?;
77+
78+
// Calculate overall directory root hash
79+
let mut hasher = Sha256::new();
80+
hasher.update(&json);
81+
let root_hash = hex::encode(hasher.finalize());
82+
83+
tracing::info!("Root hash calculated: {}", root_hash);
84+
85+
// Write root hash to specified output or stdout
86+
if self.options.hash_output.as_os_str() == "-" {
87+
println!("{}", root_hash);
88+
} else {
89+
tracing::info!("Writing root hash to: {:?}", self.options.hash_output);
90+
fs::write(&self.options.hash_output, &root_hash).await?;
91+
}
92+
93+
Ok(())
94+
}
95+
}
96+
97+
impl FormatCommand {
98+
async fn collect_files(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
99+
let mut entries = WalkDir::new(dir);
100+
101+
while let Some(Ok(entry)) = entries.next().await {
102+
if entry.path().file_name() == Some(std::ffi::OsStr::new(DEFAULT_METADATA_FILE)) {
103+
continue;
104+
}
105+
106+
if entry.file_type().await.map_or(false, |ft| ft.is_file()) {
107+
files.push(entry.path());
108+
}
109+
}
110+
111+
Ok(())
112+
}
113+
}

src/cmd/verity/mod.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
use crate::cmd::{Command, IntoCommand};
2+
3+
mod format;
4+
mod verify;
5+
mod dump;
6+
7+
impl IntoCommand for crate::cli::VerityOptions {
8+
fn into_command(self) -> Box<dyn Command> {
9+
match self.command {
10+
crate::cli::VeritySubcommand::Format(format_options) => {
11+
Box::new(format::FormatCommand {
12+
options: format_options,
13+
})
14+
}
15+
crate::cli::VeritySubcommand::Verify(verify_options) => {
16+
Box::new(verify::VerifyCommand {
17+
options: verify_options,
18+
})
19+
}
20+
crate::cli::VeritySubcommand::Dump(dump_options) => {
21+
Box::new(dump::DumpCommand {
22+
options: dump_options,
23+
})
24+
}
25+
}
26+
}
27+
}

src/cmd/verity/verify.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use anyhow::Result;
2+
use async_trait::async_trait;
3+
use serde::{Deserialize, Serialize};
4+
use sha2::{Digest, Sha256};
5+
use tokio::fs;
6+
7+
use crate::cmd::Command;
8+
9+
const DEFAULT_METADATA_FILE: &str = "cryptpilot.metadata.json";
10+
11+
pub struct VerifyCommand {
12+
pub options: crate::cli::VerifyOptions,
13+
}
14+
15+
#[derive(Serialize, Deserialize, Debug)]
16+
struct FileInfo {
17+
path: String,
18+
sha256: String,
19+
}
20+
21+
#[async_trait]
22+
impl Command for VerifyCommand {
23+
async fn run(&self) -> Result<()> {
24+
tracing::info!("Starting verity verify command");
25+
tracing::info!("Data directory: {:?}", self.options.data_dir);
26+
tracing::info!("Expected root hash: {}", self.options.hash);
27+
28+
// Determine the metadata file path
29+
let metadata_path = if let Some(ref metadata) = self.options.metadata {
30+
if metadata.is_absolute() {
31+
metadata.clone()
32+
} else {
33+
self.options.data_dir.join(metadata)
34+
}
35+
} else {
36+
self.options.data_dir.join(DEFAULT_METADATA_FILE)
37+
};
38+
39+
tracing::info!("Reading metadata from: {:?}", metadata_path);
40+
41+
// Read metadata file
42+
let metadata_content = fs::read_to_string(&metadata_path).await?;
43+
let file_infos: Vec<FileInfo> = serde_json::from_str(&metadata_content)?;
44+
45+
// Calculate overall directory root hash from metadata
46+
let mut hasher = Sha256::new();
47+
hasher.update(&metadata_content);
48+
let root_hash = hex::encode(hasher.finalize());
49+
50+
// Compare root hash with expected hash
51+
if root_hash != self.options.hash {
52+
anyhow::bail!(
53+
"Root hash mismatch. Expected: {}, Actual: {}",
54+
self.options.hash,
55+
root_hash
56+
);
57+
}
58+
59+
tracing::info!("Root hash verification passed");
60+
61+
// Verify each file
62+
for file_info in file_infos {
63+
let file_path = self.options.data_dir.join(&file_info.path);
64+
tracing::debug!("Verifying file: {:?}", file_path);
65+
66+
// Read file content
67+
let content = fs::read(&file_path).await?;
68+
69+
// Calculate file hash
70+
let hash = hex::encode(Sha256::digest(&content));
71+
72+
// Compare with expected hash
73+
if hash != file_info.sha256 {
74+
anyhow::bail!(
75+
"File hash mismatch for {}. Expected: {}, Actual: {}",
76+
file_info.path,
77+
file_info.sha256,
78+
hash
79+
);
80+
}
81+
}
82+
83+
tracing::info!("All file hash verifications passed");
84+
85+
Ok(())
86+
}
87+
}

0 commit comments

Comments
 (0)