From 0d8b945d127a102d675b0062fe090033dd8d5d78 Mon Sep 17 00:00:00 2001
From: David Pacheco <dap@oxidecomputer.com>
Date: Fri, 19 Dec 2025 20:10:18 -0800
Subject: [PATCH 1/2] DebugCollector: first class file archival

---
 Cargo.lock                                    |    2 +
 sled-agent/config-reconciler/Cargo.toml       |    2 +
 .../file_archiver/execution.rs                |  269 +++++
 .../file_archiver/filesystem.rs               |  132 +++
 .../src/debug_collector/file_archiver/mod.rs  |   16 +
 .../debug_collector/file_archiver/planning.rs | 1001 +++++++++++++++++
 .../debug_collector/file_archiver/rules.rs    |  297 +++++
 .../file_archiver/test_helpers.rs             |  377 +++++++
 .../src/debug_collector/mod.rs                |    1 +
 .../src/debug_collector/worker.rs             |  265 ++---
 .../test-data/debug-files.txt                 |   93 ++
 11 files changed, 2253 insertions(+), 202 deletions(-)
 create mode 100644 sled-agent/config-reconciler/src/debug_collector/file_archiver/execution.rs
 create mode 100644 sled-agent/config-reconciler/src/debug_collector/file_archiver/filesystem.rs
 create mode 100644 sled-agent/config-reconciler/src/debug_collector/file_archiver/mod.rs
 create mode 100644 sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs
 create mode 100644 sled-agent/config-reconciler/src/debug_collector/file_archiver/rules.rs
 create mode 100644 sled-agent/config-reconciler/src/debug_collector/file_archiver/test_helpers.rs
 create mode 100644 sled-agent/config-reconciler/test-data/debug-files.txt

diff --git a/Cargo.lock b/Cargo.lock
index 5114b3bb3ca..4933f564b11 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -12999,6 +12999,7 @@ dependencies = [
  "omicron-workspace-hack",
  "proptest",
  "rand 0.9.2",
+ "regex",
  "schemars 0.8.22",
  "scopeguard",
  "serde",
@@ -13012,6 +13013,7 @@ dependencies = [
  "sled-storage",
  "slog",
  "slog-error-chain",
+ "strum 0.27.2",
  "test-strategy",
  "thiserror 2.0.17",
  "tokio",
diff --git a/sled-agent/config-reconciler/Cargo.toml b/sled-agent/config-reconciler/Cargo.toml
index d66d9cfcef2..55dd4860630 100644
--- a/sled-agent/config-reconciler/Cargo.toml
+++ b/sled-agent/config-reconciler/Cargo.toml
@@ -29,6 +29,7 @@ ntp-admin-client.workspace = true
 omicron-common.workspace = true
 omicron-uuid-kinds.workspace = true
 rand.workspace = true
+regex.workspace = true
 serde.workspace = true
 sha2.workspace = true
 sled-agent-api.workspace = true
@@ -37,6 +38,7 @@ sled-hardware.workspace = true
 sled-storage.workspace = true
 slog.workspace = true
 slog-error-chain.workspace = true
+strum.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 tufaceous-artifact.workspace = true
diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/execution.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/execution.rs
new file mode 100644
index 00000000000..118e7f5fe5d
--- /dev/null
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/execution.rs
@@ -0,0 +1,269 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Execution of file archival
+//!
+//! As much as possible, behavior should **not** live here, but in the planning
+//! module instead so that it can be tested without touching the filesystem.
+
+use super::filesystem::FileLister;
+use super::planning::ArchiveStep;
+use anyhow::Context;
+use camino::Utf8Path;
+use slog::debug;
+
+pub(crate) async fn execute_archive_step<'a>(
+    log: &slog::Logger,
+    step: ArchiveStep<'a>,
+    lister: &'a (dyn FileLister + Send + Sync),
+) -> Result<(), anyhow::Error> {
+    match step {
+        ArchiveStep::Mkdir { output_directory } => {
+            // We assume that the parent of all output directories
+            // already exists.  That's because in practice it should be
+            // true: all of the output directories are one level below
+            // the debug dataset itself.  (The test suite verifies
+            // this.)  So if we find at runtime that this isn't true,
+            // that's a bad sign.  Maybe somebody has unmounted the
+            // debug dataset and deleted its mountpoint?  We don't want
+            // to start spewing stuff to the wrong place.  That's why we
+            // don't use create_dir_all() here.
+            debug!(
+                log,
+                "create directory";
+                "directory" => %output_directory
+            );
+            tokio::fs::create_dir(&output_directory)
+                .await
+                .or_else(|error| {
+                    if error.kind() == std::io::ErrorKind::AlreadyExists {
+                        Ok(())
+                    } else {
+                        Err(error)
+                    }
+                })
+                .with_context(|| format!("mkdir {output_directory:?}"))
+        }
+        ArchiveStep::ArchiveFile(archive_file) => {
+            match archive_file.choose_filename(lister) {
+                Err(error) => Err(error),
+                Ok(output_filename) => {
+                    let input_path = &archive_file.input_path;
+                    let output_path = archive_file
+                        .output_directory
+                        .join(output_filename.as_ref());
+                    debug!(
+                        log,
+                        "archive file";
+                        "input_path" => %input_path,
+                        "output_path" => %output_path,
+                        "delete_original" =>
+                            archive_file.delete_original,
+                    );
+                    archive_one(
+                        &input_path,
+                        &output_path,
+                        archive_file.delete_original,
+                    )
+                    .await
+                    .with_context(|| {
+                        format!("archive {input_path:?} to {output_path:?}")
+                    })
+                }
+            }
+        }
+    }
+}
+
+async fn archive_one(
+    source: &Utf8Path,
+    dest: &Utf8Path,
+    delete_original: bool,
+) -> tokio::io::Result<()> {
+    let mut dest_f = tokio::fs::File::create(&dest).await?;
+    let mut src_f = tokio::fs::File::open(&source).await?;
+
+    tokio::io::copy(&mut src_f, &mut dest_f).await?;
+
+    dest_f.sync_all().await?;
+
+    drop(src_f);
+    drop(dest_f);
+
+    if delete_original {
+        tokio::fs::remove_file(source).await?;
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+    use crate::debug_collector::file_archiver;
+    use anyhow::Context;
+    use camino::Utf8Path;
+    use camino_tempfile::Utf8TempDir;
+    use chrono::DateTime;
+    use chrono::Utc;
+    use file_archiver::planning::ArchiveKind;
+    use file_archiver::planning::ArchivePlanner;
+    use omicron_test_utils::dev::test_setup_log;
+    use slog::info;
+
+    #[tokio::test]
+    async fn test_real_archival() {
+        // Set up the test.
+        let logctx = test_setup_log("test_archiving_basic");
+        let log = &logctx.log;
+
+        // Create a temporary directory in which to store some output files.
+        let tempdir = Utf8TempDir::new().unwrap();
+        info!(log, "temporary directory"; "tempdir" => %tempdir.path());
+
+        // Populate it with a couple of files.
+        //
+        // Note that all of the interesting cases around generating archive
+        // steps are covered elsewhere.  We really only need to smoke check
+        // basic behavior here.
+        let outdir = tempdir.path().join("out");
+        let zone_name = "an-example-zone";
+        let zone_root = tempdir.path().join(zone_name);
+        let logdir = zone_root.join("var/svc/log");
+        let file1_live = logdir.join("svc1.log");
+        let file2_rotated = logdir.join("svc1.log.0");
+        let file3_rotated = logdir.join("svc2.log.0");
+        let coredir = tempdir.path().join("crash");
+        let file4_core = coredir.join("core.123");
+
+        let populate_input = |contents: &str| {
+            std::fs::create_dir_all(&logdir).unwrap();
+            std::fs::create_dir_all(&coredir).unwrap();
+            for file in
+                [&file1_live, &file2_rotated, &file3_rotated, &file4_core]
+            {
+                let contents =
+                    format!("{}-{contents}", file.file_name().unwrap());
+                std::fs::write(&file, contents).unwrap();
+            }
+        };
+
+        populate_input("first");
+
+        // Compute the expected filenames.  These depend on the mtimes that the
+        // files wound up with.
+        let expected_filename = |base: &str, input: &Utf8Path| {
+            let found_mtime = input.metadata().unwrap().modified().unwrap();
+            let mtime: DateTime<Utc> = DateTime::from(found_mtime);
+            format!("{base}{}", mtime.timestamp())
+        };
+        let file1_expected = expected_filename("svc1.", &file1_live);
+        let file2_expected = expected_filename("svc1.log.", &file2_rotated);
+        let file3_expected = expected_filename("svc2.log.", &file3_rotated);
+
+        // Run a complete archive.
+        std::fs::create_dir(&outdir).unwrap();
+        let mut planner = ArchivePlanner::new(log, ArchiveKind::Final, &outdir);
+        planner.include_cores_directory(&coredir);
+        planner.include_zone(zone_name, &zone_root);
+        let () = planner.execute().await.expect("successful execution");
+
+        // Check each of the output log files.  This is a little annoying
+        // because we don't necessarily know what names they were given, since
+        // it depends on the mtime on the input file.
+        let verify_logs = |unchanged| {
+            for (input_path, expected_filename, deleted_original) in [
+                (&file1_live, &file1_expected, false),
+                (&file2_rotated, &file2_expected, true),
+                (&file3_rotated, &file3_expected, true),
+            ] {
+                let expected_path =
+                    outdir.join(zone_name).join(expected_filename);
+                let contents = std::fs::read_to_string(&expected_path)
+                    .with_context(|| {
+                        format!("read expected output file {expected_path:?}")
+                    })
+                    .unwrap();
+                assert!(contents.starts_with(input_path.file_name().unwrap()));
+                assert!(contents.ends_with("-first"));
+
+                if deleted_original {
+                    // Check that the original file is gone.
+                    assert!(!input_path.exists());
+                } else {
+                    // The input file should exist.  It may or may not match
+                    // what it originally did, depending on what the caller
+                    // says.
+                    let input_contents = std::fs::read_to_string(&input_path)
+                        .with_context(|| {
+                            format!("read expected intput file {input_path:?}")
+                        })
+                        .unwrap();
+                    if unchanged {
+                        assert_eq!(contents, input_contents);
+                    }
+                }
+            }
+        };
+
+        verify_logs(true);
+
+        // Check the output core file, too.
+        let file4_output = outdir.join("core.123");
+        let contents = std::fs::read_to_string(&file4_output)
+            .with_context(|| {
+                format!("read expected output file {file4_output:?}")
+            })
+            .unwrap();
+        assert_eq!(contents, "core.123-first");
+        assert!(!file4_core.exists());
+
+        // Now, check the behavior for file collisions.
+        //
+        // First, re-populate the input tree, but with new data so that we can
+        // tell when things have been clobbered.
+        populate_input("second");
+
+        // Run another archive.
+        let mut planner = ArchivePlanner::new(log, ArchiveKind::Final, &outdir);
+        planner.include_cores_directory(&coredir);
+        planner.include_zone(zone_name, &zone_root);
+        let () = planner.execute().await.expect("successful execution");
+
+        // The previously archived log file should still exist, still have the
+        // same (original) contents, and the input files should be gone again.
+        verify_logs(false);
+
+        // There should now be new versions of the three log files that contain
+        // the new contents.
+        for result in outdir.join(zone_name).read_dir_utf8().unwrap() {
+            let entry = result.unwrap();
+            let contents = std::fs::read_to_string(&entry.path())
+                .with_context(|| {
+                    format!("read expected intput file {:?}", entry.path())
+                })
+                .unwrap();
+
+            if entry.file_name() == &file1_expected
+                || entry.file_name() == &file2_expected
+                || entry.file_name() == &file3_expected
+            {
+                assert!(contents.ends_with("-first"));
+            } else {
+                assert!(contents.ends_with("-second"));
+            }
+        }
+
+        // The core file should have been completely overwritten with new
+        // contents.
+        assert!(!file4_core.exists());
+        let contents = std::fs::read_to_string(&file4_output)
+            .with_context(|| {
+                format!("read expected output file {file4_output:?}")
+            })
+            .unwrap();
+        assert_eq!(contents, "core.123-second");
+
+        logctx.cleanup_successful();
+    }
+}
diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/filesystem.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/filesystem.rs
new file mode 100644
index 00000000000..fbc1c26f311
--- /dev/null
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/filesystem.rs
@@ -0,0 +1,132 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use anyhow::Context;
+use anyhow::anyhow;
+use camino::Utf8Path;
+use chrono::DateTime;
+use chrono::Utc;
+use derive_more::AsRef;
+use thiserror::Error;
+
+/// Describes the final component of a path name (that has no `/` in it)
+#[derive(AsRef, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub(crate) struct Filename(String);
+
+#[derive(Debug, Error)]
+#[error("string is not a valid filename (has slashes or is '.' or '..')")]
+pub(crate) struct BadFilename;
+
+impl TryFrom<String> for Filename {
+    type Error = BadFilename;
+    fn try_from(value: String) -> Result<Self, Self::Error> {
+        if value == "." || value == ".." || value.contains('/') {
+            Err(BadFilename)
+        } else {
+            Ok(Filename(value))
+        }
+    }
+}
+
+/// Helper trait used to swap out basic filesystem functionality for testing
+pub(crate) trait FileLister {
+    /// List the files within a directory
+    ///
+    /// This should return an empty vec when the directory does not exist,
+    /// rather than an error.
+    fn list_files(
+        &self,
+        path: &Utf8Path,
+    ) -> Vec<Result<Filename, anyhow::Error>>;
+
+    /// Return the modification time of a file
+    fn file_mtime(
+        &self,
+        path: &Utf8Path,
+    ) -> Result<Option<DateTime<Utc>>, anyhow::Error>;
+
+    /// Return whether a file exists
+    fn file_exists(&self, path: &Utf8Path) -> Result<bool, anyhow::Error>;
+}
+
+/// `FileLister` implementation that uses the real filesystem
+pub(crate) struct FilesystemLister;
+impl FileLister for FilesystemLister {
+    fn list_files(
+        &self,
+        path: &Utf8Path,
+    ) -> Vec<Result<Filename, anyhow::Error>> {
+        let entry_iter = match path.read_dir_utf8() {
+            Ok(iter) => iter,
+            Err(error) => {
+                if error.kind() == std::io::ErrorKind::NotFound {
+                    // This interface is more useful if we swallow ENOTFOUND
+                    // rather than propagate it since the caller will treat
+                    // this the same as an empty directory.
+                    return vec![];
+                } else {
+                    return vec![Err(
+                        anyhow!(error).context("readdir {path:?}")
+                    )];
+                }
+            }
+        };
+
+        entry_iter
+            .map(|entry| {
+                entry.context("reading directory entry").and_then(|entry| {
+                    // It should be impossible for this `try_from()` to fail,
+                    // but it's easy enough to handle gracefully.
+                    Filename::try_from(entry.file_name().to_owned())
+                        .with_context(|| {
+                            format!(
+                                "processing as a file name: {:?}",
+                                entry.file_name(),
+                            )
+                        })
+                })
+            })
+            .collect()
+    }
+
+    fn file_mtime(
+        &self,
+        path: &Utf8Path,
+    ) -> Result<Option<DateTime<Utc>>, anyhow::Error> {
+        let metadata = path
+            .symlink_metadata()
+            .with_context(|| format!("loading metadata for {path:?}"))?;
+
+        Ok(metadata
+            .modified()
+            // This `ok()` ignores an error fetching the mtime.  We could
+            // probably just handle it, since it shouldn't come up.  But this
+            // preserves historical behavior.
+            .ok()
+            .map(|m| m.into()))
+    }
+
+    fn file_exists(&self, path: &Utf8Path) -> Result<bool, anyhow::Error> {
+        path.try_exists()
+            .with_context(|| format!("checking existence of {path:?}"))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::Filename;
+
+    #[test]
+    fn test_filename() {
+        assert_eq!(
+            Filename::try_from(String::from("foo")).unwrap().as_ref(),
+            "foo"
+        );
+        assert!(Filename::try_from(String::from(".")).is_err());
+        assert!(Filename::try_from(String::from("..")).is_err());
+        assert!(Filename::try_from(String::from("foo/bar")).is_err());
+        assert!(Filename::try_from(String::from("foo/")).is_err());
+        assert!(Filename::try_from(String::from("/bar")).is_err());
+    }
+}
diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/mod.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/mod.rs
new file mode 100644
index 00000000000..dcc3f20cfd4
--- /dev/null
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/mod.rs
@@ -0,0 +1,16 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Configuration and implementation for archiving ordinary files as debug data
+//! (e.g., log files)
+
+mod execution;
+mod filesystem;
+mod planning;
+mod rules;
+#[cfg(test)]
+mod test_helpers;
+
+pub use planning::ArchiveKind;
+pub use planning::ArchivePlanner;
diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs
new file mode 100644
index 00000000000..61752b7ab76
--- /dev/null
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs
@@ -0,0 +1,1001 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Configuration and implementation for archiving ordinary files as debug data
+//! (e.g., log files)
+//!
+//! This system is designed so that as much possible is incorporated into the
+//! plan so that it can be tested in simulation without extensive dependency
+//! injection.  See also [https://mmapped.blog/posts/29-plan-execute](the
+//! plan-execute pattern).
+
+use super::execution::execute_archive_step;
+use super::filesystem::FileLister;
+use super::filesystem::Filename;
+use super::filesystem::FilesystemLister;
+use super::rules::ALL_RULES;
+use super::rules::ArchiveGroup;
+use super::rules::NamingRule;
+use super::rules::RuleScope;
+use super::rules::Source;
+use anyhow::Context;
+use anyhow::anyhow;
+use camino::Utf8Path;
+use camino::Utf8PathBuf;
+use chrono::DateTime;
+use chrono::Utc;
+use slog::Logger;
+use slog::debug;
+use slog::o;
+use slog::warn;
+use slog_error_chain::InlineErrorChain;
+
+/// Describes what kind of archive operation this is, which affects what debug
+/// data to collect
+#[derive(Debug, Clone, Copy)]
+pub enum ArchiveKind {
+    /// Periodic archive
+    ///
+    /// Periodic archives include immutable files like core files and rotated
+    /// log files, but they ignore live log files since they're still being
+    /// written-to.  Those will get picked up in a subsequent periodic archive
+    /// (once rotated) or a final archive for this source.
+    Periodic,
+
+    /// Final archive for this source
+    ///
+    /// The final archive for a given source is our last chance to archive debug
+    /// data from it.  It is also generally at rest (or close to it).  So this
+    /// includes everything that a periodic archive includes *plus* live log
+    /// files.
+    Final,
+}
+
+/// Used to configure and execute a file archival operation
+pub struct ArchivePlanner<'a> {
+    log: Logger,
+    what: ArchiveKind,
+    debug_dir: Utf8PathBuf,
+    groups: Vec<ArchiveGroup<'static>>,
+    lister: &'a (dyn FileLister + Send + Sync),
+}
+
+impl ArchivePlanner<'static> {
+    /// Begin an archival operation that will store data into `debug_dir`
+    pub fn new(
+        log: &Logger,
+        what: ArchiveKind,
+        debug_dir: &Utf8Path,
+    ) -> ArchivePlanner<'static> {
+        Self::new_with_lister(log, what, debug_dir, &FilesystemLister)
+    }
+}
+
+impl<'a> ArchivePlanner<'a> {
+    // Used by the tests to inject a custom lister.
+    pub(crate) fn new_with_lister(
+        log: &Logger,
+        what: ArchiveKind,
+        debug_dir: &Utf8Path,
+        lister: &'a (dyn FileLister + Send + Sync),
+    ) -> ArchivePlanner<'a> {
+        let log = log.new(o!(
+            "component" => "DebugCollectorArchiver",
+            "debug_dir" => debug_dir.to_string(),
+            "what" => format!("{what:?}"),
+        ));
+        debug!(&log, "planning archival");
+
+        ArchivePlanner {
+            log,
+            what,
+            debug_dir: debug_dir.to_owned(),
+            groups: Vec::new(),
+            lister,
+        }
+    }
+
+    /// Configure this archive operation to include debug data from the given
+    /// illumos zone zone
+    pub fn include_zone(&mut self, zone_name: &str, zone_root: &Utf8Path) {
+        debug!(
+            &self.log,
+            "archiving debug data from zone";
+            "zonename" => zone_name,
+            "zone_root" => %zone_root,
+        );
+
+        let source = Source {
+            input_prefix: zone_root.to_owned(),
+            output_prefix: self.debug_dir.join(zone_name),
+        };
+
+        let rules =
+            ALL_RULES.iter().filter(|r| match (&r.rule_scope, &self.what) {
+                (RuleScope::ZoneAlways, _) => true,
+                (RuleScope::ZoneMutable, ArchiveKind::Final) => true,
+                (RuleScope::ZoneMutable, ArchiveKind::Periodic) => false,
+                (RuleScope::CoresDirectory, _) => false,
+            });
+
+        for rule in rules {
+            self.groups.push(ArchiveGroup { source: source.clone(), rule });
+        }
+    }
+
+    /// Configure this archive operation to include debug data from the given
+    /// cores directory
+    pub fn include_cores_directory(&mut self, cores_dir: &Utf8Path) {
+        debug!(
+            &self.log,
+            "archiving debug data from cores directory";
+            "cores_dir" => %cores_dir,
+        );
+
+        let source = Source {
+            input_prefix: cores_dir.to_owned(),
+            output_prefix: self.debug_dir.to_owned(),
+        };
+
+        let rules = ALL_RULES.iter().filter(|r| match r.rule_scope {
+            RuleScope::CoresDirectory => true,
+            RuleScope::ZoneMutable | RuleScope::ZoneAlways => false,
+        });
+
+        for rule in rules {
+            self.groups.push(ArchiveGroup { source: source.clone(), rule });
+        }
+    }
+
+    /// Returns an `ArchivePlan` that describes more specific steps for
+    /// archiving the requested debug data
+    pub fn into_plan(self) -> ArchivePlan<'a> {
+        ArchivePlan {
+            log: self.log,
+            groups: self.groups,
+            debug_dir: self.debug_dir,
+            lister: self.lister,
+        }
+    }
+
+    /// Generates an `ArchivePlan` and immediately executes it, archiving the
+    /// requested files
+    ///
+    /// Returns a single `anyhow::Error` if there are any problems archiving any
+    /// files.  Details are emitted to the log.  (It's assumed that consumers
+    /// don't generally care to act on detailed failures programmatically, just
+    /// report them to the log.)
+    pub async fn execute(self) -> Result<(), anyhow::Error> {
+        if !self.into_plan().execute().await.is_empty() {
+            Err(anyhow!("one or more archive steps failed (see logs)"))
+        } else {
+            Ok(())
+        }
+    }
+}
+
+/// Describes specific steps to carry out an archive operation
+///
+/// Constructed with [`ArchivePlanner`].
+pub(crate) struct ArchivePlan<'a> {
+    log: slog::Logger,
+    debug_dir: Utf8PathBuf,
+    groups: Vec<ArchiveGroup<'static>>,
+    lister: &'a (dyn FileLister + Send + Sync),
+}
+
+impl ArchivePlan<'_> {
+    #[cfg(test)]
+    pub(crate) fn to_steps(
+        &self,
+    ) -> impl Iterator<Item = Result<ArchiveStep<'_>, anyhow::Error>> {
+        Self::to_steps_generic(
+            &self.log,
+            &self.groups,
+            &self.debug_dir,
+            self.lister,
+        )
+    }
+
+    pub(crate) fn to_steps_generic<'a>(
+        log: &Logger,
+        groups: &'a [ArchiveGroup<'static>],
+        debug_dir: &'a Utf8Path,
+        lister: &'a (dyn FileLister + Send + Sync),
+    ) -> impl Iterator<Item = Result<ArchiveStep<'a>, anyhow::Error>> {
+        // This gigantic combinator iterates the list of archive steps, which
+        // consist of:
+        //
+        // - an `ArchiveStep::Mkdir` for each output directory we need to create
+        // - an `ArchiveStep::ArchiveFile` for each file that we need to archive
+        //   (all files matching all the rules that have been applied to the
+        //   input sources).
+        //
+        // In fact, each item that we iterate is a `Result`: it's either one of
+        // these archive steps or its an error that was encountered along the
+        // way.
+        //
+        // Being one big expression makes this annoying to read and modify, but
+        // it has the useful property that it operates in a streaming way: at no
+        // point are all of the files in all of the matching directories read
+        // into memory at once.
+        groups
+            .iter()
+            // Start with a `mkdir` for each of the output directories.
+            .filter_map(move |group| {
+                let output_directory = group.output_directory(debug_dir);
+                if output_directory != debug_dir {
+                    Some(Ok(ArchiveStep::Mkdir { output_directory }))
+                } else {
+                    None
+                }
+            })
+            // Chain this with a list of all the files we need to archive.
+            .chain(
+                groups
+                    .iter()
+                    .flat_map(move |group| {
+                        // Each group essentially identifies one directory that
+                        // we need to scan for files to archive.  For each of
+                        // these, list the files in the directory.
+                        let input_directory = group.input_directory();
+
+                        debug!(
+                            log,
+                            "listing directory";
+                            "input_directory" => %input_directory
+                        );
+                        lister.list_files(&input_directory).into_iter().map(
+                            move |item| item.map(|filename| (group, filename)),
+                        )
+                    })
+                    .filter(move |entry| match entry {
+                        // Errors are passed to the end of this pipeline.
+                        Err(_) => true,
+
+                        // Files that we found in an input directory are checked
+                        // against the corresponding rule to see if we should
+                        // include them.
+                        Ok((group, filename)) => {
+                            debug!(
+                                log,
+                                "checking file";
+                                "file" => %filename.as_ref(),
+                            );
+                            group.rule.include_file(&filename)
+                        }
+                    })
+                    .map(|entry| match entry {
+                        // Errors are passed to the end of this pipeline.
+                        Err(error) => Err(error),
+
+                        // If we found a matching file, fetch its metadata and
+                        // grab the mtime.  This is used for naming the archived
+                        // file.
+                        Ok((group, filename)) => {
+                            let input_path =
+                                group.input_directory().join(filename.as_ref());
+                            lister
+                                .file_mtime(&input_path)
+                                .map(|mtime| (group, input_path, mtime))
+                        }
+                    })
+                    .map(|entry| match entry {
+                        // Errors are passed to the end of this pipeline.
+                        Err(error) => Err(error),
+
+                        // If we succeeded so far, we have a matching input
+                        // file, its mtime and the associated group.  Construct
+                        // an archive step describing that we need to archive
+                        // this file.
+                        Ok((group, input_path, mtime)) => {
+                            let output_directory =
+                                group.output_directory(debug_dir);
+                            Ok(ArchiveStep::ArchiveFile(ArchiveFile {
+                                input_path,
+                                mtime,
+                                output_directory,
+                                namer: group.rule.naming,
+                                delete_original: group.rule.delete_original,
+                                #[cfg(test)]
+                                rule: group.rule.label,
+                            }))
+                        }
+                    }),
+            )
+    }
+
+    pub(crate) async fn execute(self) -> Vec<anyhow::Error> {
+        let mut errors = Vec::new();
+        let log = &self.log;
+        let groups = self.groups;
+        let debug_dir = self.debug_dir;
+        let lister = self.lister;
+        for step in Self::to_steps_generic(log, &groups, &debug_dir, lister) {
+            let result = match step {
+                Err(error) => Err(error),
+                Ok(step) => execute_archive_step(log, step, lister).await,
+            };
+
+            if let Err(error) = result {
+                warn!(
+                    log,
+                    "error during archival";
+                    InlineErrorChain::new(&*error)
+                );
+                errors.push(error);
+            }
+        }
+
+        errors
+    }
+}
+
+pub(crate) enum ArchiveStep<'a> {
+    Mkdir { output_directory: Utf8PathBuf },
+    ArchiveFile(ArchiveFile<'a>),
+}
+
+#[derive(Clone)]
+pub(crate) struct ArchiveFile<'a> {
+    pub(crate) input_path: Utf8PathBuf,
+    pub(crate) mtime: Option<DateTime<Utc>>,
+    pub(crate) output_directory: Utf8PathBuf,
+    pub(crate) namer: &'a (dyn NamingRule + Send + Sync),
+    pub(crate) delete_original: bool,
+    #[cfg(test)]
+    pub(crate) rule: &'static str,
+}
+
+impl ArchiveFile<'_> {
+    pub(crate) fn choose_filename(
+        &self,
+        lister: &dyn FileLister,
+    ) -> Result<Filename, anyhow::Error> {
+        let file_name: Filename = self
+            .input_path
+            .file_name()
+            .ok_or_else(|| {
+                // This should be impossible, but it's easy enough to handle
+                // gracefully.
+                anyhow!(
+                    "file for archival has no filename: {:?}",
+                    &self.input_path
+                )
+            })?
+            .to_owned()
+            .try_into()
+            .context("file_name() returned a non-Filename")?;
+        self.namer.archived_file_name(
+            &file_name,
+            self.mtime,
+            lister,
+            &self.output_directory,
+        )
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::debug_collector::file_archiver;
+    use camino::Utf8Path;
+    use chrono::DateTime;
+    use chrono::Timelike;
+    use chrono::Utc;
+    use file_archiver::planning::ArchiveFile;
+    use file_archiver::planning::ArchiveKind;
+    use file_archiver::planning::ArchiveStep;
+    use file_archiver::rules::ALL_RULES;
+    use file_archiver::rules::MAX_COLLIDING_FILENAMES;
+    use file_archiver::rules::NameRotatedLogFile;
+    use file_archiver::test_helpers::*;
+    use omicron_test_utils::dev::test_setup_log;
+    use slog::debug;
+    use slog::info;
+    use slog_error_chain::InlineErrorChain;
+    use std::collections::BTreeSet;
+
+    /// Fully tests archive planning with a bunch of real-world file paths
+    #[test]
+    fn test_archiving_basic() {
+        // Set up the test.
+        let logctx = test_setup_log("test_archiving_basic");
+        let log = &logctx.log;
+
+        // Load the test data
+        let files = load_test_files().unwrap();
+
+        // Run a simulated archive.
+        let fake_output_dir = Utf8Path::new("/fake-output-directory");
+        let lister = TestLister::new_for_test_data(&files);
+        let plan = test_archive(
+            log,
+            &files,
+            fake_output_dir,
+            ArchiveKind::Final,
+            &lister,
+        );
+
+        // Now, walk through the archive plan and verify it.
+        let mut directories_created = BTreeSet::new();
+        let mut unarchived_files = files.clone();
+        let mut rules_unused: BTreeSet<_> =
+            ALL_RULES.iter().map(|r| r.label).collect();
+        for step in plan.to_steps() {
+            let step = step.expect("no errors with test lister");
+
+            match step {
+                // For a `mkdir`, verify that the parent directory matches our
+                // output directory.  (For more on why, see the code where we
+                // process this Mkdir.)  Then record it.  We'll use that to
+                // verify that files are always archived into directories that
+                // already exist.
+                ArchiveStep::Mkdir { output_directory } => {
+                    let parent = output_directory
+                        .parent()
+                        .expect("output directory has a parent");
+                    if parent != fake_output_dir {
+                        panic!(
+                            "archiver created an output directory \
+                             ({output_directory:?}) whose parent is not the \
+                             fake debug directory ({fake_output_dir:?}).  \
+                             This is not currently supported."
+                        );
+                    }
+                    directories_created.insert(output_directory);
+                }
+
+                ArchiveStep::ArchiveFile(ArchiveFile {
+                    input_path,
+                    delete_original,
+                    output_directory,
+                    rule,
+                    ..
+                }) => {
+                    println!("archiving: {input_path}");
+
+                    // Check that we have not already archived this file.
+                    // That would imply that two rules matched the same file,
+                    // which would be a bug in the rule definitions.
+                    let test_file = unarchived_files
+                        .remove(input_path.as_path())
+                        .unwrap_or_else(|| {
+                            panic!(
+                                "attempted to archive the same file multiple \
+                                 times (or it was not in the test dataset): \
+                                 {input_path:?}",
+                            );
+                        });
+
+                    // Check that we've correctly determined whether to delete
+                    // the original file when archiving it.  This is determined
+                    // by the rule that it matched.  We check it here against
+                    // what we expect for each kind of file.
+                    match &test_file.kind {
+                        TestFileKind::KernelCrashDump { .. }
+                        | TestFileKind::ProcessCoreDump { .. }
+                        | TestFileKind::LogSmfRotated { .. }
+                        | TestFileKind::LogSyslogRotated { .. }
+                        | TestFileKind::GlobalLogSmfRotated
+                        | TestFileKind::GlobalLogSyslogRotated
+                        | TestFileKind::Ignored => {
+                            assert!(
+                                delete_original,
+                                "expected to delete original file when \
+                                 archiving file of kind {:?}",
+                                test_file.kind,
+                            );
+                        }
+
+                        TestFileKind::LogSmfLive { .. }
+                        | TestFileKind::LogSyslogLive { .. }
+                        | TestFileKind::GlobalLogSmfLive
+                        | TestFileKind::GlobalLogSyslogLive => {
+                            assert!(
+                                !delete_original,
+                                "expected not to delete original file when \
+                                 archiving file of kind {:?}",
+                                test_file.kind,
+                            );
+                        }
+                    }
+
+                    // The output directory must either match the overall output
+                    // directory or else be one of the directories created by a
+                    // Mkdir that we've already processed.
+                    if output_directory != fake_output_dir
+                        && !directories_created.contains(&output_directory)
+                    {
+                        panic!(
+                            "file was archived into a non-existent \
+                             directory: {}",
+                            test_file.path
+                        );
+                    }
+
+                    // Mark that we've used this rule.  It's not a problem if
+                    // we've already done so.
+                    let _ = rules_unused.remove(rule);
+                }
+            };
+        }
+
+        if !rules_unused.is_empty() {
+            panic!(
+                "one or more rules was not covered by the tests: \
+                 {rules_unused:?}"
+            );
+        }
+
+        println!("files that were not archived: {}", unarchived_files.len());
+        for test_file in unarchived_files {
+            println!("    {}", test_file.path);
+            if !matches!(test_file.kind, TestFileKind::Ignored) {
+                panic!(
+                    "non-ignored test file was not archived: {:?}",
+                    test_file.path
+                );
+            }
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    // Tests that when we archive "immutable-only" files:
+    // - we do archive the stuff we expect
+    // - we don't archive the stuff that we don't expect
+    #[test]
+    fn test_archiving_immutable_only() {
+        // Set up the test.
+        let logctx = test_setup_log("test_archiving_immutable_only");
+        let log = &logctx.log;
+
+        // Load the test data
+        let files = load_test_files().unwrap();
+
+        // Run a simulated archive.
+        let fake_output_dir = Utf8Path::new("/fake-output-directory");
+        let lister = TestLister::new_for_test_data(&files);
+        let plan = test_archive(
+            log,
+            &files,
+            fake_output_dir,
+            ArchiveKind::Periodic,
+            &lister,
+        );
+
+        let mut expected_unarchived: BTreeSet<_> = files
+            .iter()
+            .filter_map(|test_file| {
+                let expected = match test_file.kind {
+                    TestFileKind::KernelCrashDump { .. }
+                    | TestFileKind::ProcessCoreDump { .. }
+                    | TestFileKind::LogSmfRotated { .. }
+                    | TestFileKind::LogSyslogRotated { .. }
+                    | TestFileKind::GlobalLogSmfRotated
+                    | TestFileKind::GlobalLogSyslogRotated => true,
+                    TestFileKind::LogSmfLive { .. }
+                    | TestFileKind::LogSyslogLive { .. }
+                    | TestFileKind::GlobalLogSmfLive
+                    | TestFileKind::GlobalLogSyslogLive
+                    | TestFileKind::Ignored => false,
+                };
+
+                expected.then_some(&test_file.path)
+            })
+            .collect();
+
+        // Check that precisely the expected files were collected.
+        // We do not check all the other expected behaviors around archiving
+        // here.  That's tested in `test_archive_basic` for all files.
+        for step in plan.to_steps() {
+            let step = step.expect("no errors with test lister");
+            let ArchiveStep::ArchiveFile(archive_file) = step else {
+                continue;
+            };
+
+            let input_path = archive_file.input_path;
+            let test_file = files.get(input_path.as_path()).expect(
+                "unexpectedly archived file that was not in the test data",
+            );
+            if matches!(test_file.kind, TestFileKind::Ignored) {
+                // We don't care whether "ignored" files get archived or not.
+                continue;
+            }
+
+            if !expected_unarchived.remove(&input_path) {
+                panic!(
+                    "unexpectedly archived file (either it should not have \
+                     been at all or it was archived more than once): \
+                     {input_path:?}",
+                );
+            }
+
+            // This is technically checked in the other test, but since it's
+            // related to the file being immutable, we may as well check it
+            // again here.
+            assert!(
+                archive_file.delete_original,
+                "expected to delete the original when archiving immutable files"
+            );
+        }
+
+        if !expected_unarchived.is_empty() {
+            panic!(
+                "did not archive some of the files we expected: {:?}",
+                expected_unarchived
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    /// Verifies that the archive plan streams rather than pre-computing all the
+    /// steps it has to do at once
+    ///
+    /// This property is important for scalability and memory usage.
+    #[test]
+    fn test_archiving_is_streaming() {
+        // Set up the test.
+        let logctx = test_setup_log("test_archiving_is_streaming");
+        let log = &logctx.log;
+
+        // Load the test data
+        let files = load_test_files().unwrap();
+
+        // Begin a simulated archive.
+        let fake_output_dir = Utf8Path::new("/fake-output-directory");
+        let lister = TestLister::new_for_test_data(&files);
+        let plan = test_archive(
+            log,
+            &files,
+            fake_output_dir,
+            ArchiveKind::Final,
+            &lister,
+        );
+
+        // Verify that the archiver operates in a streaming way by checking that
+        // each archived file is contained in the most-recently-listed
+        // directory.  If it's not, then it must have come from some previously
+        // listed directory, which means that the archiver should have returned
+        // it before listing the next directory.  In other words, that would
+        // mean that the archiver read ahead of the directory whose files it's
+        // currently archiving, which is the thing we're trying to check
+        // doesn't happen.
+        for step in plan.to_steps() {
+            let step = step.expect("test lister does not produce errors");
+            let ArchiveStep::ArchiveFile(archive_file) = &step else {
+                continue;
+            };
+
+            let last_listed = lister.last_listed();
+            let last = last_listed
+                .as_ref()
+                .expect("listed a directory before archiving any files");
+            assert!(
+                archive_file.input_path.starts_with(last),
+                "archived file is not in the most-recently-listed directory",
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    /// Verifies that failure to list a directory does not affect archiving
+    /// other directories
+    #[test]
+    fn test_directory_list_error() {
+        // Set up the test.
+        let logctx = test_setup_log("test_directory_list_error");
+        let log = &logctx.log;
+
+        // Load the test data
+        let files = load_test_files().unwrap();
+
+        // Choose a directory for which to inject an error.
+        let fail_dir = files
+            .iter()
+            .find_map(|test_file| {
+                if matches!(&test_file.kind, TestFileKind::Ignored) {
+                    None
+                } else {
+                    let parent = test_file.path.parent().unwrap();
+                    Some(Utf8Path::new(parent))
+                }
+            })
+            .expect("at least one non-ignored file in test data");
+        info!(
+            log,
+            "injecting error for directory";
+            "directory" => fail_dir.as_str(),
+        );
+
+        // Begin a simulated archive.  Configure the lister to inject an error
+        // for the directory that we chose.
+        let fake_output_dir = Utf8Path::new("/fake-output-directory");
+        let mut lister = TestLister::new_for_test_data(&files);
+        lister.inject_error(fail_dir);
+        let plan = test_archive(
+            log,
+            &files,
+            fake_output_dir,
+            ArchiveKind::Final,
+            &lister,
+        );
+
+        // Now walk through the archive plan and make sure:
+        // (1) Everything that's not in this directory gets archived.
+        // (2) There's an error produced for this directory.
+        // (3) Nothing is archived within this directory.
+        let mut unarchived_files = files.clone();
+        let mut nerrors = 0;
+        for step in plan.to_steps() {
+            let step = match step {
+                Err(error) => {
+                    let error = InlineErrorChain::new(&*error);
+                    let error_str = error.to_string();
+                    debug!(log, "found error"; error);
+                    assert!(error_str.contains(fail_dir.as_str()));
+                    assert!(error_str.contains("injected error"));
+                    nerrors += 1;
+                    continue;
+                }
+                Ok(step) => step,
+            };
+
+            let ArchiveStep::ArchiveFile(archive_file) = &step else {
+                continue;
+            };
+
+            assert!(
+                !archive_file.input_path.starts_with(fail_dir),
+                "archived file in the directory where we injected an error"
+            );
+
+            let _ = unarchived_files
+                .remove(archive_file.input_path.as_path())
+                .expect("archived file was in list of test files");
+        }
+
+        // We should see one error for each time the directory that we chose was
+        // listed.  That should always be at least once.  It could be more than
+        // once, depending on how rules are configured.  For example, with two
+        // rules for syslog (/var/adm/messages.* and /var/adm/messages), there
+        // would be two errors for /var/adm.
+        assert_ne!(
+            nerrors, 0,
+            "expected at least one error after injecting one"
+        );
+
+        for file in unarchived_files {
+            assert!(
+                file.path.starts_with(fail_dir),
+                "missed file: {:?}",
+                file.path
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
+
+    /// Verifies that failure to fetch file details does not affect archiving
+    /// other files
+    #[test]
+    fn test_file_metadata_error() {
+        // Set up the test.
+        let logctx = test_setup_log("test_file_metadata_error");
+        let log = &logctx.log;
+
+        // Load the test data
+        let files = load_test_files().unwrap();
+
+        // Find a directory that contains at least two files.  We'll inject an
+        // error for one of those files.
+        let mut fail_file = None;
+        {
+            let mut dirs_with_files: BTreeSet<_> = BTreeSet::new();
+            for test_file in &files {
+                if matches!(&test_file.kind, TestFileKind::Ignored) {
+                    continue;
+                }
+                let file = &test_file.path;
+                let dir = Utf8Path::new(
+                    file.parent().expect("test file has parent directory"),
+                );
+                if dirs_with_files.contains(dir) {
+                    fail_file = Some(file);
+                    break;
+                }
+
+                dirs_with_files.insert(dir);
+            }
+        };
+        let Some(fail_file) = fail_file else {
+            panic!(
+                "test data had no directory with multiple non-ignored files"
+            );
+        };
+
+        // Begin a simulated archive.  Configure the lister to inject an error
+        // on the path that we selected above.
+        let fake_output_dir = Utf8Path::new("/fake-output-directory");
+        let mut lister = TestLister::new_for_test_data(&files);
+        lister.inject_error(fail_file);
+        let plan = test_archive(
+            log,
+            &files,
+            fake_output_dir,
+            ArchiveKind::Final,
+            &lister,
+        );
+
+        // Run through the archive plan and verify:
+        //
+        // (1) We get exactly one error and it's for the path we injected an
+        //     error for.
+        // (2) That file does not get archived.
+        // (2) Every other file gets archived.
+        let mut unarchived_files = files.clone();
+        let mut nerrors = 0;
+        for step in plan.to_steps() {
+            let step = match step {
+                Err(error) => {
+                    let error = InlineErrorChain::new(&*error);
+                    let error_str = error.to_string();
+                    debug!(log, "found error"; error);
+                    assert!(error_str.contains(fail_file.as_str()));
+                    assert!(error_str.contains("injected error"));
+                    nerrors += 1;
+                    continue;
+                }
+                Ok(step) => step,
+            };
+
+            let ArchiveStep::ArchiveFile(ArchiveFile { input_path, .. }) =
+                &step
+            else {
+                continue;
+            };
+
+            assert!(
+                input_path != fail_file,
+                "unexpectedly archived file for which we injected an error"
+            );
+
+            let _ = unarchived_files
+                .remove(input_path.as_path())
+                .expect("archived file was in list of test files");
+        }
+
+        // There should be exactly one error.
+        assert_eq!(
+            nerrors, 1,
+            "expected exatcly one error after injecting only one error \
+             on a file path",
+        );
+
+        // There should be exactly one file that was not archived.
+        assert_eq!(unarchived_files.len(), 1);
+        assert!(unarchived_files.contains_key(fail_file.as_path()));
+
+        logctx.cleanup_successful();
+    }
+
+    #[test]
+    fn test_naming_logs() {
+        // template used for other tests
+        let template = ArchiveFile {
+            input_path: Utf8Path::new("/nonexistent/one/two.log.0").to_owned(),
+            mtime: Some("2025-12-12T16:51:00-07:00".parse().unwrap()),
+            output_directory: Utf8Path::new("/nonexistent/out").to_owned(),
+            namer: &NameRotatedLogFile,
+            delete_original: true,
+            rule: "dummy rule",
+        };
+
+        let empty_lister = TestLister::empty();
+
+        // ordinary case of a rotated log file name: output filename generated
+        // based on input and mtime
+        let input = ArchiveFile {
+            input_path: Utf8Path::new("/nonexistent/one/two.log.0").to_owned(),
+            ..template.clone()
+        };
+        let filename = input.choose_filename(&empty_lister).unwrap();
+        assert_eq!(filename.as_ref(), "two.log.1765583460");
+
+        // ordinary case with a live log file name
+        let input = ArchiveFile {
+            input_path: Utf8Path::new("/nonexistent/one/two.log").to_owned(),
+            ..template.clone()
+        };
+        let filename = input.choose_filename(&empty_lister).unwrap();
+        assert_eq!(filename.as_ref(), "two.1765583460");
+
+        // case: rotated log file, no mtime available
+        // (this may never happen in practice)
+        //
+        // The current mtime should be used instead.
+        let input = ArchiveFile {
+            input_path: Utf8Path::new("/nonexistent/one/two.log.0").to_owned(),
+            mtime: None,
+            ..template.clone()
+        };
+        let before = Utc::now().with_nanosecond(0).unwrap();
+        let filename = input.choose_filename(&empty_lister).unwrap();
+        let after = Utc::now();
+        assert!(before <= after);
+        // The resulting filename should be "two.log.MTIME".
+        let (prefix, mtime) =
+            filename.as_ref().rsplit_once(".").expect("unexpected filename");
+        assert_eq!(prefix, "two.log");
+        let parsed: DateTime<Utc> = DateTime::from_timestamp(
+            mtime.parse().expect("expected Unix timestamp in filename"),
+            0,
+        )
+        .unwrap();
+        assert!(before <= parsed);
+        assert!(parsed <= after);
+
+        // case: live log file, no mtime available
+        // (this may never happen in practice)
+        //
+        // The current mtime should be used instead.
+        let input = ArchiveFile {
+            input_path: Utf8Path::new("/nonexistent/one/two.log").to_owned(),
+            mtime: None,
+            ..template.clone()
+        };
+        let before = Utc::now().with_nanosecond(0).unwrap();
+        let filename = input.choose_filename(&empty_lister).unwrap();
+        let after = Utc::now();
+        assert!(before <= after);
+        // The resulting filename should be "two.MTIME".
+        let (prefix, mtime) =
+            filename.as_ref().rsplit_once(".").expect("unexpected filename");
+        assert_eq!(prefix, "two");
+        let parsed: DateTime<Utc> = DateTime::from_timestamp(
+            mtime.parse().expect("expected Unix timestamp in filename"),
+            0,
+        )
+        .unwrap();
+        assert!(before <= parsed);
+        assert!(parsed <= after);
+
+        // case: the normal output filename already exists
+        // expected behavior: the "mtime" in the filename is incremented
+        let input = ArchiveFile {
+            input_path: Utf8Path::new("/nonexistent/one/two.log.0").to_owned(),
+            ..template.clone()
+        };
+        let lister = TestLister::new(["/nonexistent/out/two.log.1765583460"]);
+        let filename = input.choose_filename(&lister).unwrap();
+        assert_eq!(filename.as_ref(), "two.log.1765583461");
+
+        // case: several closely-named output filenames also exist
+        let lister = TestLister::new([
+            "/nonexistent/out/two.log.1765583460",
+            "/nonexistent/out/two.log.1765583461",
+            "/nonexistent/out/two.log.1765583462",
+            "/nonexistent/out/two.log.1765583464",
+        ]);
+        let filename = input.choose_filename(&lister).unwrap();
+        assert_eq!(filename.as_ref(), "two.log.1765583463");
+
+        // case: too many closely-named output files also exist
+        let colliding_filenames: Vec<_> = (0..=MAX_COLLIDING_FILENAMES)
+            .map(|i| {
+                format!(
+                    "/nonexistent/out/two.log.{}",
+                    1765583460u64 + u64::from(i)
+                )
+            })
+            .collect();
+        let lister = TestLister::new(colliding_filenames.iter());
+        let error = input.choose_filename(&lister).unwrap_err();
+        assert!(
+            error.to_string().contains("too many files with colliding names")
+        );
+    }
+}
diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/rules.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/rules.rs
new file mode 100644
index 00000000000..480697fbc0c
--- /dev/null
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/rules.rs
@@ -0,0 +1,297 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Rules used for determining what debug data to collect
+
+use super::filesystem::FileLister;
+use super::filesystem::Filename;
+use anyhow::anyhow;
+use camino::Utf8Path;
+use camino::Utf8PathBuf;
+use chrono::DateTime;
+use chrono::Utc;
+use iddqd::IdOrdItem;
+use iddqd::IdOrdMap;
+use iddqd::id_upcast;
+use regex::Regex;
+use std::sync::LazyLock;
+
+/// Describes a source of debug data
+///
+/// In practice, this corresponds to either:
+///
+/// * the root filesystem of an illumos **zone**
+/// * a **cores** (also called **crash**) dataset where user process core dumps
+///   and kernel crash dumps are initially stored
+#[derive(Clone)]
+pub(crate) struct Source {
+    pub(crate) input_prefix: Utf8PathBuf,
+    pub(crate) output_prefix: Utf8PathBuf,
+}
+
+/// Describes debug data to be archived from within some `Source`.
+///
+/// Rules specify a path within the source where the files are found (e.g.,
+/// "var/svc/log") and a pattern for specifying files within that directory that
+/// should be covered by the rule (e.g., "*.log").  The rule is applied across
+/// several sources (in this case: illumos zones).  A rule might cover "all the
+/// files in a given cores dataset" or "the rotated SMF log files for a given
+/// zone".
+///
+/// It may be easiest to understand this by example.  See [`ALL_RULES`] for all
+/// of the rules.
+///
+/// There are basically two kinds of rules:
+///
+/// * **Zone** rules are applied to root filesystems of illumos zones,
+///   including the global zone and non-global zones.  These have scope
+///   `RuleScope::ZoneMutable` or `RuleScope::ZoneAlways`.  These describe how
+///   to find the zone's log files.
+///
+/// * **Cores** rules are applied to cores datasets (also known as "crash
+///   datasets"), which contain kernel crash dumps and process core dumps.
+pub(crate) struct Rule {
+    /// human-readable description of the rule
+    pub label: &'static str,
+    /// identifies what types of sources this rule is supposed to be applied to
+    pub rule_scope: RuleScope,
+    /// identifies the path to a directory within a source's input directory
+    /// that contains the data described by this rule
+    pub directory: Utf8PathBuf,
+    /// describes which files within `directory` are identified by this rule
+    regex: Regex,
+    /// configures whether the original files associated with this rule should
+    /// be deleted once they're archived
+    ///
+    /// For example, rotated log files are deleted when archived.  Live log
+    /// files are not.
+    pub delete_original: bool,
+    /// Describes how to construct the name of a file that's being archived
+    pub naming: &'static (dyn NamingRule + Send + Sync),
+}
+
+impl Rule {
+    /// Returns true if this rule specifies that the given `filename` should be
+    /// archived
+    pub(crate) fn include_file(&self, filename: &Filename) -> bool {
+        self.regex.is_match(filename.as_ref())
+    }
+}
+
+impl IdOrdItem for Rule {
+    type Key<'a> = &'static str;
+    fn key(&self) -> Self::Key<'_> {
+        self.label
+    }
+    id_upcast!();
+}
+
+/// Describes what Sources a rule can be applied to
+pub(crate) enum RuleScope {
+    /// this rule applies to all cores directories
+    CoresDirectory,
+    /// this rule applies to zone roots for "everything" collections, but not
+    /// "immutable" ones
+    ZoneMutable,
+    /// this rule applies to zone roots always, regardless of whether or not
+    /// we're collecting immutable data only
+    ZoneAlways,
+}
+
+/// path within a zone's root filesystem to its SMF logs
+static VAR_SVC_LOG: &str = "var/svc/log";
+/// path within a zone's root filesystem to its syslog
+static VAR_ADM: &str = "var/adm";
+
+/// List of all archive rules in the system
+///
+/// **NOTE:** If you change these rules, you may also need to update the testing
+/// data used by the test suite.  The test suite uses path names from real
+/// systems to test various properties about these rules:
+///
+/// * that all files in the test data are covered by exactly one rule
+///   (rules should not specify overlapping files)
+/// * that all rules are covered by the test data
+pub(crate) static ALL_RULES: LazyLock<IdOrdMap<Rule>> = LazyLock::new(|| {
+    let rules = [
+        Rule {
+            label: "process core files and kernel crash dumps",
+            rule_scope: RuleScope::CoresDirectory,
+            directory: ".".parse().unwrap(),
+            regex: "^.*$".parse().unwrap(),
+            delete_original: true,
+            naming: &NameIdentity,
+        },
+        Rule {
+            label: "live SMF log files",
+            rule_scope: RuleScope::ZoneMutable,
+            directory: VAR_SVC_LOG.parse().unwrap(),
+            regex: "^.*\\.log$".parse().unwrap(),
+            delete_original: false,
+            naming: &NameLiveLogFile,
+        },
+        Rule {
+            label: "live syslog files",
+            rule_scope: RuleScope::ZoneMutable,
+            directory: VAR_ADM.parse().unwrap(),
+            regex: "^messages$".parse().unwrap(),
+            delete_original: false,
+            naming: &NameLiveLogFile,
+        },
+        Rule {
+            label: "rotated SMF log files",
+            rule_scope: RuleScope::ZoneAlways,
+            directory: VAR_SVC_LOG.parse().unwrap(),
+            regex: "^.*\\.log.[0-9]+$".parse().unwrap(),
+            delete_original: true,
+            naming: &NameRotatedLogFile,
+        },
+        Rule {
+            label: "rotated syslog files",
+            rule_scope: RuleScope::ZoneAlways,
+            directory: VAR_ADM.parse().unwrap(),
+            regex: "^messages\\.[0-9]+$".parse().unwrap(),
+            delete_original: true,
+            naming: &NameRotatedLogFile,
+        },
+    ];
+
+    // We could do this more concisely with a `collect()` or `IdOrdMap::from`,
+    // but those would silently discard duplicates.  We want to detect these and
+    // provide a clear error message.
+    let mut rv = IdOrdMap::new();
+    for rule in rules {
+        let label = rule.label;
+        if let Err(_) = rv.insert_unique(rule) {
+            panic!("found multiple rules with the same label: {:?}", label);
+        }
+    }
+
+    rv
+});
+
+/// Describes a combination of `source` and `rule`
+///
+/// This essentially takes a `Rule` and applies it to a specific source.  For
+/// example, a rule might say how to find the log files within a given zone.  A
+/// specific zone will be its own `Source`.  An `ArchiveGroup` puts these
+/// together to represent collection of log files from a specific zone.
+pub(crate) struct ArchiveGroup<'a> {
+    pub(crate) source: Source,
+    pub(crate) rule: &'a Rule,
+}
+
+impl<'a> ArchiveGroup<'a> {
+    pub(crate) fn input_directory(&self) -> Utf8PathBuf {
+        self.source.input_prefix.join(&self.rule.directory)
+    }
+
+    pub(crate) fn output_directory(&self, debug_dir: &Utf8Path) -> Utf8PathBuf {
+        debug_dir.join(&self.source.output_prefix)
+    }
+}
+
+/// Describes how to construct an archived file's final name based on its
+/// original name and mtime
+///
+/// `archived_file_name` is provided with `lister`, which can be used to
+/// determine if the desired output filename already exists and choose another
+/// name.  **If the name of an existing file is returned, that file will be
+/// overwritten by the file that's being archived.**
+pub(crate) trait NamingRule {
+    fn archived_file_name(
+        &self,
+        source_file_name: &Filename,
+        source_file_mtime: Option<DateTime<Utc>>,
+        lister: &dyn FileLister,
+        output_directory: &Utf8Path,
+    ) -> Result<Filename, anyhow::Error>;
+}
+
+pub(crate) const MAX_COLLIDING_FILENAMES: u16 = 30;
+
+/// `NamingRule` that's used for rotated log files
+///
+/// These files are typically named `foo.0`, `foo.1`, etc.  The integer at the
+/// end is provided by logadm(8) and has no meaning for us.  This implementation
+/// replaces that integer with the file's `mtime` as a Unix timestamp.  When
+/// that would collide with an existing filename, it increments the `mtime`
+/// until it gets a unique value (up to `MAX_COLLIDING_FILENAMES` tries).  This
+/// behavior is historical and should potentially be revisited.
+pub(crate) struct NameRotatedLogFile;
+impl NamingRule for NameRotatedLogFile {
+    fn archived_file_name(
+        &self,
+        source_file_name: &Filename,
+        source_file_mtime: Option<DateTime<Utc>>,
+        lister: &dyn FileLister,
+        output_directory: &Utf8Path,
+    ) -> Result<Filename, anyhow::Error> {
+        let filename_base = match source_file_name.as_ref().rsplit_once('.') {
+            Some((base, _extension)) => base,
+            None => source_file_name.as_ref(),
+        };
+
+        let mtime_as_seconds =
+            source_file_mtime.unwrap_or_else(|| Utc::now()).timestamp();
+        for i in 0..MAX_COLLIDING_FILENAMES {
+            let rv =
+                format!("{filename_base}.{}", mtime_as_seconds + i64::from(i));
+            let dest = output_directory.join(&rv);
+            if !lister.file_exists(&dest)? {
+                // unwrap(): we started with a valid `Filename` and did not add
+                // any slashes here.
+                return Ok(Filename::try_from(rv).unwrap());
+            }
+        }
+
+        Err(anyhow!(
+            "failed to choose archive file name for file {source_file_name:?} \
+             because there are too many files with colliding names (at least \
+             {MAX_COLLIDING_FILENAMES})"
+        ))
+    }
+}
+
+/// `NamingRule` that's used for live log files
+///
+/// These files can have an arbitrary name `foo`.  (SMF log files have a `.log`
+/// suffix, but syslog files do not.)  For historical reasons, this uses the
+/// same implementation as `NameRotatedLogFile`.  This behavior should probably
+/// be revisited.
+struct NameLiveLogFile;
+impl NamingRule for NameLiveLogFile {
+    fn archived_file_name(
+        &self,
+        source_file_name: &Filename,
+        source_file_mtime: Option<DateTime<Utc>>,
+        lister: &dyn FileLister,
+        output_directory: &Utf8Path,
+    ) -> Result<Filename, anyhow::Error> {
+        NameRotatedLogFile.archived_file_name(
+            source_file_name,
+            source_file_mtime,
+            lister,
+            output_directory,
+        )
+    }
+}
+
+/// `NamingRule` that's used for files whose names get preserved across archival
+///
+/// This includes kernel crash dumps, process core dumps, etc.  This behavior is
+/// historical.  It does not account for cases where the output filename already
+/// exists, which means those files may be overwritten.
+struct NameIdentity;
+impl NamingRule for NameIdentity {
+    fn archived_file_name(
+        &self,
+        source_file_name: &Filename,
+        _source_file_mtime: Option<DateTime<Utc>>,
+        _lister: &dyn FileLister,
+        _output_directory: &Utf8Path,
+    ) -> Result<Filename, anyhow::Error> {
+        Ok(source_file_name.clone())
+    }
+}
diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/test_helpers.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/test_helpers.rs
new file mode 100644
index 00000000000..7720cd635a2
--- /dev/null
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/test_helpers.rs
@@ -0,0 +1,377 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Helpers for working with the testing data used in the test suite
+
+use super::filesystem::FileLister;
+use super::filesystem::Filename;
+use super::planning::ArchiveKind;
+use super::planning::ArchivePlan;
+use super::planning::ArchivePlanner;
+use anyhow::Context;
+use anyhow::anyhow;
+use anyhow::bail;
+use camino::Utf8Path;
+use camino::Utf8PathBuf;
+use chrono::DateTime;
+use chrono::Utc;
+use iddqd::IdOrdItem;
+use iddqd::IdOrdMap;
+use iddqd::id_upcast;
+use regex::Regex;
+use slog::Logger;
+use slog::debug;
+use std::collections::BTreeSet;
+use std::sync::LazyLock;
+use std::sync::Mutex;
+use strum::Display;
+use strum::EnumDiscriminants;
+use strum::EnumIter;
+use strum::IntoDiscriminant;
+use strum::IntoEnumIterator;
+
+/// Loads the filenames in the test data
+pub(crate) fn load_test_files() -> anyhow::Result<IdOrdMap<TestFile>> {
+    load_test_data_paths()?
+        .into_iter()
+        .map(|path| {
+            TestFileKind::try_from(path.as_ref())
+                .context("may need to update load_test_files()?")
+                .map(|kind| TestFile { path, kind })
+        })
+        .collect()
+}
+
+fn load_test_data_paths() -> anyhow::Result<BTreeSet<Utf8PathBuf>> {
+    let path = "test-data/debug-files.txt";
+    std::fs::read_to_string(&path)
+        .with_context(|| format!("read {path:?}"))?
+        .lines()
+        .enumerate()
+        .map(|(i, l)| (i, l.trim()))
+        .filter(|(_i, l)| !l.is_empty() && !l.starts_with("#"))
+        .map(|(i, l)| {
+            Utf8PathBuf::try_from(l).map_err(|_err| {
+                anyhow!("{path:?} line {}: non-UTF8 file path", i + 1)
+            })
+        })
+        .collect()
+}
+
+/// Test that our test data includes all the kinds of things that we expect.
+/// If you see this test failing, presumably you updated the test data and
+/// you'll need to make sure it's still representative.
+#[test]
+fn test_test_data() {
+    // Load the test data and determine what kind of file each one is.
+    let files = load_test_files().unwrap();
+
+    // Create a set of all the kinds of test files that we have not seen so
+    // far.  We'll remove from this set as we find files of this kind.  Any
+    // kinds left over at the end are missing from our test data.
+    let mut all_kinds: BTreeSet<_> =
+        TestFileKindDiscriminants::iter().collect();
+    // We don't care about finding the "ignored" kind.
+    all_kinds.remove(&TestFileKindDiscriminants::Ignored);
+    for test_file in files {
+        println!("{} {}", test_file.kind, test_file.path);
+        all_kinds.remove(&test_file.kind.discriminant());
+    }
+
+    if !all_kinds.is_empty() {
+        panic!("missing file in test data for kinds: {:?}", all_kinds);
+    }
+}
+
+/// Plan an archive operation based on the testing data
+pub(crate) fn test_archive<'a>(
+    log: &Logger,
+    test_files: &IdOrdMap<TestFile>,
+    output_dir: &Utf8Path,
+    what: ArchiveKind,
+    lister: &'a TestLister,
+) -> ArchivePlan<'a> {
+    // Construct sources that correspond with the test data.
+    let cores_datasets: BTreeSet<_> = test_files
+        .iter()
+        .filter_map(|test_file| test_file.kind.cores_directory())
+        .collect();
+    let zone_infos: BTreeSet<_> = test_files
+        .iter()
+        .filter_map(|test_file| test_file.kind.zone_info())
+        .collect();
+
+    // Plan an archival pass.
+    let mut planner =
+        ArchivePlanner::new_with_lister(log, what, output_dir, lister);
+
+    for cores_dir in cores_datasets {
+        debug!(log, "including cores directory"; "cores_dir" => %cores_dir);
+        planner.include_cores_directory(cores_dir);
+    }
+
+    for (zone_name, zone_root) in zone_infos {
+        debug!(
+            log,
+            "including zone";
+            "zone_name" => zone_name,
+            "zone_root" => %zone_root,
+        );
+        planner.include_zone(zone_name, zone_root);
+    }
+
+    planner.into_plan()
+}
+
+/// Describes one file path in the testing data
+#[derive(Clone)]
+pub(crate) struct TestFile {
+    /// path to the file
+    pub path: Utf8PathBuf,
+    /// what kind of file we determined it to be, based on its path
+    pub kind: TestFileKind,
+}
+
+impl IdOrdItem for TestFile {
+    type Key<'a> = &'a Utf8Path;
+
+    fn key(&self) -> Self::Key<'_> {
+        &self.path
+    }
+
+    id_upcast!();
+}
+
+/// Describes what kind of file we're looking at and what source it's in
+#[derive(Clone, Debug, Display, EnumIter, EnumDiscriminants)]
+#[strum_discriminants(derive(EnumIter, Ord, PartialOrd))]
+pub(crate) enum TestFileKind {
+    KernelCrashDump {
+        cores_directory: String,
+    },
+    ProcessCoreDump {
+        cores_directory: String,
+    },
+    LogSmfRotated {
+        zone_name: String,
+        zone_root: String,
+    },
+    LogSmfLive {
+        zone_name: String,
+        zone_root: String,
+    },
+    LogSyslogRotated {
+        zone_name: String,
+        zone_root: String,
+    },
+    LogSyslogLive {
+        zone_name: String,
+        zone_root: String,
+    },
+    GlobalLogSmfRotated,
+    GlobalLogSmfLive,
+    GlobalLogSyslogRotated,
+    GlobalLogSyslogLive,
+    /// files we don't especially care about, but are in the test data to
+    /// ensure that they don't create a problem
+    Ignored,
+}
+
+impl TestFileKind {
+    /// Returns information about the cores directory this file is in, if any
+    pub fn cores_directory(&self) -> Option<&Utf8Path> {
+        match self {
+            TestFileKind::KernelCrashDump { cores_directory }
+            | TestFileKind::ProcessCoreDump { cores_directory } => {
+                Some(Utf8Path::new(cores_directory))
+            }
+            TestFileKind::LogSmfRotated { .. }
+            | TestFileKind::LogSmfLive { .. }
+            | TestFileKind::LogSyslogRotated { .. }
+            | TestFileKind::LogSyslogLive { .. }
+            | TestFileKind::GlobalLogSmfRotated
+            | TestFileKind::GlobalLogSmfLive
+            | TestFileKind::GlobalLogSyslogRotated
+            | TestFileKind::GlobalLogSyslogLive
+            | TestFileKind::Ignored => None,
+        }
+    }
+
+    /// Returns information about the zone this file is in, if any
+    pub fn zone_info(&self) -> Option<(&str, &Utf8Path)> {
+        match self {
+            TestFileKind::KernelCrashDump { .. }
+            | TestFileKind::ProcessCoreDump { .. }
+            | TestFileKind::Ignored => None,
+            TestFileKind::LogSmfRotated { zone_name, zone_root }
+            | TestFileKind::LogSmfLive { zone_name, zone_root }
+            | TestFileKind::LogSyslogRotated { zone_name, zone_root }
+            | TestFileKind::LogSyslogLive { zone_name, zone_root } => {
+                Some((zone_name, Utf8Path::new(zone_root)))
+            }
+            TestFileKind::GlobalLogSmfRotated
+            | TestFileKind::GlobalLogSmfLive
+            | TestFileKind::GlobalLogSyslogRotated
+            | TestFileKind::GlobalLogSyslogLive => {
+                Some(("global", Utf8Path::new("/")))
+            }
+        }
+    }
+}
+
+static RE_CORES_DATASET: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new("^(/pool/int/[^/]+/crash)/[^/]+$").unwrap());
+
+static RE_NONGLOBAL_ZONE: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new("^/pool/ext/[^/]+/crypt/zone/([^/]+)/root").unwrap()
+});
+
+impl TryFrom<&Utf8Path> for TestFileKind {
+    type Error = anyhow::Error;
+
+    fn try_from(value: &Utf8Path) -> Result<Self, Self::Error> {
+        let s = value.as_str();
+
+        if let Some(c) = RE_CORES_DATASET.captures(s) {
+            let (_, [cores_directory]) = c.extract();
+            let cores_directory = cores_directory.to_owned();
+            if s.ends_with("bounds") {
+                Ok(TestFileKind::Ignored)
+            } else if s.contains("/vmdump.") {
+                Ok(TestFileKind::KernelCrashDump { cores_directory })
+            } else if s.contains("/core.") {
+                Ok(TestFileKind::ProcessCoreDump { cores_directory })
+            } else {
+                Err(anyhow!("unknown cores dataset test file kind"))
+            }
+        } else if let Some(c) = RE_NONGLOBAL_ZONE.captures(s) {
+            let (zone_root, [zone_name]) = c.extract();
+            let zone_root = zone_root.to_owned();
+            let zone_name = zone_name.to_owned();
+            if s.ends_with("/messages") {
+                Ok(TestFileKind::LogSyslogLive { zone_name, zone_root })
+            } else if s.contains("/messages.") {
+                Ok(TestFileKind::LogSyslogRotated { zone_name, zone_root })
+            } else if s.contains("/var/svc/log") {
+                if s.ends_with(".log") {
+                    Ok(TestFileKind::LogSmfLive { zone_name, zone_root })
+                } else {
+                    Ok(TestFileKind::LogSmfRotated { zone_name, zone_root })
+                }
+            } else {
+                Err(anyhow!("unknown non-global zone test file kind"))
+            }
+        } else {
+            if s == "/var/adm/messages" {
+                Ok(TestFileKind::GlobalLogSyslogLive)
+            } else if s.starts_with("/var/adm") && s.contains("/messages.") {
+                Ok(TestFileKind::GlobalLogSyslogRotated)
+            } else if s.starts_with("/var/svc/log") {
+                if s.ends_with(".log") {
+                    Ok(TestFileKind::GlobalLogSmfLive)
+                } else {
+                    Ok(TestFileKind::GlobalLogSmfRotated)
+                }
+            } else {
+                Err(anyhow!("unknown test file kind"))
+            }
+        }
+    }
+}
+
+/// Implementation of `FileLister` built atop the testing data
+pub(crate) struct TestLister<'a> {
+    /// files in our fake filesystem
+    files: BTreeSet<&'a Utf8Path>,
+    /// describes the last path listed (used in tests to verify behavior)
+    last_listed: Mutex<Option<Utf8PathBuf>>,
+    /// inject errors when operating on this path
+    injected_error: Option<&'a Utf8Path>,
+}
+
+impl<'a> TestLister<'a> {
+    /// Returns a lister that reports no files
+    pub fn empty() -> Self {
+        Self::new::<_, &'a str>(std::iter::empty())
+    }
+
+    /// Returns a lister for the test data
+    pub fn new_for_test_data(files: &'a IdOrdMap<TestFile>) -> Self {
+        Self::new(files.iter().map(|test_file| test_file.path.as_path()))
+    }
+
+    /// Returns a lister backed by the specified files
+    pub fn new<I, P>(files: I) -> Self
+    where
+        I: IntoIterator<Item = &'a P>,
+        P: AsRef<Utf8Path> + ?Sized + 'a,
+    {
+        Self {
+            files: files.into_iter().map(|p| p.as_ref()).collect(),
+            last_listed: Mutex::new(None),
+            injected_error: None,
+        }
+    }
+
+    /// Configure this lister to inject errors when accessing this path
+    ///
+    /// Clears any previously injected error.
+    pub fn inject_error(&mut self, fail_path: &'a Utf8Path) {
+        self.injected_error = Some(fail_path);
+    }
+
+    pub fn last_listed(&self) -> Option<Utf8PathBuf> {
+        self.last_listed.lock().unwrap().clone()
+    }
+}
+
+impl FileLister for TestLister<'_> {
+    fn list_files(
+        &self,
+        path: &Utf8Path,
+    ) -> Vec<Result<Filename, anyhow::Error>> {
+        // Keep track of the last path that was listed.
+        *self.last_listed.lock().unwrap() = Some(path.to_owned());
+
+        // Inject any errors we've been configured to inject.
+        if let Some(fail_path) = self.injected_error {
+            if path == fail_path {
+                return vec![Err(anyhow!("injected error for {fail_path:?}"))];
+            }
+        }
+
+        // Create a directory listing from the files in our test data.
+        self.files
+            .iter()
+            .filter_map(|file_path| {
+                let directory =
+                    file_path.parent().expect("test file has a parent");
+                (directory == path).then(|| {
+                    let filename = file_path
+                        .file_name()
+                        .expect("test file has a filename");
+                    Ok(Filename::try_from(filename.to_owned())
+                        .expect("filename has no slashes"))
+                })
+            })
+            .collect()
+    }
+
+    fn file_mtime(
+        &self,
+        path: &Utf8Path,
+    ) -> Result<Option<DateTime<Utc>>, anyhow::Error> {
+        if let Some(fail_path) = self.injected_error {
+            if path == fail_path {
+                bail!("injected error for {fail_path:?}");
+            }
+        }
+
+        Ok(Some("2025-12-12T16:51:00-07:00".parse().unwrap()))
+    }
+
+    fn file_exists(&self, path: &Utf8Path) -> Result<bool, anyhow::Error> {
+        Ok(self.files.contains(path))
+    }
+}
diff --git a/sled-agent/config-reconciler/src/debug_collector/mod.rs b/sled-agent/config-reconciler/src/debug_collector/mod.rs
index 367b75029ed..c1764d65708 100644
--- a/sled-agent/config-reconciler/src/debug_collector/mod.rs
+++ b/sled-agent/config-reconciler/src/debug_collector/mod.rs
@@ -84,6 +84,7 @@
 //!   +----------------------+
 //! ```
 
+mod file_archiver;
 mod handle;
 mod helpers;
 mod task;
diff --git a/sled-agent/config-reconciler/src/debug_collector/worker.rs b/sled-agent/config-reconciler/src/debug_collector/worker.rs
index c092a5a4843..e1b2cf1f408 100644
--- a/sled-agent/config-reconciler/src/debug_collector/worker.rs
+++ b/sled-agent/config-reconciler/src/debug_collector/worker.rs
@@ -186,6 +186,8 @@
 //! the _live_ log files are also archived, since they will not have a chance
 //! to get rotated and so would otherwise be lost.
 
+use super::file_archiver::ArchiveKind;
+use super::file_archiver::ArchivePlanner;
 use super::helpers::CoreDumpAdmInvoker;
 use super::helpers::ZFS_PROP_AVAILABLE;
 use super::helpers::ZFS_PROP_USED;
@@ -210,12 +212,10 @@ use slog::warn;
 use slog_error_chain::InlineErrorChain;
 use std::collections::HashSet;
 use std::ffi::OsString;
-use std::path::{Path, PathBuf};
 use std::sync::Arc;
-use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH};
+use std::time::{Duration, SystemTimeError, UNIX_EPOCH};
 use tokio::sync::mpsc::Receiver;
 use tokio::sync::oneshot;
-use zone::ZoneError;
 
 // Parameters related to management of storage on debug datasets
 
@@ -884,110 +884,62 @@ impl DebugCollectorWorker {
         }
     }
 
-    async fn archive_files(&self) -> tokio::io::Result<()> {
-        if let Some(debug_dir) = &self.chosen_debug_dir {
-            if self.known_core_dirs.is_empty() {
-                info!(self.log, "No core dump locations yet known.");
-            }
-            for core_dir in &self.known_core_dirs {
-                if let Ok(dir) = core_dir.as_ref().read_dir() {
-                    for entry in dir.flatten() {
-                        if let Some(path) = entry.file_name().to_str() {
-                            let dest = debug_dir.as_ref().join(path);
-
-                            if let Err(err) =
-                                Self::copy_sync_and_remove(&entry.path(), &dest)
-                                    .await
-                            {
-                                error!(
-                                    self.log,
-                                    "Failed to archive {entry:?}: {err:?}"
-                                );
-                            } else {
-                                info!(
-                                    self.log,
-                                    "Relocated {entry:?} to {dest:?}"
-                                );
-                            }
-                        } else {
+    async fn archive_files(&self) -> Result<(), anyhow::Error> {
+        let log = &self.log;
+        let Some(debug_dir) = &self.chosen_debug_dir else {
+            error!(
+                &log,
+                "Archiving skipped: no archival destination available"
+            );
+            return Ok(());
+        };
+
+        info!(&log, "Archiving files");
+        let mut archiver = ArchivePlanner::new(
+            log,
+            ArchiveKind::Periodic,
+            &debug_dir.as_ref(),
+        );
+        if self.known_core_dirs.is_empty() {
+            warn!(self.log, "No core dump locations yet known.");
+        }
+        for core_dir in &self.known_core_dirs {
+            archiver.include_cores_directory(core_dir.as_ref());
+        }
+
+        match self.zone_invoker.get_zones().await {
+            Ok(zones) => {
+                for zone in zones {
+                    let zone_path: &Utf8Path = match zone.path().try_into() {
+                        Ok(zone_path) => zone_path,
+                        Err(error) => {
+                            // This should be impossible in practice.
+                            let error = InlineErrorChain::new(&error);
                             error!(
-                                self.log,
-                                "Non-UTF8 path found while archiving core \
-                                 dumps: {entry:?}"
+                                log,
+                                "Cannot archive zone because its path is \
+                                 not UTF-8";
+                                "zone_name" => zone.name(),
+                                error
                             );
+                            continue;
                         }
-                    }
+                    };
+                    let zone_root = if zone.global() {
+                        zone_path.to_owned()
+                    } else {
+                        zone_path.join("root")
+                    };
+                    archiver.include_zone(zone.name(), &zone_root);
                 }
             }
-        } else {
-            info!(
-                self.log,
-                "No archival destination for crash dumps yet chosen."
-            );
-        }
-
-        if let Err(err) = self.archive_logs_from_running_zones().await {
-            if !matches!(err, ArchiveLogsError::NoDebugDirYet) {
-                error!(
-                    self.log,
-                    "Failure while trying to archive logs to debug dataset: \
-                     {err:?}"
-                );
+            Err(error) => {
+                let error = InlineErrorChain::new(&error);
+                warn!(log, "Failed to list running zones"; error);
             }
-        }
-
-        Ok(())
-    }
-
-    async fn copy_sync_and_remove(
-        source: impl AsRef<Path>,
-        dest: impl AsRef<Path>,
-    ) -> tokio::io::Result<()> {
-        let source = source.as_ref();
-        let dest = dest.as_ref();
-        let mut dest_f = tokio::fs::File::create(&dest).await?;
-        let mut src_f = tokio::fs::File::open(&source).await?;
-
-        tokio::io::copy(&mut src_f, &mut dest_f).await?;
-
-        dest_f.sync_all().await?;
-
-        drop(src_f);
-        drop(dest_f);
-
-        tokio::fs::remove_file(source).await?;
-        Ok(())
-    }
-
-    async fn archive_logs_from_running_zones(
-        &self,
-    ) -> Result<(), ArchiveLogsError> {
-        let debug_dir = self
-            .chosen_debug_dir
-            .as_ref()
-            .ok_or(ArchiveLogsError::NoDebugDirYet)?;
-        let oxz_zones = self.zone_invoker.get_zones().await?;
-
-        for zone in oxz_zones {
-            let zone_root = if zone.global() {
-                zone.path().to_owned()
-            } else {
-                zone.path().join("root")
-            };
-            let logdir = zone_root.join("var/svc/log");
-            let zone_name = zone.name();
-            self.archive_logs_from_zone_path(
-                debug_dir, logdir, "*.log", zone_name, false,
-            )
-            .await?;
+        };
 
-            let adm_logdir = zone_root.join("var/adm");
-            self.archive_logs_from_zone_path(
-                debug_dir, adm_logdir, "messages", zone_name, false,
-            )
-            .await?;
-        }
-        Ok(())
+        archiver.execute().await
     }
 
     async fn do_archive_former_zone_root(
@@ -1000,16 +952,13 @@ impl DebugCollectorWorker {
             .chosen_debug_dir
             .as_ref()
             .ok_or(ArchiveLogsError::NoDebugDirYet)?;
-        let logdir = zone_root.join("root/var/svc/log");
-        let rv = self
-            .archive_logs_from_zone_path(
-                debug_dir,
-                logdir.into(),
-                "*.log",
-                zone_name,
-                true,
-            )
-            .await;
+        let mut archiver = ArchivePlanner::new(
+            &self.log,
+            ArchiveKind::Final,
+            &debug_dir.as_ref(),
+        );
+        archiver.include_zone(zone_name, zone_root);
+        archiver.execute().await.map_err(ArchiveLogsError::Archiver)?;
         if let Err(()) = completion_tx.send(()) {
             // In practice, it would be surprising for our caller to have
             // dropped this channel.  Make a note.
@@ -1019,90 +968,6 @@ impl DebugCollectorWorker {
                 "error" => "completion channel closed",
             );
         }
-        rv
-    }
-
-    // Archives log files found in `logdir` for zone `zone_name` to the
-    // destination debug dataset.
-    //
-    // `log_name_pattern` should be a glob pattern that matches against file
-    // names, e.g., `*.log`, `mylog`. If `include_live` is `true`, this will
-    // archive all logs, matching on `{log_name_pattern}*`. If it is `false`,
-    // only rotated logs will be archived, matching on
-    // `{log_name_pattern}.[0-9]`.
-    async fn archive_logs_from_zone_path(
-        &self,
-        debug_dir: &DebugDataset,
-        logdir: PathBuf,
-        log_name_pattern: &str,
-        zone_name: &str,
-        include_live: bool,
-    ) -> Result<(), ArchiveLogsError> {
-        let mut rotated_log_files = Vec::new();
-        if include_live {
-            let pattern = logdir
-                .join(format!("{log_name_pattern}*"))
-                .to_str()
-                .ok_or_else(|| ArchiveLogsError::Utf8(zone_name.to_string()))?
-                .to_string();
-            rotated_log_files.extend(glob::glob(&pattern)?.flatten());
-        } else {
-            // patterns matching archived logs, e.g. foo.log.3
-            // keep checking for greater numbers of digits until we don't find
-            // any
-            for n in 1..9 {
-                let pattern = logdir
-                    .join(format!("{log_name_pattern}.{}", "[0-9]".repeat(n)))
-                    .to_str()
-                    .ok_or_else(|| {
-                        ArchiveLogsError::Utf8(zone_name.to_string())
-                    })?
-                    .to_string();
-                rotated_log_files.extend(glob::glob(&pattern)?.flatten());
-            }
-        }
-        let dest_dir = debug_dir.as_ref().join(zone_name).into_std_path_buf();
-        if !rotated_log_files.is_empty() {
-            tokio::fs::create_dir_all(&dest_dir).await?;
-            let count = rotated_log_files.len();
-            info!(
-                self.log,
-                "Archiving {count} log files from {zone_name} zone"
-            );
-        } else if include_live {
-            warn!(
-                self.log,
-                "Found no log files from {zone_name} zone, including live \
-                 log files"
-            );
-        }
-        for entry in rotated_log_files {
-            let src_name = entry.file_name().unwrap();
-            // as we archive them, logadm will keep resetting to .log.0,
-            // so we need to maintain our own numbering in the dest dataset.
-            // we'll use the modified date of the rotated log file, or try
-            // falling back to the time of archival if that fails, and
-            // falling back to counting up from 0 if *that* somehow fails.
-            let mut n = entry
-                .metadata()
-                .and_then(|m| m.modified())
-                .unwrap_or_else(|_| SystemTime::now())
-                .duration_since(UNIX_EPOCH)
-                .map(|d| d.as_secs())
-                .unwrap_or(0);
-            let mut dest;
-            loop {
-                dest = dest_dir.join(src_name).with_extension(format!("{n}"));
-                if dest.exists() {
-                    n += 1;
-                } else {
-                    break;
-                }
-            }
-            if let Err(err) = Self::copy_sync_and_remove(&entry, dest).await {
-                warn!(self.log, "Failed to archive {entry:?}: {err:?}");
-            }
-        }
         Ok(())
     }
 
@@ -1275,18 +1140,12 @@ impl DebugCollectorWorker {
 
 #[derive(thiserror::Error, Debug)]
 pub enum ArchiveLogsError {
-    #[error("I/O error: {0}")]
-    IoError(#[from] tokio::io::Error),
-    #[error("Error calling zoneadm: {0}")]
-    Zoneadm(#[from] ZoneError),
-    #[error("Non-UTF8 zone path for zone {0}")]
-    Utf8(String),
-    #[error("Glob pattern invalid: {0}")]
-    Glob(#[from] glob::PatternError),
     #[error(
         "No debug dir into which we should archive logs has yet been chosen"
     )]
     NoDebugDirYet,
+    #[error("Archive error")]
+    Archiver(#[source] anyhow::Error),
 }
 
 #[derive(thiserror::Error, Debug)]
@@ -1324,8 +1183,10 @@ mod tests {
     use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET};
     use std::collections::HashMap;
     use std::str::FromStr;
+    use std::time::SystemTime;
     use tokio::io::AsyncWriteExt;
     use zone::Zone;
+    use zone::ZoneError;
 
     impl Clone for ZfsGetError {
         fn clone(&self) -> Self {
diff --git a/sled-agent/config-reconciler/test-data/debug-files.txt b/sled-agent/config-reconciler/test-data/debug-files.txt
new file mode 100644
index 00000000000..cc146e4b1f4
--- /dev/null
+++ b/sled-agent/config-reconciler/test-data/debug-files.txt
@@ -0,0 +1,93 @@
+# In this file, blank lines and lines starting with '#' are ignored.
+#
+# This file contains the full paths to files seen on deployed systems
+# (like the dogfood environment) that the debug collector is responsible for
+# archiving.  There are tests that use these paths to verify that the debug
+# collector's path rules correctly collect files from production systems.
+#
+# If you need to update or regenerate this, you'll want to use something like:
+#
+#    find PATHS -type f
+#
+# for each of several different paths.  Using `find` like this makes sure that
+# these are real paths from real systems.  If you hand-construct a path here to
+# match what the test expects, that defeats the point!
+#
+# Here are paths that we include:
+#
+#    # Not-yet-archived kernel crash dumps and core files
+#    /pool/int/*/crash
+#
+#    # Pick some zone and get its un-archived SMF logs and syslog.
+#    # You'll want to make sure to find one with recently rotated
+#    # log files (which is a little tricky since logadm only rotates
+#    # them every hour and the archiver picks them up within 5
+#    # minutes).
+#    /pool/ext/*/crypt/zone/$PICK_A_ZONE/root/var/svc/log/*
+#    /pool/ext/*/crypt/zone/$PICK_A_ZONE/root/var/adm/messages*
+#
+# though you should beware that this will produce an enormous amount of output
+# on systems that have been deployed for a long time.  You will probably want to
+# prune most of the log files out.
+#
+# The output here has been constructed from similar invocations on several
+# systems in order to assemble a representative set.
+
+# Kernel crash dumps and related files in the "crash" dataset
+/pool/int/35dcb885-18cf-4842-a17f-fb63e76a5f2c/crash/vmdump.0
+/pool/int/35dcb885-18cf-4842-a17f-fb63e76a5f2c/crash/bounds
+/pool/int/35dcb885-18cf-4842-a17f-fb63e76a5f2c/crash/vmdump.1
+
+# User process core files in the "crash" dataset
+/pool/int/5a058adc-8208-4bff-b4f3-44e2651435b0/crash/core.oxz_switch.cat.27751.1765577436
+/pool/int/7c377f39-95bf-4074-8dda-7a7a102b9d2c/crash/core.oxz_propolis-server_081c9d2b-2d89-4830-b3c5-ff4439013794.propolis-server.5047.1765579111
+/pool/int/41eda85a-0820-4c91-8067-7f28af0cd408/crash/core.oxz_propolis-server_1182aa44-0367-4ea5-be41-f207a95b52bf.propolis-server.28517.1765579115
+
+# Live SMF log files
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/application-management-net-snmp:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/application-security-tcsd:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/milestone-devices:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/network-tcpkey:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/oxide-crucible-agent:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/oxide-crucible-downstairs:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/oxide-crucible-downstairs:downstairs-5275dc20-1d32-4304-8b61-a62a575839ad.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/svc/log/svc.startd.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_ntp_0460bff0-4cef-487f-aa5c-fd7e1ecef3e0/root/var/svc/log/oxide-chrony-setup:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_ntp_0460bff0-4cef-487f-aa5c-fd7e1ecef3e0/root/var/svc/log/oxide-ntp-admin:default.log
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_ntp_0460bff0-4cef-487f-aa5c-fd7e1ecef3e0/root/var/svc/log/oxide-ntp:default.log
+
+# Live syslog files
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_crucible_5cc7c840-8e6b-48c8-ac4b-f4297f8cf61a/root/var/adm/messages
+/pool/ext/0c4ef358-5533-43db-ad38-a8eff716e53a/crypt/zone/oxz_ntp_0460bff0-4cef-487f-aa5c-fd7e1ecef3e0/root/var/adm/messages
+/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone/oxz_crucible_b12aa520-a769-4eac-b56b-09960550a831/root/var/adm/messages
+/pool/ext/34dadf3f-f60c-4acc-b82b-4b0c82224222/crypt/zone/oxz_propolis-server_c78103ae-b241-4655-93fb-5f2f8bf50041/root/var/adm/messages
+/pool/ext/416232c1-bc8f-403f-bacb-28403dd8fced/crypt/zone/oxz_cockroachdb_e86845b5-eabd-49f5-9a10-6dfef9066209/root/var/adm/messages
+/pool/ext/416232c1-bc8f-403f-bacb-28403dd8fced/crypt/zone/oxz_crucible_85bd9bdb-1ec5-4a8d-badb-8b5d502546a1/root/var/adm/messages
+
+# Rotated SMF log files
+/pool/ext/2115b084-be0f-4fba-941b-33a659798a9e/crypt/zone/oxz_ntp_a700528f-f600-4908-94ac-9c06442ef6b4/root/var/svc/log/application-management-net-snmp:default.log.0
+/pool/ext/2115b084-be0f-4fba-941b-33a659798a9e/crypt/zone/oxz_ntp_a700528f-f600-4908-94ac-9c06442ef6b4/root/var/svc/log/oxide-ntp:default.log.0
+
+# Rotated syslog files
+/pool/ext/2115b084-be0f-4fba-941b-33a659798a9e/crypt/zone/oxz_ntp_a700528f-f600-4908-94ac-9c06442ef6b4/root/var/adm/messages.0
+
+# Global zone: live syslog
+/var/adm/messages
+
+# Global zone: rotated syslog
+/var/adm/messages.0
+/var/adm/messages.1
+/var/adm/messages.2
+/var/adm/messages.3
+
+# Global zone: live SMF logs
+/var/svc/log/site-postboot:default.log
+/var/svc/log/oxide-pumpkind:default.log
+/var/svc/log/oxide-sled-agent:default.log
+
+# Global zone: rotated SMF logs
+/var/svc/log/oxide-mg-ddm:default.log.0
+/var/svc/log/oxide-mg-ddm:default.log.1
+/var/svc/log/oxide-sled-agent:default.log.0
+/var/svc/log/oxide-sled-agent:default.log.1
+/var/svc/log/oxide-sled-agent:default.log.2

From 372db0ad65a709768b974f3e53f262390e787032 Mon Sep 17 00:00:00 2001
From: David Pacheco <dap@oxidecomputer.com>
Date: Mon, 22 Dec 2025 09:14:48 -0800
Subject: [PATCH 2/2] fix link

---
 .../src/debug_collector/file_archiver/planning.rs             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs b/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs
index 61752b7ab76..2c906c0f050 100644
--- a/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs
+++ b/sled-agent/config-reconciler/src/debug_collector/file_archiver/planning.rs
@@ -7,8 +7,8 @@
 //!
 //! This system is designed so that as much possible is incorporated into the
 //! plan so that it can be tested in simulation without extensive dependency
-//! injection.  See also [https://mmapped.blog/posts/29-plan-execute](the
-//! plan-execute pattern).
+//! injection.  See also [the plan-execute
+//! pattern](https://mmapped.blog/posts/29-plan-execute).
 
 use super::execution::execute_archive_step;
 use super::filesystem::FileLister;