darkshapes · exdysa · Feb 4, 2026 · Feb 2, 2026 · Feb 3, 2026 · Feb 4, 2026
diff --git a/.gitignore b/.gitignore
@@ -18,4 +18,5 @@ models/*
 
 results/features_residual/
 results/residual_analysis/
-results/features_updated/
+results/features_updated/
+.demo.ipynb
diff --git a/README.md b/README.md
@@ -26,9 +26,7 @@ This repo provides a simple command‑line interface to invoke the tool and exam
 
 Future work includes the development of an automated testing framework and evaluation suite, expanding the scope of research to include wider diversity of synthetic and original human-generated datasets, benchmarking against comparable methods, and exploring additional model architectures.
 
-![Bar and grid graph comparing variance of the synthetic and real images](results/score_explained_variance.png)
-![Graph comparing before and after pca transform operation of dataset](results/pca_transform_map.png)
-![Graph comparing confusion matrix of the synthetic and real images](results/score_confusion_matrix.png)
+![Bar and grid graph comparing variance of the synthetic and real images](results/combined_plots.png)
 
 ## Requirements
 
@@ -69,32 +67,33 @@ Set-ExecutionPolicy Bypass -Scope Process -Force; .venv\Scripts\Activate.ps1
 Basic Syntax:
 
 ```sh
-usage: negate [-h] {train,check} ...
+usage: negate [-h] {train,check,compare} ...
 
 Negate CLI
 
 positional arguments:
-  {train,check}
-    train        Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.
-    check        Check whether an image at the provided path is synthetic or original.
+  {train,check,compare}
+    train               Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.
+    check               Check whether an image at the provided path is synthetic or original.
+    compare             Run extraction and training using all possible VAE.
 
 options:
-  -h, --help     show this help message and exit
+  -h, --help            show this help message and exit
 ```
 
 Training syntax:
 
 ```sh
 usage: negate train [-h]
-                    [-m {exdysa/dc-ae-f32c32-sana-1.1-diffusers,zai-org/GLM-Image,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}]
+                    [-m {exdysa/dc-ae-f32c32-sana-1.1-diffusers,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}]
                     [path]
 
 positional arguments:
-  path                  Dataset path
+  path                  Genunie/Human-original dataset path
 
 options:
   -h, --help            show this help message and exit
-  -m, --model {exdysa/dc-ae-f32c32-sana-1.1-diffusers,zai-org/GLM-Image,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}
+  -m, --model {exdysa/dc-ae-f32c32-sana-1.1-diffusers,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}
                         Change the VAE model to use for training to a supported HuggingFace repo. Accuracy and memory use decrease from left to right
 ```
 

diff --git a/_version.py b/_version.py
@@ -28,7 +28,7 @@
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
 
-__version__ = version = '0.1.dev41+g786446a62.d20260131'
-__version_tuple__ = version_tuple = (0, 1, 'dev41', 'g786446a62.d20260131')
+__version__ = version = '0.1.dev57+gded5bb62c.d20260203'
+__version_tuple__ = version_tuple = (0, 1, 'dev57', 'gded5bb62c.d20260203')
 
-__commit_id__ = commit_id = 'g786446a62'
+__commit_id__ = commit_id = 'gded5bb62c'
diff --git a/config/config.yaml b/config/config.yaml
@@ -0,0 +1,23 @@
+# Advanced Configuration for Negate CLI
+batch_size: 4                           # Feature extraction batch size, zero to disable batching
+cache_features: true                    # enable <chud> mode
+vae_tiling: false                       # Enable VAE tiling
+vae_slicing: false                      # Enable VAE scaling
+patch_size: 768                         # Patch resolution
+top_k: 1                                # Number of top patches to keep
+use_onnx: false                          # True → ONNX, False → native XGBoost
+dtype: bfloat16                         # vae dtype
+default_vae: "Freepik/F-Lite-Texture"   # Model path
+
+train:
+  n_components: 0.95                      # Training PCA num components
+  num_boost_round: 200                    # Boosted training rounds
+  early_stopping_rounds: 10               # Early stop training
+  colsample_bytree: 0.8
+  eval_metric: ["logloss", "aucpr"]
+  learning_rate: 0.1
+  max_depth: 4
+  objective:  "binary:logistic"
+  subsample: 0.8
+  scale_pos_weight: null
+  seed: 0
diff --git a/negate/__init__.py b/negate/__init__.py
@@ -1,9 +1,11 @@
 # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0
 # <!-- // /*  d a r k s h a p e s */ -->
+# ruff: noqa
 
-from negate.datasets import build_datasets, dataset_to_nparray, generate_dataset
+from negate.config import negate_options as negate_opt
+from negate.datasets import build_datasets, generate_dataset
 from negate.extract import FeatureExtractor, DeviceName, features, VAEModel
-from negate.train import TrainResult, grade, get_time, model_path
+from negate.train import TrainResult, grade, generate_datestamp_path, datestamped_folder, get_time, model_path
 from negate.track import in_console, on_graph
-from negate.save import save_model, save_to_onnx
+from negate.save import save_models, save_to_onnx
 from negate.residuals import Residual
diff --git a/negate/__main__.py b/negate/__main__.py
@@ -1,57 +1,115 @@
 # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0
 # <!-- // /*  d a r k s h a p e s */ -->
 
+"""Negate CLI entry point for training and inference.\n
+:returns: None."""
+
 from pathlib import Path
+from typing import Any
+
 import numpy as np
 
-from negate import TrainResult, build_datasets, features, generate_dataset, grade, in_console, save_to_onnx, on_graph, VAEModel
+from negate import (
+    TrainResult,
+    VAEModel,
+    build_datasets,
+    datestamped_folder,
+    features,
+    generate_dataset,
+    generate_datestamp_path,
+    grade,
+    in_console,
+    model_path,
+    negate_opt,
+    on_graph,
+    save_models,
+    save_to_onnx,
+)
+
+
+def run_native(features_array) -> np.ndarray:
+    """Run inference using XGBoost with PCA pre-processing.\n
+    :param features_array: Feature array.\n
+    :param scale_pos_weight: Weight for positive class.\n
+    :return: Prediction array."""
+    import pickle
+
+    import xgboost as xgb
+
+    model_file_path_named = model_path / "negate.ubj"
+
+    if not model_file_path_named.exists():
+        raise FileNotFoundError(f"Model file not found: {str(model_file_path_named)}. Please run 'train' first to create the model.")
+    else:
+        model_file_path_named = str(model_file_path_named)
+
+    pca_file_path_named = model_path / "negate_pca.pkl"
+    with open(pca_file_path_named, "rb") as pca_file:
+        pca = pickle.load(pca_file)
+
+    features_pca = pca.transform(features_array)
+
+    model = xgb.Booster()
+    model.load_model(model_file_path_named)
+
+    result = model.predict(xgb.DMatrix(features_pca))
 
+    return result
 
-def evaluate(prediction: np.ndarray, ground_truth: np.ndarray) -> None:
-    """Print accuracy and class distribution.\n
-    :param prediction: Model outputs (0 = genuine, 1 = synthetic).\n
-    :param ground_truth: Ground-truth labels.\n
-    :return: None."""
 
-    prediction = prediction.astype(int)
-    ground_truth = ground_truth.astype(int)
+def run_onnx(features_array) -> Any:
+    """Run inference using ONNX Runtime with PCA pre-processing.\n
+    :param features_array: Feature array.\n
+    :return: Prediction array."""
+    import onnxruntime as ort
+    from onnxruntime.capi.onnxruntime_pybind11_state import Fail as ONNXRuntimeError
+    from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument
+
+    model_file_path_named = model_path / "negate.onnx"
+    if not model_file_path_named.exists():
+        raise FileNotFoundError(f"Model file not found: {str(model_file_path_named)}. Please run 'train' first to create the model.")
+    else:
+        model_file_path_named = str(model_file_path_named)
+
+    # pca_file_path_named = model_path / "negate_pca.onnx"
+    # session_pca = ort.InferenceSession(pca_file_path_named)
+    # input_name_pca = session_pca.get_inputs()[0].name
+    # features_pca = session_pca.run(None, {input_name_pca: features_array})[0]
 
-    acc = float(np.mean(prediction == ground_truth))
+    # input_name = ort.get_available_providers()[0]
+    features_model = features_array.astype(np.float32)  # type: ignore
 
-    genu_cnt = int(np.sum(ground_truth == 0))
-    synth_cnt = int(np.sum(ground_truth == 1))
+    session = ort.InferenceSession(model_file_path_named)
+    print(f"Model '{model_file_path_named}' loaded.")
+    input_name = session.get_inputs()[0].name
+    try:
+        result = session.run(None, {input_name: features_model})[0]  # type: ignore
+        return result
+    except (InvalidArgument, ONNXRuntimeError) as error_log:
+        import sys
 
-    print(f"Accuracy: {acc:.2%}")
-    print(f"Genuine: {genu_cnt}  Synthetic: {synth_cnt}")
+        print(error_log)
+        sys.exit()
 
 
-def predict(image_path: Path, vae_type: VAEModel = VAEModel.MITSUA_FP16, true_label: int | None = None) -> np.ndarray:
+def predict(image_path: Path, vae_type: VAEModel, true_label: int | None = None) -> np.ndarray:
     """Predict synthetic or original for given image. (0 = genuine, 1 = synthetic)\n
     :param image_path: Path to image file or folder.
     :param vae_type: VAE model to use for feature extraction.
-    :return: Prediction array.
-    """
+    :return: Prediction array."""
     from datasets import Dataset
-    import onnxruntime as ort
-    from onnxruntime import SparseTensor
-
-    print(f"{'Evaluation' if true_label is not None else 'Detection'} selected.")
 
-    models_location = Path(__file__).parent.parent / "models"
-    model_file = models_location / "negate.onnx"
-
-    if not model_file.exists():
-        raise FileNotFoundError(f"Model file not found: {model_file}. Please run 'train' first to create the model.")
+    print(f"""{"Evaluation" if true_label is not None else "Detection"} selected.
+Checking path '{image_path}' with {vae_type.value}""")
 
     dataset: Dataset = generate_dataset(image_path)
     features_dataset: Dataset = features(dataset, vae_type)
+    features_array = np.array(features_dataset["features"], dtype=np.float32)  # type: ignore[arg-type]
 
-    features_array = np.array(features_dataset["features"]).astype(np.float32)  # type: ignore[arg-type]
+    result = run_onnx(features_array) if negate_opt.use_onnx else run_native(features_array)
 
-    session = ort.InferenceSession(str(model_file))
-    input_name = session.get_inputs()[0].name
-    result: SparseTensor = session.run(None, {input_name: features_array})[0]  # type: ignore
-    print(result)
+    thresh = 0.5
+    predictions = (result > thresh).astype(int)
     match true_label:
         case None:
             for prediction in result:  # type: ignore
@@ -60,21 +118,26 @@ def predict(image_path: Path, vae_type: VAEModel = VAEModel.MITSUA_FP16, true_la
                 else:
                     print("image is SYNTHETIC")
         case _:
-            evaluate(result, np.array([true_label]))  # type: ignore
+            ground_truth = np.full(predictions.shape, true_label, dtype=int)
+            acc = float(np.mean(predictions == ground_truth))
+            print(f"Accuracy: {acc:.2%}")
 
-    return result  # type: ignore[return-value]
+    return result, predictions  # type: ignore[return-value]
 
 
-def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None) -> None:
-    """Train model using dataset at path.\n
-    :param path: Dataset root."""
+def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None, compare: bool = False) -> None:
+    """Train \n
+    # xgb00OOst\n
+    model using dataset at path.\n
+    :param path: Dataset root folder."""
     from datasets import Dataset
 
     print("Training selected.")
     dataset: Dataset = build_datasets(file_or_folder_path)
     features_dataset: Dataset = features(dataset, vae_type)
     train_result: TrainResult = grade(features_dataset)
     save_to_onnx(train_result)
+    save_models(train_result, compare)
     in_console(train_result, vae_type)
     on_graph(train_result)
 
@@ -90,14 +153,15 @@ def main() -> None:
     subparsers = parser.add_subparsers(dest="cmd", required=True)
 
     train_parser = subparsers.add_parser("train", help="Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.")
-    train_parser.add_argument("path", help="Dataset path", nargs="?", default=None)
+    train_parser.add_argument("path", help="Genunie/Human-original dataset path", nargs="?", default=None)
     train_parser.add_argument(
         "-m",
         "--model",
         choices=[m.value for m in VAEModel],
-        default=VAEModel.MITSUA_FP16,
-        help="Change the VAE model to use for training to a supported HuggingFace repo. Accuracy and memory use decrease from left to right",
+        default=negate_opt.default_vae,
+        help=f"Change the VAE model to use for training to a supported HuggingFace repo (default {negate_opt.default_vae}). Accuracy and memory use decrease from left to right",
     )
+
     check_parser = subparsers.add_parser(
         "check",
         help="Check whether an image at the provided path is synthetic or original.",
@@ -106,7 +170,7 @@ def main() -> None:
     label_grp = check_parser.add_mutually_exclusive_group()
     label_grp.add_argument("-s", "--synthetic", action="store_const", const=1, dest="label", help="Mark image as synthetic (label = 1) for evaluation.")
     label_grp.add_argument("-g", "--genuine", action="store_const", const=0, dest="label", help="Mark image as genuine (label = 0) for evaluation.")
-
+    subparsers.add_parser("compare", help="Run extraction and training using all possible VAE.")
     args = parser.parse_args(argv[1:])
 
     match args.cmd:
@@ -115,12 +179,27 @@ def main() -> None:
                 dataset_location: Path | None = Path(args.path)
             else:
                 dataset_location: Path | None = None
+            datestamped_folder.mkdir(parents=True, exist_ok=True)
+
             vae_type = VAEModel(args.model)
-            training_run(file_or_folder_path=dataset_location, vae_type=vae_type)
+            training_run(
+                vae_type=vae_type,
+                file_or_folder_path=dataset_location,
+            )
         case "check":
             if args.path is None:
                 raise ValueError("Check requires an image path.")
-            predict(Path(args.path), true_label=args.label)
+            import json
+
+            results_file_path = model_path / "results.json"
+            with open(results_file_path) as result_metadata:
+                train_metadata = json.load(result_metadata)
+            vae_type = VAEModel(train_metadata["vae_type"])
+            predict(Path(args.path), vae_type=vae_type, true_label=args.label)
+        case "compare":
+            for model in VAEModel:
+                _regenerate = generate_datestamp_path("test")
+                training_run(vae_type=VAEModel(model.value), compare=True)
         case _:
             raise NotImplementedError
 

diff --git a/negate/config.py b/negate/config.py
@@ -0,0 +1,52 @@
+from typing import NamedTuple
+
+
+class NegateConfig(NamedTuple):
+    """YAML config values.\n
+    :param patch_size: Patch width for residuals.\n
+    :param top_k: Number of patches.\n
+    :param vae_tiling: Enable tiling.\n
+    :param vae_slicing: Enable slicing.\n
+    :param use_onnx: Use ONNX for inference.\n
+    :param use_gpu: Use GPU if available.\n
+    :return: Config instance."""  # noqa: D401
+
+    batch_size: int
+    cache_features: bool
+    default_vae: str
+    dtype: str
+    n_components: float
+    num_boost_round: int
+    patch_size: int
+    top_k: int
+    use_onnx: bool
+    vae_slicing: bool
+    vae_tiling: bool
+    early_stopping_rounds: int
+    colsample_bytree: float
+    eval_metric: list
+    learning_rate: float
+    max_depth: int
+    objective: str
+    subsample: float
+    scale_pos_weight: float | None
+    seed: int
+
+
+def load_config_options() -> NegateConfig:
+    """Load YAML configuration options.\n
+    :return: Config dict."""
+
+    from pathlib import Path
+
+    import yaml
+
+    config_path = Path(__file__).parent.parent / "config" / "config.yaml"
+    with open(config_path, "r") as config_file:
+        data = yaml.safe_load(config_file)
+    train_cfg = data.pop("train", {})
+    data.update(train_cfg)
+    return NegateConfig(**data)
+
+
+negate_options = load_config_options()