diff --git a/.gitignore b/.gitignore index a7c63c9..7a51b52 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,5 @@ models/* results/features_residual/ results/residual_analysis/ -results/features_updated/ \ No newline at end of file +results/features_updated/ +.demo.ipynb diff --git a/README.md b/README.md index 219372a..d9e23d0 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,7 @@ This repo provides a simple command‑line interface to invoke the tool and exam Future work includes the development of an automated testing framework and evaluation suite, expanding the scope of research to include wider diversity of synthetic and original human-generated datasets, benchmarking against comparable methods, and exploring additional model architectures. -![Bar and grid graph comparing variance of the synthetic and real images](results/score_explained_variance.png) -![Graph comparing before and after pca transform operation of dataset](results/pca_transform_map.png) -![Graph comparing confusion matrix of the synthetic and real images](results/score_confusion_matrix.png) +![Bar and grid graph comparing variance of the synthetic and real images](results/combined_plots.png) ## Requirements @@ -69,32 +67,33 @@ Set-ExecutionPolicy Bypass -Scope Process -Force; .venv\Scripts\Activate.ps1 Basic Syntax: ```sh -usage: negate [-h] {train,check} ... +usage: negate [-h] {train,check,compare} ... Negate CLI positional arguments: - {train,check} - train Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk. - check Check whether an image at the provided path is synthetic or original. + {train,check,compare} + train Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk. + check Check whether an image at the provided path is synthetic or original. + compare Run extraction and training using all possible VAE. options: - -h, --help show this help message and exit + -h, --help show this help message and exit ``` Training syntax: ```sh usage: negate train [-h] - [-m {exdysa/dc-ae-f32c32-sana-1.1-diffusers,zai-org/GLM-Image,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}] + [-m {exdysa/dc-ae-f32c32-sana-1.1-diffusers,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}] [path] positional arguments: - path Dataset path + path Genunie/Human-original dataset path options: -h, --help show this help message and exit - -m, --model {exdysa/dc-ae-f32c32-sana-1.1-diffusers,zai-org/GLM-Image,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS} + -m, --model {exdysa/dc-ae-f32c32-sana-1.1-diffusers,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS} Change the VAE model to use for training to a supported HuggingFace repo. Accuracy and memory use decrease from left to right ``` diff --git a/_version.py b/_version.py index aac473e..bf13d14 100644 --- a/_version.py +++ b/_version.py @@ -28,7 +28,7 @@ commit_id: COMMIT_ID __commit_id__: COMMIT_ID -__version__ = version = '0.1.dev41+g786446a62.d20260131' -__version_tuple__ = version_tuple = (0, 1, 'dev41', 'g786446a62.d20260131') +__version__ = version = '0.1.dev57+gded5bb62c.d20260203' +__version_tuple__ = version_tuple = (0, 1, 'dev57', 'gded5bb62c.d20260203') -__commit_id__ = commit_id = 'g786446a62' +__commit_id__ = commit_id = 'gded5bb62c' diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..8ec8f3c --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,23 @@ +# Advanced Configuration for Negate CLI +batch_size: 4 # Feature extraction batch size, zero to disable batching +cache_features: true # enable mode +vae_tiling: false # Enable VAE tiling +vae_slicing: false # Enable VAE scaling +patch_size: 768 # Patch resolution +top_k: 1 # Number of top patches to keep +use_onnx: false # True → ONNX, False → native XGBoost +dtype: bfloat16 # vae dtype +default_vae: "Freepik/F-Lite-Texture" # Model path + +train: + n_components: 0.95 # Training PCA num components + num_boost_round: 200 # Boosted training rounds + early_stopping_rounds: 10 # Early stop training + colsample_bytree: 0.8 + eval_metric: ["logloss", "aucpr"] + learning_rate: 0.1 + max_depth: 4 + objective: "binary:logistic" + subsample: 0.8 + scale_pos_weight: null + seed: 0 diff --git a/negate/__init__.py b/negate/__init__.py index 45e22b7..47695e3 100644 --- a/negate/__init__.py +++ b/negate/__init__.py @@ -1,9 +1,11 @@ # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0 # +# ruff: noqa -from negate.datasets import build_datasets, dataset_to_nparray, generate_dataset +from negate.config import negate_options as negate_opt +from negate.datasets import build_datasets, generate_dataset from negate.extract import FeatureExtractor, DeviceName, features, VAEModel -from negate.train import TrainResult, grade, get_time, model_path +from negate.train import TrainResult, grade, generate_datestamp_path, datestamped_folder, get_time, model_path from negate.track import in_console, on_graph -from negate.save import save_model, save_to_onnx +from negate.save import save_models, save_to_onnx from negate.residuals import Residual diff --git a/negate/__main__.py b/negate/__main__.py index 1e6c61d..86424a7 100644 --- a/negate/__main__.py +++ b/negate/__main__.py @@ -1,57 +1,115 @@ # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0 # +"""Negate CLI entry point for training and inference.\n +:returns: None.""" + from pathlib import Path +from typing import Any + import numpy as np -from negate import TrainResult, build_datasets, features, generate_dataset, grade, in_console, save_to_onnx, on_graph, VAEModel +from negate import ( + TrainResult, + VAEModel, + build_datasets, + datestamped_folder, + features, + generate_dataset, + generate_datestamp_path, + grade, + in_console, + model_path, + negate_opt, + on_graph, + save_models, + save_to_onnx, +) + + +def run_native(features_array) -> np.ndarray: + """Run inference using XGBoost with PCA pre-processing.\n + :param features_array: Feature array.\n + :param scale_pos_weight: Weight for positive class.\n + :return: Prediction array.""" + import pickle + + import xgboost as xgb + + model_file_path_named = model_path / "negate.ubj" + + if not model_file_path_named.exists(): + raise FileNotFoundError(f"Model file not found: {str(model_file_path_named)}. Please run 'train' first to create the model.") + else: + model_file_path_named = str(model_file_path_named) + + pca_file_path_named = model_path / "negate_pca.pkl" + with open(pca_file_path_named, "rb") as pca_file: + pca = pickle.load(pca_file) + + features_pca = pca.transform(features_array) + + model = xgb.Booster() + model.load_model(model_file_path_named) + + result = model.predict(xgb.DMatrix(features_pca)) + return result -def evaluate(prediction: np.ndarray, ground_truth: np.ndarray) -> None: - """Print accuracy and class distribution.\n - :param prediction: Model outputs (0 = genuine, 1 = synthetic).\n - :param ground_truth: Ground-truth labels.\n - :return: None.""" - prediction = prediction.astype(int) - ground_truth = ground_truth.astype(int) +def run_onnx(features_array) -> Any: + """Run inference using ONNX Runtime with PCA pre-processing.\n + :param features_array: Feature array.\n + :return: Prediction array.""" + import onnxruntime as ort + from onnxruntime.capi.onnxruntime_pybind11_state import Fail as ONNXRuntimeError + from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument + + model_file_path_named = model_path / "negate.onnx" + if not model_file_path_named.exists(): + raise FileNotFoundError(f"Model file not found: {str(model_file_path_named)}. Please run 'train' first to create the model.") + else: + model_file_path_named = str(model_file_path_named) + + # pca_file_path_named = model_path / "negate_pca.onnx" + # session_pca = ort.InferenceSession(pca_file_path_named) + # input_name_pca = session_pca.get_inputs()[0].name + # features_pca = session_pca.run(None, {input_name_pca: features_array})[0] - acc = float(np.mean(prediction == ground_truth)) + # input_name = ort.get_available_providers()[0] + features_model = features_array.astype(np.float32) # type: ignore - genu_cnt = int(np.sum(ground_truth == 0)) - synth_cnt = int(np.sum(ground_truth == 1)) + session = ort.InferenceSession(model_file_path_named) + print(f"Model '{model_file_path_named}' loaded.") + input_name = session.get_inputs()[0].name + try: + result = session.run(None, {input_name: features_model})[0] # type: ignore + return result + except (InvalidArgument, ONNXRuntimeError) as error_log: + import sys - print(f"Accuracy: {acc:.2%}") - print(f"Genuine: {genu_cnt} Synthetic: {synth_cnt}") + print(error_log) + sys.exit() -def predict(image_path: Path, vae_type: VAEModel = VAEModel.MITSUA_FP16, true_label: int | None = None) -> np.ndarray: +def predict(image_path: Path, vae_type: VAEModel, true_label: int | None = None) -> np.ndarray: """Predict synthetic or original for given image. (0 = genuine, 1 = synthetic)\n :param image_path: Path to image file or folder. :param vae_type: VAE model to use for feature extraction. - :return: Prediction array. - """ + :return: Prediction array.""" from datasets import Dataset - import onnxruntime as ort - from onnxruntime import SparseTensor - - print(f"{'Evaluation' if true_label is not None else 'Detection'} selected.") - models_location = Path(__file__).parent.parent / "models" - model_file = models_location / "negate.onnx" - - if not model_file.exists(): - raise FileNotFoundError(f"Model file not found: {model_file}. Please run 'train' first to create the model.") + print(f"""{"Evaluation" if true_label is not None else "Detection"} selected. +Checking path '{image_path}' with {vae_type.value}""") dataset: Dataset = generate_dataset(image_path) features_dataset: Dataset = features(dataset, vae_type) + features_array = np.array(features_dataset["features"], dtype=np.float32) # type: ignore[arg-type] - features_array = np.array(features_dataset["features"]).astype(np.float32) # type: ignore[arg-type] + result = run_onnx(features_array) if negate_opt.use_onnx else run_native(features_array) - session = ort.InferenceSession(str(model_file)) - input_name = session.get_inputs()[0].name - result: SparseTensor = session.run(None, {input_name: features_array})[0] # type: ignore - print(result) + thresh = 0.5 + predictions = (result > thresh).astype(int) match true_label: case None: for prediction in result: # type: ignore @@ -60,14 +118,18 @@ def predict(image_path: Path, vae_type: VAEModel = VAEModel.MITSUA_FP16, true_la else: print("image is SYNTHETIC") case _: - evaluate(result, np.array([true_label])) # type: ignore + ground_truth = np.full(predictions.shape, true_label, dtype=int) + acc = float(np.mean(predictions == ground_truth)) + print(f"Accuracy: {acc:.2%}") - return result # type: ignore[return-value] + return result, predictions # type: ignore[return-value] -def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None) -> None: - """Train model using dataset at path.\n - :param path: Dataset root.""" +def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None, compare: bool = False) -> None: + """Train \n + # xgb00OOst\n + model using dataset at path.\n + :param path: Dataset root folder.""" from datasets import Dataset print("Training selected.") @@ -75,6 +137,7 @@ def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None) -> features_dataset: Dataset = features(dataset, vae_type) train_result: TrainResult = grade(features_dataset) save_to_onnx(train_result) + save_models(train_result, compare) in_console(train_result, vae_type) on_graph(train_result) @@ -90,14 +153,15 @@ def main() -> None: subparsers = parser.add_subparsers(dest="cmd", required=True) train_parser = subparsers.add_parser("train", help="Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.") - train_parser.add_argument("path", help="Dataset path", nargs="?", default=None) + train_parser.add_argument("path", help="Genunie/Human-original dataset path", nargs="?", default=None) train_parser.add_argument( "-m", "--model", choices=[m.value for m in VAEModel], - default=VAEModel.MITSUA_FP16, - help="Change the VAE model to use for training to a supported HuggingFace repo. Accuracy and memory use decrease from left to right", + default=negate_opt.default_vae, + help=f"Change the VAE model to use for training to a supported HuggingFace repo (default {negate_opt.default_vae}). Accuracy and memory use decrease from left to right", ) + check_parser = subparsers.add_parser( "check", help="Check whether an image at the provided path is synthetic or original.", @@ -106,7 +170,7 @@ def main() -> None: label_grp = check_parser.add_mutually_exclusive_group() label_grp.add_argument("-s", "--synthetic", action="store_const", const=1, dest="label", help="Mark image as synthetic (label = 1) for evaluation.") label_grp.add_argument("-g", "--genuine", action="store_const", const=0, dest="label", help="Mark image as genuine (label = 0) for evaluation.") - + subparsers.add_parser("compare", help="Run extraction and training using all possible VAE.") args = parser.parse_args(argv[1:]) match args.cmd: @@ -115,12 +179,27 @@ def main() -> None: dataset_location: Path | None = Path(args.path) else: dataset_location: Path | None = None + datestamped_folder.mkdir(parents=True, exist_ok=True) + vae_type = VAEModel(args.model) - training_run(file_or_folder_path=dataset_location, vae_type=vae_type) + training_run( + vae_type=vae_type, + file_or_folder_path=dataset_location, + ) case "check": if args.path is None: raise ValueError("Check requires an image path.") - predict(Path(args.path), true_label=args.label) + import json + + results_file_path = model_path / "results.json" + with open(results_file_path) as result_metadata: + train_metadata = json.load(result_metadata) + vae_type = VAEModel(train_metadata["vae_type"]) + predict(Path(args.path), vae_type=vae_type, true_label=args.label) + case "compare": + for model in VAEModel: + _regenerate = generate_datestamp_path("test") + training_run(vae_type=VAEModel(model.value), compare=True) case _: raise NotImplementedError diff --git a/negate/config.py b/negate/config.py new file mode 100644 index 0000000..7d9c4f1 --- /dev/null +++ b/negate/config.py @@ -0,0 +1,52 @@ +from typing import NamedTuple + + +class NegateConfig(NamedTuple): + """YAML config values.\n + :param patch_size: Patch width for residuals.\n + :param top_k: Number of patches.\n + :param vae_tiling: Enable tiling.\n + :param vae_slicing: Enable slicing.\n + :param use_onnx: Use ONNX for inference.\n + :param use_gpu: Use GPU if available.\n + :return: Config instance.""" # noqa: D401 + + batch_size: int + cache_features: bool + default_vae: str + dtype: str + n_components: float + num_boost_round: int + patch_size: int + top_k: int + use_onnx: bool + vae_slicing: bool + vae_tiling: bool + early_stopping_rounds: int + colsample_bytree: float + eval_metric: list + learning_rate: float + max_depth: int + objective: str + subsample: float + scale_pos_weight: float | None + seed: int + + +def load_config_options() -> NegateConfig: + """Load YAML configuration options.\n + :return: Config dict.""" + + from pathlib import Path + + import yaml + + config_path = Path(__file__).parent.parent / "config" / "config.yaml" + with open(config_path, "r") as config_file: + data = yaml.safe_load(config_file) + train_cfg = data.pop("train", {}) + data.update(train_cfg) + return NegateConfig(**data) + + +negate_options = load_config_options() diff --git a/negate/datasets.py b/negate/datasets.py index 5de86b8..ca9f0ab 100644 --- a/negate/datasets.py +++ b/negate/datasets.py @@ -3,7 +3,6 @@ from pathlib import Path -import numpy as np from datasets import Dataset, Image, concatenate_datasets, load_dataset @@ -22,10 +21,11 @@ def detect_nans(dataset: Dataset) -> Dataset: return dataset -def load_remote_dataset(repo: str, folder_path: Path, label: int) -> Dataset: +def load_remote_dataset(repo: str, folder_path: Path, label: int = 1) -> Dataset: """Load a remote dataset and attach a default label.\n :param repo: Repository ID of the dataset. :param folder_path: Local path to cache the dataset. + :param label: The default label to assign to all images in the dataset :return: Dataset with a ``label`` column added and NaNs removed.""" remote_dataset = load_dataset(repo, cache_dir=str(folder_path), split="train").cast_column("image", Image(decode=True)) @@ -41,6 +41,7 @@ def generate_dataset(input_path: Path) -> Dataset: from PIL import Image as PillowImage + print(f"Using local images from {input_path}") validated_paths = [] valid_extensions = {".jpg", ".webp", ".jpeg", ".png", ".tif", ".tiff"} if input_path.is_dir(): @@ -55,6 +56,8 @@ def generate_dataset(input_path: Path) -> Dataset: validated_paths.append({"image": str(img_path)}) elif input_path.is_file() and input_path.suffix.lower() in valid_extensions: validated_paths.append({"image": str(input_path)}) + else: + raise ValueError(f"Invalid path {input_path}") dataset = Dataset.from_list(validated_paths) # NaN Prevention: decode separately @@ -80,35 +83,10 @@ def build_datasets(input_folder: Path | None = None) -> Dataset: original_input_folder = Path(__file__).parent.parent / "assets" original_input_folder.mkdir(parents=True, exist_ok=True) - slice_dataset = load_remote_dataset("exdysa/nano-banana-pro-generated-1k-clone", synthetic_input_folder, 1) - rnd_synthetic_dataset = load_remote_dataset("exdysa/rnd_synthetic_img", synthetic_input_folder, 1) + slice_dataset = load_remote_dataset("exdysa/nano-banana-pro-generated-1k-clone", synthetic_input_folder) + rnd_synthetic_dataset = load_remote_dataset("exdysa/rnd_synthetic_img", synthetic_input_folder) - # Check if assets folder has images, otherwise use placeholder dataset - valid_extensions = {".jpg", ".webp", ".jpeg", ".png", ".tif", ".tiff"} - has_images = original_input_folder.exists() and any(f.is_file() and f.suffix.lower() in valid_extensions for f in original_input_folder.iterdir()) - - if has_images: - print(f"Using local images from {original_input_folder}") - original_dataset = generate_dataset(original_input_folder) - else: - print(f"No images found in {original_input_folder}, using placeholder dataset 'darkshapes/a_slice'") - original_dataset = load_remote_dataset("darkshapes/a_slice", original_input_folder, 0) + original_dataset = generate_dataset(original_input_folder) dataset = concatenate_datasets([slice_dataset, rnd_synthetic_dataset, original_dataset]) return dataset - - -def dataset_to_nparray(dataset: Dataset, column_names: list[str] | None = None) -> np.ndarray: - """Convert Dataset to ndarray.\n - :param dataset: HuggingFace Dataset of images. - :param columns: Columns to keep. If None all columns are used. - :return: Array of shape (n_samples, n_features) or (n_samples,) if a single column.""" - - if column_names is None: - column_names = dataset.column_names - - data = {name: dataset[name] for name in column_names} - - if len(column_names) == 1: - return np.array(data[column_names[0]]) - return np.vstack([np.array(data[name]) for name in column_names]).T diff --git a/negate/extract.py b/negate/extract.py index bfdd76d..49622b3 100644 --- a/negate/extract.py +++ b/negate/extract.py @@ -1,22 +1,26 @@ # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0 # -from enum import Enum from dataclasses import dataclass +from enum import Enum +from pathlib import Path + from datasets import Dataset +from PIL.Image import Image + +from negate.config import negate_options as negate_opt class VAEModel(str, Enum): """Choose the name and size of the VAE model to use for extraction.""" SANA_FP32 = "exdysa/dc-ae-f32c32-sana-1.1-diffusers" - SANA_FP16 = "exdysa/dc-ae-f32c32-sana-1.1-diffusers" - GLM_BF16 = "zai-org/GLM-Image" - FLUX2_FP32 = "black-forest-labs/FLUX.2-dev" - FLUX2_FP16 = "black-forest-labs/FLUX.2-klein-4B" - FLUX1_FP32 = "Tongyi-MAI/Z-Image" - FLUX1_FP16 = "Freepik/F-Lite-Texture" - MITSUA_FP16 = "exdysa/mitsua-vae-SAFETENSORS" + SANA_FP16 = "exdysa/dc-ae-f32c32-sana-1.1-diffusers" # dc_ae 'accuracy': 0.8235294117647058, + FLUX2_FP32 = "black-forest-labs/FLUX.2-dev" # f2 dev 'accuracy': 0.9313725490196079, + FLUX2_FP16 = "black-forest-labs/FLUX.2-klein-4B" # f2 klein 'accuracy': 0.9215686274509803, + FLUX1_FP32 = "Tongyi-MAI/Z-Image" # zimage 'accuracy': 0.9411764705882353, + FLUX1_FP16 = "Freepik/F-Lite-Texture" # flite 'accuracy': 0.9509803921568627, + MITSUA_FP16 = "exdysa/mitsua-vae-SAFETENSORS" # mitsua 'accuracy': 0.9509803921568627, @dataclass @@ -27,11 +31,11 @@ class VAEInfo: MODEL_MAP = { VAEModel.MITSUA_FP16: VAEInfo(VAEModel.MITSUA_FP16, "autoencoders.autoencoder_kl.AutoencoderKL"), - VAEModel.GLM_BF16: VAEInfo(VAEModel.GLM_BF16, "autoencoders.autoencoder_kl.AutoencoderKL"), VAEModel.FLUX1_FP32: VAEInfo(VAEModel.FLUX1_FP32, "autoencoders.autoencoder_kl.AutoencoderKL"), VAEModel.FLUX1_FP16: VAEInfo(VAEModel.FLUX1_FP16, "autoencoders.autoencoder_kl.AutoencoderKL"), VAEModel.FLUX2_FP32: VAEInfo(VAEModel.FLUX2_FP32, "autoencoders.autoencoder_kl_flux2.AutoencoderKLFlux2"), - VAEModel.FLUX2_FP16: VAEInfo(VAEModel.FLUX1_FP16, "autoencoders.autoencoder_kl_flux2.AutoencoderKLFlux2"), + VAEModel.FLUX2_FP16: VAEInfo(VAEModel.FLUX2_FP16, "autoencoders.autoencoder_kl_flux2.AutoencoderKLFlux2"), + VAEModel.SANA_FP16: VAEInfo(VAEModel.SANA_FP16, "autoencoders.autoencoder_dc.AutoencoderDC"), VAEModel.SANA_FP32: VAEInfo(VAEModel.SANA_FP32, "autoencoders.autoencoder_dc.AutoencoderDC"), } @@ -58,19 +62,17 @@ class FeatureExtractor: def __init__(self, vae_type: VAEModel, device: DeviceName, dtype: torch.dtype) -> None: """Set up the extractor with a VAE model.\n - :param model: Repository ID of the VAE. + :param vae_type: VAEModel ID of the VAE. :param device: Target device. :param dtype: Data type for tensors.""" - from diffusers.models.autoencoders.vae import AutoencoderMixin from negate import Residual # `B̴̨̒e̷w̷͇̃ȁ̵͈r̸͔͛ę̵͂ ̷̫̚t̵̻̐h̶̜͒ȩ̸̋ ̵̪̄ő̷̦ù̵̥r̷͇̂o̷̫͑b̷̲͒ò̷̫r̴̢͒ô̵͍s̵̩̈́` #type: ignore - self.device = device + self.residual_transform = Residual(patch_size=negate_opt.patch_size, top_k=negate_opt.top_k) + self.device = device.value self.dtype = dtype self.model: VAEInfo = MODEL_MAP[vae_type] - self.vae: AutoencoderMixin | None = None - self.residual_transform = Residual() - if self.vae is None: + if not hasattr(self, "vae"): self.create_vae() def create_vae(self): @@ -79,34 +81,52 @@ def create_vae(self): import os from diffusers.models import autoencoders - from huggingface_hub.errors import LocalEntryNotFoundError from huggingface_hub import snapshot_download + from huggingface_hub.errors import LocalEntryNotFoundError + + if negate_opt.vae_tiling: + self.vae.enable_tiling() + if negate_opt.vae_slicing: + self.vae.enable_slicing() - autoencoder_cls = getattr(autoencoders, self.model.module.split(".")[-1]) + autoencoder_cls = getattr(autoencoders, self.model.module.split(".")[-1], None) try: - vae_model = autoencoder_cls.from_pretrained(self.model.enum.value, torch_dtype=self.dtype, local_files_only=True).to(self.device.value) - except (LocalEntryNotFoundError, OSError): + vae_model = autoencoder_cls.from_pretrained(self.model.enum.value, torch_dtype=self.dtype, local_files_only=True).to(self.device) # type: ignore + except (LocalEntryNotFoundError, OSError, AttributeError): print("Downloading model...") - vae_path: str = snapshot_download(self.model.enum.value, allow_patterns=["vae/*"]) # type: ignore - vae_path = os.path.join(vae_path, "vae") - vae_model = autoencoder_cls.from_pretrained(vae_path, torch_dtype=self.dtype, local_files_only=True).to(self.device.value) + vae_path: str = snapshot_download(self.model.enum.value, allow_patterns=["vae/*"]) # type: ignore + vae_path = os.path.join(vae_path, "vae") + vae_model = autoencoder_cls.from_pretrained(vae_path, torch_dtype=self.dtype, local_files_only=True).to(self.device) # type: ignore vae_model.eval() self.vae = vae_model - def cleanup(self) -> None: - """Free the VAE and GPU memory.""" + def _extract_generic(self, batch: "torch.Tensor"): + """Encode with standard Diffusers VAE and return mean latent.\n + :param batch: Tensor of image + patches. + :return: NumPy mean latent.""" - import gc + latent = self.vae.encode(batch).latent_dist.sample() # type: ignore + return latent.mean(dim=0).cpu().float().numpy() + + def _extract_special(self, batch: "torch.Tensor", image: Image): + """Handle SANA and AuraEqui models.\n + :param batch: Tensor of image + patches. + :param img: Original PIL image. + :return: NumPy mean latent.""" import torch + from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution + from torch import Tensor - device = self.device - if device != "cpu": - gpu = getattr(torch, device) - gpu.empty_cache() - del self.vae - gc.collect() + latent: Tensor = self.vae.encode(batch) # type: ignore + mean = torch.mean(latent.latent, dim=0).cpu().float() # type: ignore + + logvar = torch.zeros_like(mean).cpu().float() + params = torch.cat([mean, logvar], dim=1) + dist = DiagonalGaussianDistribution(params) + sample = dist.sample() + return sample.mean(dim=0).cpu().float().numpy() def batch_extract(self, dataset: Dataset): """Extract VAE features from a batch of images. @@ -120,35 +140,38 @@ def batch_extract(self, dataset: Dataset): patch_stack = [] for image in dataset["image"]: - color_image = image.convert("RGB") - color_tensor = self.transform(color_image) - patches = self.residual_transform.crop_select(image, size=768, top_k=1) - for patch in patches: - patch_image = patch.convert("RGB") - patch_tensor = self.transform(patch_image) - patch_stack.append(patch_tensor) - - batch_tensor = torch.stack([color_tensor, *patch_stack]).to(self.device, dtype=self.dtype) + rgb = image.convert("RGB") + col = self.transform(rgb) + for patches in self.residual_transform.crop_select(image): + patch_stack.append(self.transform(patches.convert("RGB"))) + + batch = torch.stack([col, *patch_stack]).to(self.device, self.dtype) with torch.no_grad(): - if self.model.enum != VAEModel.SANA_FP32: # type: ignore can't access encode - latents_2_dim_h_w = self.vae.encode(batch_tensor).latent_dist.sample() # type: ignore can't access encode - mean_latent = latents_2_dim_h_w.mean(dim=0).cpu().float().numpy() - else: - from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution - - latent = self.vae.encode(batch_tensor) # # type: ignore can't access encode - mean_latent = torch.mean(latent.latent, dim=0).cpu().float() # distribution with mean - logvar_latent = torch.zeros_like(mean_latent).cpu().float() # & logvar - params = torch.cat([mean_latent, logvar_latent], dim=1) - distribution = DiagonalGaussianDistribution(params) - sample = distribution.sample() - mean_latent = sample.mean(dim=0).cpu().float().numpy() - feature_vec = mean_latent.flatten() - - features_list.append(feature_vec) + match self.model.enum: + case VAEModel.SANA_FP32 | VAEModel.SANA_FP16: + mean_latent = self._extract_special(batch, image) + case _: + mean_latent = self._extract_generic(batch) + + features_list.append(mean_latent.flatten()) return {"features": features_list} + def cleanup(self) -> None: + """Free the VAE and GPU memory.""" + + import gc + + import torch + + if self.device != "cpu": + gpu = getattr(torch, self.device) + gpu.empty_cache() + del gpu + del self.vae + del self.device + gc.collect() + def __enter__(self) -> "FeatureExtractor": return self @@ -157,6 +180,30 @@ def __exit__(self, exc_type, exc, tb) -> None: self.cleanup() +def feature_cache(dataset: Dataset, vae_type: VAEModel) -> Path: + """Generate cache filename based on dataset fingerprint and VAE model.\n + Dataset fingerprint automatically changes when data changes. + + :param dataset: The incoming dataset to be processed. + :param vae_type: The VAE model typeselectedfor feature extraction. + :returns: Location to cache results of feature extraction""" + + import hashlib + + cache_dir = Path(".cache/features") # was here + cache_dir.mkdir(parents=True, exist_ok=True) + + dataset_hash = dataset._fingerprint if hasattr(dataset, "_fingerprint") else hashlib.md5(str(dataset).encode()).hexdigest()[:8] + vae_name = vae_type.value.split("/")[-1].replace("-", "_") + cache_file = cache_dir / f"features_{vae_name}_{dataset_hash}.arrow" + + if cache_file.exists(): + print(f"Using cached features from {cache_file}") + else: + print(f"Extracting features (will cache to {cache_file})") + return cache_file + + def features(dataset: Dataset, vae_type: VAEModel) -> Dataset: """Generate a feature dataset from images.\n :param dataset: Dataset containing images. @@ -166,17 +213,23 @@ def features(dataset: Dataset, vae_type: VAEModel) -> Dataset: import torch device = DeviceName.CUDA if torch.cuda.is_available() else DeviceName.MPS if torch.mps.is_available() else DeviceName.CPU - dtype = torch.bfloat16 - # was here + dtype = getattr(torch, negate_opt.dtype) + kwargs = {} + match negate_opt: + case opt if opt.batch_size > 0: + kwargs.setdefault("batch_size", opt.batch_size) + case opt if opt.cache_features: + cache_file_name = str(feature_cache(dataset, vae_type)) + kwargs.setdefault("cache_file_name", cache_file_name) with FeatureExtractor(vae_type, device, dtype) as extractor: features_dataset = dataset.map( extractor.batch_extract, - batched=True, - batch_size=4, + batched=negate_opt.batch_size > 0, remove_columns=["image"], desc="Extracting features...", + **kwargs, ) features_dataset.set_format(type="numpy", columns=["features", "label"]) diff --git a/negate/residuals.py b/negate/residuals.py index 726005d..55878f0 100644 --- a/negate/residuals.py +++ b/negate/residuals.py @@ -9,12 +9,14 @@ class Residual: - def __init__(self, dtype: np.typing.DTypeLike = np.float32): + def __init__(self, top_k: int, patch_size: int = 512, dtype: np.typing.DTypeLike = np.float32) -> None: """Initialize residual class for residual image processing.\n :param dtype: dtype for internal numpy conversion. return: None.""" self.dtype = dtype + self.top_k = top_k + self.patch_size = patch_size def __call__(self, image: Image) -> Image: """Create a 3-channel residual from a grayscale image.\n @@ -128,7 +130,7 @@ def image_from_fourier(self, masked_spectrum: NDArray, fourier_shift: NDArray, m return fromarray(reconstructed_uint8, mode="L").convert("RGB") - def mask_patches(self, numeric_image: NDArray, size: int): + def mask_patches(self, numeric_image: NDArray): """Crop patches and compute freq divergence.\n :return: List of (divergence, patch Image). :param numeric_image: Image converted into an array. @@ -137,17 +139,17 @@ def mask_patches(self, numeric_image: NDArray, size: int): from PIL.Image import fromarray metrics: list[tuple[float, Image]] = [] - + patch_size = self.patch_size h, w = numeric_image.shape - nx = (w + size - 1) // size - ny = (h + size - 1) // size + nx = (w + patch_size - 1) // patch_size + ny = (h + patch_size - 1) // patch_size for iy in range(ny): for ix in range(nx): - x0 = ix * size - y0 = iy * size - patch_arr = numeric_image[y0 : y0 + size, x0 : x0 + size] - if patch_arr.shape != (size, size): - pad = np.zeros((size, size), dtype=self.dtype) + x0 = ix * patch_size + y0 = iy * patch_size + patch_arr = numeric_image[y0 : y0 + patch_size, x0 : x0 + patch_size] + if patch_arr.shape != (patch_size, patch_size): + pad = np.zeros((patch_size, patch_size), dtype=self.dtype) pad[: patch_arr.shape[0], : patch_arr.shape[1]] = patch_arr patch_arr = pad @@ -163,12 +165,7 @@ def mask_patches(self, numeric_image: NDArray, size: int): metrics.append((div, patch_img)) return metrics - def crop_select( - self, - image: Image, - size: int, - top_k: int = 5, - ) -> list[Image]: + def crop_select(self, image: Image) -> list[Image]: """Crop image into patches, compute freq-divergence, return most extreme patches.\n :param image: PIL image to process. :param size: Patch dimension. @@ -179,11 +176,12 @@ def crop_select( gray = image.convert("L") numeric_image = np.array(gray, dtype=self.dtype) - metrics: list[tuple[float, Image]] = self.mask_patches(numeric_image, size=size) + metrics: list[tuple[float, Image]] = self.mask_patches(numeric_image) metrics.sort(key=lambda x: x[0], reverse=True) chosen: list[Image] = [] + top_k = self.top_k chosen.extend([p for _, p in metrics[:top_k]]) # high diverges chosen.extend([p for _, p in metrics[-top_k:]]) # low diverges diff --git a/negate/save.py b/negate/save.py index 63d59aa..c31b93f 100644 --- a/negate/save.py +++ b/negate/save.py @@ -1,7 +1,10 @@ # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0 # -from negate import TrainResult, model_path +import shutil +from pathlib import Path + +from negate import TrainResult, generate_datestamp_path, model_path def save_metadata(train_result: TrainResult, file_name: str = "negate") -> str: @@ -15,32 +18,39 @@ def save_metadata(train_result: TrainResult, file_name: str = "negate") -> str: scale_pos_weight: float = train_result.scale_pos_weight seed: int = train_result.seed - metadata_file_name = model_path(f"{file_name}_metadata.npz") + metadata_file_name = generate_datestamp_path(f"{file_name}_metadata.npz") np.savez(metadata_file_name, seed=seed, scale_pos_weight=scale_pos_weight) return metadata_file_name -def save_model(train_result: TrainResult, file_name: str = "negate") -> None: +def save_models(train_result: TrainResult, compare: bool, file_name: str = "negate") -> None: """Persist a trained model and its PCA transformer.\n :param train_result: Training output containing model, PCA and metadata. :param file_name: Base name for the files written to the *models* folder. :return: None""" + import pickle from sklearn.decomposition import PCA from xgboost import Booster + datestamp_path = generate_datestamp_path(file_name) + model: Booster = train_result.model pca: PCA = train_result.pca - - pca_file_name = model_path(f"{file_name}_pca.pkl") + pca_file_name = datestamp_path + "_pca.pkl" with open(pca_file_name, "wb") as f: pickle.dump(pca, f) - negate_xgb_file_name = model_path(f"{file_name}.json") + negate_xgb_file_name = datestamp_path + ".ubj" model.save_model(negate_xgb_file_name) metadata_file_name = save_metadata(train_result) + + if not compare: + for src in (negate_xgb_file_name, pca_file_name): + shutil.copy(src, model_path / Path(src).name) # type: ignore no overloads + print(f"Models saved to disk. {pca_file_name} {negate_xgb_file_name} {metadata_file_name}") @@ -49,15 +59,14 @@ def save_to_onnx(train_result: TrainResult, file_name: str = "negate"): :param train_result: Training output containing the XGBoost model. :param file_name: Base name for the ONNX file.""" - import shutil - from pathlib import Path - import onnx from skl2onnx import convert_sklearn from skl2onnx.common.data_types import FloatTensorType from negate.to_onnx import DataType, IOShape, ModelInputFormat, ONNXConverter + datestamp_path = generate_datestamp_path(file_name) + model = train_result.model num_features = train_result.feature_matrix.shape[1] pca = train_result.pca @@ -68,22 +77,19 @@ def save_to_onnx(train_result: TrainResult, file_name: str = "negate"): name="input", format=ModelInputFormat.FORMAT_NONE, # Used for TensorRT ) - negate_onnx_file_name = model_path(f"{file_name}.onnx") + negate_onnx_file_name = datestamp_path + ".onnx" onnx_model = ONNXConverter.from_xgboost(model, inputs=[input_shape], opset=12) onnx.save(onnx_model, negate_onnx_file_name) initial_pca_types = [("input", FloatTensorType([None, num_features]))] negate_pca_onnx_raw = convert_sklearn(pca, initial_types=initial_pca_types) negate_pca_onnx = ONNXConverter.optim_onnx(negate_pca_onnx_raw) # type: ignore[arg-type] - pca_file_name = model_path(f"{file_name}_pca.onnx") + pca_file_name = datestamp_path + "_pca.onnx" onnx.save(negate_pca_onnx, pca_file_name) metadata_file_name = save_metadata(train_result) - models_dir = Path(__file__).parent.parent / "models" - models_dir.mkdir(parents=True, exist_ok=True) - - for src in (negate_onnx_file_name, pca_file_name, metadata_file_name): - shutil.copy(src, models_dir / Path(src).name) # type: ignore no overloads + for src in (negate_onnx_file_name, pca_file_name): + shutil.copy(src, model_path / Path(src).name) # type: ignore no overloads print(f"Models saved to disk. {pca_file_name} {negate_onnx_file_name} {metadata_file_name}") diff --git a/negate/track.py b/negate/track.py index 2f71807..e550e7b 100644 --- a/negate/track.py +++ b/negate/track.py @@ -1,12 +1,15 @@ # SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0 # -from negate import TrainResult, VAEModel, get_time, model_path +from negate import TrainResult, VAEModel, get_time, generate_datestamp_path, model_path +import matplotlib.pyplot as plt def in_console(train_result: TrainResult, vae_type: VAEModel) -> None: """Print diagnostics and plots for a trained model.\n :param train_result: Result object from training.""" + from pathlib import Path + import shutil import json from pprint import pprint @@ -61,11 +64,11 @@ def in_console(train_result: TrainResult, vae_type: VAEModel) -> None: } pprint(results) - results_file = model_path("results.json") + results_file = generate_datestamp_path("results.json") result_format = {k: str(v) for k, v in results.items()} with open(results_file, "tw", encoding="utf-8") as out_file: json.dump(result_format, out_file, ensure_ascii=False, indent=4, sort_keys=True) - + shutil.copy(results_file, model_path / Path(results_file).name) # type: ignore no overloads separator = lambda: print("=" * 60) separator() print("CLASSIFICATION RESULTS") @@ -76,11 +79,10 @@ def in_console(train_result: TrainResult, vae_type: VAEModel) -> None: def on_graph(train_result: TrainResult) -> None: """Save and show PCA variance plots for a trained model.\n :param train_result: Result object from training.""" - - import matplotlib.pyplot as plt import numpy as np from numpy.typing import NDArray from sklearn.metrics import confusion_matrix + import seaborn as sns X_train: NDArray = train_result.X_train X_train_pca = train_result.X_train_pca @@ -90,73 +92,69 @@ def on_graph(train_result: TrainResult) -> None: y_pred = (y_pred_proba > 0.5).astype(int) pca = train_result.pca - plt.figure(figsize=(10, 5)) - plt.subplot(1, 2, 1) - plt.plot(np.cumsum(pca.explained_variance_ratio_), color="aqua") - plt.xlabel("Number of Components") - plt.ylabel("Cumulative Explained Variance") - plt.title("PCA Explained Variance") - plt.grid(True) - - plt.subplot(1, 2, 2) - plt.bar(range(min(20, len(pca.explained_variance_ratio_))), pca.explained_variance_ratio_[:20], color="aqua") - plt.xlabel("Component") - plt.ylabel("Explained Variance Ratio") - plt.title("First 20 Components") - plt.tight_layout() - plt.savefig(model_path("score_explained_variance.png")) - plt.show() - cm = confusion_matrix(train_result.y_test, y_pred) - fig, ax = plt.subplots() - cax = ax.imshow(cm, interpolation="nearest", cmap="Reds") - - ax.set_xticks(np.arange(cm.shape[1])) - ax.set_yticks(np.arange(cm.shape[0])) - ax.set_xticklabels(["Real", "Synthetic"]) - ax.set_yticklabels(["Real", "Synthetic"]) - plt.setp(ax.get_xticklabels(), rotation=45, ha="right") + # Create a single figure with 6 subplots (2 rows × 3 columns) + fig, axes = plt.subplots(2, 3, figsize=(18, 12)) + ax_cum = axes[0, 0] + ax_bar = axes[0, 1] + ax_conf = axes[0, 2] + ax_orig = axes[1, 0] + ax_pca = axes[1, 1] + ax_heat = axes[1, 2] + + # 1. Cumulative explained variance + ax_cum.plot(np.cumsum(pca.explained_variance_ratio_), color="aqua") + ax_cum.set_xlabel("Number of Components") + ax_cum.set_ylabel("Cumulative Explained Variance") + ax_cum.set_title("PCA Explained Variance") + ax_cum.grid(True) + + # 2. First 20 components + ax_bar.bar( + range(min(20, len(pca.explained_variance_ratio_))), + pca.explained_variance_ratio_[:20], + color="aqua", + ) + ax_bar.set_xlabel("Component") + ax_bar.set_ylabel("Explained Variance Ratio") + ax_bar.set_title("First 20 Components") + # 3. Confusion matrix + cm = confusion_matrix(train_result.y_test, y_pred) + cax = ax_conf.imshow(cm, interpolation="nearest", cmap="Reds") + ax_conf.set_xticks(np.arange(cm.shape[1])) + ax_conf.set_yticks(np.arange(cm.shape[0])) + ax_conf.set_xticklabels(["Real", "Synthetic"]) + ax_conf.set_yticklabels(["Real", "Synthetic"]) + plt.setp(ax_conf.get_xticklabels(), rotation=45, ha="right") for i in range(cm.shape[0]): for j in range(cm.shape[1]): - ax.text(j, i, cm[i, j], ha="center", va="center", color="black") - - ax.set_xlabel("Predicted") - ax.set_ylabel("Actual") - ax.set_title("Confusion Matrix") - fig.colorbar(cax) - plt.savefig(model_path("score_confusion_matrix.png")) - plt.show() - - plt.figure(figsize=(10, 5)) - plt.subplot(1, 2, 1) - plt.scatter(X_train[:, 0], X_train[:, 1], c=y_plot, cmap="coolwarm", edgecolor="k") - plt.xlabel("Feature 1") - plt.ylabel("Feature 2") - plt.title("Original Data (First Two Features)") - plt.colorbar(label="Prediction") - - plt.subplot(1, 2, 2) - plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1], c=y_plot, cmap="coolwarm", edgecolor="k") - plt.xlabel("Principal Component 1") - plt.ylabel("Principal Component 2") - plt.title("PCA Transformed Data") - plt.colorbar(label="Prediction") - plt.tight_layout() - plt.savefig(model_path("pca_transform_map.png")) - plt.show() - - import seaborn as sns - + ax_conf.text(j, i, cm[i, j], ha="center", va="center", color="black") + ax_conf.set_xlabel("Predicted") + ax_conf.set_ylabel("Actual") + ax_conf.set_title("Confusion Matrix") + fig.colorbar(cax, ax=ax_conf) + + # 4. Original data scatter + ax_orig.scatter(X_train[:, 0], X_train[:, 1], c=y_plot, cmap="coolwarm", edgecolor="k") + ax_orig.set_xlabel("Feature 1") + ax_orig.set_ylabel("Feature 2") + ax_orig.set_title("Original Data (First Two Features)") + # ax_orig.colorbar(label="Prediction") + + # 5. PCA transformed scatter + ax_pca.scatter(X_train_pca[:, 0], X_train_pca[:, 1], c=y_plot, cmap="coolwarm", edgecolor="k") + ax_pca.set_xlabel("Principal Component 1") + ax_pca.set_ylabel("Principal Component 2") + ax_pca.set_title("PCA Transformed Data") + # ax_pca.colorbar(label="Prediction") + + # 6. Correlation heatmap corr = np.corrcoef(X_train_pca, rowvar=False) upper_triangle_mask = np.triu(np.ones_like(corr, dtype=bool)) - - # Get actual min/max from the lower triangle (excluding diagonal) lower_triangle = corr[np.tril_indices_from(corr, k=-1)] vmin = lower_triangle.min() vmax = lower_triangle.max() - - figure, ax = plt.subplots(figsize=(12, 10)) cmap = sns.diverging_palette(20, 230, as_cmap=True) sns.heatmap( corr, @@ -168,8 +166,9 @@ def on_graph(train_result: TrainResult) -> None: square=True, linewidths=0.5, cbar_kws={"shrink": 0.5}, + ax=ax_heat, ) - ax.set_title(f"Feature Correlation Heatmap (PCA Components)\nRange: [{vmin:.3e}, {vmax:.3e}]") - plt.tight_layout() - figure.savefig(model_path("correlation_heatmap.png")) - plt.show() + ax_heat.set_title(f"Feature Correlation Heatmap (PCA Components)\nRange: [{vmin:.3e}, {vmax:.3e}]") + + plt.tight_layout(pad=0.5) + plt.savefig(generate_datestamp_path("combined_plots.png")) diff --git a/negate/train.py b/negate/train.py index 870fb71..30dac90 100644 --- a/negate/train.py +++ b/negate/train.py @@ -11,24 +11,31 @@ from sklearn.decomposition import PCA from xgboost import Booster +from negate.config import negate_options as negate_opt + get_time = lambda: datetime.now().strftime("%Y%m%d_%H%M%S") -folder = Path("models", get_time()) -folder.mkdir(parents=True, exist_ok=True) -model_path = lambda file_name: str(folder / file_name) +datestamped_folder = Path("models", get_time()) +model_path = Path(__file__).parent.parent / "models" + + +def generate_datestamp_path(file_name) -> str: + datestamped_folder.mkdir(parents=True, exist_ok=True) + generated_path = str(datestamped_folder / file_name) + return generated_path @dataclass class TrainingParameters: """Container holding main model parameters""" - colsample_bytree: float = 0.8 - eval_metric: list = field(default_factory=lambda: ["logloss", "aucpr"]) - learning_rate: float = 0.1 - max_depth: int = 4 - objective: str = "binary:logistic" - scale_pos_weight: float | None = None - seed: int | None = None - subsample: float = 0.8 + seed: int = negate_opt.seed + colsample_bytree: float = negate_opt.colsample_bytree + eval_metric: list = field(default_factory=lambda: negate_opt.eval_metric) + learning_rate: float = negate_opt.learning_rate + max_depth: int = negate_opt.max_depth + objective: str = negate_opt.objective + scale_pos_weight: float | None = negate_opt.scale_pos_weight + subsample: float = negate_opt.subsample @dataclass @@ -41,7 +48,7 @@ class TrainResult: model: Booster num_features: int pca: PCA - scale_pos_weight: float + scale_pos_weight: float | None seed: int X_train_pca: NDArray X_train: NDArray @@ -58,26 +65,25 @@ def grade(features_dataset: Dataset) -> TrainResult: from numpy.random import default_rng from sklearn.model_selection import train_test_split - feature_matrix = np.array([sample["features"] for sample in features_dataset]).astype(np.float32) # type: ignore no overloads + feature_matrix = np.asarray([sample["features"] for sample in features_dataset], dtype=np.float32) # type: ignore labels = np.array([sample["label"] for sample in features_dataset]) # type: ignore no overloads rng = default_rng(1) random_state = lambda: int(np.round(rng.random() * 0xFFFFFFFF)) seed = random_state() - X_train, X_test, y_train, y_test = train_test_split(feature_matrix, labels, test_size=0.2, stratify=labels, random_state=seed) + params = TrainingParameters( + seed=seed, + ) + X_train, X_test, y_train, y_test = train_test_split(feature_matrix, labels, test_size=0.2, stratify=labels, random_state=params.seed) - pca: PCA = PCA(n_components=0.95, random_state=seed) # dimensionality .95 + pca: PCA = PCA(n_components=0.95, random_state=params.seed) # dimensionality .95 X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) - scale_pos_weight = np.sum(y_train == 0) / np.sum(y_train == 1) + params.scale_pos_weight = np.sum(y_train == 0) / np.sum(y_train == 1) d_matrix_train = xgb.DMatrix(X_train_pca, label=y_train) d_matrix_test = xgb.DMatrix(X_test_pca, label=y_test) - params = TrainingParameters( - scale_pos_weight=scale_pos_weight, - seed=seed, - ) training_parameters = asdict(params) evaluation_parameters = [(d_matrix_train, "train"), (d_matrix_test, "test")] evaluation_result = {} @@ -91,11 +97,11 @@ def grade(features_dataset: Dataset) -> TrainResult: pca=pca, d_matrix_test=d_matrix_test, # type: ignore model=model, - scale_pos_weight=scale_pos_weight, + scale_pos_weight=params.scale_pos_weight, X_train_pca=X_train_pca, y_test=y_test, # type: ignore labels=labels, feature_matrix=feature_matrix, - seed=seed, + seed=params.seed, num_features=model.num_features(), ) diff --git a/pyproject.toml b/pyproject.toml index b2bb723..be0be7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,9 +6,7 @@ requires = ["setuptools", "setuptools_scm"] build-backend = "setuptools.build_meta" [project] -authors = [ - { name = "darkshapes", email = "91800957+exdysa@users.noreply.github.com" }, -] +authors = [{ name = "darkshapes", email = "91800957+exdysa@users.noreply.github.com" }] name = "negate" description = "Synthetic image detector" readme = "README.md" @@ -33,6 +31,7 @@ dependencies = [ "accelerate>=1.12.0", "datasets>=4.5.0", "diffusers>=0.36.0", + "einops>=0.8.2", "huggingface-hub>=1.3.2", "matplotlib>=3.10.8", "mongoengine>=0.29.1", @@ -54,12 +53,7 @@ dependencies = [ "xgboost>=3.1.3", ] [dependency-groups] -dev = [ - "pyright>=1.1.408", - "pytest>=9.0.2", - "pytest-asyncio>=1.3.0", - "ruff>=0.14.13", -] +dev = ["pyright>=1.1.408", "pytest>=9.0.2", "pytest-asyncio>=1.3.0", "ruff>=0.14.13"] [project.optional-dependencies] # -- frameworks -- @@ -80,15 +74,7 @@ torch-backend = "auto" prerelease = "allow" preview = true - -conflicts = [ - [ - { extra = "cpu" }, - { extra = "mps" }, - { extra = "rocm" }, - { extra = "cuda" }, - ], -] +conflicts = [[{ extra = "cpu" }, { extra = "mps" }, { extra = "rocm" }, { extra = "cuda" }]] [[tool.uv.index]] name = "pytorch-cpu" @@ -107,13 +93,9 @@ explicit = true [tool.uv.sources] -torchvision = [ - { index = "pytorch-cpu", extra = "mps", marker = "sys_platform == 'darwin'" }, -] +torchvision = [{ index = "pytorch-cpu", extra = "mps", marker = "sys_platform == 'darwin'" }] -torch = [ - { index = "pytorch-cpu", extra = "mps", marker = "sys_platform == 'darwin'" }, -] +torch = [{ index = "pytorch-cpu", extra = "mps", marker = "sys_platform == 'darwin'" }] [tool.setuptools_scm] write_to = "_version.py" @@ -141,7 +123,7 @@ ignore-paths = ["^tests/.*$", "test_.*$"] disable = ["C0415"] [tool.pyright] -include = ["nnll"] +include = ["negate"] exclude = [ "^tests/.*$", "**/__pycache__", # cache directories diff --git a/results/combined_plots.png b/results/combined_plots.png new file mode 100644 index 0000000..9031ac1 Binary files /dev/null and b/results/combined_plots.png differ diff --git a/results/pca_transform_map.png b/results/pca_transform_map.png deleted file mode 100644 index 094f3fe..0000000 Binary files a/results/pca_transform_map.png and /dev/null differ diff --git a/results/results.json b/results/results.json deleted file mode 100644 index e1d9202..0000000 --- a/results/results.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "accuracy": "0.9690721649484536", - "best_iter": "53", - "best_score": "0.9933269449816234", - "cumulative:": "[0.07007149 0.12663467 0.15042071 0.17022693 0.18916745 0.20264165\n 0.21424904 0.22467917 0.23394077 0.24301024 0.25173762 0.2600328\n 0.26813537 0.27596053 0.28343022 0.29078788 0.2979155 0.30497494\n 0.3119168 0.31863883 0.3252243 0.33161107 0.33782476 0.34401444\n 0.34997436 0.35590667 0.36177313 0.3675905 0.37325367 0.3787788\n 0.384235 0.38964587 0.39498276 0.40025353 0.40543026 0.41055965\n 0.4156341 0.42061973 0.4255008 0.43036702 0.4351645 0.43992177\n 0.44464865 0.4493351 0.45390958 0.45841444 0.46289667 0.4673559\n 0.4717829 0.47617012 0.48050338 0.4848013 0.489041 0.49326256\n 0.4974451 0.5015809 0.50571084 0.5098176 0.5138784 0.5179276\n 0.52193904 0.525917 0.529865 0.53380066 0.5377231 0.54159486\n 0.5454342 0.54926467 0.5530502 0.556791 0.56052 0.5641965\n 0.5678675 0.5714872 0.57509106 0.57867914 0.5822311 0.5857644\n 0.58928233 0.592793 0.59627086 0.59971887 0.60316205 0.60658395\n 0.6099794 0.61334175 0.61667436 0.619985 0.6232716 0.6265461\n 0.62981224 0.6330606 0.63630044 0.63946843 0.64262515 0.6457716\n 0.6488872 0.65199125 0.65502757 0.6580535 0.6610565 0.6640251\n 0.6669438 0.6698483 0.6727014 0.67552817 0.6783299 0.6810903\n 0.6838292 0.6865456 0.6892405 0.6919044 0.6945346 0.69714165\n 0.69970524 0.70223397 0.70471185 0.7071842 0.7095903 0.7119819\n 0.71437263 0.71673536 0.71906656 0.72137076 0.72366214 0.7259256\n 0.72818136 0.73042464 0.7326233 0.73481643 0.7369742 0.7391216\n 0.74125946 0.7433683 0.74547356 0.7475682 0.74964374 0.75171167\n 0.75376904 0.7558107 0.757825 0.75982887 0.7618149 0.7637917\n 0.76576257 0.767709 0.7696543 0.7715925 0.7735119 0.77542216\n 0.77732575 0.77922314 0.7810983 0.7829713 0.78483254 0.7866745\n 0.7885077 0.7903256 0.7921303 0.7939335 0.7957258 0.7975064\n 0.79927003 0.80102766 0.8027792 0.80452484 0.8062575 0.8079841\n 0.8096921 0.81139535 0.813073 0.8147449 0.8164148 0.8180765\n 0.81972486 0.82136554 0.82299864 0.8246217 0.82623744 0.8278416\n 0.8294405 0.8310295 0.832608 0.83417296 0.8357336 0.83728594\n 0.8388256 0.8403523 0.841871 0.84337974 0.84488434 0.8463729\n 0.8478495 0.8493148 0.850769 0.8522208 0.85365826 0.8550832\n 0.8565004 0.85791075 0.85931104 0.8606994 0.86207896 0.86343545\n 0.86477923 0.86610985 0.86742765 0.86871994 0.8700045 0.8712725\n 0.87253964 0.87379074 0.87502587 0.8762541 0.8774646 0.87864786\n 0.8798055 0.88094205 0.882077 0.88319016 0.884288 0.88536936\n 0.8864456 0.8875099 0.88856953 0.8896209 0.890664 0.8917057\n 0.89274216 0.89376813 0.8947927 0.8958099 0.89682394 0.89783007\n 0.8988324 0.899825 0.9008152 0.90179825 0.90277 0.9037378\n 0.9046995 0.9056541 0.90660703 0.90755624 0.9084997 0.90944076\n 0.91037524 0.91130465 0.91222924 0.9131488 0.91406435 0.9149753\n 0.9158825 0.916784 0.9176836 0.9185801 0.9194655 0.9203469\n 0.9212241 0.92210066 0.92296964 0.9238352 0.92469907 0.92556065\n 0.9264135 0.92726207 0.9281086 0.9289509 0.9297887 0.93062186\n 0.93144906 0.93227077 0.93309164 0.93390757 0.93472147 0.93552625\n 0.93633 0.93713075 0.93792987 0.9387237 0.9395151 0.94030124\n 0.94108176 0.9418612 0.9426338 0.94340473 0.9441706 0.9449316\n 0.94568676 0.9464374 0.9471839 0.947925 0.9486617 0.9493933\n 0.9501213 ]", - "explained_var": "0.95012134", - "f1_macro": "0.9690194825934206", - "f1_weighted": "0.9690853355372118", - "feature_shape": "(484, 131072)", - "imbalance_ratio": "0.9206349206349206", - "label_dist": "{'real': 232, 'synthetic': 252, 'real_pct': np.float64(47.93388429752066), 'synthetic_pct': np.float64(52.066115702479344)}", - "labels_shape": "(484,)", - "n_components": "0.95", - "original_dim": "131072", - "pca_dim": "295", - "roc_auc": "0.9936061381074169", - "scale_pos_weight": "0.9253731343283582", - "seed": "2198257139", - "timestamp": "20260131_164505" -} \ No newline at end of file diff --git a/results/score_confusion_matrix.png b/results/score_confusion_matrix.png deleted file mode 100644 index 0ac5fee..0000000 Binary files a/results/score_confusion_matrix.png and /dev/null differ diff --git a/results/score_explained_variance.png b/results/score_explained_variance.png deleted file mode 100644 index bcfb2ba..0000000 Binary files a/results/score_explained_variance.png and /dev/null differ diff --git a/results/vae_comparison.txt b/results/vae_comparison.txt new file mode 100644 index 0000000..0a9f27c --- /dev/null +++ b/results/vae_comparison.txt @@ -0,0 +1,706 @@ +{'accuracy': 0.8235294117647058, + 'best_iter': 32, + 'best_score': 0.9447935070036866, + 'cumulative:': array([0.32039887, 0.37570587, 0.4090894 , 0.436353 , 0.44910192, + 0.46047893, 0.4705638 , 0.4795027 , 0.48775637, 0.49585357, + 0.50377613, 0.5113055 , 0.5186545 , 0.52593285, 0.5329015 , + 0.539597 , 0.5461374 , 0.5525876 , 0.55888116, 0.56508017, + 0.5712244 , 0.57724464, 0.5830629 , 0.5887853 , 0.59437275, + 0.59993416, 0.6053227 , 0.61063474, 0.61582214, 0.6209211 , + 0.62599427, 0.63095903, 0.6358681 , 0.6406707 , 0.64545375, + 0.6501189 , 0.6547675 , 0.659283 , 0.6637747 , 0.66820705, + 0.67259544, 0.6769708 , 0.68126285, 0.6854735 , 0.68963486, + 0.69374937, 0.6977799 , 0.70176166, 0.7056897 , 0.709554 , + 0.7133815 , 0.7171504 , 0.72091293, 0.7246123 , 0.72825223, + 0.7318754 , 0.7354328 , 0.73897654, 0.7424681 , 0.7459095 , + 0.7493197 , 0.7526981 , 0.7560446 , 0.7593642 , 0.76262933, + 0.7658778 , 0.7691024 , 0.7722846 , 0.7754338 , 0.7785385 , + 0.78160787, 0.7846422 , 0.78762335, 0.79057777, 0.7935175 , + 0.7963818 , 0.799237 , 0.8020655 , 0.8048832 , 0.80764866, + 0.81039196, 0.8131009 , 0.81579715, 0.8184613 , 0.8210898 , + 0.8236765 , 0.8262456 , 0.8287908 , 0.8313098 , 0.8337678 , + 0.83620983, 0.8386336 , 0.84102917, 0.8433846 , 0.84568655, + 0.8479637 , 0.85022783, 0.8524689 , 0.8546968 , 0.8568957 , + 0.85904723, 0.8611882 , 0.8633105 , 0.86540455, 0.867487 , + 0.86954474, 0.8715774 , 0.87358457, 0.87557405, 0.87754464, + 0.87949663, 0.88143903, 0.88336784, 0.88526374, 0.8871446 , + 0.88901025, 0.89084035, 0.89265585, 0.8944312 , 0.8961898 , + 0.89793426, 0.89964986, 0.9013583 , 0.9030427 , 0.90468526, + 0.9063262 , 0.9079446 , 0.9095402 , 0.9111285 , 0.9127047 , + 0.9142477 , 0.91576916, 0.9172699 , 0.91875887, 0.9202242 , + 0.9216819 , 0.9231232 , 0.9245376 , 0.92593247, 0.92731655, + 0.92867297, 0.9300219 , 0.9313516 , 0.9326673 , 0.93395793, + 0.93523085, 0.9364834 , 0.9377117 , 0.93892545, 0.9401313 , + 0.9413293 , 0.9425155 , 0.94368446, 0.94484085, 0.9459849 , + 0.94712096, 0.9482242 , 0.9493191 , 0.95039463], dtype=float32), + 'explained_var': np.float32(0.9503943), + 'f1_macro': 0.8224371373307543, + 'f1_weighted': 0.823256343156218, + 'feature_shape': (510, 256), + 'imbalance_ratio': np.float64(0.895910780669145), + 'label_dist': {'real': 241, + 'real_pct': np.float64(47.25490196078431), + 'synthetic': 269, + 'synthetic_pct': np.float64(52.74509803921569)}, + 'labels_shape': (510,), + 'n_components': np.int64(159), + 'original_dim': 256, + 'pca_dim': 159, + 'roc_auc': 0.9251543209876544, + 'scale_pos_weight': np.float64(0.8976744186046511), + 'seed': 2198257139, + 'timestamp': '20260203_113430', + 'vae_type': 'exdysa/dc-ae-f32c32-sana-1.1-diffusers'} +============================================================ +CLASSIFICATION RESULTS +============================================================ + precision recall f1-score support + + Real 0.83 0.79 0.81 48 + Synthetic 0.82 0.85 0.84 54 + + accuracy 0.82 102 + macro avg 0.82 0.82 0.82 102 +weighted avg 0.82 0.82 0.82 102 + +Training selected. +Resolving data files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 69591.90it/s] +Resolving data files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 69/69 [00:00<00:00, 50218.11it/s] +Using local images from /Users/e6d64/Documents/GitHub/darkshapes/negate/assets +Downloading model... +Fetching 2 files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 4728.64it/s] +Download complete: : 0.00B [00:00, ?B/s] | 0/2 [00:00 from negate.extract import VAEModel, MODEL_MAP -import pytest def test_vae_model_map_entries() -> None: diff --git a/uv.lock b/uv.lock index 907c388..52ee3b2 100644 --- a/uv.lock +++ b/uv.lock @@ -414,6 +414,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, ] +[[package]] +name = "einops" +version = "0.8.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/77/850bef8d72ffb9219f0b1aac23fbc1bf7d038ee6ea666f331fa273031aa2/einops-0.8.2.tar.gz", hash = "sha256:609da665570e5e265e27283aab09e7f279ade90c4f01bcfca111f3d3e13f2827", size = 56261, upload-time = "2026-01-26T04:13:17.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/09/f8d8f8f31e4483c10a906437b4ce31bdf3d6d417b73fe33f1a8b59e34228/einops-0.8.2-py3-none-any.whl", hash = "sha256:54058201ac7087911181bfec4af6091bb59380360f069276601256a76af08193", size = 65638, upload-time = "2026-01-26T04:13:18.546Z" }, +] + [[package]] name = "filelock" version = "3.20.3" @@ -1041,6 +1050,7 @@ dependencies = [ { name = "accelerate" }, { name = "datasets" }, { name = "diffusers" }, + { name = "einops" }, { name = "huggingface-hub" }, { name = "matplotlib" }, { name = "mongoengine" }, @@ -1097,6 +1107,7 @@ requires-dist = [ { name = "accelerate", specifier = ">=1.12.0" }, { name = "datasets", specifier = ">=4.5.0" }, { name = "diffusers", specifier = ">=0.36.0" }, + { name = "einops", specifier = ">=0.8.2" }, { name = "huggingface-hub", specifier = ">=1.3.2" }, { name = "matplotlib", specifier = ">=3.10.8" }, { name = "mongoengine", specifier = ">=0.29.1" },