Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ models/*

results/features_residual/
results/residual_analysis/
results/features_updated/
results/features_updated/
.demo.ipynb
21 changes: 10 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ This repo provides a simple command‑line interface to invoke the tool and exam

Future work includes the development of an automated testing framework and evaluation suite, expanding the scope of research to include wider diversity of synthetic and original human-generated datasets, benchmarking against comparable methods, and exploring additional model architectures.

![Bar and grid graph comparing variance of the synthetic and real images](results/score_explained_variance.png)
![Graph comparing before and after pca transform operation of dataset](results/pca_transform_map.png)
![Graph comparing confusion matrix of the synthetic and real images](results/score_confusion_matrix.png)
![Bar and grid graph comparing variance of the synthetic and real images](results/combined_plots.png)

## Requirements

Expand Down Expand Up @@ -69,32 +67,33 @@ Set-ExecutionPolicy Bypass -Scope Process -Force; .venv\Scripts\Activate.ps1
Basic Syntax:

```sh
usage: negate [-h] {train,check} ...
usage: negate [-h] {train,check,compare} ...

Negate CLI

positional arguments:
{train,check}
train Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.
check Check whether an image at the provided path is synthetic or original.
{train,check,compare}
train Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.
check Check whether an image at the provided path is synthetic or original.
compare Run extraction and training using all possible VAE.

options:
-h, --help show this help message and exit
-h, --help show this help message and exit
```

Training syntax:

```sh
usage: negate train [-h]
[-m {exdysa/dc-ae-f32c32-sana-1.1-diffusers,zai-org/GLM-Image,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}]
[-m {exdysa/dc-ae-f32c32-sana-1.1-diffusers,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}]
[path]

positional arguments:
path Dataset path
path Genunie/Human-original dataset path

options:
-h, --help show this help message and exit
-m, --model {exdysa/dc-ae-f32c32-sana-1.1-diffusers,zai-org/GLM-Image,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}
-m, --model {exdysa/dc-ae-f32c32-sana-1.1-diffusers,black-forest-labs/FLUX.2-dev,black-forest-labs/FLUX.2-klein-4B,Tongyi-MAI/Z-Image,Freepik/F-Lite-Texture,exdysa/mitsua-vae-SAFETENSORS}
Change the VAE model to use for training to a supported HuggingFace repo. Accuracy and memory use decrease from left to right
```

Expand Down
6 changes: 3 additions & 3 deletions _version.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID

__version__ = version = '0.1.dev41+g786446a62.d20260131'
__version_tuple__ = version_tuple = (0, 1, 'dev41', 'g786446a62.d20260131')
__version__ = version = '0.1.dev57+gded5bb62c.d20260203'
__version_tuple__ = version_tuple = (0, 1, 'dev57', 'gded5bb62c.d20260203')

__commit_id__ = commit_id = 'g786446a62'
__commit_id__ = commit_id = 'gded5bb62c'
23 changes: 23 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Advanced Configuration for Negate CLI
batch_size: 4 # Feature extraction batch size, zero to disable batching
cache_features: true # enable <chud> mode
vae_tiling: false # Enable VAE tiling
vae_slicing: false # Enable VAE scaling
patch_size: 768 # Patch resolution
top_k: 1 # Number of top patches to keep
use_onnx: false # True → ONNX, False → native XGBoost
dtype: bfloat16 # vae dtype
default_vae: "Freepik/F-Lite-Texture" # Model path

train:
n_components: 0.95 # Training PCA num components
num_boost_round: 200 # Boosted training rounds
early_stopping_rounds: 10 # Early stop training
colsample_bytree: 0.8
eval_metric: ["logloss", "aucpr"]
learning_rate: 0.1
max_depth: 4
objective: "binary:logistic"
subsample: 0.8
scale_pos_weight: null
seed: 0
8 changes: 5 additions & 3 deletions negate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0
# <!-- // /* d a r k s h a p e s */ -->
# ruff: noqa

from negate.datasets import build_datasets, dataset_to_nparray, generate_dataset
from negate.config import negate_options as negate_opt
from negate.datasets import build_datasets, generate_dataset
from negate.extract import FeatureExtractor, DeviceName, features, VAEModel
from negate.train import TrainResult, grade, get_time, model_path
from negate.train import TrainResult, grade, generate_datestamp_path, datestamped_folder, get_time, model_path
from negate.track import in_console, on_graph
from negate.save import save_model, save_to_onnx
from negate.save import save_models, save_to_onnx
from negate.residuals import Residual
161 changes: 120 additions & 41 deletions negate/__main__.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,115 @@
# SPDX-License-Identifier: MPL-2.0 AND LicenseRef-Commons-Clause-License-Condition-1.0
# <!-- // /* d a r k s h a p e s */ -->

"""Negate CLI entry point for training and inference.\n
:returns: None."""

from pathlib import Path
from typing import Any

import numpy as np

from negate import TrainResult, build_datasets, features, generate_dataset, grade, in_console, save_to_onnx, on_graph, VAEModel
from negate import (
TrainResult,
VAEModel,
build_datasets,
datestamped_folder,
features,
generate_dataset,
generate_datestamp_path,
grade,
in_console,
model_path,
negate_opt,
on_graph,
save_models,
save_to_onnx,
)


def run_native(features_array) -> np.ndarray:
"""Run inference using XGBoost with PCA pre-processing.\n
:param features_array: Feature array.\n
:param scale_pos_weight: Weight for positive class.\n
:return: Prediction array."""
import pickle

import xgboost as xgb

model_file_path_named = model_path / "negate.ubj"

if not model_file_path_named.exists():
raise FileNotFoundError(f"Model file not found: {str(model_file_path_named)}. Please run 'train' first to create the model.")
else:
model_file_path_named = str(model_file_path_named)

pca_file_path_named = model_path / "negate_pca.pkl"
with open(pca_file_path_named, "rb") as pca_file:
pca = pickle.load(pca_file)

features_pca = pca.transform(features_array)

model = xgb.Booster()
model.load_model(model_file_path_named)

result = model.predict(xgb.DMatrix(features_pca))

return result

def evaluate(prediction: np.ndarray, ground_truth: np.ndarray) -> None:
"""Print accuracy and class distribution.\n
:param prediction: Model outputs (0 = genuine, 1 = synthetic).\n
:param ground_truth: Ground-truth labels.\n
:return: None."""

prediction = prediction.astype(int)
ground_truth = ground_truth.astype(int)
def run_onnx(features_array) -> Any:
"""Run inference using ONNX Runtime with PCA pre-processing.\n
:param features_array: Feature array.\n
:return: Prediction array."""
import onnxruntime as ort
from onnxruntime.capi.onnxruntime_pybind11_state import Fail as ONNXRuntimeError
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidArgument

model_file_path_named = model_path / "negate.onnx"
if not model_file_path_named.exists():
raise FileNotFoundError(f"Model file not found: {str(model_file_path_named)}. Please run 'train' first to create the model.")
else:
model_file_path_named = str(model_file_path_named)

# pca_file_path_named = model_path / "negate_pca.onnx"
# session_pca = ort.InferenceSession(pca_file_path_named)
# input_name_pca = session_pca.get_inputs()[0].name
# features_pca = session_pca.run(None, {input_name_pca: features_array})[0]

acc = float(np.mean(prediction == ground_truth))
# input_name = ort.get_available_providers()[0]
features_model = features_array.astype(np.float32) # type: ignore

genu_cnt = int(np.sum(ground_truth == 0))
synth_cnt = int(np.sum(ground_truth == 1))
session = ort.InferenceSession(model_file_path_named)
print(f"Model '{model_file_path_named}' loaded.")
input_name = session.get_inputs()[0].name
try:
result = session.run(None, {input_name: features_model})[0] # type: ignore
return result
except (InvalidArgument, ONNXRuntimeError) as error_log:
import sys

print(f"Accuracy: {acc:.2%}")
print(f"Genuine: {genu_cnt} Synthetic: {synth_cnt}")
print(error_log)
sys.exit()


def predict(image_path: Path, vae_type: VAEModel = VAEModel.MITSUA_FP16, true_label: int | None = None) -> np.ndarray:
def predict(image_path: Path, vae_type: VAEModel, true_label: int | None = None) -> np.ndarray:
"""Predict synthetic or original for given image. (0 = genuine, 1 = synthetic)\n
:param image_path: Path to image file or folder.
:param vae_type: VAE model to use for feature extraction.
:return: Prediction array.
"""
:return: Prediction array."""
from datasets import Dataset
import onnxruntime as ort
from onnxruntime import SparseTensor

print(f"{'Evaluation' if true_label is not None else 'Detection'} selected.")

models_location = Path(__file__).parent.parent / "models"
model_file = models_location / "negate.onnx"

if not model_file.exists():
raise FileNotFoundError(f"Model file not found: {model_file}. Please run 'train' first to create the model.")
print(f"""{"Evaluation" if true_label is not None else "Detection"} selected.
Checking path '{image_path}' with {vae_type.value}""")

dataset: Dataset = generate_dataset(image_path)
features_dataset: Dataset = features(dataset, vae_type)
features_array = np.array(features_dataset["features"], dtype=np.float32) # type: ignore[arg-type]

features_array = np.array(features_dataset["features"]).astype(np.float32) # type: ignore[arg-type]
result = run_onnx(features_array) if negate_opt.use_onnx else run_native(features_array)

session = ort.InferenceSession(str(model_file))
input_name = session.get_inputs()[0].name
result: SparseTensor = session.run(None, {input_name: features_array})[0] # type: ignore
print(result)
thresh = 0.5
predictions = (result > thresh).astype(int)
match true_label:
case None:
for prediction in result: # type: ignore
Expand All @@ -60,21 +118,26 @@ def predict(image_path: Path, vae_type: VAEModel = VAEModel.MITSUA_FP16, true_la
else:
print("image is SYNTHETIC")
case _:
evaluate(result, np.array([true_label])) # type: ignore
ground_truth = np.full(predictions.shape, true_label, dtype=int)
acc = float(np.mean(predictions == ground_truth))
print(f"Accuracy: {acc:.2%}")

return result # type: ignore[return-value]
return result, predictions # type: ignore[return-value]


def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None) -> None:
"""Train model using dataset at path.\n
:param path: Dataset root."""
def training_run(vae_type: VAEModel, file_or_folder_path: Path | None = None, compare: bool = False) -> None:
"""Train \n
# xgb00OOst\n
model using dataset at path.\n
:param path: Dataset root folder."""
from datasets import Dataset

print("Training selected.")
dataset: Dataset = build_datasets(file_or_folder_path)
features_dataset: Dataset = features(dataset, vae_type)
train_result: TrainResult = grade(features_dataset)
save_to_onnx(train_result)
save_models(train_result, compare)
in_console(train_result, vae_type)
on_graph(train_result)

Expand All @@ -90,14 +153,15 @@ def main() -> None:
subparsers = parser.add_subparsers(dest="cmd", required=True)

train_parser = subparsers.add_parser("train", help="Train model on the dataset in the provided path or `assets/`. The resulting model will be saved to disk.")
train_parser.add_argument("path", help="Dataset path", nargs="?", default=None)
train_parser.add_argument("path", help="Genunie/Human-original dataset path", nargs="?", default=None)
train_parser.add_argument(
"-m",
"--model",
choices=[m.value for m in VAEModel],
default=VAEModel.MITSUA_FP16,
help="Change the VAE model to use for training to a supported HuggingFace repo. Accuracy and memory use decrease from left to right",
default=negate_opt.default_vae,
help=f"Change the VAE model to use for training to a supported HuggingFace repo (default {negate_opt.default_vae}). Accuracy and memory use decrease from left to right",
)

check_parser = subparsers.add_parser(
"check",
help="Check whether an image at the provided path is synthetic or original.",
Expand All @@ -106,7 +170,7 @@ def main() -> None:
label_grp = check_parser.add_mutually_exclusive_group()
label_grp.add_argument("-s", "--synthetic", action="store_const", const=1, dest="label", help="Mark image as synthetic (label = 1) for evaluation.")
label_grp.add_argument("-g", "--genuine", action="store_const", const=0, dest="label", help="Mark image as genuine (label = 0) for evaluation.")

subparsers.add_parser("compare", help="Run extraction and training using all possible VAE.")
args = parser.parse_args(argv[1:])

match args.cmd:
Expand All @@ -115,12 +179,27 @@ def main() -> None:
dataset_location: Path | None = Path(args.path)
else:
dataset_location: Path | None = None
datestamped_folder.mkdir(parents=True, exist_ok=True)

vae_type = VAEModel(args.model)
training_run(file_or_folder_path=dataset_location, vae_type=vae_type)
training_run(
vae_type=vae_type,
file_or_folder_path=dataset_location,
)
case "check":
if args.path is None:
raise ValueError("Check requires an image path.")
predict(Path(args.path), true_label=args.label)
import json

results_file_path = model_path / "results.json"
with open(results_file_path) as result_metadata:
train_metadata = json.load(result_metadata)
vae_type = VAEModel(train_metadata["vae_type"])
predict(Path(args.path), vae_type=vae_type, true_label=args.label)
case "compare":
for model in VAEModel:
_regenerate = generate_datestamp_path("test")
training_run(vae_type=VAEModel(model.value), compare=True)
case _:
raise NotImplementedError

Expand Down
52 changes: 52 additions & 0 deletions negate/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import NamedTuple


class NegateConfig(NamedTuple):
"""YAML config values.\n
:param patch_size: Patch width for residuals.\n
:param top_k: Number of patches.\n
:param vae_tiling: Enable tiling.\n
:param vae_slicing: Enable slicing.\n
:param use_onnx: Use ONNX for inference.\n
:param use_gpu: Use GPU if available.\n
:return: Config instance.""" # noqa: D401

batch_size: int
cache_features: bool
default_vae: str
dtype: str
n_components: float
num_boost_round: int
patch_size: int
top_k: int
use_onnx: bool
vae_slicing: bool
vae_tiling: bool
early_stopping_rounds: int
colsample_bytree: float
eval_metric: list
learning_rate: float
max_depth: int
objective: str
subsample: float
scale_pos_weight: float | None
seed: int


def load_config_options() -> NegateConfig:
"""Load YAML configuration options.\n
:return: Config dict."""

from pathlib import Path

import yaml

config_path = Path(__file__).parent.parent / "config" / "config.yaml"
with open(config_path, "r") as config_file:
data = yaml.safe_load(config_file)
train_cfg = data.pop("train", {})
data.update(train_cfg)
return NegateConfig(**data)


negate_options = load_config_options()
Loading