Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ plot_duplicates(image_dir='path/to/image/directory',
duplicate_map=duplicates,
filename='ukbench00120.jpg')
```

Alternatively, you can also use our library in the CLI:
```bash
imagededup find-duplicates --image_dir path/to/image/directory --method PHash
```
Use the `--help` flag to get more information on the available options.

For more examples, refer [this](https://github.com/idealo/imagededup/tree/master/examples) part of the
repository.

Expand Down
Empty file added imagededup/client/__init__.py
Empty file.
48 changes: 48 additions & 0 deletions imagededup/client/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import importlib
import click

from typing import Optional
from pathlib import PosixPath


@click.group()
def cli():
pass


@cli.command()
@click.option('--image_dir', help='Path to the directory containing all the images.', type=str, required=True)
@click.option('--method', help='Select which algorithm to use.',
type=click.Choice(['PHash', 'DHash', 'WHash', 'AHash', 'CNN']), required=True)
@click.option('--outfile', help='Name of the file the results should be written to.', type=str)
@click.option('--min_similarity_threshold',
help='For CNN only: threshold value (must be float between -1.0 and 1.0). Default is 0.9.',
type=click.FloatRange(-1.0, 1.0),
default=0.9)
@click.option('--max_distance_threshold',
help='For hashing methods only: threshold value (must be integer between 0 and 64). Default is 10.',
type=click.IntRange(0, 64), default=10)
@click.option('--scores',
help='Boolean indicating whether scores are to be returned along with retrieved duplicates.',
type=bool)
def find_duplicates(image_dir: PosixPath,
method: str,
outfile: Optional[str],
min_similarity_threshold: float,
max_distance_threshold: int,
scores: bool) -> None:
selected_method = getattr(importlib.import_module('imagededup.methods'), method)()
encodings = selected_method.encode_images(image_dir)

if method == 'CNN':
duplicates = selected_method.find_duplicates(encoding_map=encodings,
outfile=outfile,
min_similarity_threshold=min_similarity_threshold,
scores=scores)
else:
duplicates = selected_method.find_duplicates(encoding_map=encodings,
outfile=outfile,
max_distance_threshold=max_distance_threshold,
scores=scores)
if outfile is None:
click.echo(duplicates)
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
'tensorflow~=2.0.0',
'tqdm',
'scikit-learn',
'matplotlib'
'matplotlib',
'Click'
],
extras_require={
'tests': ['pytest', 'pytest-cov', 'pytest-mock', 'codecov'],
Expand All @@ -55,4 +56,5 @@
'Topic :: Software Development :: Libraries :: Python Modules',
],
packages=find_packages(exclude=('tests',)),
entry_points={'console_scripts': ['imagededup=imagededup.client.client:cli']},
)
153 changes: 153 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import os

from click.testing import CliRunner
from imagededup.client.client import find_duplicates

PATH_IMAGE_DIR = 'tests/data/mixed_images'
FILENAME = 'tests/test_output.json'


def test_no_image_dir_given():
runner = CliRunner()
result = runner.invoke(find_duplicates, ['--image_dir', ''])
assert result.exit_code == 2


def test_image_dir_given_but_no_method():
runner = CliRunner()
result = runner.invoke(find_duplicates, ['--image_dir', PATH_IMAGE_DIR])
assert result.exit_code == 2


def test_image_dir_given_and_method():
runner = CliRunner()
result = runner.invoke(find_duplicates, ['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash'])
assert result.exit_code == 0


def test_image_dir_given_but_wrong_method():
runner = CliRunner()
result = runner.invoke(find_duplicates, ['--image_dir', PATH_IMAGE_DIR, '--method', 'LHash'])
assert result.exit_code == 2


def test_file_is_created():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--outfile', FILENAME])
assert result.exit_code == 0
assert os.path.isfile(FILENAME) is True
# cleanup
os.remove(FILENAME)


def test_hash_max_distance_threshold_int():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '20'])
assert result.exit_code == 0


def test_hash_max_distance_threshold_no_int():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '0.5'])
assert result.exit_code == 2


def test_hash_max_distance_threshold_in_range_left_interval():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '0'])
assert result.exit_code == 0


def test_hash_max_distance_threshold_in_range_right_interval():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '64'])
assert result.exit_code == 0


def test_hash_max_distance_threshold_out_of_range_negative():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '-30'])
assert result.exit_code == 2


def test_hash_max_distance_threshold_out_of_range_positive():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '900'])
assert result.exit_code == 2


def test_hash_min_similarity_threshold_has_no_effect():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--min_similarity_threshold', '0.5'])
assert result.exit_code == 0


def test_cnn_min_similarity_threshold_float():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'CNN', '--min_similarity_threshold', '0.5'])
assert result.exit_code == 0


def test_cnn_min_similarity_threshold_no_float():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'CNN', '--min_similarity_threshold', '10'])
assert result.exit_code == 2


def test_cnn_min_similarity_threshold_in_range_left_interval():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'CNN', '--min_similarity_threshold', '-1.0'])
assert result.exit_code == 0


def test_cnn_min_similarity_threshold_in_range_right_interval():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'CNN', '--min_similarity_threshold', '1.0'])
assert result.exit_code == 0


def test_cnn_min_similarity_threshold_out_of_range_negative():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'CNN', '--min_similarity_threshold', '-1.5'])
assert result.exit_code == 2


def test_cnn_min_similarity_threshold_out_of_range_positive():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'CNN', '--min_similarity_threshold', '1.5'])
assert result.exit_code == 2


def test_cnn_max_distance_threshold_has_no_effect():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--max_distance_threshold', '10'])
assert result.exit_code == 0


def test_scores_boolean():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--scores', 'False'])
assert result.exit_code == 0


def test_scores_no_boolean():
runner = CliRunner()
result = runner.invoke(find_duplicates,
['--image_dir', PATH_IMAGE_DIR, '--method', 'PHash', '--scores', 'hello'])
assert result.exit_code == 2