Skip to content
4 changes: 2 additions & 2 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[versions]
guava_version = "33.5.0-jre"
junit_version = "6.0.1"
log4j_version = "2.25.2"
junit_version = "6.0.2"
log4j_version = "2.25.3"

[libraries]
guava = { module = "com.google.guava:guava", version.ref = "guava_version" }
Expand Down
26 changes: 8 additions & 18 deletions src/main/java/com/wildermods/masshash/Blob.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;

import com.wildermods.masshash.exception.IntegrityException;
import com.wildermods.masshash.utils.ByteUtil;
Expand All @@ -15,6 +16,11 @@
*/
public record Blob(byte[] data, String hash) implements IBlob {

public Blob {
Objects.requireNonNull(data);
Objects.requireNonNull(hash);
}

/**
* Constructs a Blob from the given data and computes its hash.
*
Expand Down Expand Up @@ -118,23 +124,7 @@ public Blob(InputStream stream, Hash hash) throws IOException, IntegrityExceptio
* @return A new {@link Hash} object that represents this blob, but with no associated data.
*/
public Hash dropData() {
if(isTransient()) {
throw new UnsupportedOperationException("Data already dropped!");
}
return new Blob((byte[])null, hash);
}

/**
* Returns the data associated with this Blob
*
* @return a byte array that contains the data stored in this blob
*/
@Override
public byte[] data() {
if(data == null) {
throw new UnsupportedOperationException("Null data! Was the data dropped?");
}
return data;
return Hash.of(hash());
}

@Override
Expand All @@ -143,7 +133,7 @@ public int hashCode() {
}

/**
* Compares this object with another Hash object for equality. All {@link Blob} objects are also instances of {@link Hash}.
* Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}.
* <p>
* Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares
* the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash},
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/com/wildermods/masshash/Data.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package com.wildermods.masshash;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

/**
Expand All @@ -15,6 +18,21 @@ public interface Data {
*/
public byte[] data();

/**
* Returns an {@link InputStream} for reading the data.
*
* Default implementation wraps {@link data()} in a {@link ByteArrayInputStream}
* Classes that can stream data without holding it in memory should override this.
*
* @return an {@link InputStream} for the data
*/
public default InputStream dataStream() throws IOException {
if(isTransient()) {
throw new IllegalStateException("No data to stream!");
}
return new ByteArrayInputStream(data());
}

/**
* Checks if the data is transient, meaning the data is null or otherwise unavailable.
*
Expand Down
47 changes: 46 additions & 1 deletion src/main/java/com/wildermods/masshash/Hash.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,51 @@ public default boolean hashEquals(String hash) {
* @return a new {@link Hash} instance.
*/
public static Hash of(String hash) {
return new Blob((byte[])null, hash);
return new Hash() {

@Override
public String hash() {
return hash;
}

@Override
public int hashCode() {
return hash.hashCode();
}

/**
* Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}.
* <p>
* Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares
* the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash},
* the method returns {@code false}.
* </p>
*
* @param o the object to compare with this Hash object.
* @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise.
*/
@Override
public boolean equals(Object o) {
if(o instanceof Hash) {
return hash().equals(((Hash) o).hash());
}
return false;
}

/**
* Returns a string representation of this Blob, which is its hash value.
* <p>
* This method overrides the default {@link Object#toString()} method to provide a more meaningful
* string representation of the Blob.
* </p>
*
* @return the hash of the Blob as a string.
*/
@Override
public String toString() {
return hash();
}

};
}
}
14 changes: 7 additions & 7 deletions src/main/java/com/wildermods/masshash/Hasher.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public Hasher(final Stream<Path> files) throws IOException {
* before being added to the result map. The updated reference value will be associated with the computed hash.
* @throws IOException if an I/O error occurs during hashing
*/
public Hasher(final Stream<Path> files, final BiConsumer<Reference<Path>, Blob> forEachBlob) throws IOException {
public Hasher(final Stream<Path> files, final BiConsumer<Reference<Path>, IBlob> forEachBlob) throws IOException {
this(files, (p) -> true, forEachBlob);
}

Expand All @@ -112,7 +112,7 @@ public Hasher(final Stream<Path> files, final BiConsumer<Reference<Path>, Blob>
* @throws IOException if an I/O error occurs during hashing
* @throws IllegalArgumentException if no files match the predicate
*/
public Hasher(final Stream<Path> files, final Predicate<Path> predicate, final BiConsumer<Reference<Path>, Blob> forEachBlob) throws IOException {
public Hasher(final Stream<Path> files, final Predicate<Path> predicate, final BiConsumer<Reference<Path>, IBlob> forEachBlob) throws IOException {
this(files, Runtime.getRuntime().availableProcessors(), predicate, forEachBlob);
}

Expand Down Expand Up @@ -147,7 +147,7 @@ public Hasher(final Stream<Path> files, final Predicate<Path> predicate, final B
* @throws IOException if an error occurs while reading files or during thread execution
* @throws IllegalArgumentException if no files matched the provided predicate
*/
public Hasher(final Stream<Path> files, int threads, final Predicate<Path> predicate, final BiConsumer<Reference<Path>,Blob> forEachBlob) throws IOException {
public Hasher(final Stream<Path> files, int threads, final Predicate<Path> predicate, final BiConsumer<Reference<Path>,IBlob> forEachBlob) throws IOException {
final int processors = Runtime.getRuntime().availableProcessors();
Objects.requireNonNull(files);
Objects.requireNonNull(predicate);
Expand Down Expand Up @@ -214,12 +214,12 @@ public Hasher(final Stream<Path> files, int threads, final Predicate<Path> predi
for (Path file : sublist) {
Reference<Path> newFile = new Reference<>(file);
//Read and hash the file into a Blob, then discard the Blob’s data to conserve memory
Hash blob = new Blob(file);
forEachBlob.accept(newFile, (Blob) blob);
((Blob) blob).dropData();
IBlob blob = LightBlob.from(file);
forEachBlob.accept(newFile, (IBlob) blob);
Hash hash = blob.dropData();

//Group files by their content hash. Files with the same hash will share the same key
local.computeIfAbsent(blob, k -> new HashSet<>()).add(newFile.get());
local.computeIfAbsent(hash, k -> new HashSet<>()).add(newFile.get());
}
return local;
}));
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/com/wildermods/masshash/IBlob.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,13 @@ public interface IBlob extends Data, Hash {
* indicating data corruption or alteration.
*/
public void verify() throws IntegrityException;

/**
* Drops the data from the current object and returns a new Hash that represents the hash of this Blob.
* The original blob still holds the data for as long as you keep it referenced.
*
* @return A new {@link Hash} object that represents this blob, but with no associated data.
*/
public Hash dropData();

}
Loading
Loading