diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 56c1252..83b44b9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,7 @@ [versions] guava_version = "33.5.0-jre" -junit_version = "6.0.1" -log4j_version = "2.25.2" +junit_version = "6.0.2" +log4j_version = "2.25.3" [libraries] guava = { module = "com.google.guava:guava", version.ref = "guava_version" } diff --git a/src/main/java/com/wildermods/masshash/Blob.java b/src/main/java/com/wildermods/masshash/Blob.java index bd832cf..07bfa89 100644 --- a/src/main/java/com/wildermods/masshash/Blob.java +++ b/src/main/java/com/wildermods/masshash/Blob.java @@ -4,6 +4,7 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Objects; import com.wildermods.masshash.exception.IntegrityException; import com.wildermods.masshash.utils.ByteUtil; @@ -15,6 +16,11 @@ */ public record Blob(byte[] data, String hash) implements IBlob { + public Blob { + Objects.requireNonNull(data); + Objects.requireNonNull(hash); + } + /** * Constructs a Blob from the given data and computes its hash. * @@ -118,23 +124,7 @@ public Blob(InputStream stream, Hash hash) throws IOException, IntegrityExceptio * @return A new {@link Hash} object that represents this blob, but with no associated data. */ public Hash dropData() { - if(isTransient()) { - throw new UnsupportedOperationException("Data already dropped!"); - } - return new Blob((byte[])null, hash); - } - - /** - * Returns the data associated with this Blob - * - * @return a byte array that contains the data stored in this blob - */ - @Override - public byte[] data() { - if(data == null) { - throw new UnsupportedOperationException("Null data! Was the data dropped?"); - } - return data; + return Hash.of(hash()); } @Override @@ -143,7 +133,7 @@ public int hashCode() { } /** - * Compares this object with another Hash object for equality. All {@link Blob} objects are also instances of {@link Hash}. + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. *
* Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, diff --git a/src/main/java/com/wildermods/masshash/Data.java b/src/main/java/com/wildermods/masshash/Data.java index 2a7b974..dcbdad3 100644 --- a/src/main/java/com/wildermods/masshash/Data.java +++ b/src/main/java/com/wildermods/masshash/Data.java @@ -1,5 +1,8 @@ package com.wildermods.masshash; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; /** @@ -15,6 +18,21 @@ public interface Data { */ public byte[] data(); + /** + * Returns an {@link InputStream} for reading the data. + * + * Default implementation wraps {@link data()} in a {@link ByteArrayInputStream} + * Classes that can stream data without holding it in memory should override this. + * + * @return an {@link InputStream} for the data + */ + public default InputStream dataStream() throws IOException { + if(isTransient()) { + throw new IllegalStateException("No data to stream!"); + } + return new ByteArrayInputStream(data()); + } + /** * Checks if the data is transient, meaning the data is null or otherwise unavailable. * diff --git a/src/main/java/com/wildermods/masshash/Hash.java b/src/main/java/com/wildermods/masshash/Hash.java index 6bb4893..866372d 100644 --- a/src/main/java/com/wildermods/masshash/Hash.java +++ b/src/main/java/com/wildermods/masshash/Hash.java @@ -41,6 +41,51 @@ public default boolean hashEquals(String hash) { * @return a new {@link Hash} instance. */ public static Hash of(String hash) { - return new Blob((byte[])null, hash); + return new Hash() { + + @Override + public String hash() { + return hash; + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *
+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *
+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); + } + return false; + } + + /** + * Returns a string representation of this Blob, which is its hash value. + *+ * This method overrides the default {@link Object#toString()} method to provide a more meaningful + * string representation of the Blob. + *
+ * + * @return the hash of the Blob as a string. + */ + @Override + public String toString() { + return hash(); + } + + }; } } \ No newline at end of file diff --git a/src/main/java/com/wildermods/masshash/Hasher.java b/src/main/java/com/wildermods/masshash/Hasher.java index 5913f0f..19be387 100644 --- a/src/main/java/com/wildermods/masshash/Hasher.java +++ b/src/main/java/com/wildermods/masshash/Hasher.java @@ -93,7 +93,7 @@ public Hasher(final Stream+ * This is particularly useful for large files or streams (e.g., files on disk, network streams), + * where reading the entire content into memory is undesirable. The hash of the data is always stored + * and can be verified without retaining the raw bytes. + *
+ */ +public record LightBlob(Supplier+ * Deprecated because reading the entire data into memory may be expensive for large streams. + * Prefer {@link #dataStream()} instead. + *
+ * + * @return the byte array of the blob + * @throws UncheckedIOException if reading the stream fails + */ + @Override + @Deprecated + public byte[] data() { + try (InputStream stream = dataStream()){ + return stream.readAllBytes(); + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Checks if this blob is transient, meaning the underlying stream cannot be opened. + *+ * This can occur if the file is deleted, the network stream fails, or any other I/O error + * prevents the stream from being accessed. + *
+ * + * @return {@code true} if the data stream cannot be opened, {@code false} otherwise + */ + @Override + public boolean isTransient() { + try (InputStream stream = streamSupplier.get()){ + return false; + } + catch(Exception e) { + return true; + } + } + + /** + * Returns a fresh {@link InputStream} for reading the blob's data. + *+ * Each call returns a new stream. The caller is responsible for closing it. + *
+ * + * @return a fresh {@link InputStream} for reading the blob's contents + * @throws IOException if the stream cannot be opened + */ + @Override + public InputStream dataStream() throws IOException { + try { + return streamSupplier.get(); + } + catch(Exception e) { + throw new IOException(e); + } + } + + /** + * Verifies that the data matches the provided hash. + *+ * This method computes the hash of the current data and compares it to the expected hash. If the hashes do not match, + * an {@link IntegrityException} is thrown. This method ensures the integrity of the data. + *
+ * + * @throws IntegrityException if the computed hash of the data does not match the expected hash. + */ + @Override + public void verify() throws IntegrityException { + try (InputStream stream = dataStream()){ + String actualHash = ByteUtil.hash(stream); + if(!actualHash.equals(hash)) { + throw new IntegrityException("Expected hash " + hash + " but got " + actualHash); + } + } + catch(IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Drops the data from the current object and returns a new Hash that represents the hash of this Blob. + * The original blob still holds the data for as long as you keep it referenced. + * + * @return A new {@link Hash} object that represents this blob, but with no associated data. + */ + @Override + public Hash dropData() { + return Hash.of(hash()); + } + + @Override + public int hashCode() { + return hash.hashCode(); + } + + /** + * Compares this object with another Hash object for equality. All {@link IBlob} objects are also instances of {@link Hash}. + *+ * Two {@link Hash} objects are considered equal if their hashes are the same. This method specifically compares + * the hash of the other object with the hash of this object. If the other object is not an instance of {@link Hash}, + * the method returns {@code false}. + *
+ * + * @param o the object to compare with this Hash object. + * @return {@code true} if the other object is a {@link Hash} and has the same hash; {@code false} otherwise. + */ + @Override + public boolean equals(Object o) { + if(o instanceof Hash) { + return hash().equals(((Hash) o).hash()); + } + return false; + } + + /** + * Returns a string representation of this Blob, which is its hash value. + *+ * This method overrides the default {@link Object#toString()} method to provide a more meaningful + * string representation of the Blob. + *
+ * + * @return the hash of the Blob as a string. + */ + @Override + public String toString() { + return hash(); + } + +} diff --git a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java index c430c8d..11d765f 100644 --- a/src/main/java/com/wildermods/masshash/utils/ByteUtil.java +++ b/src/main/java/com/wildermods/masshash/utils/ByteUtil.java @@ -1,5 +1,7 @@ package com.wildermods.masshash.utils; +import java.io.IOException; +import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Objects; @@ -29,6 +31,30 @@ public static String hash(byte[] bytes) { } } + /** + * Hashes the contents of an InputStream using SHA-1 without loading all bytes into memory. + * The stream is read sequentially in 1 MiB chunks. + * + * @param stream the InputStream to hash + * @return the hexadecimal SHA-1 hash + * @throws IOException if an I/O error occurs reading the stream + * @throws NullPointerException if the stream is null + */ + public static String hash(InputStream stream) throws IOException { + Objects.requireNonNull(stream, "InputStream cannot be null."); + try { + MessageDigest digest = MessageDigest.getInstance("SHA-1"); + byte[] buffer = new byte[1048576]; // 1 MiB buffer + int bytesRead; + while ((bytesRead = stream.read(buffer)) != -1) { + digest.update(buffer, 0, bytesRead); + } + return bytesToHex(digest.digest()); + } catch (NoSuchAlgorithmException e) { + throw new AssertionError("SHA-1 algorithm is unavailable.", e); + } + } + /** * Converts a byte array into a hexadecimal string representation. *
diff --git a/src/test/java/com/wildermods/masshash/BlobTests.java b/src/test/java/com/wildermods/masshash/BlobTests.java
index 0f92860..b560e33 100644
--- a/src/test/java/com/wildermods/masshash/BlobTests.java
+++ b/src/test/java/com/wildermods/masshash/BlobTests.java
@@ -3,21 +3,41 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
+import java.util.function.Supplier;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+
import org.junit.jupiter.api.Test;
import com.wildermods.masshash.exception.IntegrityException;
+import com.wildermods.masshash.utils.ByteUtil;
public class BlobTests {
+ private static final String testHash = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3";
+
private static final Blob testBlob = new Blob("test".getBytes());
private static final Blob testBlob2 = new Blob("test".getBytes());
+ private static final LightBlob lightBlob = new LightBlob(
+ (Supplier