From ac988fff01a9371d346a4a28e0112f8ac854ffc3 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Tue, 2 Sep 2025 19:05:30 +0530 Subject: [PATCH 01/50] * Add boilerplate --- ingest-v2/.gitignore | 49 +++++ ingest-v2/README.md | 38 ++++ ingest-v2/pom.xml | 185 ++++++++++++++++++ ingest-v2/src/main/kotlin/Application.kt | 13 ++ ingest-v2/src/main/kotlin/HTTP.kt | 20 ++ ingest-v2/src/main/kotlin/Routing.kt | 17 ++ ingest-v2/src/main/kotlin/Serialization.kt | 20 ++ ingest-v2/src/main/resources/application.yaml | 6 + ingest-v2/src/main/resources/logback.xml | 12 ++ ingest-v2/src/test/kotlin/ApplicationTest.kt | 21 ++ pom.xml | 6 +- 11 files changed, 384 insertions(+), 3 deletions(-) create mode 100644 ingest-v2/.gitignore create mode 100644 ingest-v2/README.md create mode 100644 ingest-v2/pom.xml create mode 100644 ingest-v2/src/main/kotlin/Application.kt create mode 100644 ingest-v2/src/main/kotlin/HTTP.kt create mode 100644 ingest-v2/src/main/kotlin/Routing.kt create mode 100644 ingest-v2/src/main/kotlin/Serialization.kt create mode 100644 ingest-v2/src/main/resources/application.yaml create mode 100644 ingest-v2/src/main/resources/logback.xml create mode 100644 ingest-v2/src/test/kotlin/ApplicationTest.kt diff --git a/ingest-v2/.gitignore b/ingest-v2/.gitignore new file mode 100644 index 000000000..4eb2a1222 --- /dev/null +++ b/ingest-v2/.gitignore @@ -0,0 +1,49 @@ +# Created by https://www.toptal.com/developers/gitignore/api/kotlin,maven +# Edit at https://www.toptal.com/developers/gitignore?templates=kotlin,maven + +### Kotlin ### +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* +replay_pid* + +### Maven ### +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +# https://github.com/takari/maven-wrapper#usage-without-binary-jar +.mvn/wrapper/maven-wrapper.jar + +# Eclipse m2e generated files +# Eclipse Core +.project +# JDT-specific (Eclipse Java Development Tools) +.classpath + +# End of https://www.toptal.com/developers/gitignore/api/kotlin,maven \ No newline at end of file diff --git a/ingest-v2/README.md b/ingest-v2/README.md new file mode 100644 index 000000000..d90c4540b --- /dev/null +++ b/ingest-v2/README.md @@ -0,0 +1,38 @@ +# ingest-v2 + +This project was created using the [Ktor Project Generator](https://start.ktor.io). + +Here are some useful links to get you started: + +- [Ktor Documentation](https://ktor.io/docs/home.html) +- [Ktor GitHub page](https://github.com/ktorio/ktor) +- The [Ktor Slack chat](https://app.slack.com/client/T09229ZC6/C0A974TJ9). You'll need to [request an invite](https://surveys.jetbrains.com/s3/kotlin-slack-sign-up) to join. + +## Features + +Here's a list of features included in this project: + +| Name | Description | +| ------------------------------------------------------------------------|------------------------------------------------------------------------------------ | +| [Content Negotiation](https://start.ktor.io/p/content-negotiation) | Provides automatic content conversion according to Content-Type and Accept headers | +| [Routing](https://start.ktor.io/p/routing) | Provides a structured routing DSL | +| [kotlinx.serialization](https://start.ktor.io/p/kotlinx-serialization) | Handles JSON serialization using kotlinx.serialization library | +| [AsyncAPI](https://start.ktor.io/p/asyncapi) | Generates and serves AsyncAPI documentation | + +## Building & Running + +To build or run the project, use one of the following tasks: + +| Task | Description | +| --------------------------------------------------------------|------------------- | +| `mvn test` | Run the tests | +| `mvn package` | Build the project | +| `java -jar target/ingest-v2-0.0.1-jar-with-dependencies.jar` | Run the server | + +If the server starts successfully, you'll see the following output: + +``` +2024-12-04 14:32:45.584 [main] INFO Application - Application started in 0.303 seconds. +2024-12-04 14:32:45.682 [main] INFO Application - Responding at http://0.0.0.0:8080 +``` + diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml new file mode 100644 index 000000000..9ec6858b7 --- /dev/null +++ b/ingest-v2/pom.xml @@ -0,0 +1,185 @@ + + + 4.0.0 + com.microsoft.azure.kusto + ingest-v2 + 0.0.1 + ingest-v2 + ingest-v2 + + official + 2.1.10 + 3.2.3 + 3.1.1 + 1.4.14 + 2.0.9 + UTF-8 + true + io.ktor.server.netty.EngineMain + + + + + + io.ktor + ktor-server-content-negotiation-jvm + ${ktor_version} + + + io.ktor + ktor-server-core-jvm + ${ktor_version} + + + io.ktor + ktor-serialization-kotlinx-json-jvm + ${ktor_version} + + + org.openfolder + kotlin-asyncapi-ktor + ${ktor_async_api_version} + + + io.ktor + ktor-server-netty-jvm + ${ktor_version} + + + ch.qos.logback + logback-classic + ${logback_version} + + + org.slf4j + slf4j-api + ${slf4j_version} + + + io.ktor + ktor-server-config-yaml-jvm + ${ktor_version} + + + io.ktor + ktor-server-test-host-jvm + ${ktor_version} + test + + + org.jetbrains.kotlin + kotlin-test-junit + ${kotlin_version} + test + + + org.jetbrains.kotlinx + kotlinx-coroutines-debug + 1.6.4 + test + + + + ${project.basedir}/src/main/kotlin + ${project.basedir}/src/test/kotlin + + + ${project.basedir}/src/main/resources + + + + + + kotlin-maven-plugin + org.jetbrains.kotlin + ${kotlin_version} + + 1.8 + + + + compile + compile + + compile + + + + test-compile + test-compile + + test-compile + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + + + + java + + + + + ${main.class} + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.6 + + + jar-with-dependencies + + + + true + ${main.class} + + + + + + assemble-all + package + + single + + + + + + org.jetbrains.kotlin + kotlin-maven-plugin + ${kotlin_version} + + + compile + compile + + compile + + + + + + kotlinx-serialization + + + + + org.jetbrains.kotlin + kotlin-maven-serialization + ${kotlin_version} + + + + + + \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/Application.kt b/ingest-v2/src/main/kotlin/Application.kt new file mode 100644 index 000000000..bc09aa42c --- /dev/null +++ b/ingest-v2/src/main/kotlin/Application.kt @@ -0,0 +1,13 @@ +package com.microsoft.azure.kusto + +import io.ktor.server.application.* + +fun main(args: Array) { + io.ktor.server.netty.EngineMain.main(args) +} + +fun Application.module() { + configureSerialization() + configureHTTP() + configureRouting() +} diff --git a/ingest-v2/src/main/kotlin/HTTP.kt b/ingest-v2/src/main/kotlin/HTTP.kt new file mode 100644 index 000000000..07da277dc --- /dev/null +++ b/ingest-v2/src/main/kotlin/HTTP.kt @@ -0,0 +1,20 @@ +package com.microsoft.azure.kusto + +import com.asyncapi.kotlinasyncapi.context.service.AsyncApiExtension +import com.asyncapi.kotlinasyncapi.ktor.AsyncApiPlugin +import io.ktor.serialization.kotlinx.json.* +import io.ktor.server.application.* +import io.ktor.server.plugins.contentnegotiation.* +import io.ktor.server.response.* +import io.ktor.server.routing.* + +fun Application.configureHTTP() { + install(AsyncApiPlugin) { + extension = AsyncApiExtension.builder { + info { + title("Sample API") + version("1.0.0") + } + } + } +} diff --git a/ingest-v2/src/main/kotlin/Routing.kt b/ingest-v2/src/main/kotlin/Routing.kt new file mode 100644 index 000000000..c48c05c69 --- /dev/null +++ b/ingest-v2/src/main/kotlin/Routing.kt @@ -0,0 +1,17 @@ +package com.microsoft.azure.kusto + +import com.asyncapi.kotlinasyncapi.context.service.AsyncApiExtension +import com.asyncapi.kotlinasyncapi.ktor.AsyncApiPlugin +import io.ktor.serialization.kotlinx.json.* +import io.ktor.server.application.* +import io.ktor.server.plugins.contentnegotiation.* +import io.ktor.server.response.* +import io.ktor.server.routing.* + +fun Application.configureRouting() { + routing { + get("/") { + call.respondText("Hello World!") + } + } +} diff --git a/ingest-v2/src/main/kotlin/Serialization.kt b/ingest-v2/src/main/kotlin/Serialization.kt new file mode 100644 index 000000000..00d52586e --- /dev/null +++ b/ingest-v2/src/main/kotlin/Serialization.kt @@ -0,0 +1,20 @@ +package com.microsoft.azure.kusto + +import com.asyncapi.kotlinasyncapi.context.service.AsyncApiExtension +import com.asyncapi.kotlinasyncapi.ktor.AsyncApiPlugin +import io.ktor.serialization.kotlinx.json.* +import io.ktor.server.application.* +import io.ktor.server.plugins.contentnegotiation.* +import io.ktor.server.response.* +import io.ktor.server.routing.* + +fun Application.configureSerialization() { + install(ContentNegotiation) { + json() + } + routing { + get("/json/kotlinx-serialization") { + call.respond(mapOf("hello" to "world")) + } + } +} diff --git a/ingest-v2/src/main/resources/application.yaml b/ingest-v2/src/main/resources/application.yaml new file mode 100644 index 000000000..88e6eff80 --- /dev/null +++ b/ingest-v2/src/main/resources/application.yaml @@ -0,0 +1,6 @@ +ktor: + application: + modules: + - com.microsoft.azure.kusto.ApplicationKt.module + deployment: + port: 8080 diff --git a/ingest-v2/src/main/resources/logback.xml b/ingest-v2/src/main/resources/logback.xml new file mode 100644 index 000000000..aadef5d5b --- /dev/null +++ b/ingest-v2/src/main/resources/logback.xml @@ -0,0 +1,12 @@ + + + + %d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + \ No newline at end of file diff --git a/ingest-v2/src/test/kotlin/ApplicationTest.kt b/ingest-v2/src/test/kotlin/ApplicationTest.kt new file mode 100644 index 000000000..04731a58a --- /dev/null +++ b/ingest-v2/src/test/kotlin/ApplicationTest.kt @@ -0,0 +1,21 @@ +package com.microsoft.azure.kusto + +import io.ktor.client.request.* +import io.ktor.http.* +import io.ktor.server.testing.* +import kotlin.test.Test +import kotlin.test.assertEquals + +class ApplicationTest { + + @Test + fun testRoot() = testApplication { + application { + module() + } + client.get("/").apply { + assertEquals(HttpStatusCode.OK, status) + } + } + +} diff --git a/pom.xml b/pom.xml index 705519246..048e636a0 100644 --- a/pom.xml +++ b/pom.xml @@ -35,12 +35,12 @@ 8.0.0 UTF-8 11 - 1.2.28 + 1.2.37 1.7.36 - 3.14.0 + 3.18.0 1.17.0 4.5.14 3.6.11 @@ -63,12 +63,12 @@ 5.11.0 0.8.11 - ingest data samples quickstart + ingest-v2 From 40ffe449a3671d33cbb82ffa267f689d258be637 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Tue, 2 Sep 2025 21:25:54 +0530 Subject: [PATCH 02/50] * Add boilerplate and code generator --- ingest-v2/pom.xml | 72 +++++- ingest-v2/src/main/resources/openapi.yaml | 258 ++++++++++++++++++++++ 2 files changed, 325 insertions(+), 5 deletions(-) create mode 100644 ingest-v2/src/main/resources/openapi.yaml diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 9ec6858b7..9b8b5182f 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -2,14 +2,13 @@ 4.0.0 - com.microsoft.azure.kusto ingest-v2 - 0.0.1 + ${revision} ingest-v2 ingest-v2 official - 2.1.10 + 2.1.21 3.2.3 3.1.1 1.4.14 @@ -18,8 +17,12 @@ true io.ktor.server.netty.EngineMain - - + + kusto-client + com.microsoft.azure.kusto + + ${revision} + io.ktor @@ -61,6 +64,31 @@ ktor-server-config-yaml-jvm ${ktor_version} + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-base + ${fasterxml.jackson.core.version} + + + com.fasterxml.jackson.core + jackson-core + ${fasterxml.jackson.core.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${fasterxml.jackson.core.version} + + + com.fasterxml.jackson.core + jackson-databind + ${fasterxml.jackson.core.version} + + + org.openapitools + jackson-databind-nullable + 0.2.7 + io.ktor ktor-server-test-host-jvm @@ -180,6 +208,40 @@ + + + org.openapitools + openapi-generator-maven-plugin + + 7.15.0 + + + + default + + generate + + + + ${project.basedir}/src/main/resources/openapi.yaml + true + + kotlin + + + true + jvm-ktor + + + com.microsoft.azure.kusto.ingest.v2 + true + java8 + + jvm-ktor + + + + \ No newline at end of file diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml new file mode 100644 index 000000000..766fb6d4b --- /dev/null +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -0,0 +1,258 @@ +openapi: 3.0.0 +info: + title: Kusto Ingest REST API + version: 0.0.1 + description: API for ingesting data into Kusto (excluding streaming ingest) +servers: + - description: A kusto cluster ingest endpoint + url: https://ingest-{clusterName}.kusto.windows.net + variables: + clusterName: + default: "mycluster.swedencentral" + description: The name of the Kusto cluster, including the region, e.g., "mycluster.swedencentral". +paths: + /queued: + post: + summary: Submit an ingest request + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/IngestRequest' + responses: + '200': + description: Ingest operation accepted + content: + application/json: + schema: + $ref: '#/components/schemas/IngestResponse' + /configuration: + post: + summary: Get ingest configuration + security: + - BearerAuth: [] + responses: + '200': + description: Configuration response + content: + application/json: + schema: + $ref: '#/components/schemas/ConfigurationResponse' + /status: + post: + summary: Get status of an ingest operation + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/StatusRequest' + responses: + '200': + description: Status response + content: + application/json: + schema: + $ref: '#/components/schemas/StatusResponse' +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + schemas: + IngestRequest: + type: object + properties: + Timestamp: + type: string + format: date-time + Blobs: + type: array + items: + $ref: '#/components/schemas/Blob' + Properties: + type: object + description: Ingestion options for the request + properties: + format: + type: string + description: Data format (e.g., csv, tsv, scsv, sohsv, psv, txt, raw, tsve, json, singlejson, multijson, avro, parquet, sstream, orc, apacheavro, w3clogfile, azmonstream) + enum: + - csv + - tsv + - scsv + - sohsv + - psv + - txt + - raw + - tsve + - json + - singlejson + - multijson + - avro + - parquet + - sstream + - orc + - apacheavro + - w3clogfile + - azmonstream + minLength: 1 + enableTracking: + type: boolean + description: Enable tracking for the ingest operation + nullable: true + tags: + type: array + items: + type: string + description: List of tags for the ingest operation + nullable: true + ingestIfNotExists: + type: array + items: + type: string + description: Tags for ingest-if-not-exists + nullable: true + skipBatching: + type: boolean + description: Skip batching during ingestion + nullable: true + deleteAfterDownload: + type: boolean + description: Delete blob after download + nullable: true + mappingReference: + type: string + description: Reference to a named mapping policy + nullable: true + mapping: + type: string + description: Ingestion mapping object + nullable: true + validationPolicy: + type: string + description: Validation policy for ingestion + nullable: true + ignoreSizeLimit: + type: boolean + description: Ignore size limit during ingestion + nullable: true + ignoreFirstRecord: + type: boolean + description: Skip the first record (e.g., header) + nullable: true + ignoreLastRecordIfInvalid: + type: boolean + description: Ignore last record if invalid + nullable: true + creationTime: + type: string + format: date-time + description: Creation time for ingested data extents + nullable: true + zipPattern: + type: string + description: Regex pattern for selecting files in ZIP archive + nullable: true + extend_schema: + type: boolean + description: Extend table schema if needed + nullable: true + recreate_schema: + type: boolean + description: Recreate table schema if needed + nullable: true + required: + - format + additionalProperties: true + Blob: + type: object + properties: + Url: + type: string + SourceId: + type: string + RawSize: + type: integer + nullable: true + IngestResponse: + type: object + properties: + IngestionOperationId: + type: string + ConfigurationResponse: + type: object + properties: + ContainerSettings: + $ref: '#/components/schemas/ContainerSettings' + IngestionSettings: + $ref: '#/components/schemas/IngestionSettings' + ContainerSettings: + type: object + properties: + Containers: + type: array + items: + $ref: '#/components/schemas/ContainerInfo' + LakeFolders: + type: array + items: + $ref: '#/components/schemas/ContainerInfo' + RefreshInterval: + type: string + PreferredUploadMethod: + type: string + nullable: true + IngestionSettings: + type: object + properties: + MaxBlobsPerBatch: + type: integer + MaxDataSize: + type: integer + PreferredIngestionMethod: + type: string + nullable: true + ContainerInfo: + type: object + properties: + Path: + type: string + StatusRequest: + type: object + properties: + Table: + type: string + Database: + type: string + OperationId: + type: string + Details: + type: boolean + StatusResponse: + type: object + properties: + StartTime: + type: string + format: date-time + LastUpdated: + type: string + format: date-time + Status: + $ref: '#/components/schemas/Status' + Details: + type: array + items: + $ref: '#/components/schemas/BlobStatus' + Status: + type: object + # Define properties as needed + BlobStatus: + type: object + # Define properties as needed From 72607226732d04aad12678ba7304ebefd3ade5e2 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Tue, 2 Sep 2025 21:37:51 +0530 Subject: [PATCH 03/50] * Add boilerplate and code generator --- ingest-v2/pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 9b8b5182f..6fbd86213 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -229,13 +229,14 @@ kotlin - true jvm-ktor com.microsoft.azure.kusto.ingest.v2 true java8 + true + jackson jvm-ktor From 9762b20391fc2cdcfccb1f33cf36364f28a0ce12 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Wed, 3 Sep 2025 07:51:49 +0530 Subject: [PATCH 04/50] * Edits to code --- data/pom.xml | 16 --------- ingest-v2/pom.xml | 36 ++------------------ ingest-v2/src/main/kotlin/Application.kt | 13 ------- ingest-v2/src/main/kotlin/HTTP.kt | 20 ----------- ingest-v2/src/main/kotlin/Routing.kt | 17 --------- ingest-v2/src/main/kotlin/Serialization.kt | 20 ----------- ingest-v2/src/test/kotlin/ApplicationTest.kt | 21 ------------ ingest/pom.xml | 26 ++++++-------- pom.xml | 20 ++++++++++- 9 files changed, 32 insertions(+), 157 deletions(-) delete mode 100644 ingest-v2/src/main/kotlin/Application.kt delete mode 100644 ingest-v2/src/main/kotlin/HTTP.kt delete mode 100644 ingest-v2/src/main/kotlin/Routing.kt delete mode 100644 ingest-v2/src/main/kotlin/Serialization.kt delete mode 100644 ingest-v2/src/test/kotlin/ApplicationTest.kt diff --git a/data/pom.xml b/data/pom.xml index 42f4bb66d..2cd5c012c 100644 --- a/data/pom.xml +++ b/data/pom.xml @@ -18,18 +18,6 @@ ${revision} - - - - com.azure - azure-sdk-bom - ${azure-bom-version} - pom - import - - - - @@ -186,12 +174,10 @@ jackson-databind com.fasterxml.jackson.core - ${fasterxml.jackson.core.version} jackson-annotations com.fasterxml.jackson.core - ${fasterxml.jackson.core.version} org.slf4j @@ -244,12 +230,10 @@ jackson-core com.fasterxml.jackson.core - ${fasterxml.jackson.core.version} com.fasterxml.jackson.datatype jackson-datatype-jsr310 - ${fasterxml.jackson.core.version} io.projectreactor diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 6fbd86213..ca6b75202 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -64,25 +64,17 @@ ktor-server-config-yaml-jvm ${ktor_version} - - com.fasterxml.jackson.jaxrs - jackson-jaxrs-base - ${fasterxml.jackson.core.version} - com.fasterxml.jackson.core jackson-core - ${fasterxml.jackson.core.version} com.fasterxml.jackson.core jackson-annotations - ${fasterxml.jackson.core.version} com.fasterxml.jackson.core jackson-databind - ${fasterxml.jackson.core.version} org.openapitools @@ -116,32 +108,7 @@ ${project.basedir}/src/main/resources - - - kotlin-maven-plugin - org.jetbrains.kotlin - ${kotlin_version} - - 1.8 - - - - compile - compile - - compile - - - - test-compile - test-compile - - test-compile - - - - org.codehaus.mojo exec-maven-plugin @@ -234,9 +201,12 @@ com.microsoft.azure.kusto.ingest.v2 true + java8 + true jackson + true jvm-ktor diff --git a/ingest-v2/src/main/kotlin/Application.kt b/ingest-v2/src/main/kotlin/Application.kt deleted file mode 100644 index bc09aa42c..000000000 --- a/ingest-v2/src/main/kotlin/Application.kt +++ /dev/null @@ -1,13 +0,0 @@ -package com.microsoft.azure.kusto - -import io.ktor.server.application.* - -fun main(args: Array) { - io.ktor.server.netty.EngineMain.main(args) -} - -fun Application.module() { - configureSerialization() - configureHTTP() - configureRouting() -} diff --git a/ingest-v2/src/main/kotlin/HTTP.kt b/ingest-v2/src/main/kotlin/HTTP.kt deleted file mode 100644 index 07da277dc..000000000 --- a/ingest-v2/src/main/kotlin/HTTP.kt +++ /dev/null @@ -1,20 +0,0 @@ -package com.microsoft.azure.kusto - -import com.asyncapi.kotlinasyncapi.context.service.AsyncApiExtension -import com.asyncapi.kotlinasyncapi.ktor.AsyncApiPlugin -import io.ktor.serialization.kotlinx.json.* -import io.ktor.server.application.* -import io.ktor.server.plugins.contentnegotiation.* -import io.ktor.server.response.* -import io.ktor.server.routing.* - -fun Application.configureHTTP() { - install(AsyncApiPlugin) { - extension = AsyncApiExtension.builder { - info { - title("Sample API") - version("1.0.0") - } - } - } -} diff --git a/ingest-v2/src/main/kotlin/Routing.kt b/ingest-v2/src/main/kotlin/Routing.kt deleted file mode 100644 index c48c05c69..000000000 --- a/ingest-v2/src/main/kotlin/Routing.kt +++ /dev/null @@ -1,17 +0,0 @@ -package com.microsoft.azure.kusto - -import com.asyncapi.kotlinasyncapi.context.service.AsyncApiExtension -import com.asyncapi.kotlinasyncapi.ktor.AsyncApiPlugin -import io.ktor.serialization.kotlinx.json.* -import io.ktor.server.application.* -import io.ktor.server.plugins.contentnegotiation.* -import io.ktor.server.response.* -import io.ktor.server.routing.* - -fun Application.configureRouting() { - routing { - get("/") { - call.respondText("Hello World!") - } - } -} diff --git a/ingest-v2/src/main/kotlin/Serialization.kt b/ingest-v2/src/main/kotlin/Serialization.kt deleted file mode 100644 index 00d52586e..000000000 --- a/ingest-v2/src/main/kotlin/Serialization.kt +++ /dev/null @@ -1,20 +0,0 @@ -package com.microsoft.azure.kusto - -import com.asyncapi.kotlinasyncapi.context.service.AsyncApiExtension -import com.asyncapi.kotlinasyncapi.ktor.AsyncApiPlugin -import io.ktor.serialization.kotlinx.json.* -import io.ktor.server.application.* -import io.ktor.server.plugins.contentnegotiation.* -import io.ktor.server.response.* -import io.ktor.server.routing.* - -fun Application.configureSerialization() { - install(ContentNegotiation) { - json() - } - routing { - get("/json/kotlinx-serialization") { - call.respond(mapOf("hello" to "world")) - } - } -} diff --git a/ingest-v2/src/test/kotlin/ApplicationTest.kt b/ingest-v2/src/test/kotlin/ApplicationTest.kt deleted file mode 100644 index 04731a58a..000000000 --- a/ingest-v2/src/test/kotlin/ApplicationTest.kt +++ /dev/null @@ -1,21 +0,0 @@ -package com.microsoft.azure.kusto - -import io.ktor.client.request.* -import io.ktor.http.* -import io.ktor.server.testing.* -import kotlin.test.Test -import kotlin.test.assertEquals - -class ApplicationTest { - - @Test - fun testRoot() = testApplication { - application { - module() - } - client.get("/").apply { - assertEquals(HttpStatusCode.OK, status) - } - } - -} diff --git a/ingest/pom.xml b/ingest/pom.xml index 88681863f..9c9aeae48 100644 --- a/ingest/pom.xml +++ b/ingest/pom.xml @@ -18,18 +18,6 @@ ${revision} - - - - com.azure - azure-sdk-bom - ${azure-bom-version} - pom - import - - - - @@ -180,7 +168,6 @@ com.azure azure-core - org.slf4j slf4j-api @@ -189,7 +176,6 @@ com.fasterxml.jackson.core jackson-databind - ${fasterxml.jackson.core.version} jackson-annotations @@ -200,7 +186,6 @@ com.fasterxml.jackson.core jackson-annotations - ${fasterxml.jackson.core.version} com.univocity @@ -247,7 +232,16 @@ annotations ${annotations.version} - + + io.github.resilience4j + resilience4j-retry + ${resilience4j.version} + + + io.vavr + vavr + ${io.vavr.version} + io.projectreactor reactor-test diff --git a/pom.xml b/pom.xml index 048e636a0..307da5599 100644 --- a/pom.xml +++ b/pom.xml @@ -44,7 +44,7 @@ 1.17.0 4.5.14 3.6.11 - 2.16.0 + 2.19.2 2.9.1 1.77 @@ -111,4 +111,22 @@ + + + + com.azure + azure-sdk-bom + ${azure-bom-version} + pom + import + + + com.fasterxml.jackson + jackson-bom + ${fasterxml.jackson.core.version} + import + pom + + + From 41baae2b20f78be0c50c960ec51623e4933296fe Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Wed, 3 Sep 2025 17:58:25 +0530 Subject: [PATCH 05/50] * Edits to code --- ingest-v2/pom.xml | 26 +- .../kusto/ingest/v2/KustoIngestClient.kt | 76 ++++++ .../ingest/v2/common/ConfigurationCache.kt | 11 + .../ingest/v2/common/IngestRetryPolicy.kt | 58 +++++ .../auth/KustoTokenCredentialsProvider.kt | 25 ++ .../ingest/v2/common/models/ClientDetails.kt | 3 + .../v2/common/models/KustoTokenCredentials.kt | 21 ++ ingest-v2/src/main/resources/openapi.yaml | 246 +++++++++++++----- 8 files changed, 372 insertions(+), 94 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index ca6b75202..7a6032778 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -26,12 +26,12 @@ io.ktor - ktor-server-content-negotiation-jvm + ktor-client-auth-jvm ${ktor_version} io.ktor - ktor-server-core-jvm + ktor-client-content-negotiation-jvm ${ktor_version} @@ -39,31 +39,11 @@ ktor-serialization-kotlinx-json-jvm ${ktor_version} - - org.openfolder - kotlin-asyncapi-ktor - ${ktor_async_api_version} - - - io.ktor - ktor-server-netty-jvm - ${ktor_version} - - - ch.qos.logback - logback-classic - ${logback_version} - org.slf4j - slf4j-api + slf4j-simple ${slf4j_version} - - io.ktor - ktor-server-config-yaml-jvm - ${ktor_version} - com.fasterxml.jackson.core jackson-core diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt new file mode 100644 index 000000000..890502816 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt @@ -0,0 +1,76 @@ +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.SimpleRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.auth.KustoTokenCredentialsProvider +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials +import io.ktor.client.* +import io.ktor.client.plugins.* +import io.ktor.client.plugins.auth.* +import io.ktor.client.plugins.auth.providers.* +import io.ktor.client.plugins.contentnegotiation.* +import io.ktor.client.request.* +import io.ktor.serialization.kotlinx.json.* +import io.ktor.client.statement.bodyAsText +import io.ktor.client.statement.HttpResponse +import io.ktor.http.HttpMethod +import kotlinx.serialization.json.Json +import kotlinx.serialization.decodeFromString +import kotlin.reflect.KClass +import kotlinx.coroutines.CancellationException + +import java.net.URI + +open class KustoIngestClient( + val clusterUrl: String, + val clientDetails: ClientDetails, + val kustoTokenCredentials: KustoTokenCredentials?, + retryPolicy: IngestRetryPolicy? = null, + val skipSecurityChecks: Boolean = false +) { + val retryPolicy: IngestRetryPolicy = retryPolicy ?: SimpleRetryPolicy() + private var authInitialized = false + private var audience: String = "https://kusto.kusto.windows.net" + + + init { + if (!skipSecurityChecks) { + val uri = URI(clusterUrl) + val scheme = uri.scheme?.lowercase() + if (!(scheme == "https" || (scheme == "http" && kustoTokenCredentials != null))) { + throw IllegalArgumentException("The provided endpoint is not a valid endpoint") + } + } + } + + + protected val setupConfig: suspend (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } + + private suspend fun getClientConfig(config: HttpClientConfig<*>) { + config.install(DefaultRequest) { + header("Content-Type", "application/json") + } + + kustoTokenCredentials!!.tokenValue?.let { bearerToken -> + config.install(Auth) { + bearer { + loadTokens { BearerTokens(bearerToken, refreshToken = "") } + } + } + } + config.install(ContentNegotiation) { + json() + } + } + + // Authenticates the request by setting the Authorization header using the token provider. + suspend fun authenticate(request: HttpRequestBuilder) { + if (kustoTokenCredentials == null) return + if (!authInitialized) { + authInitialized = true + // For now, use a constant for the audience as a placeholder + } + request.headers.append("Authorization", "Bearer ${kustoTokenCredentials.tokenValue}") + } +} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt new file mode 100644 index 000000000..09bb0c9c9 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -0,0 +1,11 @@ +package com.microsoft.azure.kusto.ingest.v2.common + +//import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import java.time.Duration + +interface ConfigurationCache { + val refreshInterval: Duration + + /** Gets the configuration response data. */ +// fun getConfiguration(): ConfigurationResponse +} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt new file mode 100644 index 000000000..dc3965dd2 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -0,0 +1,58 @@ +package com.microsoft.azure.kusto.ingest.v2.common + +import java.time.Duration + +/** + * Represents a retry policy for ingesting data into Kusto. + */ +interface IngestRetryPolicy { + /** + * Determines whether the operation should be retried based on the retryNumber. + * @param retryNumber The retry attempt number (zero-based). + * @return Pair of (shouldRetry, retryInterval) + */ + fun next(retryNumber: Int): Pair +} + +/** + * No retries will be attempted. + */ +object NoRetryPolicy : IngestRetryPolicy { + override fun next(retryNumber: Int): Pair = Pair(false, Duration.ZERO) +} + +/** + * Simple retry policy with a constant duration between retry attempts. + */ +class SimpleRetryPolicy( + val intervalDuration: Duration = Duration.ofSeconds(10), + val totalRetries: Int = 3 +) : IngestRetryPolicy { + init { + require(totalRetries > 0) { "totalRetries must be positive" } + } + override fun next(retryNumber: Int): Pair { + require(retryNumber >= 0) { "retryNumber must be non-negative" } + return if (retryNumber < totalRetries) Pair(true, intervalDuration) + else Pair(false, Duration.ZERO) + } +} + +/** + * Custom retry policy with a collection of interval durations between retry attempts. + */ +class CustomRetryPolicy( + val intervalDurations: List = listOf( + Duration.ofSeconds(1), + Duration.ofSeconds(3), + Duration.ofSeconds(7) + ) +) : IngestRetryPolicy { + override fun next(retryNumber: Int): Pair { + return if (retryNumber < intervalDurations.size) { + Pair(true, intervalDurations[retryNumber]) + } else { + Pair(false, Duration.ZERO) + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt new file mode 100644 index 000000000..b335dd423 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt @@ -0,0 +1,25 @@ +package com.microsoft.azure.kusto.ingest.v2.common.auth + +import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials +import java.util.concurrent.CompletableFuture + +interface KustoTokenCredentialsProvider { + /** + * Retrieves (or creates) a [KustoTokenCredentials] object for [targetResource]. + * @param targetResource The target resource for which the credentials are needed. + * @return The [KustoTokenCredentials] concrete object to use when accessing the target resource. + */ + suspend fun getCredentialsAsync(targetResource: String): KustoTokenCredentials + + /** + * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId]. + * Note this API is NOT always supported. Make sure the implementation you use supports this API. + */ + suspend fun getCredentialsAsync(targetResource: String, tenantId: String): KustoTokenCredentials + + /** + * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId] with retries. + * Note this API is NOT always supported. Make sure the implementation you use supports this API. + */ + suspend fun getCredentialsAsync(targetResource: String, retries: Int, tenantId: String? = null): KustoTokenCredentials +} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt new file mode 100644 index 000000000..dd12af9d5 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt @@ -0,0 +1,3 @@ +package com.microsoft.azure.kusto.ingest.v2.common.models + +data class ClientDetails(val name: String, val version: String) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt new file mode 100644 index 000000000..b0e8b763a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt @@ -0,0 +1,21 @@ +package com.microsoft.azure.kusto.ingest.v2.common.models + +import java.time.LocalDateTime + +/** + * Represents a token credentials holder, capable (at least) of authenticating over an HTTPS "Authorization" header. + */ +data class KustoTokenCredentials( + val tokenScheme: String? = null, + val tokenValue: String? = null, + val expiresOn: LocalDateTime? = null +) { + /** + * Returns the secure representation of this instance. + */ + fun toSecureString(): String { + return "${this::class.simpleName}:$tokenScheme:*****" + } + + override fun toString(): String = toSecureString() +} \ No newline at end of file diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml index 766fb6d4b..8236b20c4 100644 --- a/ingest-v2/src/main/resources/openapi.yaml +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -11,11 +11,22 @@ servers: default: "mycluster.swedencentral" description: The name of the Kusto cluster, including the region, e.g., "mycluster.swedencentral". paths: - /queued: + /v1/rest/ingestion/queued/{database}/{table}: post: summary: Submit an ingest request security: - - BearerAuth: [] + - BearerAuth: [ ] + parameters: + - name: database + in: path + required: true + schema: + type: string + - name: table + in: path + required: true + schema: + type: string requestBody: required: true content: @@ -29,11 +40,11 @@ paths: application/json: schema: $ref: '#/components/schemas/IngestResponse' - /configuration: - post: + /v1/rest/ingestion/configuration: + get: summary: Get ingest configuration security: - - BearerAuth: [] + - BearerAuth: [ ] responses: '200': description: Configuration response @@ -41,17 +52,33 @@ paths: application/json: schema: $ref: '#/components/schemas/ConfigurationResponse' - /status: - post: + /v1/rest/ingestion/queued/{database}/{table}/{operationId}: + get: summary: Get status of an ingest operation + parameters: + - name: database + in: path + required: true + schema: + type: string + - name: table + in: path + required: true + schema: + type: string + - name: operationId + in: path + required: true + schema: + type: string + - name: details + in: query + required: false + schema: + type: boolean + default: false security: - - BearerAuth: [] - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/StatusRequest' + - BearerAuth: [ ] responses: '200': description: Status response @@ -59,6 +86,54 @@ paths: application/json: schema: $ref: '#/components/schemas/StatusResponse' + /v1/rest/ingest/{database}/{table}: + servers: + - url: https://{clusterName}.kusto.windows.net + variables: + clusterName: + default: "mycluster.swedencentral" + description: The name of the Kusto cluster, including the region, e.g., "mycluster.swedencentral". + post: + summary: Streaming Ingest + security: + - BearerAuth: [ ] + parameters: + - name: database + in: path + required: true + schema: + type: string + - name: table + in: path + required: true + schema: + type: string + - name: streamFormat + in: query + required: true + schema: + $ref: '#/components/schemas/Format' + - name: mappingName + in: query + required: false + schema: + type: string + - name: sourceKind + in: query + required: false + schema: + type: string + enum: + - uri + responses: + '200': + description: Ingestion mappings response + requestBody: + required: true + content: + application/octet-stream: + schema: + format: binary components: securitySchemes: BearerAuth: @@ -66,43 +141,44 @@ components: scheme: bearer bearerFormat: JWT schemas: + Format: + type: string + description: Data format (e.g., csv, tsv, scsv, sohsv, psv, txt, raw, tsve, json, singlejson, multijson, avro, parquet, sstream, orc, apacheavro, w3clogfile, azmonstream) + enum: + - csv + - tsv + - scsv + - sohsv + - psv + - txt + - raw + - tsve + - json + - singlejson + - multijson + - avro + - parquet + - sstream + - orc + - apacheavro + - w3clogfile + - azmonstream IngestRequest: type: object properties: - Timestamp: + timestamp: type: string format: date-time - Blobs: + blobs: type: array items: $ref: '#/components/schemas/Blob' - Properties: + properties: type: object description: Ingestion options for the request properties: format: - type: string - description: Data format (e.g., csv, tsv, scsv, sohsv, psv, txt, raw, tsve, json, singlejson, multijson, avro, parquet, sstream, orc, apacheavro, w3clogfile, azmonstream) - enum: - - csv - - tsv - - scsv - - sohsv - - psv - - txt - - raw - - tsve - - json - - singlejson - - multijson - - avro - - parquet - - sstream - - orc - - apacheavro - - w3clogfile - - azmonstream - minLength: 1 + $ref: '#/components/schemas/Format' enableTracking: type: boolean description: Enable tracking for the ingest operation @@ -174,85 +250,113 @@ components: Blob: type: object properties: - Url: + url: type: string - SourceId: + sourceId: type: string - RawSize: + rawSize: type: integer nullable: true IngestResponse: type: object properties: - IngestionOperationId: + ingestionOperationId: type: string ConfigurationResponse: type: object properties: - ContainerSettings: + containerSettings: $ref: '#/components/schemas/ContainerSettings' - IngestionSettings: + ingestionSettings: $ref: '#/components/schemas/IngestionSettings' ContainerSettings: type: object properties: - Containers: + containers: type: array items: $ref: '#/components/schemas/ContainerInfo' - LakeFolders: + lakeFolders: type: array items: $ref: '#/components/schemas/ContainerInfo' - RefreshInterval: + refreshInterval: type: string - PreferredUploadMethod: + preferredUploadMethod: type: string nullable: true IngestionSettings: type: object properties: - MaxBlobsPerBatch: + maxBlobsPerBatch: type: integer - MaxDataSize: + maxDataSize: type: integer - PreferredIngestionMethod: + preferredIngestionMethod: type: string nullable: true ContainerInfo: type: object properties: - Path: - type: string - StatusRequest: - type: object - properties: - Table: - type: string - Database: + path: type: string - OperationId: - type: string - Details: - type: boolean StatusResponse: type: object properties: - StartTime: + startTime: type: string format: date-time - LastUpdated: + lastUpdated: type: string format: date-time - Status: - $ref: '#/components/schemas/Status' - Details: + status: + $ref: "#/components/schemas/Status" + details: type: array items: $ref: '#/components/schemas/BlobStatus' Status: type: object - # Define properties as needed + properties: + succeeded: + type: integer + failed: + type: integer + inProgress: + type: integer + canceled: + type: integer + BlobStatus: type: object - # Define properties as needed + properties: + sourceId: + type: string + status: + type: string + enum: + - Queued + - InProgress + - Succeeded + - Failed + - Canceled + startedAt: + type: string + format: date-time + lastUpdateTime: + type: string + format: date-time + errorCode: + type: string + nullable: true + failureStatus: + type: string + enum: + - Unknown + - Permanent + - Transient + - Exhausted + nullable: true + details: + type: string + nullable: true From 73af48be69119d685cbd91add7584f989eca37af Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Mon, 8 Sep 2025 18:31:35 +0530 Subject: [PATCH 06/50] * Move code forward --- ingest-v2/pom.xml | 105 ++++++++- .../ingest/v2/ConfigurationApiWrapper.kt | 39 ++++ .../kusto/ingest/v2/KustoBaseApiClient.kt | 52 +++++ .../kusto/ingest/v2/KustoIngestClient.kt | 76 ------- .../ingest/v2/common/ConfigurationCache.kt | 44 +++- .../ingest/v2/common/IngestRetryPolicy.kt | 65 +++--- .../ingest/v2/common/RetryPolicyExtensions.kt | 73 +++++++ .../auth/AzCliTokenCredentialsProvider.kt | 29 +++ ...rovider.kt => TokenCredentialsProvider.kt} | 21 +- .../v2/common/exceptions/IngestException.kt | 205 ++++++++++++++++++ .../ingest/v2/common/models/ClientDetails.kt | 1 + .../v2/common/models/KustoTokenCredentials.kt | 15 +- .../kusto/ingest/v2/common/utils/PathUtils.kt | 71 ++++++ .../ingest/v2/container/ContainerBase.kt | 7 + .../v2/container/UploadContainerBase.kt | 8 + .../kusto/ingest/v2/source/BlobSource.kt | 35 +++ .../kusto/ingest/v2/source/CompressionType.kt | 13 ++ .../ingest/v2/source/DataSourceFormat.kt | 56 +++++ .../kusto/ingest/v2/source/IngestionSource.kt | 25 +++ .../kusto/ingest/v2/source/LocalSource.kt | 52 +++++ .../ingest/v2/ConfigurationApiWrapperTest.kt | 55 +++++ .../v2/common/RetryPolicyExtensionsTest.kt | 31 +++ 22 files changed, 932 insertions(+), 146 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt rename ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/{KustoTokenCredentialsProvider.kt => TokenCredentialsProvider.kt} (59%) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 7a6032778..c2a7b54fc 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -13,6 +13,7 @@ 3.1.1 1.4.14 2.0.9 + 2.46.1 UTF-8 true io.ktor.server.netty.EngineMain @@ -39,23 +40,41 @@ ktor-serialization-kotlinx-json-jvm ${ktor_version} + + io.ktor + ktor-serialization-jackson + ${ktor_version} + org.slf4j slf4j-simple ${slf4j_version} - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-databind + com.azure + azure-identity + + + + + + + + + + + + + + + + + + + + + org.openapitools jackson-databind-nullable @@ -79,6 +98,12 @@ 1.6.4 test + + io.mockk + mockk-jvm + 1.14.5 + test + ${project.basedir}/src/main/kotlin @@ -185,7 +210,7 @@ java8 true - jackson + kotlinx_serialization true jvm-ktor @@ -193,6 +218,64 @@ + + com.diffplug.spotless + spotless-maven-plugin + ${spotless.version} + + + + + src/main/kotlin/**/*.kt + src/test/kotlin/**/*.kt + + + + 0.51 + + 120 + 4 + 8 + false + true + + + 1.0.0 + + true + true + + intellij_idea + + + io.nlopez.compose.rules:ktlint:0.4.25 + + + + /* (C)$YEAR */ + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.6.1 + + + add-openapi-generated-sources + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/openapi/src/main/kotlin + + + + + \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt new file mode 100644 index 000000000..22055668f --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -0,0 +1,39 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi +import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse + +class ConfigurationApiWrapper( + override val clusterUrl: String, + val tokenCredentialsProvider: TokenCredentialsProvider, + override val skipSecurityChecks: Boolean = false, + // Allow injection for testing + private val configurationApi: DefaultApi? = null, +) : KustoBaseApiClient(clusterUrl, tokenCredentialsProvider, skipSecurityChecks) { + // Add Logging using slf4j + private val logger = org.slf4j.LoggerFactory.getLogger(ConfigurationApiWrapper::class.java) + private val api: DefaultApi = + configurationApi ?: DefaultApi(baseUrl = "$clusterUrl/v1/rest/ingest", httpClientConfig = setupConfig) + + suspend fun getConfigurationDetails(): ConfigurationResponse { + val configurationHttpResponse: HttpResponse = api.v1RestIngestionConfigurationGet() + if (configurationHttpResponse.success) { + logger.info( + "Successfully retrieved configuration details from $clusterUrl with status: ${configurationHttpResponse.status}", + ) + logger.debug("Configuration details: {}", configurationHttpResponse.body()) + return configurationHttpResponse.body() + } else { + logger.error( + "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, Body: ${configurationHttpResponse.body()}", + ) + throw IngestException( + "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, Body: ${configurationHttpResponse.body()}", + ) + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt new file mode 100644 index 000000000..3599d5e33 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -0,0 +1,52 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider +import io.ktor.client.HttpClientConfig +import io.ktor.client.plugins.DefaultRequest +import io.ktor.client.plugins.auth.Auth +import io.ktor.client.plugins.auth.providers.BearerTokens +import io.ktor.client.plugins.auth.providers.bearer +import io.ktor.client.plugins.contentnegotiation.ContentNegotiation +import io.ktor.client.request.header +import io.ktor.serialization.kotlinx.json.json +import java.net.URI + +open class KustoBaseApiClient( + open val clusterUrl: String, + open val tokenCredentialsProvider: TokenCredentialsProvider, + open val skipSecurityChecks: Boolean = false, +) { + init { + if (!skipSecurityChecks) { + val uri = URI(clusterUrl) + val scheme = uri.scheme?.lowercase() + if (scheme != "https") { + throw IllegalArgumentException("The provided endpoint is not a valid endpoint") + } + } + } + + protected val setupConfig: (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } + + private fun getClientConfig(config: HttpClientConfig<*>) { + config.install(DefaultRequest) { header("Content-Type", "application/json") } + config.install(Auth) { + bearer { + loadTokens { + // Always null so refreshTokens is always called + tokenCredentialsProvider.getCredentialsAsync(clusterUrl).tokenValue?.let { + BearerTokens(accessToken = it, refreshToken = null) + } + } + refreshTokens { + // Always null so refreshTokens is always called + tokenCredentialsProvider.getCredentialsAsync(clusterUrl).tokenValue?.let { + BearerTokens(accessToken = it, refreshToken = null) + } + } + } + } + config.install(ContentNegotiation) { json() } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt deleted file mode 100644 index 890502816..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoIngestClient.kt +++ /dev/null @@ -1,76 +0,0 @@ -package com.microsoft.azure.kusto.ingest.v2 - -import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy -import com.microsoft.azure.kusto.ingest.v2.common.SimpleRetryPolicy -import com.microsoft.azure.kusto.ingest.v2.common.auth.KustoTokenCredentialsProvider -import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails -import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials -import io.ktor.client.* -import io.ktor.client.plugins.* -import io.ktor.client.plugins.auth.* -import io.ktor.client.plugins.auth.providers.* -import io.ktor.client.plugins.contentnegotiation.* -import io.ktor.client.request.* -import io.ktor.serialization.kotlinx.json.* -import io.ktor.client.statement.bodyAsText -import io.ktor.client.statement.HttpResponse -import io.ktor.http.HttpMethod -import kotlinx.serialization.json.Json -import kotlinx.serialization.decodeFromString -import kotlin.reflect.KClass -import kotlinx.coroutines.CancellationException - -import java.net.URI - -open class KustoIngestClient( - val clusterUrl: String, - val clientDetails: ClientDetails, - val kustoTokenCredentials: KustoTokenCredentials?, - retryPolicy: IngestRetryPolicy? = null, - val skipSecurityChecks: Boolean = false -) { - val retryPolicy: IngestRetryPolicy = retryPolicy ?: SimpleRetryPolicy() - private var authInitialized = false - private var audience: String = "https://kusto.kusto.windows.net" - - - init { - if (!skipSecurityChecks) { - val uri = URI(clusterUrl) - val scheme = uri.scheme?.lowercase() - if (!(scheme == "https" || (scheme == "http" && kustoTokenCredentials != null))) { - throw IllegalArgumentException("The provided endpoint is not a valid endpoint") - } - } - } - - - protected val setupConfig: suspend (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } - - private suspend fun getClientConfig(config: HttpClientConfig<*>) { - config.install(DefaultRequest) { - header("Content-Type", "application/json") - } - - kustoTokenCredentials!!.tokenValue?.let { bearerToken -> - config.install(Auth) { - bearer { - loadTokens { BearerTokens(bearerToken, refreshToken = "") } - } - } - } - config.install(ContentNegotiation) { - json() - } - } - - // Authenticates the request by setting the Authorization header using the token provider. - suspend fun authenticate(request: HttpRequestBuilder) { - if (kustoTokenCredentials == null) return - if (!authInitialized) { - authInitialized = true - // For now, use a constant for the audience as a placeholder - } - request.headers.append("Authorization", "Bearer ${kustoTokenCredentials.tokenValue}") - } -} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 09bb0c9c9..1c5c5ecae 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -1,11 +1,45 @@ +/* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2.common -//import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import java.lang.AutoCloseable import java.time.Duration +import kotlin.text.compareTo -interface ConfigurationCache { +interface ConfigurationCache : AutoCloseable { val refreshInterval: Duration - /** Gets the configuration response data. */ -// fun getConfiguration(): ConfigurationResponse -} \ No newline at end of file + suspend fun getConfiguration(): ConfigurationResponse + + override fun close() +} + +class DefaultConfigurationCache( + override val refreshInterval: Duration = Duration.ofHours(1), + private val configurationProvider: suspend () -> ConfigurationResponse, +) : ConfigurationCache { + @Volatile private var cachedConfiguration: ConfigurationResponse? = null + private var lastRefreshTime: Long = 0 + + override suspend fun getConfiguration(): ConfigurationResponse { + val currentTime = System.currentTimeMillis() + val needsRefresh = cachedConfiguration == null || (currentTime - lastRefreshTime) >= refreshInterval.toMillis() + if (needsRefresh) { + val newConfig = runCatching { configurationProvider() }.getOrElse { cachedConfiguration ?: throw it } + synchronized(this) { + // Double-check in case another thread refreshed while we were waiting + val stillNeedsRefresh = + cachedConfiguration == null || (currentTime - lastRefreshTime) >= refreshInterval.toMillis() + if (stillNeedsRefresh) { + cachedConfiguration = newConfig + lastRefreshTime = currentTime + } + } + } + return cachedConfiguration!! + } + + override fun close() { + // No resources to clean up in this implementation + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index dc3965dd2..fc2acda98 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -1,58 +1,49 @@ +/* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2.common import java.time.Duration -/** - * Represents a retry policy for ingesting data into Kusto. - */ interface IngestRetryPolicy { /** - * Determines whether the operation should be retried based on the retryNumber. - * @param retryNumber The retry attempt number (zero-based). - * @return Pair of (shouldRetry, retryInterval) + * Determines whether the operation should be retried based on the retryNumber. Returns a Pair + * indicating whether to retry and the duration of the retry interval. */ - fun next(retryNumber: Int): Pair + fun moveNext(retryNumber: UInt): Pair } -/** - * No retries will be attempted. - */ object NoRetryPolicy : IngestRetryPolicy { - override fun next(retryNumber: Int): Pair = Pair(false, Duration.ZERO) + override fun moveNext(retryNumber: UInt): Pair { + return Pair(false, Duration.ZERO) + } } -/** - * Simple retry policy with a constant duration between retry attempts. - */ -class SimpleRetryPolicy( - val intervalDuration: Duration = Duration.ofSeconds(10), - val totalRetries: Int = 3 -) : IngestRetryPolicy { +class SimpleRetryPolicy(val intervalDuration: Duration = Duration.ofSeconds(10), val totalRetries: Int = 3) : + IngestRetryPolicy { init { require(totalRetries > 0) { "totalRetries must be positive" } } - override fun next(retryNumber: Int): Pair { - require(retryNumber >= 0) { "retryNumber must be non-negative" } - return if (retryNumber < totalRetries) Pair(true, intervalDuration) - else Pair(false, Duration.ZERO) + + override fun moveNext(retryNumber: UInt): Pair { + require(retryNumber > 0u) { "retryNumber must be positive" } + if (retryNumber >= totalRetries.toUInt()) { + return Pair(false, Duration.ZERO) + } + return Pair(true, intervalDuration) } } -/** - * Custom retry policy with a collection of interval durations between retry attempts. - */ -class CustomRetryPolicy( - val intervalDurations: List = listOf( - Duration.ofSeconds(1), - Duration.ofSeconds(3), - Duration.ofSeconds(7) - ) -) : IngestRetryPolicy { - override fun next(retryNumber: Int): Pair { - return if (retryNumber < intervalDurations.size) { - Pair(true, intervalDurations[retryNumber]) - } else { - Pair(false, Duration.ZERO) +class CustomRetryPolicy(intervalDurations: Array? = null) : IngestRetryPolicy { + private val intervalDurations: Array = + intervalDurations ?: arrayOf(Duration.ofSeconds(1), Duration.ofSeconds(3), Duration.ofSeconds(7)) + + val intervals: List + get() = intervalDurations.toList() + + override fun moveNext(retryNumber: UInt): Pair { + val idx = retryNumber.toInt() + if (idx >= intervalDurations.size) { + return Pair(false, Duration.ZERO) } + return Pair(true, intervalDurations[idx]) } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt new file mode 100644 index 000000000..b3e331dde --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt @@ -0,0 +1,73 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.common + +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.delay + +enum class RetryDecision { + Continue, + ContinueWithoutDelay, + Throw, + Break, +} + +suspend fun IngestRetryPolicy.runWithRetry( + action: suspend (UInt) -> T, + // retry attempt number, exception, isPermanent + onRetry: ((UInt, Exception, Boolean) -> Unit)? = null, + // retry attempt number, exception, isPermanent + onError: ((UInt, Exception, Boolean) -> Unit)? = null, + shouldRetry: ((UInt, Exception, Boolean) -> RetryDecision)? = null, + throwOnExhaustedRetries: Boolean = true, + tracer: ((String) -> Unit)? = null, + cancellationChecker: (() -> Boolean)? = null, +): T? { + var attempt: UInt = 1u + while (true) { + try { + return action(attempt) + } catch (ex: Exception) { + val isPermanent = false // Stub: add logic if needed + onError?.invoke(attempt, ex, isPermanent) + val decision = + shouldRetry?.invoke(attempt, ex, isPermanent) + ?: if (isPermanent) RetryDecision.Throw else RetryDecision.Continue + + when (decision) { + RetryDecision.Throw -> { + tracer?.invoke( + "Decision to throw on attempt $attempt. Is Permanent: $isPermanent. Exception: ${ex.message}", + ) + throw ex + } + + RetryDecision.Break -> { + tracer?.invoke("Breaking out of retry loop early, on attempt $attempt. Exception: ${ex.message}") + return null + } + + else -> { + val (shouldRetry, delayDuration) = this.moveNext(attempt) + if (!shouldRetry) { + tracer?.invoke( + "Retry policy exhausted on attempt $attempt. No more retries will be attempted. throwOnExhaustedRetries: $throwOnExhaustedRetries. Exception: ${ex.message}", + ) + if (throwOnExhaustedRetries) throw ex + return null + } + tracer?.invoke("Transient error occurred: ${ex.message}. Retrying attempt $attempt.") + if (decision != RetryDecision.ContinueWithoutDelay) { + if (delayDuration.toMillis() > 0) { + if (cancellationChecker?.invoke() == true) { + throw CancellationException("Cancelled during retry delay") + } + delay(delayDuration.toMillis()) + } + } + onRetry?.invoke(attempt, ex, isPermanent) + } + } + } + attempt++ + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt new file mode 100644 index 000000000..1595e4b14 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt @@ -0,0 +1,29 @@ +package com.microsoft.azure.kusto.ingest.v2.common.auth + +import com.azure.core.credential.TokenRequestContext +import com.azure.identity.AzureCliCredentialBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials + +class AzCliTokenCredentialsProvider: TokenCredentialsProvider { + override suspend fun getCredentialsAsync(targetResource: String):KustoTokenCredentials { + val azureCliCredential = AzureCliCredentialBuilder().build() + val tokenRequestContext = TokenRequestContext().addScopes("$targetResource/.default") + val token = azureCliCredential.getToken(tokenRequestContext).block()?.token + val expiresOn = azureCliCredential.getToken(tokenRequestContext).block()?.expiresAt + return KustoTokenCredentials("JWT",token ?: throw Exception("Failed to acquire token"), expiresOn) + } + + override suspend fun getCredentialsAsync(targetResource: String, tenantId: String):KustoTokenCredentials { + val azureCliCredential = AzureCliCredentialBuilder().tenantId(tenantId).build() + val tokenRequestContext = TokenRequestContext().setTenantId(tenantId).addScopes("$targetResource/.default") + val token = azureCliCredential.getToken(tokenRequestContext).block()?.token + val expiresOn = azureCliCredential.getToken(tokenRequestContext).block()?.expiresAt + return KustoTokenCredentials("JWT",token ?: throw Exception("Failed to acquire token"), expiresOn) + + } + + override suspend fun getCredentialsAsync(targetResource: String, retries: Int, tenantId: String?) :KustoTokenCredentials { + //TODO: implement retries + return getCredentialsAsync(targetResource, tenantId ?: "") + } +} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt similarity index 59% rename from ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt rename to ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt index b335dd423..79381cd24 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/KustoTokenCredentialsProvider.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt @@ -1,25 +1,30 @@ +/* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2.common.auth import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials -import java.util.concurrent.CompletableFuture -interface KustoTokenCredentialsProvider { +interface TokenCredentialsProvider { /** * Retrieves (or creates) a [KustoTokenCredentials] object for [targetResource]. + * * @param targetResource The target resource for which the credentials are needed. * @return The [KustoTokenCredentials] concrete object to use when accessing the target resource. */ suspend fun getCredentialsAsync(targetResource: String): KustoTokenCredentials /** - * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId]. - * Note this API is NOT always supported. Make sure the implementation you use supports this API. + * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId]. Note + * this API is NOT always supported. Make sure the implementation you use supports this API. */ suspend fun getCredentialsAsync(targetResource: String, tenantId: String): KustoTokenCredentials /** - * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId] with retries. - * Note this API is NOT always supported. Make sure the implementation you use supports this API. + * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId] with + * retries. Note this API is NOT always supported. Make sure the implementation you use supports this API. */ - suspend fun getCredentialsAsync(targetResource: String, retries: Int, tenantId: String? = null): KustoTokenCredentials -} \ No newline at end of file + suspend fun getCredentialsAsync( + targetResource: String, + retries: Int, + tenantId: String? = null, + ): KustoTokenCredentials +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt new file mode 100644 index 000000000..3e1b29dfe --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt @@ -0,0 +1,205 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.common.exceptions + +open class IngestException( + message: String? = null, + cause: Throwable? = null, + val failureCode: Int? = null, + val failureSubCode: String? = null, + val isPermanent: Boolean? = null, +) : Exception(message, cause) { + open val alreadyTraced: Boolean = false + open val creationMessage: String? = message + + override val message: String + get() = creationMessage ?: "Something went wrong calling into a Kusto client library (fallback message)." + + override fun toString(): String = message +} + +class IngestRequestException( + val errorCode: String? = null, + val errorReason: String? = null, + val errorMessage: String? = null, + val dataSource: String? = null, + val databaseName: String? = null, + val clientRequestId: String? = null, + val activityId: String? = null, + failureCode: Int? = null, + failureSubCode: String? = null, + isPermanent: Boolean? = true, + message: String? = null, + cause: Throwable? = null, +) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { + override val message: String + get() = + creationMessage + ?: "${errorReason ?: ""} (${errorCode ?: ""}): ${errorMessage ?: ""}. This normally represents a permanent error, and retrying is unlikely to help." +} + +class IngestServiceException( + val errorCode: String? = null, + val errorReason: String? = null, + val errorMessage: String? = null, + val dataSource: String? = null, + val clientRequestId: String? = null, + val activityId: String? = null, + failureCode: Int? = 500, + failureSubCode: String? = null, + isPermanent: Boolean? = null, + message: String? = null, + cause: Throwable? = null, +) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { + override val message: String + get() = + creationMessage + ?: "${errorReason ?: ""} (${errorCode ?: ""}): ${errorMessage ?: ""}. This normally represents a temporary error, and retrying after some backoff period might help." +} + +open class IngestClientException( + val ingestionSourceId: String? = null, + val ingestionSource: String? = null, + val error: String? = null, + failureCode: Int? = 400, + failureSubCode: String? = null, + isPermanent: Boolean? = null, + message: String? = null, + cause: Throwable? = null, +) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { + override val message: String + get() = creationMessage ?: "An error occurred for source: '${ingestionSource ?: ""}'. Error: '${error ?: ""}'" +} + +class IngestSizeLimitExceededException( + val size: Long, + val maxSize: Long, + ingestionSourceId: String? = null, + ingestionSource: String? = null, + error: String? = null, + failureCode: Int? = 400, + failureSubCode: String? = null, + isPermanent: Boolean? = true, + message: String? = null, + cause: Throwable? = null, +) : + IngestClientException( + ingestionSourceId, + ingestionSource, + error, + failureCode, + failureSubCode, + isPermanent, + message, + cause, + ) { + override val message: String + get() = + creationMessage + ?: "Size too large to ingest: Source: '${ingestionSource ?: ""}' size in bytes is '$size' which exceeds the maximal size of '$maxSize'" +} + +class InvalidIngestionMappingException( + ingestionSourceId: String? = null, + ingestionSource: String? = null, + error: String? = null, + failureCode: Int? = 400, + failureSubCode: String? = null, + isPermanent: Boolean? = true, + message: String? = null, + cause: Throwable? = null, +) : + IngestClientException( + ingestionSourceId, + ingestionSource, + error, + failureCode, + failureSubCode, + isPermanent, + message, + cause, + ) { + override val message: String + get() = creationMessage ?: "Ingestion mapping is invalid: ${super.message ?: ""}" +} + +class MultipleIngestionMappingPropertiesException( + ingestionSourceId: String? = null, + ingestionSource: String? = null, + error: String? = null, + failureCode: Int? = 400, + failureSubCode: String? = null, + isPermanent: Boolean? = true, + message: String? = null, + cause: Throwable? = null, +) : + IngestClientException( + ingestionSourceId, + ingestionSource, + error, + failureCode, + failureSubCode, + isPermanent, + message, + cause, + ) { + override val message: String + get() = + creationMessage + ?: "At most one property of type ingestion mapping or ingestion mapping reference must be present." +} + +open class UploadFailedException( + val fileName: String? = null, + val blobName: String? = null, + failureCode: Int? = null, + failureSubCode: String? = null, + isPermanent: Boolean? = null, + message: String? = null, + cause: Throwable? = null, +) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { + override val message: String + get() = creationMessage ?: "An error occurred while attempting to upload file `$fileName` to blob `$blobName`." +} + +class NoAvailableIngestContainersException( + fileName: String? = null, + blobName: String? = null, + failureCode: Int? = 500, + failureSubCode: String? = null, + isPermanent: Boolean? = false, + message: String? = null, + cause: Throwable? = null, +) : UploadFailedException(fileName, blobName, failureCode, failureSubCode, isPermanent, message, cause) { + override val message: String + get() = creationMessage ?: "No available containers for upload." +} + +class InvalidUploadStreamException( + fileName: String? = null, + blobName: String? = null, + failureCode: Int? = null, + failureSubCode: String? = null, + isPermanent: Boolean? = true, + message: String? = null, + cause: Throwable? = null, +) : UploadFailedException(fileName, blobName, failureCode, failureSubCode, isPermanent, message, cause) { + override val message: String + get() = creationMessage ?: "The stream provided for upload is invalid - $failureSubCode." +} + +class UploadSizeLimitExceededException( + val size: Long, + val maxSize: Long, + fileName: String? = null, + blobName: String? = null, + failureCode: Int? = null, + failureSubCode: String? = null, + isPermanent: Boolean? = true, + message: String? = null, + cause: Throwable? = null, +) : UploadFailedException(fileName, blobName, failureCode, failureSubCode, isPermanent, message, cause) { + override val message: String + get() = + creationMessage + ?: "The file `$fileName` is too large to upload. Size: $size bytes, Max size: $maxSize bytes." +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt index dd12af9d5..6175b0a49 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt @@ -1,3 +1,4 @@ +/* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2.common.models data class ClientDetails(val name: String, val version: String) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt index b0e8b763a..50ecde5d3 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt @@ -1,21 +1,18 @@ +/* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2.common.models -import java.time.LocalDateTime +import java.time.OffsetDateTime -/** - * Represents a token credentials holder, capable (at least) of authenticating over an HTTPS "Authorization" header. - */ +/** Represents a token credentials holder, capable (at least) of authenticating over an HTTPS "Authorization" header. */ data class KustoTokenCredentials( val tokenScheme: String? = null, val tokenValue: String? = null, - val expiresOn: LocalDateTime? = null + val expiresOn: OffsetDateTime? = null, ) { - /** - * Returns the secure representation of this instance. - */ + /** Returns the secure representation of this instance. */ fun toSecureString(): String { return "${this::class.simpleName}:$tokenScheme:*****" } override fun toString(): String = toSecureString() -} \ No newline at end of file +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt new file mode 100644 index 000000000..e9d910eb8 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -0,0 +1,71 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.common.utils + +import java.net.URI +import java.util.* +import java.util.regex.Pattern + +object PathUtils { + private const val PREFIX = "Ingest.V2.Dotnet" + private const val FILE_NAME_SEGMENT_MAX_LENGTH = 120 + private const val TOTAL_TWO_SEGMENT_MAX_LENGTH = 160 + private const val TRUNCATION_SUFFIX = "__trunc" + private val URI_FRAGMENT_SEPARATORS = charArrayOf('?', '#', ';') + + // Only allow a-z, A-Z, 0-9, and hyphen (-) in the sanitized file name. + private val FORBIDDEN_CHARS = Pattern.compile("[^\\w-]", Pattern.CASE_INSENSITIVE) + + fun sanitizeFileName(baseName: String?, sourceId: String?): String { + val base = getBasename(baseName) + val fileNameSegment = sanitize(base, FILE_NAME_SEGMENT_MAX_LENGTH) + val baseNamePart = if (!base.isNullOrEmpty()) "_$fileNameSegment" else "" + return sanitize(sourceId, TOTAL_TWO_SEGMENT_MAX_LENGTH - fileNameSegment.length) + baseNamePart + } + + private fun sanitize(name: String?, maxSize: Int): String { + if (name.isNullOrEmpty()) return "" + var sanitized = FORBIDDEN_CHARS.matcher(name).replaceAll("-") + if (sanitized.length > maxSize) { + sanitized = sanitized.take(maxSize - TRUNCATION_SUFFIX.length) + TRUNCATION_SUFFIX + } + return sanitized + } + + // Format: Ingest.V2.Dotnet_{timestamp}_{random}_{format}_{sourceId}_{name} + // Sample: + // Ingest.V2.Dotnet_20250702080158084_874b2e9373414f64aa5a9f9c0d240b07_file_e493b23d-684f-4f4c-8ba8-3edfaca09427_dataset-json.multijson.gz + fun createFileNameForUpload(name: String): String { + val timestamp = + java.time.format.DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSS") + .format(java.time.Instant.now().atZone(java.time.ZoneOffset.UTC)) + return PREFIX + "_$timestamp" + "_${UUID.randomUUID().toString().replace("-", "")}" + "_$name" + } + + /** + * Returns the base name of the file, with extensions and without any path. Works for both local paths and URLs. + * Examples: + * - "C:\path\to\file.csv.gz" -> "file.csv.gz" + * - "https://example.com/path/to/file.csv.gz" -> "file.csv.gz" + */ + fun getBasename(uri: String?): String? { + if (uri.isNullOrBlank()) return uri + val uriObj = + try { + URI(uri) + } catch (e: Exception) { + null + } + if (uriObj == null || !uriObj.isAbsolute) { + // Not a valid absolute URI, treat as path + return uri.substringAfterLast('/', uri).substringAfterLast('\\', uri) + } + // For web URIs, extract last segment of the path, remove query/fragment + val path = uriObj.path ?: "" + var lastSegment = path.split('/', '\\').lastOrNull() ?: "" + val queryIndex = lastSegment.indexOfAny(URI_FRAGMENT_SEPARATORS) + if (queryIndex >= 0) { + lastSegment = lastSegment.take(queryIndex) + } + return lastSegment + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt new file mode 100644 index 000000000..ccc93a611 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt @@ -0,0 +1,7 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.container + +interface ContainerBase { + val uri: String + val name: String +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt new file mode 100644 index 000000000..727665014 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt @@ -0,0 +1,8 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.container + +import java.io.InputStream + +interface UploadContainerBase : ContainerBase { + suspend fun uploadAsync(name: String, stream: InputStream): String +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt new file mode 100644 index 000000000..f103fc199 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -0,0 +1,35 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.source + +class BlobSource : IngestionSource { + override val url: String + val exactSize: Int? + + constructor( + url: String, + format: DataFormat, + compression: CompressionType? = null, + sourceId: String? = null, + ) : super(format, compression ?: ExtendedDataSourceCompressionType.detectFromUri(url), url, sourceId) { + this.url = url + this.exactSize = null + } + + internal constructor( + url: String, + localSource: LocalSource, + exactSize: Int? = null, + ) : super(localSource.format, localSource.compressionType, url, localSource.sourceId) { + this.url = url + this.exactSize = exactSize + } + + override fun toString(): String { + // Assuming FormatWithInvariantCulture is replaced by Kotlin string interpolation + return "$url SourceId: $sourceId" + } + + override fun close() { + TODO("Not yet implemented") + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt new file mode 100644 index 000000000..5dbd9328a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt @@ -0,0 +1,13 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.source + +enum class CompressionType { + GZIP, + ZIP, + NONE, + ; + + override fun toString(): String { + return if (this == NONE) "" else name + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt new file mode 100644 index 000000000..df42f719e --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt @@ -0,0 +1,56 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.source + +enum class DataFormat( + val kustoValue: String, + private val ingestionMappingKind: IngestionMappingKind, + compressible: Boolean, +) { + CSV("csv", IngestionMappingKind.CSV, true), + TSV("tsv", IngestionMappingKind.CSV, true), + SCSV("scsv", IngestionMappingKind.CSV, true), + SOHSV("sohsv", IngestionMappingKind.CSV, true), + PSV("psv", IngestionMappingKind.CSV, true), + TXT("txt", IngestionMappingKind.CSV, true), + TSVE("tsve", IngestionMappingKind.CSV, true), + JSON("json", IngestionMappingKind.JSON, true), + SINGLEJSON("singlejson", IngestionMappingKind.JSON, true), + MULTIJSON("multijson", IngestionMappingKind.JSON, true), + AVRO("avro", IngestionMappingKind.AVRO, false), + APACHEAVRO("apacheavro", IngestionMappingKind.APACHEAVRO, false), + PARQUET("parquet", IngestionMappingKind.PARQUET, false), + SSTREAM("sstream", IngestionMappingKind.SSTREAM, false), + ORC("orc", IngestionMappingKind.ORC, false), + RAW("raw", IngestionMappingKind.CSV, true), + W3CLOGFILE("w3clogfile", IngestionMappingKind.W3CLOGFILE, true), + ; + + val isCompressible: Boolean = compressible + + fun getIngestionMappingKind(): IngestionMappingKind { + return ingestionMappingKind + } + + fun isBinaryFormat(): Boolean { + return this == AVRO || this == APACHEAVRO || this == PARQUET || this == SSTREAM || this == ORC + } + + fun isJsonFormat(): Boolean { + return this == JSON || this == MULTIJSON || this == SINGLEJSON + } + + fun toKustoValue(): String { + return kustoValue + } +} + +enum class IngestionMappingKind(val kustoValue: String) { + CSV("Csv"), + JSON("Json"), + AVRO("Avro"), + PARQUET("Parquet"), + SSTREAM("SStream"), + ORC("Orc"), + APACHEAVRO("ApacheAvro"), + W3CLOGFILE("W3CLogFile"), +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt new file mode 100644 index 000000000..19fc8649d --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt @@ -0,0 +1,25 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.common.utils.PathUtils +import java.lang.AutoCloseable + +abstract class IngestionSource( + open val format: DataFormat, + open val compressionType: CompressionType?, + open val url: String?, + open val sourceId: String?, +) : AutoCloseable { + var name: String? = null + private set + + fun initName(baseName: String? = null) { + val type = this::class.simpleName?.lowercase()?.removeSuffix("source") ?: "unknown" + name = "${type}_${PathUtils.sanitizeFileName(baseName, sourceId)}${format.toKustoValue()}$compressionType" + } +} + +// Placeholder classes for missing dependencies +object ExtendedDataSourceCompressionType { + fun detectFromUri(url: String): CompressionType? = null +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt new file mode 100644 index 000000000..a16682e34 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -0,0 +1,52 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.source + +import java.io.InputStream + +abstract class LocalSource( + override val format: DataFormat, + val leaveOpen: Boolean, + override val compressionType: CompressionType = CompressionType.NONE, + val baseName: String? = null, + override val sourceId: String? = null, +) : IngestionSource(format, compressionType, baseName, sourceId) { + + protected var mStream: InputStream? = null + + // Indicates whether the stream should be left open after ingestion. + // val leaveOpen: Boolean // Already a constructor property + + internal val shouldCompress: Boolean + get() = (compressionType == CompressionType.NONE) && !format.isBinaryFormat() + + abstract fun data(): InputStream + + fun reset() { + data().reset() + } + + override fun close() { + if (!leaveOpen) { + mStream?.close() + } + } +} + +class StreamSource( + stream: InputStream, + format: DataFormat, + sourceCompression: CompressionType, + sourceId: String? = null, + name: String? = null, + leaveOpen: Boolean = false, +) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { + + init { + mStream = stream + initName(name) + } + + override fun data(): InputStream { + return mStream!! + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt new file mode 100644 index 000000000..f99baf172 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -0,0 +1,55 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi +import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import io.mockk.coEvery +import io.mockk.every +import io.mockk.mockk +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertThrows +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test + +class ConfigurationApiWrapperTest { + private lateinit var defaultApi: DefaultApi + private lateinit var wrapper: ConfigurationApiWrapper + private val clusterUrl = "https://testcluster.kusto.windows.net" + private val tokenProvider = mockk(relaxed = true) + + @BeforeEach + fun setup() { + defaultApi = mockk(relaxed = true) + wrapper = ConfigurationApiWrapper(clusterUrl, tokenProvider, false, defaultApi) + } + + @Test + fun `getConfigurationDetails returns configuration on success`() = runBlocking { + val configResponse = ConfigurationResponse() // Fill with test data if needed + val httpResponse = mockk>() + every { httpResponse.success } returns true + every { httpResponse.status } returns 200 + every { httpResponse.body() } returns configResponse + coEvery { defaultApi.v1RestIngestionConfigurationGet() } returns httpResponse + + val result = wrapper.getConfigurationDetails() + assertEquals(configResponse, result) + } + + @Test + fun `getConfigurationDetails throws IngestException on failure`() = runBlocking { + val httpResponse = mockk>() + every { httpResponse.success } returns false + every { httpResponse.status } returns 500 + every { httpResponse.body() } returns null + coEvery { defaultApi.v1RestIngestionConfigurationGet() } returns httpResponse + + val ex = assertThrows(IngestException::class.java) { runBlocking { wrapper.getConfigurationDetails() } } + assertTrue(ex.message!!.contains("Failed to retrieve configuration details")) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt new file mode 100644 index 000000000..5073a1d17 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt @@ -0,0 +1,31 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.common + +import kotlinx.coroutines.test.runTest +import org.junit.Test +import kotlin.test.assertEquals + +class TestIngestRetryPolicy : IngestRetryPolicy { + override fun moveNext(retryNumber: UInt): Pair { + // Allow up to 3 attempts, 100ms delay + return Pair(retryNumber < 3u, java.time.Duration.ofMillis(100)) + } +} + +class RetryPolicyExtensionsTest { + @Test + fun testRunWithRetrySuccessAfterRetry() = runTest { + val policy = TestIngestRetryPolicy() + var callCount = 0 + val result = + policy.runWithRetry( + action = { attempt -> + callCount++ + if (attempt < 2u) throw RuntimeException("Fail") + "Success" + }, + ) + assertEquals(2, callCount) + assertEquals("Success", result) + } +} From 5d6d331db0a02e38d6510095f301dac468355b3e Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Mon, 8 Sep 2025 18:53:45 +0530 Subject: [PATCH 07/50] * Move code forward --- .../ingest/v2/ConfigurationApiWrapper.kt | 2 +- .../kusto/ingest/v2/KustoBaseApiClient.kt | 15 +++-- .../ingest/v2/ConfigurationApiWrapperTest.kt | 64 +++++++------------ 3 files changed, 31 insertions(+), 50 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 22055668f..17e435fdc 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -9,7 +9,7 @@ import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse class ConfigurationApiWrapper( override val clusterUrl: String, - val tokenCredentialsProvider: TokenCredentialsProvider, + override val tokenCredentialsProvider: TokenCredentialsProvider, override val skipSecurityChecks: Boolean = false, // Allow injection for testing private val configurationApi: DefaultApi? = null, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 3599d5e33..2a18ce5d2 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -10,7 +10,7 @@ import io.ktor.client.plugins.auth.providers.bearer import io.ktor.client.plugins.contentnegotiation.ContentNegotiation import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json -import java.net.URI +import java.net.URL open class KustoBaseApiClient( open val clusterUrl: String, @@ -18,12 +18,13 @@ open class KustoBaseApiClient( open val skipSecurityChecks: Boolean = false, ) { init { - if (!skipSecurityChecks) { - val uri = URI(clusterUrl) - val scheme = uri.scheme?.lowercase() - if (scheme != "https") { - throw IllegalArgumentException("The provided endpoint is not a valid endpoint") - } + val uri = try { + URL(clusterUrl) + } catch (e: Exception) { + throw IllegalArgumentException("Invalid cluster URL: $clusterUrl", e) + } + if (uri.protocol != "https" && !skipSecurityChecks) { + throw IllegalArgumentException("Cluster URL must use HTTPS: $clusterUrl") } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index f99baf172..aa77c5d85 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -2,54 +2,34 @@ package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi +import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProvider import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse -import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse -import io.mockk.coEvery -import io.mockk.every -import io.mockk.mockk import kotlinx.coroutines.runBlocking -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Assertions.assertThrows -import org.junit.jupiter.api.Assertions.assertTrue -import org.junit.jupiter.api.BeforeEach + import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertNotNull class ConfigurationApiWrapperTest { - private lateinit var defaultApi: DefaultApi - private lateinit var wrapper: ConfigurationApiWrapper - private val clusterUrl = "https://testcluster.kusto.windows.net" - private val tokenProvider = mockk(relaxed = true) - - @BeforeEach - fun setup() { - defaultApi = mockk(relaxed = true) - wrapper = ConfigurationApiWrapper(clusterUrl, tokenProvider, false, defaultApi) - } - - @Test - fun `getConfigurationDetails returns configuration on success`() = runBlocking { - val configResponse = ConfigurationResponse() // Fill with test data if needed - val httpResponse = mockk>() - every { httpResponse.success } returns true - every { httpResponse.status } returns 200 - every { httpResponse.body() } returns configResponse - coEvery { defaultApi.v1RestIngestionConfigurationGet() } returns httpResponse - - val result = wrapper.getConfigurationDetails() - assertEquals(configResponse, result) - } @Test - fun `getConfigurationDetails throws IngestException on failure`() = runBlocking { - val httpResponse = mockk>() - every { httpResponse.success } returns false - every { httpResponse.status } returns 500 - every { httpResponse.body() } returns null - coEvery { defaultApi.v1RestIngestionConfigurationGet() } returns httpResponse - - val ex = assertThrows(IngestException::class.java) { runBlocking { wrapper.getConfigurationDetails() } } - assertTrue(ex.message!!.contains("Failed to retrieve configuration details")) + fun `run e2e test with an actual cluster`(): Unit = runBlocking { + val actualTokenProvider = AzCliTokenCredentialsProvider() // Replace with a real token provider + val cluster = System.getenv("ENGINE_CONNECTION_STRING") + val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) + try { + val config = actualWrapper.getConfigurationDetails() + println("E2E Test Success: Retrieved configuration: $config") + assertNotNull(config, "Configuration should not be null") + assertNotNull(config.containerSettings, "ContainerSettings should not be null") + assertNotNull(config.containerSettings.preferredUploadMethod, "Preferred upload should not be null") + config.containerSettings.containers?.forEach { containerInfo -> + run { + assertNotNull(containerInfo.path, "Container path should not be null") + } + } + } catch (ex: Exception) { + println("E2E Test Failed: ${ex.message}") + throw ex + } } } From 6e3f21a195de3f97f1fe6cdf3ea26297cb8ba573 Mon Sep 17 00:00:00 2001 From: ramacg Date: Tue, 9 Sep 2025 10:43:44 +0530 Subject: [PATCH 08/50] *Additional edits --- ingest-v2/pom.xml | 4 +- .../ingest/v2/ConfigurationApiWrapper.kt | 34 +++++++--- .../kusto/ingest/v2/KustoBaseApiClient.kt | 43 +++++++------ .../ingest/v2/common/ConfigurationCache.kt | 13 +++- .../ingest/v2/common/IngestRetryPolicy.kt | 21 ++++-- .../ingest/v2/common/RetryPolicyExtensions.kt | 18 ++++-- .../auth/AzCliTokenCredentialsProvider.kt | 64 ++++++++++++++----- .../common/auth/TokenCredentialsProvider.kt | 29 ++++++--- .../v2/common/exceptions/IngestException.kt | 53 ++++++++++++--- .../v2/common/models/KustoTokenCredentials.kt | 5 +- .../kusto/ingest/v2/common/utils/PathUtils.kt | 36 ++++++++--- .../kusto/ingest/v2/source/BlobSource.kt | 14 +++- .../ingest/v2/source/DataSourceFormat.kt | 6 +- .../kusto/ingest/v2/source/IngestionSource.kt | 7 +- .../kusto/ingest/v2/source/LocalSource.kt | 4 +- .../ingest/v2/ConfigurationApiWrapperTest.kt | 24 ++++--- .../v2/common/RetryPolicyExtensionsTest.kt | 4 +- 17 files changed, 276 insertions(+), 103 deletions(-) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index c2a7b54fc..e47f186c4 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -233,10 +233,10 @@ 0.51 - 120 + 80 4 8 - false + true true diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 17e435fdc..6a94843ed 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -11,28 +11,42 @@ class ConfigurationApiWrapper( override val clusterUrl: String, override val tokenCredentialsProvider: TokenCredentialsProvider, override val skipSecurityChecks: Boolean = false, - // Allow injection for testing - private val configurationApi: DefaultApi? = null, -) : KustoBaseApiClient(clusterUrl, tokenCredentialsProvider, skipSecurityChecks) { - // Add Logging using slf4j - private val logger = org.slf4j.LoggerFactory.getLogger(ConfigurationApiWrapper::class.java) +) : + KustoBaseApiClient( + clusterUrl, + tokenCredentialsProvider, + skipSecurityChecks, + ) { + private val logger = + org.slf4j.LoggerFactory.getLogger( + ConfigurationApiWrapper::class.java, + ) private val api: DefaultApi = - configurationApi ?: DefaultApi(baseUrl = "$clusterUrl/v1/rest/ingest", httpClientConfig = setupConfig) + DefaultApi( + baseUrl = "$clusterUrl/v1/rest/ingest", + httpClientConfig = setupConfig, + ) suspend fun getConfigurationDetails(): ConfigurationResponse { - val configurationHttpResponse: HttpResponse = api.v1RestIngestionConfigurationGet() + val configurationHttpResponse: HttpResponse = + api.v1RestIngestionConfigurationGet() if (configurationHttpResponse.success) { logger.info( "Successfully retrieved configuration details from $clusterUrl with status: ${configurationHttpResponse.status}", ) - logger.debug("Configuration details: {}", configurationHttpResponse.body()) + logger.debug( + "Configuration details: {}", + configurationHttpResponse.body(), + ) return configurationHttpResponse.body() } else { logger.error( - "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, Body: ${configurationHttpResponse.body()}", + "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, " + + "Body: ${configurationHttpResponse.body()}", ) throw IngestException( - "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, Body: ${configurationHttpResponse.body()}", + "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, " + + "Body: ${configurationHttpResponse.body()}", ) } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 2a18ce5d2..0b849af75 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -10,41 +10,46 @@ import io.ktor.client.plugins.auth.providers.bearer import io.ktor.client.plugins.contentnegotiation.ContentNegotiation import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json -import java.net.URL open class KustoBaseApiClient( open val clusterUrl: String, open val tokenCredentialsProvider: TokenCredentialsProvider, open val skipSecurityChecks: Boolean = false, ) { - init { - val uri = try { - URL(clusterUrl) - } catch (e: Exception) { - throw IllegalArgumentException("Invalid cluster URL: $clusterUrl", e) - } - if (uri.protocol != "https" && !skipSecurityChecks) { - throw IllegalArgumentException("Cluster URL must use HTTPS: $clusterUrl") - } - } - protected val setupConfig: (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } + protected val setupConfig: (HttpClientConfig<*>) -> Unit = { config -> + getClientConfig(config) + } private fun getClientConfig(config: HttpClientConfig<*>) { - config.install(DefaultRequest) { header("Content-Type", "application/json") } + config.install(DefaultRequest) { + header("Content-Type", "application/json") + } config.install(Auth) { bearer { loadTokens { // Always null so refreshTokens is always called - tokenCredentialsProvider.getCredentialsAsync(clusterUrl).tokenValue?.let { - BearerTokens(accessToken = it, refreshToken = null) - } + tokenCredentialsProvider + .getCredentialsAsync(clusterUrl) + .tokenValue + ?.let { + BearerTokens( + accessToken = it, + refreshToken = null, + ) + } } refreshTokens { // Always null so refreshTokens is always called - tokenCredentialsProvider.getCredentialsAsync(clusterUrl).tokenValue?.let { - BearerTokens(accessToken = it, refreshToken = null) - } + tokenCredentialsProvider + .getCredentialsAsync(clusterUrl) + .tokenValue + ?.let { + BearerTokens( + accessToken = it, + refreshToken = null, + ) + } } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 1c5c5ecae..5441783d2 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -23,13 +23,20 @@ class DefaultConfigurationCache( override suspend fun getConfiguration(): ConfigurationResponse { val currentTime = System.currentTimeMillis() - val needsRefresh = cachedConfiguration == null || (currentTime - lastRefreshTime) >= refreshInterval.toMillis() + val needsRefresh = + cachedConfiguration == null || + (currentTime - lastRefreshTime) >= + refreshInterval.toMillis() if (needsRefresh) { - val newConfig = runCatching { configurationProvider() }.getOrElse { cachedConfiguration ?: throw it } + val newConfig = + runCatching { configurationProvider() } + .getOrElse { cachedConfiguration ?: throw it } synchronized(this) { // Double-check in case another thread refreshed while we were waiting val stillNeedsRefresh = - cachedConfiguration == null || (currentTime - lastRefreshTime) >= refreshInterval.toMillis() + cachedConfiguration == null || + (currentTime - lastRefreshTime) >= + refreshInterval.toMillis() if (stillNeedsRefresh) { cachedConfiguration = newConfig lastRefreshTime = currentTime diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index fc2acda98..73697108b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -5,8 +5,9 @@ import java.time.Duration interface IngestRetryPolicy { /** - * Determines whether the operation should be retried based on the retryNumber. Returns a Pair - * indicating whether to retry and the duration of the retry interval. + * Determines whether the operation should be retried based on the + * retryNumber. Returns a Pair indicating whether to + * retry and the duration of the retry interval. */ fun moveNext(retryNumber: UInt): Pair } @@ -17,8 +18,10 @@ object NoRetryPolicy : IngestRetryPolicy { } } -class SimpleRetryPolicy(val intervalDuration: Duration = Duration.ofSeconds(10), val totalRetries: Int = 3) : - IngestRetryPolicy { +class SimpleRetryPolicy( + val intervalDuration: Duration = Duration.ofSeconds(10), + val totalRetries: Int = 3, +) : IngestRetryPolicy { init { require(totalRetries > 0) { "totalRetries must be positive" } } @@ -32,9 +35,15 @@ class SimpleRetryPolicy(val intervalDuration: Duration = Duration.ofSeconds(10), } } -class CustomRetryPolicy(intervalDurations: Array? = null) : IngestRetryPolicy { +class CustomRetryPolicy(intervalDurations: Array? = null) : + IngestRetryPolicy { private val intervalDurations: Array = - intervalDurations ?: arrayOf(Duration.ofSeconds(1), Duration.ofSeconds(3), Duration.ofSeconds(7)) + intervalDurations + ?: arrayOf( + Duration.ofSeconds(1), + Duration.ofSeconds(3), + Duration.ofSeconds(7), + ) val intervals: List get() = intervalDurations.toList() diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt index b3e331dde..7dc7a9cf5 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt @@ -31,7 +31,11 @@ suspend fun IngestRetryPolicy.runWithRetry( onError?.invoke(attempt, ex, isPermanent) val decision = shouldRetry?.invoke(attempt, ex, isPermanent) - ?: if (isPermanent) RetryDecision.Throw else RetryDecision.Continue + ?: if (isPermanent) { + RetryDecision.Throw + } else { + RetryDecision.Continue + } when (decision) { RetryDecision.Throw -> { @@ -42,7 +46,9 @@ suspend fun IngestRetryPolicy.runWithRetry( } RetryDecision.Break -> { - tracer?.invoke("Breaking out of retry loop early, on attempt $attempt. Exception: ${ex.message}") + tracer?.invoke( + "Breaking out of retry loop early, on attempt $attempt. Exception: ${ex.message}", + ) return null } @@ -55,11 +61,15 @@ suspend fun IngestRetryPolicy.runWithRetry( if (throwOnExhaustedRetries) throw ex return null } - tracer?.invoke("Transient error occurred: ${ex.message}. Retrying attempt $attempt.") + tracer?.invoke( + "Transient error occurred: ${ex.message}. Retrying attempt $attempt.", + ) if (decision != RetryDecision.ContinueWithoutDelay) { if (delayDuration.toMillis() > 0) { if (cancellationChecker?.invoke() == true) { - throw CancellationException("Cancelled during retry delay") + throw CancellationException( + "Cancelled during retry delay", + ) } delay(delayDuration.toMillis()) } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt index 1595e4b14..b2830ed10 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt @@ -1,29 +1,61 @@ +/* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2.common.auth import com.azure.core.credential.TokenRequestContext import com.azure.identity.AzureCliCredentialBuilder import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials -class AzCliTokenCredentialsProvider: TokenCredentialsProvider { - override suspend fun getCredentialsAsync(targetResource: String):KustoTokenCredentials { +class AzCliTokenCredentialsProvider : TokenCredentialsProvider { + override suspend fun getCredentialsAsync( + targetResource: String, + ): KustoTokenCredentials { val azureCliCredential = AzureCliCredentialBuilder().build() - val tokenRequestContext = TokenRequestContext().addScopes("$targetResource/.default") - val token = azureCliCredential.getToken(tokenRequestContext).block()?.token - val expiresOn = azureCliCredential.getToken(tokenRequestContext).block()?.expiresAt - return KustoTokenCredentials("JWT",token ?: throw Exception("Failed to acquire token"), expiresOn) + val tokenRequestContext = + TokenRequestContext().addScopes("$targetResource/.default") + val token = + azureCliCredential.getToken(tokenRequestContext).block()?.token + val expiresOn = + azureCliCredential + .getToken(tokenRequestContext) + .block() + ?.expiresAt + return KustoTokenCredentials( + "JWT", + token ?: throw Exception("Failed to acquire token"), + expiresOn, + ) } - override suspend fun getCredentialsAsync(targetResource: String, tenantId: String):KustoTokenCredentials { - val azureCliCredential = AzureCliCredentialBuilder().tenantId(tenantId).build() - val tokenRequestContext = TokenRequestContext().setTenantId(tenantId).addScopes("$targetResource/.default") - val token = azureCliCredential.getToken(tokenRequestContext).block()?.token - val expiresOn = azureCliCredential.getToken(tokenRequestContext).block()?.expiresAt - return KustoTokenCredentials("JWT",token ?: throw Exception("Failed to acquire token"), expiresOn) - + override suspend fun getCredentialsAsync( + targetResource: String, + tenantId: String, + ): KustoTokenCredentials { + val azureCliCredential = + AzureCliCredentialBuilder().tenantId(tenantId).build() + val tokenRequestContext = + TokenRequestContext() + .setTenantId(tenantId) + .addScopes("$targetResource/.default") + val token = + azureCliCredential.getToken(tokenRequestContext).block()?.token + val expiresOn = + azureCliCredential + .getToken(tokenRequestContext) + .block() + ?.expiresAt + return KustoTokenCredentials( + "JWT", + token ?: throw Exception("Failed to acquire token"), + expiresOn, + ) } - override suspend fun getCredentialsAsync(targetResource: String, retries: Int, tenantId: String?) :KustoTokenCredentials { - //TODO: implement retries + override suspend fun getCredentialsAsync( + targetResource: String, + retries: Int, + tenantId: String?, + ): KustoTokenCredentials { + // TODO: implement retries return getCredentialsAsync(targetResource, tenantId ?: "") } -} \ No newline at end of file +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt index 79381cd24..083890c27 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt @@ -5,22 +5,33 @@ import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials interface TokenCredentialsProvider { /** - * Retrieves (or creates) a [KustoTokenCredentials] object for [targetResource]. + * Retrieves (or creates) a [KustoTokenCredentials] object for + * [targetResource]. * - * @param targetResource The target resource for which the credentials are needed. - * @return The [KustoTokenCredentials] concrete object to use when accessing the target resource. + * @param targetResource The target resource for which the credentials are + * needed. + * @return The [KustoTokenCredentials] concrete object to use when accessing + * the target resource. */ - suspend fun getCredentialsAsync(targetResource: String): KustoTokenCredentials + suspend fun getCredentialsAsync( + targetResource: String, + ): KustoTokenCredentials /** - * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId]. Note - * this API is NOT always supported. Make sure the implementation you use supports this API. + * Retrieves (or creates) a [KustoTokenCredentials] object for the + * [targetResource] on a tenant [tenantId]. Note this API is NOT always + * supported. Make sure the implementation you use supports this API. */ - suspend fun getCredentialsAsync(targetResource: String, tenantId: String): KustoTokenCredentials + suspend fun getCredentialsAsync( + targetResource: String, + tenantId: String, + ): KustoTokenCredentials /** - * Retrieves (or creates) a [KustoTokenCredentials] object for the [targetResource] on a tenant [tenantId] with - * retries. Note this API is NOT always supported. Make sure the implementation you use supports this API. + * Retrieves (or creates) a [KustoTokenCredentials] object for the + * [targetResource] on a tenant [tenantId] with retries. Note this API is + * NOT always supported. Make sure the implementation you use supports this + * API. */ suspend fun getCredentialsAsync( targetResource: String, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt index 3e1b29dfe..6287663cb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt @@ -12,7 +12,9 @@ open class IngestException( open val creationMessage: String? = message override val message: String - get() = creationMessage ?: "Something went wrong calling into a Kusto client library (fallback message)." + get() = + creationMessage + ?: "Something went wrong calling into a Kusto client library (fallback message)." override fun toString(): String = message } @@ -67,7 +69,9 @@ open class IngestClientException( cause: Throwable? = null, ) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { override val message: String - get() = creationMessage ?: "An error occurred for source: '${ingestionSource ?: ""}'. Error: '${error ?: ""}'" + get() = + creationMessage + ?: "An error occurred for source: '${ingestionSource ?: ""}'. Error: '${error ?: ""}'" } class IngestSizeLimitExceededException( @@ -119,7 +123,9 @@ class InvalidIngestionMappingException( cause, ) { override val message: String - get() = creationMessage ?: "Ingestion mapping is invalid: ${super.message ?: ""}" + get() = + creationMessage + ?: "Ingestion mapping is invalid: ${super.message ?: ""}" } class MultipleIngestionMappingPropertiesException( @@ -158,7 +164,9 @@ open class UploadFailedException( cause: Throwable? = null, ) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { override val message: String - get() = creationMessage ?: "An error occurred while attempting to upload file `$fileName` to blob `$blobName`." + get() = + creationMessage + ?: "An error occurred while attempting to upload file `$fileName` to blob `$blobName`." } class NoAvailableIngestContainersException( @@ -169,7 +177,16 @@ class NoAvailableIngestContainersException( isPermanent: Boolean? = false, message: String? = null, cause: Throwable? = null, -) : UploadFailedException(fileName, blobName, failureCode, failureSubCode, isPermanent, message, cause) { +) : + UploadFailedException( + fileName, + blobName, + failureCode, + failureSubCode, + isPermanent, + message, + cause, + ) { override val message: String get() = creationMessage ?: "No available containers for upload." } @@ -182,9 +199,20 @@ class InvalidUploadStreamException( isPermanent: Boolean? = true, message: String? = null, cause: Throwable? = null, -) : UploadFailedException(fileName, blobName, failureCode, failureSubCode, isPermanent, message, cause) { +) : + UploadFailedException( + fileName, + blobName, + failureCode, + failureSubCode, + isPermanent, + message, + cause, + ) { override val message: String - get() = creationMessage ?: "The stream provided for upload is invalid - $failureSubCode." + get() = + creationMessage + ?: "The stream provided for upload is invalid - $failureSubCode." } class UploadSizeLimitExceededException( @@ -197,7 +225,16 @@ class UploadSizeLimitExceededException( isPermanent: Boolean? = true, message: String? = null, cause: Throwable? = null, -) : UploadFailedException(fileName, blobName, failureCode, failureSubCode, isPermanent, message, cause) { +) : + UploadFailedException( + fileName, + blobName, + failureCode, + failureSubCode, + isPermanent, + message, + cause, + ) { override val message: String get() = creationMessage diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt index 50ecde5d3..bc8e9d377 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt @@ -3,7 +3,10 @@ package com.microsoft.azure.kusto.ingest.v2.common.models import java.time.OffsetDateTime -/** Represents a token credentials holder, capable (at least) of authenticating over an HTTPS "Authorization" header. */ +/** + * Represents a token credentials holder, capable (at least) of authenticating + * over an HTTPS "Authorization" header. + */ data class KustoTokenCredentials( val tokenScheme: String? = null, val tokenValue: String? = null, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index e9d910eb8..fdfe7adf4 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -13,20 +13,27 @@ object PathUtils { private val URI_FRAGMENT_SEPARATORS = charArrayOf('?', '#', ';') // Only allow a-z, A-Z, 0-9, and hyphen (-) in the sanitized file name. - private val FORBIDDEN_CHARS = Pattern.compile("[^\\w-]", Pattern.CASE_INSENSITIVE) + private val FORBIDDEN_CHARS = + Pattern.compile("[^\\w-]", Pattern.CASE_INSENSITIVE) fun sanitizeFileName(baseName: String?, sourceId: String?): String { val base = getBasename(baseName) val fileNameSegment = sanitize(base, FILE_NAME_SEGMENT_MAX_LENGTH) - val baseNamePart = if (!base.isNullOrEmpty()) "_$fileNameSegment" else "" - return sanitize(sourceId, TOTAL_TWO_SEGMENT_MAX_LENGTH - fileNameSegment.length) + baseNamePart + val baseNamePart = + if (!base.isNullOrEmpty()) "_$fileNameSegment" else "" + return sanitize( + sourceId, + TOTAL_TWO_SEGMENT_MAX_LENGTH - fileNameSegment.length, + ) + baseNamePart } private fun sanitize(name: String?, maxSize: Int): String { if (name.isNullOrEmpty()) return "" var sanitized = FORBIDDEN_CHARS.matcher(name).replaceAll("-") if (sanitized.length > maxSize) { - sanitized = sanitized.take(maxSize - TRUNCATION_SUFFIX.length) + TRUNCATION_SUFFIX + sanitized = + sanitized.take(maxSize - TRUNCATION_SUFFIX.length) + + TRUNCATION_SUFFIX } return sanitized } @@ -36,14 +43,22 @@ object PathUtils { // Ingest.V2.Dotnet_20250702080158084_874b2e9373414f64aa5a9f9c0d240b07_file_e493b23d-684f-4f4c-8ba8-3edfaca09427_dataset-json.multijson.gz fun createFileNameForUpload(name: String): String { val timestamp = - java.time.format.DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSS") - .format(java.time.Instant.now().atZone(java.time.ZoneOffset.UTC)) - return PREFIX + "_$timestamp" + "_${UUID.randomUUID().toString().replace("-", "")}" + "_$name" + java.time.format.DateTimeFormatter.ofPattern( + "yyyyMMddHHmmssSSS", + ) + .format( + java.time.Instant.now() + .atZone(java.time.ZoneOffset.UTC), + ) + return PREFIX + + "_$timestamp" + + "_${UUID.randomUUID().toString().replace("-", "")}" + + "_$name" } /** - * Returns the base name of the file, with extensions and without any path. Works for both local paths and URLs. - * Examples: + * Returns the base name of the file, with extensions and without any path. + * Works for both local paths and URLs. Examples: * - "C:\path\to\file.csv.gz" -> "file.csv.gz" * - "https://example.com/path/to/file.csv.gz" -> "file.csv.gz" */ @@ -57,7 +72,8 @@ object PathUtils { } if (uriObj == null || !uriObj.isAbsolute) { // Not a valid absolute URI, treat as path - return uri.substringAfterLast('/', uri).substringAfterLast('\\', uri) + return uri.substringAfterLast('/', uri) + .substringAfterLast('\\', uri) } // For web URIs, extract last segment of the path, remove query/fragment val path = uriObj.path ?: "" diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index f103fc199..144e3df43 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -10,7 +10,12 @@ class BlobSource : IngestionSource { format: DataFormat, compression: CompressionType? = null, sourceId: String? = null, - ) : super(format, compression ?: ExtendedDataSourceCompressionType.detectFromUri(url), url, sourceId) { + ) : super( + format, + compression ?: ExtendedDataSourceCompressionType.detectFromUri(url), + url, + sourceId, + ) { this.url = url this.exactSize = null } @@ -19,7 +24,12 @@ class BlobSource : IngestionSource { url: String, localSource: LocalSource, exactSize: Int? = null, - ) : super(localSource.format, localSource.compressionType, url, localSource.sourceId) { + ) : super( + localSource.format, + localSource.compressionType, + url, + localSource.sourceId, + ) { this.url = url this.exactSize = exactSize } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt index df42f719e..71af76202 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt @@ -32,7 +32,11 @@ enum class DataFormat( } fun isBinaryFormat(): Boolean { - return this == AVRO || this == APACHEAVRO || this == PARQUET || this == SSTREAM || this == ORC + return this == AVRO || + this == APACHEAVRO || + this == PARQUET || + this == SSTREAM || + this == ORC } fun isJsonFormat(): Boolean { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt index 19fc8649d..7c585d3a9 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt @@ -14,8 +14,11 @@ abstract class IngestionSource( private set fun initName(baseName: String? = null) { - val type = this::class.simpleName?.lowercase()?.removeSuffix("source") ?: "unknown" - name = "${type}_${PathUtils.sanitizeFileName(baseName, sourceId)}${format.toKustoValue()}$compressionType" + val type = + this::class.simpleName?.lowercase()?.removeSuffix("source") + ?: "unknown" + name = + "${type}_${PathUtils.sanitizeFileName(baseName, sourceId)}${format.toKustoValue()}$compressionType" } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index a16682e34..acab0ecca 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -17,7 +17,9 @@ abstract class LocalSource( // val leaveOpen: Boolean // Already a constructor property internal val shouldCompress: Boolean - get() = (compressionType == CompressionType.NONE) && !format.isBinaryFormat() + get() = + (compressionType == CompressionType.NONE) && + !format.isBinaryFormat() abstract fun data(): InputStream diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index aa77c5d85..e37ff2c57 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -1,11 +1,8 @@ /* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2 -import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProvider -import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider import kotlinx.coroutines.runBlocking - import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertNotNull @@ -13,18 +10,29 @@ class ConfigurationApiWrapperTest { @Test fun `run e2e test with an actual cluster`(): Unit = runBlocking { - val actualTokenProvider = AzCliTokenCredentialsProvider() // Replace with a real token provider + val actualTokenProvider = + AzCliTokenCredentialsProvider() // Replace with a real token provider val cluster = System.getenv("ENGINE_CONNECTION_STRING") - val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) + val actualWrapper = + ConfigurationApiWrapper(cluster, actualTokenProvider, true) try { val config = actualWrapper.getConfigurationDetails() println("E2E Test Success: Retrieved configuration: $config") assertNotNull(config, "Configuration should not be null") - assertNotNull(config.containerSettings, "ContainerSettings should not be null") - assertNotNull(config.containerSettings.preferredUploadMethod, "Preferred upload should not be null") + assertNotNull( + config.containerSettings, + "ContainerSettings should not be null", + ) + assertNotNull( + config.containerSettings.preferredUploadMethod, + "Preferred upload should not be null", + ) config.containerSettings.containers?.forEach { containerInfo -> run { - assertNotNull(containerInfo.path, "Container path should not be null") + assertNotNull( + containerInfo.path, + "Container path should not be null", + ) } } } catch (ex: Exception) { diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt index 5073a1d17..da120cac8 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt @@ -6,7 +6,9 @@ import org.junit.Test import kotlin.test.assertEquals class TestIngestRetryPolicy : IngestRetryPolicy { - override fun moveNext(retryNumber: UInt): Pair { + override fun moveNext( + retryNumber: UInt, + ): Pair { // Allow up to 3 attempts, 100ms delay return Pair(retryNumber < 3u, java.time.Duration.ofMillis(100)) } From 68c171440a6c4a28f903974c6efc66700bf8a97a Mon Sep 17 00:00:00 2001 From: ramacg Date: Tue, 9 Sep 2025 11:15:52 +0530 Subject: [PATCH 09/50] *Additional edits --- ingest-v2/pom.xml | 28 +++++-------------- .../ingest/v2/ConfigurationApiWrapper.kt | 16 ++++++----- .../kusto/ingest/v2/KustoBaseApiClient.kt | 6 ++-- ingest-v2/src/main/resources/openapi.yaml | 2 +- .../ingest/v2/ConfigurationApiWrapperTest.kt | 9 ++++-- 5 files changed, 27 insertions(+), 34 deletions(-) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index e47f186c4..9867a473b 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -54,27 +54,6 @@ com.azure azure-identity - - - - - - - - - - - - - - - - - - - - - org.openapitools jackson-databind-nullable @@ -166,6 +145,13 @@ compile + + test-compile + test-compile + + test-compile + + diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 6a94843ed..212f14d74 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -6,24 +6,26 @@ import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import org.slf4j.LoggerFactory class ConfigurationApiWrapper( - override val clusterUrl: String, + override val dmUrl: String, override val tokenCredentialsProvider: TokenCredentialsProvider, override val skipSecurityChecks: Boolean = false, ) : KustoBaseApiClient( - clusterUrl, + dmUrl, tokenCredentialsProvider, skipSecurityChecks, ) { private val logger = - org.slf4j.LoggerFactory.getLogger( + LoggerFactory.getLogger( ConfigurationApiWrapper::class.java, ) + private val baseUrl = "$dmUrl/v1/rest/ingestion/configuration" private val api: DefaultApi = DefaultApi( - baseUrl = "$clusterUrl/v1/rest/ingest", + baseUrl = dmUrl, httpClientConfig = setupConfig, ) @@ -32,7 +34,7 @@ class ConfigurationApiWrapper( api.v1RestIngestionConfigurationGet() if (configurationHttpResponse.success) { logger.info( - "Successfully retrieved configuration details from $clusterUrl with status: ${configurationHttpResponse.status}", + "Successfully retrieved configuration details from $dmUrl with status: ${configurationHttpResponse.status}", ) logger.debug( "Configuration details: {}", @@ -41,11 +43,11 @@ class ConfigurationApiWrapper( return configurationHttpResponse.body() } else { logger.error( - "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, " + + "Failed to retrieve configuration details from $baseUrl. Status: ${configurationHttpResponse.status}, " + "Body: ${configurationHttpResponse.body()}", ) throw IngestException( - "Failed to retrieve configuration details from $clusterUrl. Status: ${configurationHttpResponse.status}, " + + "Failed to retrieve configuration details from $baseUrl. Status: ${configurationHttpResponse.status}, " + "Body: ${configurationHttpResponse.body()}", ) } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 0b849af75..a44675ea0 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -12,7 +12,7 @@ import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json open class KustoBaseApiClient( - open val clusterUrl: String, + open val dmUrl: String, open val tokenCredentialsProvider: TokenCredentialsProvider, open val skipSecurityChecks: Boolean = false, ) { @@ -30,7 +30,7 @@ open class KustoBaseApiClient( loadTokens { // Always null so refreshTokens is always called tokenCredentialsProvider - .getCredentialsAsync(clusterUrl) + .getCredentialsAsync(dmUrl) .tokenValue ?.let { BearerTokens( @@ -42,7 +42,7 @@ open class KustoBaseApiClient( refreshTokens { // Always null so refreshTokens is always called tokenCredentialsProvider - .getCredentialsAsync(clusterUrl) + .getCredentialsAsync(dmUrl) .tokenValue ?.let { BearerTokens( diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml index 8236b20c4..21b940357 100644 --- a/ingest-v2/src/main/resources/openapi.yaml +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -291,7 +291,7 @@ components: maxBlobsPerBatch: type: integer maxDataSize: - type: integer + type: long preferredIngestionMethod: type: string nullable: true diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index e37ff2c57..91be141bb 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -5,19 +5,24 @@ import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProv import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertNotNull +import org.slf4j.LoggerFactory class ConfigurationApiWrapperTest { + private val logger = + LoggerFactory.getLogger( + ConfigurationApiWrapperTest::class.java, + ) @Test fun `run e2e test with an actual cluster`(): Unit = runBlocking { val actualTokenProvider = AzCliTokenCredentialsProvider() // Replace with a real token provider - val cluster = System.getenv("ENGINE_CONNECTION_STRING") + val cluster = System.getenv("DM_CONNECTION_STRING") val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) try { val config = actualWrapper.getConfigurationDetails() - println("E2E Test Success: Retrieved configuration: $config") + logger.debug("E2E Test Success: Retrieved configuration: $config") assertNotNull(config, "Configuration should not be null") assertNotNull( config.containerSettings, From 18afb7b11457eb11eae1967270d662838005702e Mon Sep 17 00:00:00 2001 From: ramacg Date: Tue, 9 Sep 2025 11:17:31 +0530 Subject: [PATCH 10/50] *Additional edits --- .../azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index 91be141bb..ec0b1445e 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -22,7 +22,7 @@ class ConfigurationApiWrapperTest { ConfigurationApiWrapper(cluster, actualTokenProvider, true) try { val config = actualWrapper.getConfigurationDetails() - logger.debug("E2E Test Success: Retrieved configuration: $config") + logger.debug("E2E Test Success: Retrieved configuration: {}", config) assertNotNull(config, "Configuration should not be null") assertNotNull( config.containerSettings, @@ -41,7 +41,7 @@ class ConfigurationApiWrapperTest { } } } catch (ex: Exception) { - println("E2E Test Failed: ${ex.message}") + logger.error("E2E Test Failed", ex) throw ex } } From 3daa586c7ef41a78518472f01017bef7f86f07ce Mon Sep 17 00:00:00 2001 From: ramacg Date: Tue, 9 Sep 2025 10:09:56 +0000 Subject: [PATCH 11/50] *Add tests --- ingest-v2/pom.xml | 13 +-- .../ingest/v2/ConfigurationApiWrapper.kt | 16 +-- .../ingest/v2/ConfigurationApiWrapperTest.kt | 28 ++++-- .../common/DefaultConfigurationCacheTest.kt | 97 +++++++++++++++++++ .../v2/common/RetryPolicyExtensionsTest.kt | 33 ------- 5 files changed, 123 insertions(+), 64 deletions(-) create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt delete mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 9867a473b..ccae958cd 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -42,7 +42,7 @@ io.ktor - ktor-serialization-jackson + ktor-client-java-jvm ${ktor_version} @@ -54,17 +54,6 @@ com.azure azure-identity - - org.openapitools - jackson-databind-nullable - 0.2.7 - - - io.ktor - ktor-server-test-host-jvm - ${ktor_version} - test - org.jetbrains.kotlin kotlin-test-junit diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 212f14d74..88e3b4628 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -12,22 +12,12 @@ class ConfigurationApiWrapper( override val dmUrl: String, override val tokenCredentialsProvider: TokenCredentialsProvider, override val skipSecurityChecks: Boolean = false, -) : - KustoBaseApiClient( - dmUrl, - tokenCredentialsProvider, - skipSecurityChecks, - ) { +) : KustoBaseApiClient(dmUrl, tokenCredentialsProvider, skipSecurityChecks) { private val logger = - LoggerFactory.getLogger( - ConfigurationApiWrapper::class.java, - ) + LoggerFactory.getLogger(ConfigurationApiWrapper::class.java) private val baseUrl = "$dmUrl/v1/rest/ingestion/configuration" private val api: DefaultApi = - DefaultApi( - baseUrl = dmUrl, - httpClientConfig = setupConfig, - ) + DefaultApi(baseUrl = dmUrl, httpClientConfig = setupConfig) suspend fun getConfigurationDetails(): ConfigurationResponse { val configurationHttpResponse: HttpResponse = diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index ec0b1445e..93fdc2c76 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -1,6 +1,7 @@ /* (C)2025 */ package com.microsoft.azure.kusto.ingest.v2 +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProvider import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Test @@ -10,9 +11,8 @@ import org.slf4j.LoggerFactory class ConfigurationApiWrapperTest { private val logger = - LoggerFactory.getLogger( - ConfigurationApiWrapperTest::class.java, - ) + LoggerFactory.getLogger(ConfigurationApiWrapperTest::class.java) + @Test fun `run e2e test with an actual cluster`(): Unit = runBlocking { val actualTokenProvider = @@ -21,9 +21,25 @@ class ConfigurationApiWrapperTest { val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) try { - val config = actualWrapper.getConfigurationDetails() - logger.debug("E2E Test Success: Retrieved configuration: {}", config) - assertNotNull(config, "Configuration should not be null") + val defaultCachedConfig = + DefaultConfigurationCache( + configurationProvider = { + actualWrapper.getConfigurationDetails() + }, + ) + logger.debug( + "E2E Test Success: Retrieved configuration: {}", + defaultCachedConfig, + ) + assertNotNull( + defaultCachedConfig, + "DefaultConfiguration should not be null", + ) + val config = defaultCachedConfig.getConfiguration() + assertNotNull( + defaultCachedConfig, + "Configuration should not be null", + ) assertNotNull( config.containerSettings, "ContainerSettings should not be null", diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt new file mode 100644 index 000000000..e5e6f8e0f --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt @@ -0,0 +1,97 @@ +/* (C)2025 */ +package com.microsoft.azure.kusto.ingest.v2.common + +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo +import com.microsoft.azure.kusto.ingest.v2.models.ContainerSettings +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Test +import java.time.Duration + +class DefaultConfigurationCacheTest { + + // create a function that returns a ConfigurationResponse + // First return value1 then value2 + private var callCount = 0 + + private suspend fun mockConfigurationProvider(): ConfigurationResponse { + callCount++ + return if (callCount == 1) { + ConfigurationResponse( + containerSettings = + ContainerSettings( + preferredUploadMethod = "REST", + containers = + listOf( + ContainerInfo( + path = + "https://example1.blob.core.windows.net/container1", + ), + ), + ), + ) + } else { + ConfigurationResponse( + containerSettings = + ContainerSettings( + preferredUploadMethod = "QUEUE", + containers = + listOf( + ContainerInfo( + path = + "https://example2.blob.core.windows.net/container2", + ), + ), + ), + ) + } + } + + @Test + fun `when cache expires, configuration API is invoked`(): Unit = + runBlocking { + val refreshInterval = Duration.ofMillis(500) // 0.5 seconds + val cache = + DefaultConfigurationCache( + refreshInterval = refreshInterval, + configurationProvider = + ::mockConfigurationProvider, + ) + + // First call should fetch from provider + val config1 = cache.getConfiguration() + assertNotNull(config1) + assertEquals( + "REST", + config1.containerSettings?.preferredUploadMethod, + ) + assertEquals(1, callCount) + + // Wait less than refresh interval and call again, should return cached value + Thread.sleep(300) + val config2 = cache.getConfiguration() + assertNotNull(config2) + assertEquals( + "REST", + config2.containerSettings?.preferredUploadMethod, + ) + assertEquals( + 1, + callCount, + ) // callCount should not have increased + + // Wait for cache to expire + Thread.sleep(600) + val config3 = cache.getConfiguration() + assertNotNull(config3) + assertEquals( + "QUEUE", + config3.containerSettings?.preferredUploadMethod, + ) + assertEquals(2, callCount) // callCount should have increased + + cache.close() + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt deleted file mode 100644 index da120cac8..000000000 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensionsTest.kt +++ /dev/null @@ -1,33 +0,0 @@ -/* (C)2025 */ -package com.microsoft.azure.kusto.ingest.v2.common - -import kotlinx.coroutines.test.runTest -import org.junit.Test -import kotlin.test.assertEquals - -class TestIngestRetryPolicy : IngestRetryPolicy { - override fun moveNext( - retryNumber: UInt, - ): Pair { - // Allow up to 3 attempts, 100ms delay - return Pair(retryNumber < 3u, java.time.Duration.ofMillis(100)) - } -} - -class RetryPolicyExtensionsTest { - @Test - fun testRunWithRetrySuccessAfterRetry() = runTest { - val policy = TestIngestRetryPolicy() - var callCount = 0 - val result = - policy.runWithRetry( - action = { attempt -> - callCount++ - if (attempt < 2u) throw RuntimeException("Fail") - "Success" - }, - ) - assertEquals(2, callCount) - assertEquals("Success", result) - } -} From 828e5b97b86c714a5d31374f94656fce996ac711 Mon Sep 17 00:00:00 2001 From: ramacg Date: Tue, 9 Sep 2025 16:39:21 +0000 Subject: [PATCH 12/50] *Add changes to content negotiation --- ingest-v2/pom.xml | 17 +++++++---- .../ingest/v2/ConfigurationApiWrapper.kt | 3 +- .../kusto/ingest/v2/KustoBaseApiClient.kt | 16 ++++++++-- .../ingest/v2/common/ConfigurationCache.kt | 3 +- .../ingest/v2/common/IngestRetryPolicy.kt | 3 +- .../ingest/v2/common/RetryPolicyExtensions.kt | 3 +- .../auth/AzCliTokenCredentialsProvider.kt | 3 +- .../common/auth/TokenCredentialsProvider.kt | 3 +- .../v2/common/exceptions/IngestException.kt | 3 +- .../ingest/v2/common/models/ClientDetails.kt | 3 +- .../v2/common/models/KustoTokenCredentials.kt | 3 +- .../kusto/ingest/v2/common/utils/PathUtils.kt | 3 +- .../ingest/v2/container/ContainerBase.kt | 3 +- .../v2/container/UploadContainerBase.kt | 3 +- .../kusto/ingest/v2/source/BlobSource.kt | 3 +- .../kusto/ingest/v2/source/CompressionType.kt | 3 +- .../ingest/v2/source/DataSourceFormat.kt | 3 +- .../kusto/ingest/v2/source/IngestionSource.kt | 3 +- .../kusto/ingest/v2/source/LocalSource.kt | 3 +- .../ingest/v2/ConfigurationApiWrapperTest.kt | 29 +++++++++++++++---- .../common/DefaultConfigurationCacheTest.kt | 3 +- 21 files changed, 84 insertions(+), 32 deletions(-) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index ccae958cd..460f59e44 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -56,7 +56,7 @@ org.jetbrains.kotlin - kotlin-test-junit + kotlin-test-junit5 ${kotlin_version} test @@ -72,6 +72,12 @@ 1.14.5 test + + org.junit.jupiter + junit-jupiter-params + ${junit.version} + test + ${project.basedir}/src/main/kotlin @@ -155,7 +161,6 @@ - org.openapitools openapi-generator-maven-plugin @@ -177,13 +182,10 @@ jvm-ktor - - com.microsoft.azure.kusto.ingest.v2 true java8 - true kotlinx_serialization true @@ -227,7 +229,10 @@ - /* (C)$YEAR */ + +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 88e3b4628..51575f9cb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index a44675ea0..26263c1bf 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider @@ -10,6 +11,7 @@ import io.ktor.client.plugins.auth.providers.bearer import io.ktor.client.plugins.contentnegotiation.ContentNegotiation import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json +import kotlinx.serialization.json.Json open class KustoBaseApiClient( open val dmUrl: String, @@ -53,6 +55,16 @@ open class KustoBaseApiClient( } } } - config.install(ContentNegotiation) { json() } + config.install(ContentNegotiation) { + json( + Json { + ignoreUnknownKeys = true + // Optionally add other settings if needed: + // isLenient = true + // allowSpecialFloatingPointValues = true + // useArrayPolymorphism = true + }, + ) + } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 5441783d2..6e7448d96 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index 73697108b..177c981ee 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common import java.time.Duration diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt index 7dc7a9cf5..eaaa97918 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common import kotlinx.coroutines.CancellationException diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt index b2830ed10..bbb335193 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.auth import com.azure.core.credential.TokenRequestContext diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt index 083890c27..533b3564a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.auth import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt index 6287663cb..e0527ef04 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.exceptions open class IngestException( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt index 6175b0a49..972bd9366 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.models data class ClientDetails(val name: String, val version: String) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt index bc8e9d377..bedb49131 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.models import java.time.OffsetDateTime diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index fdfe7adf4..04379f62b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.utils import java.net.URI diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt index ccc93a611..ee7997003 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.container interface ContainerBase { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt index 727665014..4f0f1e6e6 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.container import java.io.InputStream diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index 144e3df43..3b877e2e4 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source class BlobSource : IngestionSource { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt index 5dbd9328a..4f7e3b94a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source enum class CompressionType { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt index 71af76202..2ed9702b6 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source enum class DataFormat( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt index 7c585d3a9..17d160675 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source import com.microsoft.azure.kusto.ingest.v2.common.utils.PathUtils diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index acab0ecca..c88bcb70c 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source import java.io.InputStream diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index 93fdc2c76..54f1de90a 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -1,23 +1,40 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProvider import kotlinx.coroutines.runBlocking -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.assertNotNull +import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.MethodSource import org.slf4j.LoggerFactory +import java.util.stream.Stream +import kotlin.test.assertNotNull +@TestInstance(TestInstance.Lifecycle.PER_CLASS) class ConfigurationApiWrapperTest { private val logger = LoggerFactory.getLogger(ConfigurationApiWrapperTest::class.java) - @Test - fun `run e2e test with an actual cluster`(): Unit = runBlocking { + private fun endpointAndExceptionClause(): Stream { + return Stream.of( + Arguments.of(System.getenv("DM_CONNECTION_STRING"), false), + Arguments.of("https://help.kusto.windows.net", true), + ) + } + + @ParameterizedTest + @MethodSource("endpointAndExceptionClause") + fun `run e2e test with an actual cluster`( + cluster: String, + isException: Boolean, + ): Unit = runBlocking { val actualTokenProvider = AzCliTokenCredentialsProvider() // Replace with a real token provider - val cluster = System.getenv("DM_CONNECTION_STRING") + // val cluster = System.getenv("DM_CONNECTION_STRING") val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) try { diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt index e5e6f8e0f..f7e6a0259 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt @@ -1,4 +1,5 @@ -/* (C)2025 */ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse From d2fac0ba64cdc108e19b9d73584218f2a3f4ce72 Mon Sep 17 00:00:00 2001 From: ramacg Date: Wed, 10 Sep 2025 01:57:32 +0000 Subject: [PATCH 13/50] *Update tests and POM --- ingest-v2/pom.xml | 52 ++++++++++++------- .../ingest/v2/ConfigurationApiWrapperTest.kt | 22 +++++--- 2 files changed, 46 insertions(+), 28 deletions(-) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 460f59e44..d001245c9 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -139,6 +139,12 @@ compile + + + ${project.basedir}/src/main/kotlin + ${project.build.directory}/generated-sources/openapi/src/main/kotlin + + test-compile @@ -146,6 +152,11 @@ test-compile + + + ${project.basedir}/src/test/kotlin + + @@ -188,7 +199,8 @@ java8 true kotlinx_serialization - true + false + true jvm-ktor @@ -237,25 +249,25 @@ - - org.codehaus.mojo - build-helper-maven-plugin - 3.6.1 - - - add-openapi-generated-sources - generate-sources - - add-source - - - - ${project.build.directory}/generated-sources/openapi/src/main/kotlin - - - - - + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index 54f1de90a..38ec80ce6 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -4,8 +4,10 @@ package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProvider +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.api.assertThrows import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource @@ -37,7 +39,17 @@ class ConfigurationApiWrapperTest { // val cluster = System.getenv("DM_CONNECTION_STRING") val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) - try { + if (isException) { + // assert the call to DefaultConfigurationCache throws + assertThrows { + DefaultConfigurationCache( + configurationProvider = { + actualWrapper.getConfigurationDetails() + }, + ) + .getConfiguration() + } + } else { val defaultCachedConfig = DefaultConfigurationCache( configurationProvider = { @@ -53,10 +65,7 @@ class ConfigurationApiWrapperTest { "DefaultConfiguration should not be null", ) val config = defaultCachedConfig.getConfiguration() - assertNotNull( - defaultCachedConfig, - "Configuration should not be null", - ) + assertNotNull(config, "Configuration should not be null") assertNotNull( config.containerSettings, "ContainerSettings should not be null", @@ -73,9 +82,6 @@ class ConfigurationApiWrapperTest { ) } } - } catch (ex: Exception) { - logger.error("E2E Test Failed", ex) - throw ex } } } From 8710f02307dcbc0c3ab0fe0b465a75cf0f6e8c2e Mon Sep 17 00:00:00 2001 From: ramacg Date: Wed, 10 Sep 2025 12:15:08 +0000 Subject: [PATCH 14/50] *Minor edits --- .../v2/container/BlobUploadContainer.kt | 30 +++++++++++++++++++ .../ingest/v2/container/ContainerBase.kt | 8 ----- .../v2/container/UploadContainerBase.kt | 2 +- 3 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt new file mode 100644 index 000000000..2554097c8 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt @@ -0,0 +1,30 @@ +package com.microsoft.azure.kusto.ingest.v2.container + +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import org.jetbrains.annotations.NotNull + +class BlobUploadContainer(val configResponse: @NotNull ConfigurationResponse) : UploadContainerBase { + + // choose a random container from the configResponse.containerSettings.containers + + override suspend fun uploadAsync(name: String, stream: java.io.InputStream): String { + // Placeholder for actual upload logic + // In a real implementation, this would upload the stream to the blob storage + // and return the URI of the uploaded blob. + // check if the configResponse has containerSettings + val noUploadLocation = configResponse.containerSettings == null || + (configResponse.containerSettings.containers?.isEmpty() == true + && configResponse.containerSettings.lakeFolders?.isEmpty() == true) + if (noUploadLocation) { + throw IngestException("No container settings available in the configuration response") + } + // check if containers is null or empty , if so use lakeFolders and choose one randomly + val targetPath = if (configResponse.containerSettings.containers.isNullOrEmpty()) { + configResponse.containerSettings.lakeFolders!!.random() + } else { + configResponse.containerSettings.containers.random() + } + return "${targetPath.path}/$name" + } +} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt deleted file mode 100644 index ee7997003..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -interface ContainerBase { - val uri: String - val name: String -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt index 4f0f1e6e6..585a4c5a3 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt @@ -4,6 +4,6 @@ package com.microsoft.azure.kusto.ingest.v2.container import java.io.InputStream -interface UploadContainerBase : ContainerBase { +interface UploadContainerBase { suspend fun uploadAsync(name: String, stream: InputStream): String } From a7403f7e7f7634b8d0f52a69e30960008a1edc86 Mon Sep 17 00:00:00 2001 From: ramacg Date: Wed, 10 Sep 2025 12:19:42 +0000 Subject: [PATCH 15/50] *Reformat code --- .../v2/container/BlobUploadContainer.kt | 41 +++++++++++++------ .../v2/container/UploadContainerBase.kt | 2 +- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt index 2554097c8..2cec6cb21 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt @@ -1,30 +1,47 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.container import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import org.jetbrains.annotations.NotNull -class BlobUploadContainer(val configResponse: @NotNull ConfigurationResponse) : UploadContainerBase { +class BlobUploadContainer(val configResponse: @NotNull ConfigurationResponse) : + UploadContainerBase { // choose a random container from the configResponse.containerSettings.containers - override suspend fun uploadAsync(name: String, stream: java.io.InputStream): String { + override suspend fun uploadAsync( + name: String, + stream: java.io.InputStream, + ): String { // Placeholder for actual upload logic // In a real implementation, this would upload the stream to the blob storage // and return the URI of the uploaded blob. // check if the configResponse has containerSettings - val noUploadLocation = configResponse.containerSettings == null || - (configResponse.containerSettings.containers?.isEmpty() == true - && configResponse.containerSettings.lakeFolders?.isEmpty() == true) + val noUploadLocation = + configResponse.containerSettings == null || + ( + configResponse.containerSettings.containers + ?.isEmpty() == true && + configResponse.containerSettings.lakeFolders + ?.isEmpty() == true + ) if (noUploadLocation) { - throw IngestException("No container settings available in the configuration response") + throw IngestException( + "No container settings available in the configuration response", + ) } // check if containers is null or empty , if so use lakeFolders and choose one randomly - val targetPath = if (configResponse.containerSettings.containers.isNullOrEmpty()) { - configResponse.containerSettings.lakeFolders!!.random() - } else { - configResponse.containerSettings.containers.random() - } + val targetPath = + if ( + configResponse.containerSettings.containers + .isNullOrEmpty() + ) { + configResponse.containerSettings.lakeFolders!!.random() + } else { + configResponse.containerSettings.containers.random() + } return "${targetPath.path}/$name" } -} \ No newline at end of file +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt index 585a4c5a3..34c08d076 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt @@ -4,6 +4,6 @@ package com.microsoft.azure.kusto.ingest.v2.container import java.io.InputStream -interface UploadContainerBase { +interface UploadContainerBase { suspend fun uploadAsync(name: String, stream: InputStream): String } From 57a151ea43c927e27e1fafa7150fcd6a2dc0e4d8 Mon Sep 17 00:00:00 2001 From: ramacg Date: Thu, 18 Sep 2025 11:49:29 +0000 Subject: [PATCH 16/50] * Remove gitignore --- ingest-v2/.gitignore | 49 -------------------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 ingest-v2/.gitignore diff --git a/ingest-v2/.gitignore b/ingest-v2/.gitignore deleted file mode 100644 index 4eb2a1222..000000000 --- a/ingest-v2/.gitignore +++ /dev/null @@ -1,49 +0,0 @@ -# Created by https://www.toptal.com/developers/gitignore/api/kotlin,maven -# Edit at https://www.toptal.com/developers/gitignore?templates=kotlin,maven - -### Kotlin ### -# Compiled class file -*.class - -# Log file -*.log - -# BlueJ files -*.ctxt - -# Mobile Tools for Java (J2ME) -.mtj.tmp/ - -# Package Files # -*.jar -*.war -*.nar -*.ear -*.zip -*.tar.gz -*.rar - -# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml -hs_err_pid* -replay_pid* - -### Maven ### -target/ -pom.xml.tag -pom.xml.releaseBackup -pom.xml.versionsBackup -pom.xml.next -release.properties -dependency-reduced-pom.xml -buildNumber.properties -.mvn/timing.properties -# https://github.com/takari/maven-wrapper#usage-without-binary-jar -.mvn/wrapper/maven-wrapper.jar - -# Eclipse m2e generated files -# Eclipse Core -.project -# JDT-specific (Eclipse Java Development Tools) -.classpath - -# End of https://www.toptal.com/developers/gitignore/api/kotlin,maven \ No newline at end of file From 9ccfd3d17bffb4bb813f155d65da3a696f15fcdf Mon Sep 17 00:00:00 2001 From: ramacg Date: Thu, 18 Sep 2025 13:31:12 +0000 Subject: [PATCH 17/50] *Address some of the review comments --- ingest-v2/README.md | 4 +- ingest-v2/pom.xml | 104 ++++-------------- .../ingest/v2/ConfigurationApiWrapper.kt | 2 +- .../kusto/ingest/v2/common/utils/PathUtils.kt | 2 +- ingest-v2/src/main/resources/application.yaml | 6 - ingest-v2/src/main/resources/logback.xml | 12 -- ingest-v2/src/main/resources/openapi.yaml | 12 +- 7 files changed, 35 insertions(+), 107 deletions(-) delete mode 100644 ingest-v2/src/main/resources/application.yaml delete mode 100644 ingest-v2/src/main/resources/logback.xml diff --git a/ingest-v2/README.md b/ingest-v2/README.md index d90c4540b..1bedd86ab 100644 --- a/ingest-v2/README.md +++ b/ingest-v2/README.md @@ -13,7 +13,7 @@ Here are some useful links to get you started: Here's a list of features included in this project: | Name | Description | -| ------------------------------------------------------------------------|------------------------------------------------------------------------------------ | +|------------------------------------------------------------------------|------------------------------------------------------------------------------------| | [Content Negotiation](https://start.ktor.io/p/content-negotiation) | Provides automatic content conversion according to Content-Type and Accept headers | | [Routing](https://start.ktor.io/p/routing) | Provides a structured routing DSL | | [kotlinx.serialization](https://start.ktor.io/p/kotlinx-serialization) | Handles JSON serialization using kotlinx.serialization library | @@ -24,7 +24,7 @@ Here's a list of features included in this project: To build or run the project, use one of the following tasks: | Task | Description | -| --------------------------------------------------------------|------------------- | +|--------------------------------------------------------------|-------------------| | `mvn test` | Run the tests | | `mvn package` | Build the project | | `java -jar target/ingest-v2-0.0.1-jar-with-dependencies.jar` | Run the server | diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index d001245c9..7d09c0f8b 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -8,15 +8,17 @@ ingest-v2 official - 2.1.21 - 3.2.3 - 3.1.1 - 1.4.14 - 2.0.9 + 2.2.20 + 3.3.0 + 3.1.1 + 1.10.2 + 1.4.14 + 5.10.0 + 7.15.0 + 2.0.9 2.46.1 UTF-8 true - io.ktor.server.netty.EngineMain kusto-client @@ -28,27 +30,27 @@ io.ktor ktor-client-auth-jvm - ${ktor_version} + ${ktor.version} io.ktor ktor-client-content-negotiation-jvm - ${ktor_version} + ${ktor.version} io.ktor ktor-serialization-kotlinx-json-jvm - ${ktor_version} + ${ktor.version} io.ktor ktor-client-java-jvm - ${ktor_version} + ${ktor.version} org.slf4j slf4j-simple - ${slf4j_version} + ${slf4j.version} com.azure @@ -57,13 +59,13 @@ org.jetbrains.kotlin kotlin-test-junit5 - ${kotlin_version} + ${kotlin.version} test org.jetbrains.kotlinx kotlinx-coroutines-debug - 1.6.4 + ${kotlinx.coroutines.debug.version} test @@ -88,50 +90,10 @@ - - org.codehaus.mojo - exec-maven-plugin - 1.2.1 - - - - java - - - - - ${main.class} - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.6 - - - jar-with-dependencies - - - - true - ${main.class} - - - - - - assemble-all - package - - single - - - - org.jetbrains.kotlin kotlin-maven-plugin - ${kotlin_version} + ${kotlin.version} compile @@ -152,23 +114,19 @@ test-compile - - - ${project.basedir}/src/test/kotlin - - kotlinx-serialization + 1.8 org.jetbrains.kotlin kotlin-maven-serialization - ${kotlin_version} + ${kotlin.version} @@ -176,7 +134,7 @@ org.openapitools openapi-generator-maven-plugin - 7.15.0 + ${openapi.generator.version} @@ -185,7 +143,10 @@ generate - + integer=java.lang.Long,int=java.lang.Long + + bearer + ${project.basedir}/src/main/resources/openapi.yaml true @@ -249,25 +210,6 @@ - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 51575f9cb..49b969c7a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -22,7 +22,7 @@ class ConfigurationApiWrapper( suspend fun getConfigurationDetails(): ConfigurationResponse { val configurationHttpResponse: HttpResponse = - api.v1RestIngestionConfigurationGet() + api.getIngestConfiguration() if (configurationHttpResponse.success) { logger.info( "Successfully retrieved configuration details from $dmUrl with status: ${configurationHttpResponse.status}", diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index 04379f62b..6c59d24da 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -7,7 +7,7 @@ import java.util.* import java.util.regex.Pattern object PathUtils { - private const val PREFIX = "Ingest.V2.Dotnet" + private const val PREFIX = "Ingest.V2.Java" private const val FILE_NAME_SEGMENT_MAX_LENGTH = 120 private const val TOTAL_TWO_SEGMENT_MAX_LENGTH = 160 private const val TRUNCATION_SUFFIX = "__trunc" diff --git a/ingest-v2/src/main/resources/application.yaml b/ingest-v2/src/main/resources/application.yaml deleted file mode 100644 index 88e6eff80..000000000 --- a/ingest-v2/src/main/resources/application.yaml +++ /dev/null @@ -1,6 +0,0 @@ -ktor: - application: - modules: - - com.microsoft.azure.kusto.ApplicationKt.module - deployment: - port: 8080 diff --git a/ingest-v2/src/main/resources/logback.xml b/ingest-v2/src/main/resources/logback.xml deleted file mode 100644 index aadef5d5b..000000000 --- a/ingest-v2/src/main/resources/logback.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - %d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - \ No newline at end of file diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml index 21b940357..484eb9d43 100644 --- a/ingest-v2/src/main/resources/openapi.yaml +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -8,12 +8,13 @@ servers: url: https://ingest-{clusterName}.kusto.windows.net variables: clusterName: - default: "mycluster.swedencentral" + default: "ingest-mycluster.swedencentral" description: The name of the Kusto cluster, including the region, e.g., "mycluster.swedencentral". paths: /v1/rest/ingestion/queued/{database}/{table}: post: summary: Submit an ingest request + operationId: postQueuedIngest security: - BearerAuth: [ ] parameters: @@ -43,6 +44,7 @@ paths: /v1/rest/ingestion/configuration: get: summary: Get ingest configuration + operationId: getIngestConfiguration security: - BearerAuth: [ ] responses: @@ -55,6 +57,7 @@ paths: /v1/rest/ingestion/queued/{database}/{table}/{operationId}: get: summary: Get status of an ingest operation + operationId: getIngestStatus parameters: - name: database in: path @@ -88,13 +91,14 @@ paths: $ref: '#/components/schemas/StatusResponse' /v1/rest/ingest/{database}/{table}: servers: - - url: https://{clusterName}.kusto.windows.net + - url: https://ingest-{clusterName}.kusto.windows.net variables: clusterName: - default: "mycluster.swedencentral" + default: "ingest-mycluster.swedencentral" description: The name of the Kusto cluster, including the region, e.g., "mycluster.swedencentral". post: summary: Streaming Ingest + operationId: postStreamingIngest security: - BearerAuth: [ ] parameters: @@ -291,7 +295,7 @@ components: maxBlobsPerBatch: type: integer maxDataSize: - type: long + type: integer preferredIngestionMethod: type: string nullable: true From c472f9c09e21bc5ed80fdccfaece0399e8639f60 Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Fri, 26 Sep 2025 11:33:42 +0530 Subject: [PATCH 18/50] * Fix some more review comments --- .../ingest/v2/ConfigurationApiWrapper.kt | 6 +- .../kusto/ingest/v2/KustoBaseApiClient.kt | 49 +++++++++------ .../auth/AzCliTokenCredentialsProvider.kt | 62 ------------------- .../common/auth/TokenCredentialsProvider.kt | 42 ------------- .../kusto/ingest/v2/source/BlobSource.kt | 2 +- .../ingest/v2/ConfigurationApiWrapperTest.kt | 4 +- 6 files changed, 35 insertions(+), 130 deletions(-) delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt index 49b969c7a..edd0b1b3a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt @@ -2,8 +2,8 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 +import com.azure.core.credential.TokenCredential import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi -import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse @@ -11,9 +11,9 @@ import org.slf4j.LoggerFactory class ConfigurationApiWrapper( override val dmUrl: String, - override val tokenCredentialsProvider: TokenCredentialsProvider, + override val tokenCredential: TokenCredential, override val skipSecurityChecks: Boolean = false, -) : KustoBaseApiClient(dmUrl, tokenCredentialsProvider, skipSecurityChecks) { +) : KustoBaseApiClient(dmUrl, tokenCredential, skipSecurityChecks) { private val logger = LoggerFactory.getLogger(ConfigurationApiWrapper::class.java) private val baseUrl = "$dmUrl/v1/rest/ingestion/configuration" diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 26263c1bf..ec2818cfe 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -2,7 +2,8 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 -import com.microsoft.azure.kusto.ingest.v2.common.auth.TokenCredentialsProvider +import com.azure.core.credential.TokenCredential +import com.azure.core.credential.TokenRequestContext import io.ktor.client.HttpClientConfig import io.ktor.client.plugins.DefaultRequest import io.ktor.client.plugins.auth.Auth @@ -12,10 +13,13 @@ import io.ktor.client.plugins.contentnegotiation.ContentNegotiation import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json import kotlinx.serialization.json.Json +import kotlinx.coroutines.suspendCancellableCoroutine +import kotlin.coroutines.resume +import kotlin.coroutines.resumeWithException open class KustoBaseApiClient( open val dmUrl: String, - open val tokenCredentialsProvider: TokenCredentialsProvider, + open val tokenCredential: TokenCredential, open val skipSecurityChecks: Boolean = false, ) { @@ -27,31 +31,36 @@ open class KustoBaseApiClient( config.install(DefaultRequest) { header("Content-Type", "application/json") } + val trc = TokenRequestContext().addScopes("$dmUrl/.default") config.install(Auth) { bearer { loadTokens { // Always null so refreshTokens is always called - tokenCredentialsProvider - .getCredentialsAsync(dmUrl) - .tokenValue - ?.let { - BearerTokens( - accessToken = it, - refreshToken = null, - ) - } + null } refreshTokens { - // Always null so refreshTokens is always called - tokenCredentialsProvider - .getCredentialsAsync(dmUrl) - .tokenValue - ?.let { - BearerTokens( - accessToken = it, - refreshToken = null, - ) + try { + // Use suspendCancellableCoroutine to convert Mono to suspend function + suspendCancellableCoroutine { continuation -> + tokenCredential + .getToken(trc) + .subscribe( + { accessToken -> + val bearerTokens = BearerTokens( + accessToken = accessToken.token, + refreshToken = null, + ) + continuation.resume(bearerTokens) + }, + { error -> + continuation.resumeWithException(error) + } + ) } + } catch (e: Exception) { + // Handle token retrieval errors + null + } } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt deleted file mode 100644 index bbb335193..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/AzCliTokenCredentialsProvider.kt +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.common.auth - -import com.azure.core.credential.TokenRequestContext -import com.azure.identity.AzureCliCredentialBuilder -import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials - -class AzCliTokenCredentialsProvider : TokenCredentialsProvider { - override suspend fun getCredentialsAsync( - targetResource: String, - ): KustoTokenCredentials { - val azureCliCredential = AzureCliCredentialBuilder().build() - val tokenRequestContext = - TokenRequestContext().addScopes("$targetResource/.default") - val token = - azureCliCredential.getToken(tokenRequestContext).block()?.token - val expiresOn = - azureCliCredential - .getToken(tokenRequestContext) - .block() - ?.expiresAt - return KustoTokenCredentials( - "JWT", - token ?: throw Exception("Failed to acquire token"), - expiresOn, - ) - } - - override suspend fun getCredentialsAsync( - targetResource: String, - tenantId: String, - ): KustoTokenCredentials { - val azureCliCredential = - AzureCliCredentialBuilder().tenantId(tenantId).build() - val tokenRequestContext = - TokenRequestContext() - .setTenantId(tenantId) - .addScopes("$targetResource/.default") - val token = - azureCliCredential.getToken(tokenRequestContext).block()?.token - val expiresOn = - azureCliCredential - .getToken(tokenRequestContext) - .block() - ?.expiresAt - return KustoTokenCredentials( - "JWT", - token ?: throw Exception("Failed to acquire token"), - expiresOn, - ) - } - - override suspend fun getCredentialsAsync( - targetResource: String, - retries: Int, - tenantId: String?, - ): KustoTokenCredentials { - // TODO: implement retries - return getCredentialsAsync(targetResource, tenantId ?: "") - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt deleted file mode 100644 index 533b3564a..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/auth/TokenCredentialsProvider.kt +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.common.auth - -import com.microsoft.azure.kusto.ingest.v2.common.models.KustoTokenCredentials - -interface TokenCredentialsProvider { - /** - * Retrieves (or creates) a [KustoTokenCredentials] object for - * [targetResource]. - * - * @param targetResource The target resource for which the credentials are - * needed. - * @return The [KustoTokenCredentials] concrete object to use when accessing - * the target resource. - */ - suspend fun getCredentialsAsync( - targetResource: String, - ): KustoTokenCredentials - - /** - * Retrieves (or creates) a [KustoTokenCredentials] object for the - * [targetResource] on a tenant [tenantId]. Note this API is NOT always - * supported. Make sure the implementation you use supports this API. - */ - suspend fun getCredentialsAsync( - targetResource: String, - tenantId: String, - ): KustoTokenCredentials - - /** - * Retrieves (or creates) a [KustoTokenCredentials] object for the - * [targetResource] on a tenant [tenantId] with retries. Note this API is - * NOT always supported. Make sure the implementation you use supports this - * API. - */ - suspend fun getCredentialsAsync( - targetResource: String, - retries: Int, - tenantId: String? = null, - ): KustoTokenCredentials -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index 3b877e2e4..52f486e21 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -40,7 +40,7 @@ class BlobSource : IngestionSource { return "$url SourceId: $sourceId" } + // No resources to close; method intentionally left empty. override fun close() { - TODO("Not yet implemented") } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index 38ec80ce6..9df787e7c 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -2,8 +2,8 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 +import com.azure.identity.AzureCliCredentialBuilder import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache -import com.microsoft.azure.kusto.ingest.v2.common.auth.AzCliTokenCredentialsProvider import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.TestInstance @@ -35,7 +35,7 @@ class ConfigurationApiWrapperTest { isException: Boolean, ): Unit = runBlocking { val actualTokenProvider = - AzCliTokenCredentialsProvider() // Replace with a real token provider + AzureCliCredentialBuilder().build() // Replace with a real token provider // val cluster = System.getenv("DM_CONNECTION_STRING") val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) From 6b7416164e3fa965923c79b8761bf6782e6ccde1 Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Thu, 16 Oct 2025 10:40:33 +0530 Subject: [PATCH 19/50] * Fix comments and push this as the base branch for IngestV2 --- .../ingest/v2/common/IngestRetryPolicy.kt | 22 ++++++++++--------- .../kusto/ingest/v2/source/LocalSource.kt | 9 +++++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index 177c981ee..af4e8d5ca 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -4,18 +4,20 @@ package com.microsoft.azure.kusto.ingest.v2.common import java.time.Duration +data class RetryDecision(val shouldRetry: Boolean, val interval: Duration) + interface IngestRetryPolicy { /** * Determines whether the operation should be retried based on the - * retryNumber. Returns a Pair indicating whether to + * retryNumber. Returns a RetryDecision indicating whether to * retry and the duration of the retry interval. */ - fun moveNext(retryNumber: UInt): Pair + fun moveNext(retryNumber: UInt): RetryDecision } object NoRetryPolicy : IngestRetryPolicy { - override fun moveNext(retryNumber: UInt): Pair { - return Pair(false, Duration.ZERO) + override fun moveNext(retryNumber: UInt): RetryDecision { + return RetryDecision(false, Duration.ZERO) } } @@ -27,12 +29,12 @@ class SimpleRetryPolicy( require(totalRetries > 0) { "totalRetries must be positive" } } - override fun moveNext(retryNumber: UInt): Pair { + override fun moveNext(retryNumber: UInt): RetryDecision { require(retryNumber > 0u) { "retryNumber must be positive" } if (retryNumber >= totalRetries.toUInt()) { - return Pair(false, Duration.ZERO) + return RetryDecision(false, Duration.ZERO) } - return Pair(true, intervalDuration) + return RetryDecision(true, intervalDuration) } } @@ -49,11 +51,11 @@ class CustomRetryPolicy(intervalDurations: Array? = null) : val intervals: List get() = intervalDurations.toList() - override fun moveNext(retryNumber: UInt): Pair { + override fun moveNext(retryNumber: UInt): RetryDecision { val idx = retryNumber.toInt() if (idx >= intervalDurations.size) { - return Pair(false, Duration.ZERO) + return RetryDecision(false, Duration.ZERO) } - return Pair(true, intervalDurations[idx]) + return RetryDecision(true, intervalDurations[idx]) } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index c88bcb70c..41b1f499b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -12,7 +12,8 @@ abstract class LocalSource( override val sourceId: String? = null, ) : IngestionSource(format, compressionType, baseName, sourceId) { - protected var mStream: InputStream? = null + // Lazily initialized input stream for ingestion source + protected lateinit var mStream: InputStream // Indicates whether the stream should be left open after ingestion. // val leaveOpen: Boolean // Already a constructor property @@ -30,7 +31,9 @@ abstract class LocalSource( override fun close() { if (!leaveOpen) { - mStream?.close() + if (this::mStream.isInitialized) { + mStream.close() + } } } } @@ -50,6 +53,6 @@ class StreamSource( } override fun data(): InputStream { - return mStream!! + return mStream ?: throw IllegalStateException("Stream is not initialized") } } From 7686beff47ab207cea34c37bb1eacfce7931bf23 Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Thu, 16 Oct 2025 10:41:05 +0530 Subject: [PATCH 20/50] * Reformat code changes --- .../kusto/ingest/v2/KustoBaseApiClient.kt | 23 +++-- .../ingest/v2/common/IngestRetryPolicy.kt | 4 +- .../v2/container/BlobUploadContainer.kt | 94 +++++++++---------- .../kusto/ingest/v2/source/BlobSource.kt | 3 +- .../kusto/ingest/v2/source/LocalSource.kt | 3 +- .../ingest/v2/ConfigurationApiWrapperTest.kt | 3 +- 6 files changed, 70 insertions(+), 60 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index ec2818cfe..4e1e5c723 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -12,8 +12,8 @@ import io.ktor.client.plugins.auth.providers.bearer import io.ktor.client.plugins.contentnegotiation.ContentNegotiation import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json -import kotlinx.serialization.json.Json import kotlinx.coroutines.suspendCancellableCoroutine +import kotlinx.serialization.json.Json import kotlin.coroutines.resume import kotlin.coroutines.resumeWithException @@ -46,15 +46,24 @@ open class KustoBaseApiClient( .getToken(trc) .subscribe( { accessToken -> - val bearerTokens = BearerTokens( - accessToken = accessToken.token, - refreshToken = null, + val bearerTokens = + BearerTokens( + accessToken = + accessToken + .token, + refreshToken = + null, + ) + continuation.resume( + bearerTokens, ) - continuation.resume(bearerTokens) }, { error -> - continuation.resumeWithException(error) - } + continuation + .resumeWithException( + error, + ) + }, ) } } catch (e: Exception) { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index af4e8d5ca..6a4ad6f48 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -9,8 +9,8 @@ data class RetryDecision(val shouldRetry: Boolean, val interval: Duration) interface IngestRetryPolicy { /** * Determines whether the operation should be retried based on the - * retryNumber. Returns a RetryDecision indicating whether to - * retry and the duration of the retry interval. + * retryNumber. Returns a RetryDecision indicating whether to retry and the + * duration of the retry interval. */ fun moveNext(retryNumber: UInt): RetryDecision } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt index 2cec6cb21..649c9fd9a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt @@ -1,47 +1,47 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse -import org.jetbrains.annotations.NotNull - -class BlobUploadContainer(val configResponse: @NotNull ConfigurationResponse) : - UploadContainerBase { - - // choose a random container from the configResponse.containerSettings.containers - - override suspend fun uploadAsync( - name: String, - stream: java.io.InputStream, - ): String { - // Placeholder for actual upload logic - // In a real implementation, this would upload the stream to the blob storage - // and return the URI of the uploaded blob. - // check if the configResponse has containerSettings - val noUploadLocation = - configResponse.containerSettings == null || - ( - configResponse.containerSettings.containers - ?.isEmpty() == true && - configResponse.containerSettings.lakeFolders - ?.isEmpty() == true - ) - if (noUploadLocation) { - throw IngestException( - "No container settings available in the configuration response", - ) - } - // check if containers is null or empty , if so use lakeFolders and choose one randomly - val targetPath = - if ( - configResponse.containerSettings.containers - .isNullOrEmpty() - ) { - configResponse.containerSettings.lakeFolders!!.random() - } else { - configResponse.containerSettings.containers.random() - } - return "${targetPath.path}/$name" - } -} +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.container + +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import org.jetbrains.annotations.NotNull + +class BlobUploadContainer(val configResponse: @NotNull ConfigurationResponse) : + UploadContainerBase { + + // choose a random container from the configResponse.containerSettings.containers + + override suspend fun uploadAsync( + name: String, + stream: java.io.InputStream, + ): String { + // Placeholder for actual upload logic + // In a real implementation, this would upload the stream to the blob storage + // and return the URI of the uploaded blob. + // check if the configResponse has containerSettings + val noUploadLocation = + configResponse.containerSettings == null || + ( + configResponse.containerSettings.containers + ?.isEmpty() == true && + configResponse.containerSettings.lakeFolders + ?.isEmpty() == true + ) + if (noUploadLocation) { + throw IngestException( + "No container settings available in the configuration response", + ) + } + // check if containers is null or empty , if so use lakeFolders and choose one randomly + val targetPath = + if ( + configResponse.containerSettings.containers + .isNullOrEmpty() + ) { + configResponse.containerSettings.lakeFolders!!.random() + } else { + configResponse.containerSettings.containers.random() + } + return "${targetPath.path}/$name" + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index 52f486e21..403b5b762 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -41,6 +41,5 @@ class BlobSource : IngestionSource { } // No resources to close; method intentionally left empty. - override fun close() { - } + override fun close() {} } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index 41b1f499b..1ebb34009 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -53,6 +53,7 @@ class StreamSource( } override fun data(): InputStream { - return mStream ?: throw IllegalStateException("Stream is not initialized") + return mStream + ?: throw IllegalStateException("Stream is not initialized") } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt index 9df787e7c..6e2656fed 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt @@ -35,7 +35,8 @@ class ConfigurationApiWrapperTest { isException: Boolean, ): Unit = runBlocking { val actualTokenProvider = - AzureCliCredentialBuilder().build() // Replace with a real token provider + AzureCliCredentialBuilder() + .build() // Replace with a real token provider // val cluster = System.getenv("DM_CONNECTION_STRING") val actualWrapper = ConfigurationApiWrapper(cluster, actualTokenProvider, true) From 0d12babaad931acb5660b74c51ad1648ac9e5439 Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Thu, 16 Oct 2025 11:00:07 +0530 Subject: [PATCH 21/50] * Rename retry data class --- .../ingest/v2/common/IngestRetryPolicy.kt | 20 +++++++++---------- .../ingest/v2/common/RetryPolicyExtensions.kt | 12 +++++------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index 6a4ad6f48..d871f853a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -4,7 +4,7 @@ package com.microsoft.azure.kusto.ingest.v2.common import java.time.Duration -data class RetryDecision(val shouldRetry: Boolean, val interval: Duration) +data class Retry(val shouldRetry: Boolean, val interval: Duration) interface IngestRetryPolicy { /** @@ -12,12 +12,12 @@ interface IngestRetryPolicy { * retryNumber. Returns a RetryDecision indicating whether to retry and the * duration of the retry interval. */ - fun moveNext(retryNumber: UInt): RetryDecision + fun moveNext(retryNumber: UInt): Retry } object NoRetryPolicy : IngestRetryPolicy { - override fun moveNext(retryNumber: UInt): RetryDecision { - return RetryDecision(false, Duration.ZERO) + override fun moveNext(retryNumber: UInt): Retry { + return Retry(false, Duration.ZERO) } } @@ -29,12 +29,12 @@ class SimpleRetryPolicy( require(totalRetries > 0) { "totalRetries must be positive" } } - override fun moveNext(retryNumber: UInt): RetryDecision { + override fun moveNext(retryNumber: UInt): Retry { require(retryNumber > 0u) { "retryNumber must be positive" } if (retryNumber >= totalRetries.toUInt()) { - return RetryDecision(false, Duration.ZERO) + return Retry(false, Duration.ZERO) } - return RetryDecision(true, intervalDuration) + return Retry(true, intervalDuration) } } @@ -51,11 +51,11 @@ class CustomRetryPolicy(intervalDurations: Array? = null) : val intervals: List get() = intervalDurations.toList() - override fun moveNext(retryNumber: UInt): RetryDecision { + override fun moveNext(retryNumber: UInt): Retry { val idx = retryNumber.toInt() if (idx >= intervalDurations.size) { - return RetryDecision(false, Duration.ZERO) + return Retry(false, Duration.ZERO) } - return RetryDecision(true, intervalDurations[idx]) + return Retry(true, intervalDurations[idx]) } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt index eaaa97918..a5e471707 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt @@ -18,7 +18,7 @@ suspend fun IngestRetryPolicy.runWithRetry( onRetry: ((UInt, Exception, Boolean) -> Unit)? = null, // retry attempt number, exception, isPermanent onError: ((UInt, Exception, Boolean) -> Unit)? = null, - shouldRetry: ((UInt, Exception, Boolean) -> RetryDecision)? = null, + shouldRetry: ((UInt, Exception, Boolean) -> Retry)? = null, throwOnExhaustedRetries: Boolean = true, tracer: ((String) -> Unit)? = null, cancellationChecker: (() -> Boolean)? = null, @@ -33,20 +33,20 @@ suspend fun IngestRetryPolicy.runWithRetry( val decision = shouldRetry?.invoke(attempt, ex, isPermanent) ?: if (isPermanent) { - RetryDecision.Throw + Retry.Throw } else { - RetryDecision.Continue + Retry.Continue } when (decision) { - RetryDecision.Throw -> { + Retry.Throw -> { tracer?.invoke( "Decision to throw on attempt $attempt. Is Permanent: $isPermanent. Exception: ${ex.message}", ) throw ex } - RetryDecision.Break -> { + Retry.Break -> { tracer?.invoke( "Breaking out of retry loop early, on attempt $attempt. Exception: ${ex.message}", ) @@ -65,7 +65,7 @@ suspend fun IngestRetryPolicy.runWithRetry( tracer?.invoke( "Transient error occurred: ${ex.message}. Retrying attempt $attempt.", ) - if (decision != RetryDecision.ContinueWithoutDelay) { + if (decision != Retry.ContinueWithoutDelay) { if (delayDuration.toMillis() > 0) { if (cancellationChecker?.invoke() == true) { throw CancellationException( From 5c9d007c93d1f51bdca636e057f1b184dda51223 Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Thu, 16 Oct 2025 11:01:46 +0530 Subject: [PATCH 22/50] * Rename retry data class --- .../kusto/ingest/v2/common/RetryPolicyExtensions.kt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt index a5e471707..e042b454c 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt @@ -33,20 +33,20 @@ suspend fun IngestRetryPolicy.runWithRetry( val decision = shouldRetry?.invoke(attempt, ex, isPermanent) ?: if (isPermanent) { - Retry.Throw + RetryDecision.Throw } else { - Retry.Continue + RetryDecision.Continue } when (decision) { - Retry.Throw -> { + RetryDecision.Throw -> { tracer?.invoke( "Decision to throw on attempt $attempt. Is Permanent: $isPermanent. Exception: ${ex.message}", ) throw ex } - Retry.Break -> { + RetryDecision.Break -> { tracer?.invoke( "Breaking out of retry loop early, on attempt $attempt. Exception: ${ex.message}", ) @@ -65,7 +65,7 @@ suspend fun IngestRetryPolicy.runWithRetry( tracer?.invoke( "Transient error occurred: ${ex.message}. Retrying attempt $attempt.", ) - if (decision != Retry.ContinueWithoutDelay) { + if (decision != RetryDecision.ContinueWithoutDelay) { if (delayDuration.toMillis() > 0) { if (cancellationChecker?.invoke() == true) { throw CancellationException( From 2b74d1c010e6c977d3559725834092d5db1da0b4 Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Thu, 30 Oct 2025 16:34:27 +0530 Subject: [PATCH 23/50] * Remove unused classes * Address review comments --- .../kusto/ingest/v2/common/utils/PathUtils.kt | 4 +- .../kusto/ingest/v2/source/BlobSource.kt | 45 -------------- .../ingest/v2/source/DataSourceFormat.kt | 61 ------------------- .../kusto/ingest/v2/source/IngestionSource.kt | 8 ++- .../kusto/ingest/v2/source/LocalSource.kt | 24 ++++++-- 5 files changed, 26 insertions(+), 116 deletions(-) delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index 6c59d24da..c093f5574 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -73,8 +73,8 @@ object PathUtils { } if (uriObj == null || !uriObj.isAbsolute) { // Not a valid absolute URI, treat as path - return uri.substringAfterLast('/', uri) - .substringAfterLast('\\', uri) + return uri.substringAfterLast('/') + .substringAfterLast("\\") } // For web URIs, extract last segment of the path, remove query/fragment val path = uriObj.path ?: "" diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt deleted file mode 100644 index 403b5b762..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -class BlobSource : IngestionSource { - override val url: String - val exactSize: Int? - - constructor( - url: String, - format: DataFormat, - compression: CompressionType? = null, - sourceId: String? = null, - ) : super( - format, - compression ?: ExtendedDataSourceCompressionType.detectFromUri(url), - url, - sourceId, - ) { - this.url = url - this.exactSize = null - } - - internal constructor( - url: String, - localSource: LocalSource, - exactSize: Int? = null, - ) : super( - localSource.format, - localSource.compressionType, - url, - localSource.sourceId, - ) { - this.url = url - this.exactSize = exactSize - } - - override fun toString(): String { - // Assuming FormatWithInvariantCulture is replaced by Kotlin string interpolation - return "$url SourceId: $sourceId" - } - - // No resources to close; method intentionally left empty. - override fun close() {} -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt deleted file mode 100644 index 2ed9702b6..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/DataSourceFormat.kt +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -enum class DataFormat( - val kustoValue: String, - private val ingestionMappingKind: IngestionMappingKind, - compressible: Boolean, -) { - CSV("csv", IngestionMappingKind.CSV, true), - TSV("tsv", IngestionMappingKind.CSV, true), - SCSV("scsv", IngestionMappingKind.CSV, true), - SOHSV("sohsv", IngestionMappingKind.CSV, true), - PSV("psv", IngestionMappingKind.CSV, true), - TXT("txt", IngestionMappingKind.CSV, true), - TSVE("tsve", IngestionMappingKind.CSV, true), - JSON("json", IngestionMappingKind.JSON, true), - SINGLEJSON("singlejson", IngestionMappingKind.JSON, true), - MULTIJSON("multijson", IngestionMappingKind.JSON, true), - AVRO("avro", IngestionMappingKind.AVRO, false), - APACHEAVRO("apacheavro", IngestionMappingKind.APACHEAVRO, false), - PARQUET("parquet", IngestionMappingKind.PARQUET, false), - SSTREAM("sstream", IngestionMappingKind.SSTREAM, false), - ORC("orc", IngestionMappingKind.ORC, false), - RAW("raw", IngestionMappingKind.CSV, true), - W3CLOGFILE("w3clogfile", IngestionMappingKind.W3CLOGFILE, true), - ; - - val isCompressible: Boolean = compressible - - fun getIngestionMappingKind(): IngestionMappingKind { - return ingestionMappingKind - } - - fun isBinaryFormat(): Boolean { - return this == AVRO || - this == APACHEAVRO || - this == PARQUET || - this == SSTREAM || - this == ORC - } - - fun isJsonFormat(): Boolean { - return this == JSON || this == MULTIJSON || this == SINGLEJSON - } - - fun toKustoValue(): String { - return kustoValue - } -} - -enum class IngestionMappingKind(val kustoValue: String) { - CSV("Csv"), - JSON("Json"), - AVRO("Avro"), - PARQUET("Parquet"), - SSTREAM("SStream"), - ORC("Orc"), - APACHEAVRO("ApacheAvro"), - W3CLOGFILE("W3CLogFile"), -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt index 17d160675..049329421 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt @@ -3,13 +3,15 @@ package com.microsoft.azure.kusto.ingest.v2.source import com.microsoft.azure.kusto.ingest.v2.common.utils.PathUtils +import com.microsoft.azure.kusto.ingest.v2.models.Format import java.lang.AutoCloseable +import java.util.UUID abstract class IngestionSource( - open val format: DataFormat, + open val format: Format, open val compressionType: CompressionType?, open val url: String?, - open val sourceId: String?, + open val sourceId: UUID = UUID.randomUUID(), ) : AutoCloseable { var name: String? = null private set @@ -19,7 +21,7 @@ abstract class IngestionSource( this::class.simpleName?.lowercase()?.removeSuffix("source") ?: "unknown" name = - "${type}_${PathUtils.sanitizeFileName(baseName, sourceId)}${format.toKustoValue()}$compressionType" + "${type}_${PathUtils.sanitizeFileName(baseName, sourceId.toString())}${format.value}$compressionType" } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index 1ebb34009..c19e978c8 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -2,14 +2,16 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source +import com.microsoft.azure.kusto.ingest.v2.models.Format import java.io.InputStream +import java.util.UUID abstract class LocalSource( - override val format: DataFormat, + override val format: Format, val leaveOpen: Boolean, override val compressionType: CompressionType = CompressionType.NONE, val baseName: String? = null, - override val sourceId: String? = null, + override val sourceId: UUID = UUID.randomUUID(), ) : IngestionSource(format, compressionType, baseName, sourceId) { // Lazily initialized input stream for ingestion source @@ -21,7 +23,7 @@ abstract class LocalSource( internal val shouldCompress: Boolean get() = (compressionType == CompressionType.NONE) && - !format.isBinaryFormat() + !isBinaryFormat(format) abstract fun data(): InputStream @@ -36,13 +38,23 @@ abstract class LocalSource( } } } + + fun isBinaryFormat(format: Format): Boolean { + return when (format) { + Format.avro, + Format.parquet, + Format.orc, + Format.apacheavro -> true + else -> false + } + } } class StreamSource( stream: InputStream, - format: DataFormat, + format: Format, sourceCompression: CompressionType, - sourceId: String? = null, + sourceId: UUID = UUID.randomUUID(), name: String? = null, leaveOpen: Boolean = false, ) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { @@ -56,4 +68,6 @@ class StreamSource( return mStream ?: throw IllegalStateException("Stream is not initialized") } + + } From 3eab813c2fd22fea92cc7e874780ed45fb133a3e Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Thu, 30 Oct 2025 16:35:49 +0530 Subject: [PATCH 24/50] * Remove unused classes --- .../v2/container/BlobUploadContainer.kt | 47 ------------------- .../v2/container/UploadContainerBase.kt | 9 ---- 2 files changed, 56 deletions(-) delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt deleted file mode 100644 index 649c9fd9a..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse -import org.jetbrains.annotations.NotNull - -class BlobUploadContainer(val configResponse: @NotNull ConfigurationResponse) : - UploadContainerBase { - - // choose a random container from the configResponse.containerSettings.containers - - override suspend fun uploadAsync( - name: String, - stream: java.io.InputStream, - ): String { - // Placeholder for actual upload logic - // In a real implementation, this would upload the stream to the blob storage - // and return the URI of the uploaded blob. - // check if the configResponse has containerSettings - val noUploadLocation = - configResponse.containerSettings == null || - ( - configResponse.containerSettings.containers - ?.isEmpty() == true && - configResponse.containerSettings.lakeFolders - ?.isEmpty() == true - ) - if (noUploadLocation) { - throw IngestException( - "No container settings available in the configuration response", - ) - } - // check if containers is null or empty , if so use lakeFolders and choose one randomly - val targetPath = - if ( - configResponse.containerSettings.containers - .isNullOrEmpty() - ) { - configResponse.containerSettings.lakeFolders!!.random() - } else { - configResponse.containerSettings.containers.random() - } - return "${targetPath.path}/$name" - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt deleted file mode 100644 index 34c08d076..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -import java.io.InputStream - -interface UploadContainerBase { - suspend fun uploadAsync(name: String, stream: InputStream): String -} From 86d3f79956b5f241e66829c1c1cd7c9ecf17733b Mon Sep 17 00:00:00 2001 From: Ramachandran A G Date: Fri, 31 Oct 2025 10:39:10 +0530 Subject: [PATCH 25/50] * Fix comment on substring chaining --- .../azure/kusto/ingest/v2/common/utils/PathUtils.kt | 4 ++-- .../microsoft/azure/kusto/ingest/v2/source/LocalSource.kt | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index c093f5574..a645dbf2a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -73,8 +73,8 @@ object PathUtils { } if (uriObj == null || !uriObj.isAbsolute) { // Not a valid absolute URI, treat as path - return uri.substringAfterLast('/') - .substringAfterLast("\\") + // Chain substringAfterLast for both '/' and '\' correctly + return uri.substringAfterLast('/').substringAfterLast('\\') } // For web URIs, extract last segment of the path, remove query/fragment val path = uriObj.path ?: "" diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index c19e978c8..563eb6db2 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -44,7 +44,8 @@ abstract class LocalSource( Format.avro, Format.parquet, Format.orc, - Format.apacheavro -> true + Format.apacheavro, + -> true else -> false } } @@ -68,6 +69,4 @@ class StreamSource( return mStream ?: throw IllegalStateException("Stream is not initialized") } - - } From 00225778c0fe932a5dcfcb9f3fd672184bfe6246 Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:08:44 +0530 Subject: [PATCH 26/50] Feature/add ingestion source blob (#440) * Add IngestionSource Blob type and QueuedIngest * Merge code with IngestV2 branch * Reorganize code and remove DTO's * * Fix some review comments --------- Co-authored-by: ag-ramachandran --- ingest-v2/pom.xml | 68 +++- .../ingest/v2/ConfigurationApiWrapper.kt | 46 --- .../kusto/ingest/v2/ConfigurationClient.kt | 92 +++++ .../azure/kusto/ingest/v2/IngestClient.kt | 76 ++++ .../kusto/ingest/v2/KustoBaseApiClient.kt | 32 +- .../kusto/ingest/v2/QueuedIngestionClient.kt | 332 ++++++++++++++++++ .../kusto/ingest/v2/StreamingIngestClient.kt | 130 +++++++ .../v2/common/models/mapping/ColumnMapping.kt | 96 +++++ .../models/mapping/InlineIngestionMapping.kt | 35 ++ .../common/models/mapping/MappingConstants.kt | 23 ++ .../models/mapping/TransformationMethod.kt | 17 + .../serialization/OffsetDateTimeSerializer.kt | 34 ++ .../v2/common/utils/IngestionResultUtils.kt | 45 +++ .../ingest/v2/container/ContainerBase.kt | 8 + .../ingest/v2/source/AbstractSourceInfo.kt | 9 + .../kusto/ingest/v2/source/BlobSourceInfo.kt | 79 +++++ .../kusto/ingest/v2/source/FormatUtil.kt | 18 + .../kusto/ingest/v2/source/IngestionSource.kt | 31 -- .../kusto/ingest/v2/source/LocalSource.kt | 72 ---- .../kusto/ingest/v2/source/SourceInfo.kt | 12 + ingest-v2/src/main/resources/application.yaml | 6 + ingest-v2/src/main/resources/logback.xml | 12 + ingest-v2/src/main/resources/openapi.yaml | 34 +- ...pperTest.kt => ConfigurationClientTest.kt} | 64 ++-- .../azure/kusto/ingest/v2/IngestV2TestBase.kt | 90 +++++ .../ingest/v2/QueuedIngestionClientTest.kt | 260 ++++++++++++++ .../ingest/v2/StreamingIngestClientTest.kt | 207 +++++++++++ 27 files changed, 1746 insertions(+), 182 deletions(-) delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/ColumnMapping.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingConstants.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/TransformationMethod.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/serialization/OffsetDateTimeSerializer.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtils.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FormatUtil.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt create mode 100644 ingest-v2/src/main/resources/application.yaml create mode 100644 ingest-v2/src/main/resources/logback.xml rename ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/{ConfigurationApiWrapperTest.kt => ConfigurationClientTest.kt} (57%) create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 7d09c0f8b..1d6d3be73 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -8,7 +8,7 @@ ingest-v2 official - 2.2.20 + 2.2.10 3.3.0 3.1.1 1.10.2 @@ -47,6 +47,11 @@ ktor-client-java-jvm ${ktor.version} + + io.ktor + ktor-serialization-jackson + ${ktor.version} + org.slf4j slf4j-simple @@ -68,6 +73,12 @@ ${kotlinx.coroutines.debug.version} test + + org.junit.jupiter + junit-jupiter-params + ${junit.version} + test + io.mockk mockk-jvm @@ -75,9 +86,9 @@ test - org.junit.jupiter - junit-jupiter-params - ${junit.version} + ${project.groupId} + kusto-data + ${project.parent.version} test @@ -89,10 +100,11 @@ ${project.basedir}/src/main/resources + - org.jetbrains.kotlin kotlin-maven-plugin + org.jetbrains.kotlin ${kotlin.version} @@ -120,7 +132,6 @@ kotlinx-serialization - 1.8 @@ -130,6 +141,25 @@ + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + + + + java + + + + + + org.jetbrains.kotlin + kotlin-maven-serialization + ${kotlin.version} + + + org.openapitools openapi-generator-maven-plugin @@ -168,6 +198,25 @@ + + org.codehaus.mojo + build-helper-maven-plugin + 3.6.1 + + + add-openapi-generated-sources + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/openapi/src/main/kotlin + + + + + com.diffplug.spotless spotless-maven-plugin @@ -209,6 +258,13 @@ + + + org.jetbrains.kotlin + kotlin-maven-serialization + ${kotlin.version} + + diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt deleted file mode 100644 index edd0b1b3a..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapper.kt +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.azure.core.credential.TokenCredential -import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse -import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse -import org.slf4j.LoggerFactory - -class ConfigurationApiWrapper( - override val dmUrl: String, - override val tokenCredential: TokenCredential, - override val skipSecurityChecks: Boolean = false, -) : KustoBaseApiClient(dmUrl, tokenCredential, skipSecurityChecks) { - private val logger = - LoggerFactory.getLogger(ConfigurationApiWrapper::class.java) - private val baseUrl = "$dmUrl/v1/rest/ingestion/configuration" - private val api: DefaultApi = - DefaultApi(baseUrl = dmUrl, httpClientConfig = setupConfig) - - suspend fun getConfigurationDetails(): ConfigurationResponse { - val configurationHttpResponse: HttpResponse = - api.getIngestConfiguration() - if (configurationHttpResponse.success) { - logger.info( - "Successfully retrieved configuration details from $dmUrl with status: ${configurationHttpResponse.status}", - ) - logger.debug( - "Configuration details: {}", - configurationHttpResponse.body(), - ) - return configurationHttpResponse.body() - } else { - logger.error( - "Failed to retrieve configuration details from $baseUrl. Status: ${configurationHttpResponse.status}, " + - "Body: ${configurationHttpResponse.body()}", - ) - throw IngestException( - "Failed to retrieve configuration details from $baseUrl. Status: ${configurationHttpResponse.status}, " + - "Body: ${configurationHttpResponse.body()}", - ) - } - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt new file mode 100644 index 000000000..3d30e593e --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import io.ktor.http.HttpStatusCode +import org.slf4j.LoggerFactory +import java.net.ConnectException + +class ConfigurationClient( + override val dmUrl: String, + override val tokenCredential: TokenCredential, + override val skipSecurityChecks: Boolean = false, +) : KustoBaseApiClient(dmUrl, tokenCredential, skipSecurityChecks) { + private val logger = + LoggerFactory.getLogger(ConfigurationClient::class.java) + private val baseUrl = "$dmUrl/v1/rest/ingestion/configuration" + + suspend fun getConfigurationDetails(): ConfigurationResponse { + try { + val configurationHttpResponse: HttpResponse = + api.getIngestConfiguration() + if (configurationHttpResponse.success) { + logger.info( + "Successfully retrieved configuration details from $dmUrl with status: ${configurationHttpResponse.status}", + ) + logger.debug( + "Configuration details: {}", + configurationHttpResponse.body(), + ) + return configurationHttpResponse.body() + } else if ( + configurationHttpResponse.status == + HttpStatusCode.NotFound.value + ) { + /* + 404 is a special case - it indicates that the endpoint is not found. This may be a transient + network issue + */ + val message = + "Endpoint $dmUrl not found. Please ensure the cluster supports queued ingestion." + logger.error( + "{}. Status: {}", + message, + configurationHttpResponse.status, + ) + throw IngestException( + message = message, + cause = ConnectException(message), + failureCode = configurationHttpResponse.status, + failureSubCode = "", + isPermanent = false, + ) + } else { + val configurationResponseBody = configurationHttpResponse.body() + val message = + "Failed to retrieve configuration details from $baseUrl.Status: ${configurationHttpResponse.status}, " + + "Body: $configurationResponseBody" + logger.error("{}", message) + throw IngestException( + message = message, + failureCode = configurationHttpResponse.status, + ) + } + } catch (notAbleToReachHost: ConnectException) { + val message = + "Failed to reach $baseUrl. Please ensure the cluster address is correct and the cluster is reachable." + throw IngestException( + message = message, + cause = notAbleToReachHost, + failureCode = HttpStatusCode.NotFound.value, + failureSubCode = "", + isPermanent = false, + ) + } catch (ex: Exception) { + if (ex is IngestException) throw ex + val message = + "An unexpected error occurred while trying to reach $baseUrl" + throw IngestException( + message = message, + cause = ex, + // Mark this as a 5xx series error + failureCode = HttpStatusCode.InternalServerError.value, + failureSubCode = "", + isPermanent = true, + ) + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt new file mode 100644 index 000000000..a29762576 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt @@ -0,0 +1,76 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import io.ktor.http.HttpStatusCode +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import java.net.ConnectException + +interface IngestClient { + val logger: Logger + get() = LoggerFactory.getLogger(IngestClient::class.java) + + // Common way to parse ingestion response for both Streaming and Queued ingestion + + suspend fun handleIngestResponse( + response: HttpResponse, + database: String, + table: String, + dmUrl: String, + endpointType: String, + ): T { + if (response.success) { + val ingestResponseBody = response.body() + return ingestResponseBody + } else { + if (response.status == HttpStatusCode.NotFound.value) { + val message = + "Endpoint $dmUrl not found. Please ensure the cluster supports $endpointType ingestion." + logger.error( + "$endpointType ingestion endpoint not found. Please ensure that the target cluster supports $endpointType ingestion and that the endpoint URL is correct.", + ) + throw IngestException( + message = message, + cause = ConnectException(message), + failureCode = response.status, + failureSubCode = "", + isPermanent = false, + ) + } + val nonSuccessResponseBody: T = response.body() + val ingestResponseOperationId = + if (nonSuccessResponseBody is IngestResponse) { + if ( + (nonSuccessResponseBody as IngestResponse) + .ingestionOperationId != null + ) { + logger.info( + "Ingestion Operation ID: ${(nonSuccessResponseBody as IngestResponse).ingestionOperationId}", + ) + nonSuccessResponseBody.ingestionOperationId + } else { + "N/A" + } + } else { + "N/A" + } + val errorMessage = + "Failed to submit $endpointType ingestion to $database.$table. " + + "Status: ${response.status}, Body: $nonSuccessResponseBody. " + + "OperationId $ingestResponseOperationId" + logger.error( + "$endpointType ingestion failed with response: {}", + errorMessage, + ) + throw IngestException( + message = errorMessage, + cause = RuntimeException(errorMessage), + isPermanent = true, + ) + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 4e1e5c723..8457a0aaf 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -4,8 +4,11 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential import com.azure.core.credential.TokenRequestContext +import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi +import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer import io.ktor.client.HttpClientConfig import io.ktor.client.plugins.DefaultRequest +import io.ktor.client.plugins.HttpTimeout import io.ktor.client.plugins.auth.Auth import io.ktor.client.plugins.auth.providers.BearerTokens import io.ktor.client.plugins.auth.providers.bearer @@ -14,6 +17,9 @@ import io.ktor.client.request.header import io.ktor.serialization.kotlinx.json.json import kotlinx.coroutines.suspendCancellableCoroutine import kotlinx.serialization.json.Json +import kotlinx.serialization.modules.SerializersModule +import org.slf4j.LoggerFactory +import java.time.OffsetDateTime import kotlin.coroutines.resume import kotlin.coroutines.resumeWithException @@ -22,11 +28,15 @@ open class KustoBaseApiClient( open val tokenCredential: TokenCredential, open val skipSecurityChecks: Boolean = false, ) { - + private val logger = LoggerFactory.getLogger(KustoBaseApiClient::class.java) protected val setupConfig: (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } + protected val api: DefaultApi by lazy { + DefaultApi(baseUrl = dmUrl, httpClientConfig = setupConfig) + } + private fun getClientConfig(config: HttpClientConfig<*>) { config.install(DefaultRequest) { header("Content-Type", "application/json") @@ -68,7 +78,11 @@ open class KustoBaseApiClient( } } catch (e: Exception) { // Handle token retrieval errors - null + logger.error( + "Error retrieving access token: ${e.message}", + e, + ) + throw e } } } @@ -77,12 +91,24 @@ open class KustoBaseApiClient( json( Json { ignoreUnknownKeys = true + serializersModule = SerializersModule { + contextual( + OffsetDateTime::class, + OffsetDateTimeSerializer, + ) + } // Optionally add other settings if needed: - // isLenient = true + isLenient = true // allowSpecialFloatingPointValues = true // useArrayPolymorphism = true }, ) } + /* TODO Check what these settings should be */ + config.install(HttpTimeout) { + requestTimeoutMillis = 60_000 + connectTimeoutMillis = 60_000 + socketTimeoutMillis = 60_000 + } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt new file mode 100644 index 000000000..4d94d6045 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt @@ -0,0 +1,332 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionResultUtils +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.Blob +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequest +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import io.ktor.http.HttpStatusCode +import kotlinx.coroutines.delay +import kotlinx.coroutines.withTimeoutOrNull +import java.util.UUID +import kotlin.time.Duration + +class QueuedIngestionClient( + override val dmUrl: String, + override val tokenCredential: TokenCredential, + override val skipSecurityChecks: Boolean = false, +) : + KustoBaseApiClient(dmUrl, tokenCredential, skipSecurityChecks), + IngestClient { + + /** + * Submits a queued ingestion request. + * + * @param database The target database name + * @param table The target table name + * @param blobSources List of BlobSourceInfo objects to ingest + * @param format The data format + * @param ingestProperties Optional ingestion properties + * @return IngestionOperation for tracking the request + */ + suspend fun submitQueuedIngestion( + database: String, + table: String, + blobSources: List, + format: Format = Format.csv, + ingestProperties: IngestRequestProperties? = null, + ): IngestResponse { + logger.info( + "Submitting queued ingestion request for database: $database, table: $table, blobs: ${blobSources.size}", + ) + // Convert BlobSourceInfo objects to Blob objects + val blobs = + blobSources.mapIndexed { index, blobSource -> + val sourceId = + blobSource.sourceId?.toString() + ?: UUID.randomUUID().toString() + logger.debug( + "Preparing blob {} with sourceId {} for ingestion.", + index, + sourceId, + ) + Blob(url = blobSource.blobPath, sourceId = sourceId) + } + + val requestProperties = + ingestProperties ?: IngestRequestProperties(format = format) + + logger.debug( + "** Ingesting to {}.{} with the following properties with properties {}", + database, + table, + requestProperties, + ) + + val ingestRequest = + IngestRequest( + timestamp = java.time.OffsetDateTime.now(), + blobs = blobs, + properties = requestProperties, + ) + + try { + val response: HttpResponse = + api.postQueuedIngest( + database = database, + table = table, + ingestRequest = ingestRequest, + ) + + return handleIngestResponse( + response = response, + database = database, + table = table, + dmUrl = dmUrl, + endpointType = "queued", + ) + } catch (e: Exception) { + logger.error( + "Exception occurred during queued ingestion submission", + e, + ) + if (e is IngestException) throw e + throw IngestException( + message = + "Error submitting queued ingest request to $dmUrl", + cause = e, + isPermanent = true, + ) + } + } + + /** + * Gets a summary of the ingestion operation status (lightweight, fast). + * This method provides overall status counters without detailed blob + * information. Use this for quick status checks and polling scenarios. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID returned from the ingestion request + * @return Updated IngestionOperation with status summary + */ + private suspend fun getIngestionDetails( + database: String, + table: String, + operationId: String, + details: Boolean, + ): StatusResponse { + logger.debug("Checking ingestion summary for operation: $operationId") + try { + val response: HttpResponse = + api.getIngestStatus( + database = database, + table = table, + operationId = operationId, + details = details, + ) + + if ( + response.success && + response.status == HttpStatusCode.OK.value + ) { + val ingestStatusResponse = response.body() + logger.debug( + "Successfully retrieved summary for operation: {} and details: {}", + operationId, + ingestStatusResponse, + ) + return ingestStatusResponse + } else { + logger.error(response.toString()) + val ingestStatusFailure: StatusResponse = response.body() + // check if it is a permanent failure from status + val transientFailures = + ingestStatusFailure.details?.filter { + it.failureStatus == + BlobStatus.FailureStatus.Transient + } + val hasTransientErrors = transientFailures.isNullOrEmpty() + + if ( + response.status == HttpStatusCode.NotFound.value || + hasTransientErrors + ) { + val message = + if (hasTransientErrors) { + printMessagesFromFailures(transientFailures) + } else { + "Error polling $dmUrl for operation $operationId." + } + logger.error(message) + throw IngestException( + message = message, + cause = RuntimeException(message), + failureCode = response.status, + failureSubCode = "", + isPermanent = false, + ) + } + // TODO: We need to eventually look at OneApiExceptions + val errorMessage = + printMessagesFromFailures(ingestStatusFailure.details) + ?: "Failed to get ingestion summary for operation $operationId. Status: ${response.status}, Body: $ingestStatusFailure" + logger.error(errorMessage) + throw IngestException(errorMessage, isPermanent = true) + } + } catch (e: Exception) { + logger.error( + "Exception occurred while getting ingestion summary for operation: $operationId", + e, + ) + if (e is IngestException) throw e + throw IngestException( + "Failed to get ingestion summary: ${e.message}", + e, + ) + } + } + + private fun printMessagesFromFailures( + failures: List?, + ): String? { + return failures?.joinToString { + ( + sourceId, + status, + startedAt, + lastUpdateTime, + errorCode, + failureStatus, + details, + ), + -> + "Error ingesting blob with $sourceId. ErrorDetails $details, ErrorCode $errorCode " + + ", Status ${status?.value}. Ingestion lastUpdated at $lastUpdateTime & started at $startedAt. " + + "FailureStatus ${failureStatus?.value}" + } + } + + /** + * Gets the status of a queued ingestion operation with intelligent API + * selection. For completed operations or when details are explicitly + * requested, uses the details API. For in-progress operations, uses the + * summary API for efficiency. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID returned from the ingestion request + * @param forceDetails Force retrieval of detailed information regardless of + * operation status + * @return Updated IngestionOperation with current status + */ + suspend fun getIngestionStatus( + database: String, + table: String, + operationId: String, + forceDetails: Boolean = false, + ): StatusResponse { + // If details are explicitly requested, use the details API + if (forceDetails) { + val statusResponse = + getIngestionDetails(database, table, operationId, true) + logger.debug( + "Forcing detailed status retrieval for operation: {} returning {}", + operationId, + statusResponse, + ) + return statusResponse + } + // Start with summary for efficiency + val statusResponse = + getIngestionDetails(database, table, operationId, false) + // If operation has failures or is completed, get detailed information + return if ( + statusResponse.status?.failed?.let { it > 0 } == true || + IngestionResultUtils.isCompleted(statusResponse.details) + ) { + logger.debug( + "Operation $operationId has failures or is completed, retrieving details", + ) + getIngestionDetails(database, table, operationId, true) + } else { + statusResponse + } + } + + /** + * Polls the ingestion status until completion or timeout. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID to poll + * @param pollingInterval How often to check the status + * @param timeout Maximum time to wait before throwing timeout exception + * @return The final StatusResponse when ingestion is completed + * @throws IngestException if the operation times out or fails + */ + suspend fun pollUntilCompletion( + database: String, + table: String, + operationId: String, + pollingInterval: Duration = Duration.parse("PT30S"), + timeout: Duration = Duration.parse("PT5M"), + ): StatusResponse { + val result = + withTimeoutOrNull(timeout.inWholeMilliseconds) { + var currentStatus: StatusResponse + do { + currentStatus = + getIngestionStatus( + database, + table, + operationId, + forceDetails = true, + ) + logger.debug( + "Starting to poll ingestion status for operation: $operationId, timeout: $timeout", + ) + logger.debug( + "IngestionStatus: {}", + currentStatus.details, + ) + if ( + IngestionResultUtils.isCompleted( + currentStatus.details, + ) + ) { + logger.info( + "Ingestion operation $operationId completed", + ) + return@withTimeoutOrNull currentStatus + } + + logger.debug( + "Ingestion operation $operationId still in progress, waiting ${pollingInterval.inWholeSeconds}s before next check", + ) + delay(pollingInterval.inWholeMilliseconds) + } while ( + !IngestionResultUtils.isCompleted( + currentStatus.details, + ) + ) + + currentStatus + } + + return result + ?: throw IngestException( + "Ingestion operation $operationId timed out after $timeout. " + + "Consider increasing the timeout duration or check the operation status manually.", + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt new file mode 100644 index 000000000..e7b0745f9 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt @@ -0,0 +1,130 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import io.ktor.http.HttpStatusCode +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.Json +import java.net.ConnectException +import java.net.URI + +@Serializable +private data class StreamFromBlobRequestBody( + @SerialName("SourceUri") val sourceUri: String, +) + +class StreamingIngestClient( + val engineUrl: String, + override val tokenCredential: TokenCredential, + override val skipSecurityChecks: Boolean = false, +) : + KustoBaseApiClient(engineUrl, tokenCredential, skipSecurityChecks), + IngestClient { + + /** + * Submits a streaming ingestion request. + * + * @param database The target database name + * @param table The target table name + * @param data The data to ingest (as ByteArray) + * @param format The data format + * @param ingestProperties Optional ingestion properties + * @param blobUrl Optional blob URL for blob-based streaming ingestion (if + * provided, data is ignored) + * @return IngestResponse for tracking the request + */ + suspend fun submitStreamingIngestion( + database: String, + table: String, + data: ByteArray, + format: Format = Format.csv, + ingestProperties: IngestRequestProperties? = null, + blobUrl: String? = null, + ) { + val host = URI(engineUrl).host + + val bodyContent: Any + val sourceKind: String? + val contentType: String + + if (blobUrl != null) { + // Blob-based streaming + val requestBody = StreamFromBlobRequestBody(sourceUri = blobUrl) + bodyContent = Json.encodeToString(requestBody).toByteArray() + sourceKind = "uri" + contentType = "application/json" + logger.info( + "Submitting streaming ingestion from blob for database: {}, table: {}, blob: {}. Host {}", + database, + table, + blobUrl, + host, + ) + } else { + // Direct streaming using raw data + bodyContent = data + sourceKind = null + contentType = "application/octet-stream" + logger.info( + "Submitting streaming ingestion request for database: {}, table: {}, data size: {}. Host {}", + database, + table, + data.size, + host, + ) + } + + try { + val response: HttpResponse = + api.postStreamingIngest( + database = database, + table = table, + streamFormat = format, + body = bodyContent, + mappingName = + ingestProperties?.ingestionMappingReference, + sourceKind = sourceKind, + host = host, + acceptEncoding = "gzip", + connection = "Keep-Alive", + contentEncoding = null, + contentType = contentType, + ) + return handleIngestResponse( + response = response, + database = database, + table = table, + dmUrl = engineUrl, + endpointType = "streaming", + ) + } catch (notAbleToReachHost: ConnectException) { + val message = + "Failed to reach $engineUrl for streaming ingestion. Please ensure the cluster address is correct and the cluster is reachable." + throw IngestException( + message = message, + cause = notAbleToReachHost, + failureCode = HttpStatusCode.NotFound.value, + failureSubCode = "", + isPermanent = false, + ) + } catch (e: Exception) { + logger.error( + "Exception occurred during streaming ingestion submission", + e, + ) + if (e is IngestException) throw e + throw IngestException( + message = + "Error submitting streaming ingest request to $engineUrl", + cause = e, + isPermanent = true, + ) + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/ColumnMapping.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/ColumnMapping.kt new file mode 100644 index 000000000..d0c7020b2 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/ColumnMapping.kt @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models.mapping + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import kotlinx.serialization.Serializable as KSerializable + +@KSerializable +data class ColumnMapping( + val columnName: String, + val columnType: String, + val properties: MutableMap = mutableMapOf(), +) { + + fun setPath(path: String) { + properties[MappingConstants.Path.name] = path + } + + fun getPath(): String? = properties[MappingConstants.Path.name] + + fun setTransform(transform: TransformationMethod) { + properties[MappingConstants.Transform.name] = transform.name + } + + fun getTransform(): TransformationMethod? { + val transform = properties[MappingConstants.Transform.name] + return if (transform.isNullOrBlank()) { + null + } else { + TransformationMethod.valueOf(transform) + } + } + + fun setOrdinal(ordinal: Int) { + properties[MappingConstants.Ordinal.name] = ordinal.toString() + } + + fun getOrdinal(): Int? { + val ordinal = properties[MappingConstants.Ordinal.name] + return if (ordinal.isNullOrBlank()) null else ordinal.toInt() + } + + fun setConstantValue(constValue: String) { + properties[MappingConstants.ConstValue.name] = constValue + } + + fun getConstantValue(): String? = + properties[MappingConstants.ConstValue.name] + + fun setField(field: String) { + properties[MappingConstants.Field.name] = field + } + + fun getField(): String? = properties[MappingConstants.Field.name] + + fun setColumns(columns: String) { + properties[MappingConstants.Columns.name] = columns + } + + fun getColumns(): String? = properties[MappingConstants.Columns.name] + + fun setStorageDataType(dataType: String) { + properties[MappingConstants.StorageDataType.name] = dataType + } + + fun getStorageDataType(): String? = + properties[MappingConstants.StorageDataType.name] + + fun isValid(mappingKind: Format): Boolean { + return when (mappingKind) { + Format.csv, + Format.sstream, + -> columnName.isNotBlank() + Format.json, + Format.parquet, + Format.orc, + Format.w3clogfile, + -> { + val transformationMethod = getTransform() + columnName.isNotBlank() && + ( + !getPath().isNullOrBlank() || + transformationMethod == + TransformationMethod.SourceLineNumber || + transformationMethod == + TransformationMethod.SourceLocation + ) + } + Format.avro, + Format.apacheavro, + -> + columnName.isNotBlank() && !getColumns().isNullOrBlank() + else -> false + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt new file mode 100644 index 000000000..d645b5566 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models.mapping + +import kotlinx.serialization.Serializable as KSerializable + +@KSerializable +data class InlineIngestionMapping( + var columnMappings: List? = null, + var ingestionMappingType: IngestionMappingType? = null, +) { + constructor( + other: InlineIngestionMapping, + ) : this( + other.columnMappings?.map { + ColumnMapping( + it.columnName, + columnType = it.columnType, + properties = it.properties, + ) + }, + other.ingestionMappingType, + ) + + enum class IngestionMappingType(val kustoValue: String) { + CSV("Csv"), + JSON("Json"), + AVRO("Avro"), + PARQUET("Parquet"), + SSTREAM("SStream"), + ORC("Orc"), + APACHEAVRO("ApacheAvro"), + W3CLOGFILE("W3CLogFile"), + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingConstants.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingConstants.kt new file mode 100644 index 000000000..90f0c45ec --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingConstants.kt @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models.mapping + +import kotlinx.serialization.Serializable as KSerializable + +@KSerializable +enum class MappingConstants(val value: String) { + // Json Mapping constants + Path("Path"), + Transform("Transform"), + + // csv Mapping constants + Ordinal("Ordinal"), + ConstValue("ConstValue"), + + // Avro Mapping constants + Field("Field"), + Columns("Columns"), + + // General Mapping constants + StorageDataType("StorageDataType"), +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/TransformationMethod.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/TransformationMethod.kt new file mode 100644 index 000000000..63f9d4d2f --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/TransformationMethod.kt @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models.mapping + +import kotlinx.serialization.Serializable as KSerializable + +@KSerializable +enum class TransformationMethod { + None, + PropertyBagArrayToDictionary, + SourceLocation, + SourceLineNumber, + GetPathElement, + UnknownMethod, + DateTimeFromUnixSeconds, + DateTimeFromUnixMilliseconds, +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/serialization/OffsetDateTimeSerializer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/serialization/OffsetDateTimeSerializer.kt new file mode 100644 index 000000000..d09a82764 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/serialization/OffsetDateTimeSerializer.kt @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.serialization + +import kotlinx.serialization.KSerializer +import kotlinx.serialization.descriptors.PrimitiveKind +import kotlinx.serialization.descriptors.PrimitiveSerialDescriptor +import kotlinx.serialization.descriptors.SerialDescriptor +import kotlinx.serialization.encoding.Decoder +import kotlinx.serialization.encoding.Encoder +import java.time.OffsetDateTime +import java.time.format.DateTimeFormatter + +/** + * Custom serializer for OffsetDateTime to handle JSON + * serialization/deserialization. + */ +object OffsetDateTimeSerializer : KSerializer { + override val descriptor: SerialDescriptor = + PrimitiveSerialDescriptor("OffsetDateTime", PrimitiveKind.STRING) + + override fun serialize(encoder: Encoder, value: OffsetDateTime) { + encoder.encodeString( + value.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME), + ) + } + + override fun deserialize(decoder: Decoder): OffsetDateTime { + return OffsetDateTime.parse( + decoder.decodeString(), + DateTimeFormatter.ISO_OFFSET_DATE_TIME, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtils.kt new file mode 100644 index 000000000..ad9d13fed --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtils.kt @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.utils + +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import kotlin.collections.contains + +object IngestionResultUtils { + + fun hasFailedResults(results: List): Boolean { + return results.any { it.status == BlobStatus.Status.Failed } + } + + fun isCompleted(results: List?): Boolean { + return results?.isNotEmpty() == true && + results.all { result -> + result.status in + listOf( + BlobStatus.Status.Succeeded, + BlobStatus.Status.Failed, + BlobStatus.Status.Canceled, + ) + } + } + + fun isInProgress(results: List?): Boolean { + return results?.any { result -> + result.status in + listOf( + BlobStatus.Status.Queued, + BlobStatus.Status.InProgress, + ) + } == true + } + + fun getFailedResults(results: List?): List { + return results?.filter { it.status == BlobStatus.Status.Failed } + ?: emptyList() + } + + fun getSucceededResults(results: List?): List { + return results?.filter { it.status == BlobStatus.Status.Succeeded } + ?: emptyList() + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt new file mode 100644 index 000000000..ee7997003 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt @@ -0,0 +1,8 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.container + +interface ContainerBase { + val uri: String + val name: String +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt new file mode 100644 index 000000000..aa5eb1974 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import java.util.UUID + +abstract class AbstractSourceInfo : SourceInfo { + override var sourceId: UUID? = null +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt new file mode 100644 index 000000000..59d35003a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import java.io.File +import java.util.UUID + +class BlobSourceInfo : AbstractSourceInfo { + var blobPath: String + private set + + // For internal usage - only when we create the blob + var blobExactSize: Long? = null + private set + + var compressionType: CompressionType? = null + + constructor(blobPath: String) { + this.blobPath = blobPath + } + + constructor(blobPath: String, compressionType: CompressionType?) { + this.blobPath = blobPath + this.compressionType = compressionType + } + + constructor( + blobPath: String, + compressionType: CompressionType?, + sourceId: UUID?, + ) { + this.blobPath = blobPath + this.compressionType = compressionType + this.sourceId = sourceId + } + + override fun validate() { + require(blobPath.isNotBlank()) { "blobPath cannot be blank" } + } + + companion object { + /** For internal usage, adding blobExactSize */ + fun fromFile( + blobPath: String, + filePath: String, + sourceId: UUID?, + sourceCompressionType: CompressionType?, + gotCompressed: Boolean, + ): BlobSourceInfo { + val blobSourceInfo = + BlobSourceInfo( + blobPath, + if (gotCompressed) { + CompressionType.GZIP + } else { + sourceCompressionType + }, + sourceId, + ) + if (sourceCompressionType == null) { + blobSourceInfo.blobExactSize = File(filePath).length() + } + return blobSourceInfo + } + + /** For internal usage, adding blobExactSize */ + fun fromStream( + blobPath: String, + size: Int, + sourceId: UUID?, + compressionType: CompressionType?, + ): BlobSourceInfo { + val blobSourceInfo = + BlobSourceInfo(blobPath, compressionType, sourceId) + blobSourceInfo.blobExactSize = size.toLong() + return blobSourceInfo + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FormatUtil.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FormatUtil.kt new file mode 100644 index 000000000..898e17c9e --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FormatUtil.kt @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.models.Format + +object FormatUtil { + fun isBinaryFormat(format: Format): Boolean { + return when (format) { + Format.avro, + Format.apacheavro, + Format.parquet, + Format.orc, + -> true + else -> false + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt deleted file mode 100644 index 049329421..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -import com.microsoft.azure.kusto.ingest.v2.common.utils.PathUtils -import com.microsoft.azure.kusto.ingest.v2.models.Format -import java.lang.AutoCloseable -import java.util.UUID - -abstract class IngestionSource( - open val format: Format, - open val compressionType: CompressionType?, - open val url: String?, - open val sourceId: UUID = UUID.randomUUID(), -) : AutoCloseable { - var name: String? = null - private set - - fun initName(baseName: String? = null) { - val type = - this::class.simpleName?.lowercase()?.removeSuffix("source") - ?: "unknown" - name = - "${type}_${PathUtils.sanitizeFileName(baseName, sourceId.toString())}${format.value}$compressionType" - } -} - -// Placeholder classes for missing dependencies -object ExtendedDataSourceCompressionType { - fun detectFromUri(url: String): CompressionType? = null -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt deleted file mode 100644 index 563eb6db2..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -import com.microsoft.azure.kusto.ingest.v2.models.Format -import java.io.InputStream -import java.util.UUID - -abstract class LocalSource( - override val format: Format, - val leaveOpen: Boolean, - override val compressionType: CompressionType = CompressionType.NONE, - val baseName: String? = null, - override val sourceId: UUID = UUID.randomUUID(), -) : IngestionSource(format, compressionType, baseName, sourceId) { - - // Lazily initialized input stream for ingestion source - protected lateinit var mStream: InputStream - - // Indicates whether the stream should be left open after ingestion. - // val leaveOpen: Boolean // Already a constructor property - - internal val shouldCompress: Boolean - get() = - (compressionType == CompressionType.NONE) && - !isBinaryFormat(format) - - abstract fun data(): InputStream - - fun reset() { - data().reset() - } - - override fun close() { - if (!leaveOpen) { - if (this::mStream.isInitialized) { - mStream.close() - } - } - } - - fun isBinaryFormat(format: Format): Boolean { - return when (format) { - Format.avro, - Format.parquet, - Format.orc, - Format.apacheavro, - -> true - else -> false - } - } -} - -class StreamSource( - stream: InputStream, - format: Format, - sourceCompression: CompressionType, - sourceId: UUID = UUID.randomUUID(), - name: String? = null, - leaveOpen: Boolean = false, -) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { - - init { - mStream = stream - initName(name) - } - - override fun data(): InputStream { - return mStream - ?: throw IllegalStateException("Stream is not initialized") - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt new file mode 100644 index 000000000..f68226638 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import java.util.UUID + +interface SourceInfo { + /** Checks that this SourceInfo is defined appropriately. */ + fun validate() + + var sourceId: UUID? +} diff --git a/ingest-v2/src/main/resources/application.yaml b/ingest-v2/src/main/resources/application.yaml new file mode 100644 index 000000000..88e6eff80 --- /dev/null +++ b/ingest-v2/src/main/resources/application.yaml @@ -0,0 +1,6 @@ +ktor: + application: + modules: + - com.microsoft.azure.kusto.ApplicationKt.module + deployment: + port: 8080 diff --git a/ingest-v2/src/main/resources/logback.xml b/ingest-v2/src/main/resources/logback.xml new file mode 100644 index 000000000..aadef5d5b --- /dev/null +++ b/ingest-v2/src/main/resources/logback.xml @@ -0,0 +1,12 @@ + + + + %d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + \ No newline at end of file diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml index 484eb9d43..85eb2d893 100644 --- a/ingest-v2/src/main/resources/openapi.yaml +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -129,6 +129,36 @@ paths: type: string enum: - uri + - name: Host + in: header + required: true + schema: + type: string + example: "ingest-mycluster.swedencentral.kusto.windows.net" + - name: Accept-Encoding + in: header + required: false + schema: + type: string + example: "gzip" + - name: Connection + in: header + required: false + schema: + type: string + example: "Keep-Alive" + - name: Content-Encoding + in: header + required: false + schema: + type: string + example: "gzip" + - name: Content-Type + in: header + required: false + schema: + type: string + example: "application/octet-stream" responses: '200': description: Ingestion mappings response @@ -207,11 +237,11 @@ components: type: boolean description: Delete blob after download nullable: true - mappingReference: + ingestionMappingReference: type: string description: Reference to a named mapping policy nullable: true - mapping: + ingestionMapping: type: string description: Ingestion mapping object nullable: true diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt similarity index 57% rename from ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt rename to ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt index 6e2656fed..b572cdfa5 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationApiWrapperTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt @@ -2,53 +2,73 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 -import com.azure.identity.AzureCliCredentialBuilder import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.api.parallel.Execution +import org.junit.jupiter.api.parallel.ExecutionMode import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource -import org.slf4j.LoggerFactory import java.util.stream.Stream import kotlin.test.assertNotNull @TestInstance(TestInstance.Lifecycle.PER_CLASS) -class ConfigurationApiWrapperTest { - - private val logger = - LoggerFactory.getLogger(ConfigurationApiWrapperTest::class.java) - +@Execution(ExecutionMode.CONCURRENT) +class ConfigurationClientTest : + IngestV2TestBase(ConfigurationClientTest::class.java) { private fun endpointAndExceptionClause(): Stream { return Stream.of( - Arguments.of(System.getenv("DM_CONNECTION_STRING"), false), - Arguments.of("https://help.kusto.windows.net", true), + Arguments.of( + "Success Scenario", + System.getenv("DM_CONNECTION_STRING"), + false, + false, + ), + // Note on the arg below when this is rolled out to all clusters, this test will + // start failing + Arguments.of( + "Cluster without ingest-v2", + "https://help.kusto.windows.net", + true, + false, + ), ) } - @ParameterizedTest + @ParameterizedTest(name = "{0}") @MethodSource("endpointAndExceptionClause") fun `run e2e test with an actual cluster`( + testName: String, cluster: String, isException: Boolean, + isUnreachableHost: Boolean, ): Unit = runBlocking { - val actualTokenProvider = - AzureCliCredentialBuilder() - .build() // Replace with a real token provider + logger.info("Running configuration test {}", testName) // val cluster = System.getenv("DM_CONNECTION_STRING") - val actualWrapper = - ConfigurationApiWrapper(cluster, actualTokenProvider, true) + val actualWrapper = ConfigurationClient(cluster, tokenProvider, true) if (isException) { // assert the call to DefaultConfigurationCache throws - assertThrows { - DefaultConfigurationCache( - configurationProvider = { - actualWrapper.getConfigurationDetails() - }, - ) - .getConfiguration() + val exception = + assertThrows { + DefaultConfigurationCache( + configurationProvider = { + actualWrapper + .getConfigurationDetails() + }, + ) + .getConfiguration() + } + assertNotNull(exception, "Exception should not be null") + if (isUnreachableHost) { + assert(exception.cause is java.net.ConnectException) + assert(exception.isPermanent == false) + } else { + // if the host is reachable, we expect a 404 + assert(exception.failureCode == 404) + assert(exception.isPermanent == false) } } else { val defaultCachedConfig = diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt new file mode 100644 index 000000000..ce58a8fca --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt @@ -0,0 +1,90 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.azure.core.credential.TokenCredential +import com.azure.identity.AzureCliCredentialBuilder +import com.microsoft.azure.kusto.data.Client +import com.microsoft.azure.kusto.data.ClientFactory +import com.microsoft.azure.kusto.data.auth.ConnectionStringBuilder +import com.microsoft.azure.kusto.ingest.v2.models.Format +import org.junit.jupiter.api.AfterAll +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.TestInstance +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import java.util.UUID + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +abstract class IngestV2TestBase(testClass: Class<*>) { + protected val logger: Logger = LoggerFactory.getLogger(testClass) + protected val tokenProvider: TokenCredential = + AzureCliCredentialBuilder().build() + protected val database = System.getenv("TEST_DATABASE") ?: "e2e" + protected val dmEndpoint: String = + System.getenv("DM_CONNECTION_STRING") + ?: throw IllegalArgumentException( + "DM_CONNECTION_STRING environment variable is not set", + ) + protected val targetTestFormat = Format.json + protected val engineEndpoint: String = + dmEndpoint.replace("https://ingest-", "https://") + protected open val targetTable: String = + "Sensor_${UUID.randomUUID().toString().replace("-", "").take(8)}" + protected val columnNamesToTypes: Map = + mapOf( + "timestamp" to "datetime", + "deviceId" to "guid", + "messageId" to "guid", + "temperature" to "real", + "humidity" to "real", + "SourceLocation" to "string", + "Type" to "string", + ) + protected lateinit var adminClusterClient: Client + + @BeforeAll + fun createTables() { + val createTableScript = + """ + .create-merge table $targetTable ( + ${columnNamesToTypes.entries.joinToString(",") { "['${it.key}']:${it.value}" }} + ) + """ + .trimIndent() + val mappingReference = + """ + .create-or-alter table $targetTable ingestion json mapping '${targetTable}_mapping' ```[ + ${ + columnNamesToTypes.keys.joinToString("\n") { col -> + when (col) { + "SourceLocation" -> " {\"column\":\"$col\", \"Properties\":{\"Transform\":\"SourceLocation\"}}," + "Type" -> " {\"column\":\"$col\", \"Properties\":{\"ConstValue\":\"MappingRef\"}}" + else -> " {\"column\":\"$col\", \"Properties\":{\"Path\":\"$.$col\"}}," + } + }.removeSuffix(",") + } + ]``` + """ + .trimIndent() + adminClusterClient = + ClientFactory.createClient( + ConnectionStringBuilder.createWithAzureCli( + engineEndpoint, + ), + ) + adminClusterClient.executeMgmt(database, createTableScript) + adminClusterClient.executeMgmt(database, mappingReference) + adminClusterClient.executeMgmt( + database, + ".clear database cache streamingingestion schema", + ) + } + + @AfterAll + fun dropTables() { + val dropTableScript = ".drop table $targetTable ifexists" + logger.info("Dropping table $targetTable") + adminClusterClient.executeMgmt(database, dropTableScript) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt new file mode 100644 index 000000000..4c267d7bd --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt @@ -0,0 +1,260 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.ColumnMapping +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.InlineIngestionMapping +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.TransformationMethod +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import kotlinx.coroutines.runBlocking +import kotlinx.serialization.json.Json +import org.junit.jupiter.api.Assumptions.assumeTrue +import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.api.parallel.Execution +import org.junit.jupiter.api.parallel.ExecutionMode +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import java.net.ConnectException +import kotlin.test.assertNotNull +import kotlin.time.Duration + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Execution(ExecutionMode.CONCURRENT) +class QueuedIngestionClientTest : + IngestV2TestBase(QueuedIngestionClientTest::class.java) { + + @ParameterizedTest(name = "[QueuedIngestion] {index} => TestName ={0}") + @CsvSource( + // Single JSON blob, no mapping + "QueuedIngestion-NoMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,false,0", + // Single JSON blob, with mapping reference + "QueuedIngestion-WithMappingReference,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,true,false,0", + // Single JSON blob, with inline mapping + "QueuedIngestion-WithInlineMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,true,0", + // TODO This test fails (failureStatus is not right) + // "QueuedIngestion-FailWithInvalidBlob,https://nonexistentaccount.blob.core.windows.net/samplefiles/StormEvents.json,false,false,0", + // "https://nonexistentaccount.blob.core.windows.net/samplefiles/StormEvents.json, 1", + + ) + fun `test queued ingestion with CSV blob`( + testName: String, + blobUrl: String, + useMappingReference: Boolean, + useInlineIngestionMapping: Boolean, + numberOfFailures: Int, + ): Unit = runBlocking { + // Skip test if no DM_CONNECTION_STRING is set + logger.info("Starting test: $testName") + val queuedIngestionClient = + QueuedIngestionClient( + dmUrl = dmEndpoint, + tokenCredential = tokenProvider, + skipSecurityChecks = true, + ) + val testBlobUrls = listOf(blobUrl) + val testBlobSources = testBlobUrls.map { url -> BlobSourceInfo(url) } + + val properties = + if (useMappingReference) { + IngestRequestProperties( + format = targetTestFormat, + ingestionMappingReference = + "${targetTable}_mapping", + enableTracking = true, + ) + } else if (useInlineIngestionMapping) { + val ingestionColumnMappings = + columnNamesToTypes.keys.map { col -> + when (col) { + "SourceLocation" -> + ColumnMapping( + columnName = col, + columnType = + "string", + ) + .apply { + setTransform( + TransformationMethod + .SourceLocation, + ) + } + "Type" -> + ColumnMapping( + columnName = col, + columnType = + "string", + ) + .apply { + setConstantValue( + "IngestionMapping", + ) + } + else -> + ColumnMapping( + columnName = col, + columnType = + columnNamesToTypes[ + col, + ]!!, + ) + .apply { setPath("$.$col") } + } + } + val inlineIngestionMappingInline = + InlineIngestionMapping( + columnMappings = ingestionColumnMappings, + ingestionMappingType = + InlineIngestionMapping + .IngestionMappingType + .JSON, + ) + val ingestionMappingString = + Json.encodeToString( + inlineIngestionMappingInline.columnMappings, + ) + IngestRequestProperties( + format = targetTestFormat, + ingestionMapping = ingestionMappingString, + enableTracking = true, + ) + } else { + IngestRequestProperties( + format = targetTestFormat, + enableTracking = true, + ) + } + + try { + // Test successful ingestion submission + val ingestionResponse = + queuedIngestionClient.submitQueuedIngestion( + database = database, + table = targetTable, + blobSources = testBlobSources, + format = targetTestFormat, + ingestProperties = properties, + ) + + logger.info( + "E2E: Submitted queued ingestion with operation ID: {}", + ingestionResponse.ingestionOperationId, + ) + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull( + ingestionResponse.ingestionOperationId, + "Operation ID should not be null", + ) + // Test polling until completion with timeout + logger.info( + "Starting to poll for completion of operation: {}", + ingestionResponse.ingestionOperationId, + ) + + val finalStatus = + queuedIngestionClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse.ingestionOperationId, + // Poll every 5 seconds for testing + pollingInterval = Duration.parse("PT5S"), + // 5 minute timeout for testing + timeout = Duration.parse("PT5M"), + ) + + logger.info( + "Ingestion completed with final status: {}", + finalStatus.status, + ) + + // Verify the operation completed successfully + // Check if we have any results + if (finalStatus.details?.isNotEmpty() == true) { + val succeededCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Succeeded + } + val failedCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Failed + } + logger.info( + "Ingestion results - Succeeded: {}, Failed: {}", + succeededCount, + failedCount, + ) + // For this test, we expect at least some processing to have occurred + assert(succeededCount > 0 || failedCount > 0) { + "Expected at least some blobs to be processed" + } + + assert(failedCount == numberOfFailures) { + "Expected $numberOfFailures failed ingestions, but got $failedCount" + } + + if (failedCount > 0) { + finalStatus.details + .filter { blobStatus -> + blobStatus.status == BlobStatus.Status.Failed + } + .forEach { failedBlob -> + logger.error( + "Blob ingestion failed for sourceId: ${failedBlob.sourceId}, message: ${failedBlob.details}", + ) + } + logger.error( + "There are $failedCount blobs that failed ingestion.", + ) + } + val filterType = + when { + useMappingReference -> "MappingRef" + useInlineIngestionMapping -> "IngestionMapping" + else -> "None" + } + if (useMappingReference || useInlineIngestionMapping) { + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | where Type == '$filterType' | summarize count=count() by SourceLocation", + ) + .primaryResults + assertNotNull(results, "Query results should not be null") + results.next() + val count: Long = results.getLong("count") + assertNotNull(count, "Count should not be null") + assert(count > 0) { + "Expected some records in the table after ingestion" + } + val sourceLocation: String = + results.getString("SourceLocation") + assert(sourceLocation.isNotEmpty()) { + "SourceLocation should not be empty" + } + } + } + } catch (e: ConnectException) { + // Skip test if we can't connect to the test cluster due to network issues + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster due to network connectivity issues: ${e.message}", + ) + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster due to network connectivity issues: ${e.cause?.message}", + ) + } else { + throw e + } + } + } +} +// https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt new file mode 100644 index 000000000..fd03e426a --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.api.parallel.Execution +import org.junit.jupiter.api.parallel.ExecutionMode +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.MethodSource +import java.util.UUID +import java.util.stream.Stream +import kotlin.test.assertNotNull + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Execution(ExecutionMode.CONCURRENT) +class StreamingIngestClientTest : + IngestV2TestBase(StreamingIngestClientTest::class.java) { + + private val publicBlobUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" + + private val targetUuid = UUID.randomUUID().toString() + private val randomRow: String = + """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$targetUuid","messageId": "7f316225-839a-4593-92b5-1812949279b3","temperature": 31.0301639051317,"humidity": 62.0791099602725}""" + .trimIndent() + + private fun testParameters(): Stream { + return Stream.of( + Arguments.of( + "Direct ingest - success", + engineEndpoint, + // isException + false, + // isUnreachableHost + false, + // blobUrl + null, + ), + Arguments.of( + "Blob based ingest - success", + engineEndpoint, + // isException + false, + // isUnreachableHost + false, + publicBlobUrl, + ), + // Blob-based streaming - error case + Arguments.of( + "Blob based ingest- Invalid blob URL", + engineEndpoint, + // isException + true, + // isUnreachableHost + false, + "https://nonexistentaccount.blob.core.windows.net/container/file.json", + ), + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testParameters") + fun `run streaming ingest test with various clusters`( + testName: String, + cluster: String, + isException: Boolean, + isUnreachableHost: Boolean, + blobUrl: String?, + ) = runBlocking { + logger.info("Running streaming ingest test {}", testName) + val client = StreamingIngestClient(cluster, tokenProvider, true) + val ingestProps = IngestRequestProperties(format = targetTestFormat) + if (isException) { + if (blobUrl != null) { + logger.info( + "Testing error handling for invalid blob URL: {}", + blobUrl, + ) + val exception = + assertThrows { + client.submitStreamingIngestion( + database = database, + table = targetTable, + data = ByteArray(0), + format = targetTestFormat, + ingestProperties = ingestProps, + blobUrl = blobUrl, + ) + } + assertNotNull( + exception, + "Exception should not be null for invalid blob URL", + ) + logger.info( + "Expected exception caught for invalid blob URL: {}", + exception.message, + ) + logger.info( + "Failure code: {}, isPermanent: {}", + exception.failureCode, + exception.isPermanent, + ) + + assert(exception.failureCode != 0) { + "Expected non-zero failure code for invalid blob URL" + } + } else { + logger.info( + "Testing error handling for direct streaming ingestion", + ) + val table = "testtable" + val data = "col1,col2\nval1,val2".toByteArray() + val exception = + assertThrows { + client.submitStreamingIngestion( + database, + table, + data, + targetTestFormat, + ingestProps, + ) + } + assertNotNull(exception, "Exception should not be null") + if (isUnreachableHost) { + assert(exception.cause is java.net.ConnectException) + assert(exception.isPermanent == false) + } else { + assert(exception.failureCode == 404) + assert(exception.isPermanent == false) + } + } + } else { + if (blobUrl != null) { + logger.info( + "Blob-based streaming ingestion with URL: {}", + blobUrl, + ) + + client.submitStreamingIngestion( + database = database, + table = targetTable, + // Ignored when blobUrl is provided + data = ByteArray(0), + format = targetTestFormat, + ingestProperties = ingestProps, + blobUrl = blobUrl, + ) + + logger.info( + "Blob-based streaming ingestion submitted successfully", + ) + + kotlinx.coroutines.delay(3000) + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | summarize count=count()", + ) + .primaryResults + + assertNotNull(results, "Query results should not be null") + results.next() + val count: Long = results.getLong("count") + assertNotNull(count, "Count should not be null") + assert(count > 0) { + "Expected records in table after blob-based streaming ingestion, but got $count" + } + + logger.info( + "Blob-based streaming ingestion verified - {} records in table", + count, + ) + } else { + logger.info("Direct streaming ingestion - success case") + client.submitStreamingIngestion( + database = database, + table = targetTable, + data = randomRow.toByteArray(), + format = targetTestFormat, + ingestProperties = ingestProps, + blobUrl = null, + ) + + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | where deviceId == '$targetUuid' | summarize count=count() by deviceId", + ) + .primaryResults + assertNotNull(results, "Query results should not be null") + results.next() + val count: Long = results.getLong("count") + assertNotNull(count, "Count should not be null") + assert(count == 1L) { + "Expected 1 record for $targetUuid, but got $count" + } + } + } + } +} From 0bb128194133d40f436bd8396c1bcba9c14f6416 Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Tue, 2 Dec 2025 16:48:40 +0530 Subject: [PATCH 27/50] Feature/add local file source v2 (#443) * * Add boilerplate * Merge code with IngestV2 branch * * Add support for non-blob sources * * Add support for non-blob sources * * Add support for File and Stream uploads * managed streaming ingest client * * Minor edits - Make constants for ManagedStreamingIngestPolicy * Add a method called size for determining size in the AbstractSourceInfo interface --------- Co-authored-by: Ramachandran A G * * Fix some formatting * * Remove unused config --------- Co-authored-by: ag-ramachandran --- ingest-v2/pom.xml | 41 + .../azure/kusto/ingest/v2/IngestClient.kt | 66 + .../azure/kusto/ingest/v2/IngestV2.kt | 63 + .../kusto/ingest/v2/KustoBaseApiClient.kt | 27 +- .../ingest/v2/ManagedStreamingIngestClient.kt | 657 +++++++ .../kusto/ingest/v2/ManagedStreamingPolicy.kt | 331 ++++ .../kusto/ingest/v2/QueuedIngestionClient.kt | 257 ++- .../kusto/ingest/v2/StreamingIngestClient.kt | 121 +- .../v2/builders/BaseIngestClientBuilder.kt | 89 + .../builders/QueuedIngestionClientBuilder.kt | 59 + .../builders/StreamingIngestClientBuilder.kt | 31 + .../ingest/v2/common/BatchOperationResult.kt | 24 + .../kusto/ingest/v2/common/ClientDetails.kt | 184 ++ .../ingest/v2/common/ConfigurationCache.kt | 43 +- .../ingest/v2/common/IngestRetryPolicy.kt | 19 +- .../v2/common/exceptions/IngestException.kt | 2 +- .../v2/container/BlobUploadContainer.kt | 392 ++++ .../v2/container/UploadContainerBase.kt | 9 + .../ingest/v2/container/UploadErrorCode.kt | 43 + .../kusto/ingest/v2/container/UploadResult.kt | 35 + .../ingest/v2/source/AbstractSourceInfo.kt | 7 +- .../kusto/ingest/v2/source/BlobSourceInfo.kt | 195 +- .../kusto/ingest/v2/source/CompressionType.kt | 4 +- .../kusto/ingest/v2/source/LocalSource.kt | 192 ++ .../kusto/ingest/v2/source/SourceInfo.kt | 2 +- ingest-v2/src/main/resources/application.yaml | 6 - ingest-v2/src/main/resources/logback.xml | 12 - ingest-v2/src/main/resources/openapi.yaml | 2 +- .../azure/kusto/ingest/v2/IngestV2TestBase.kt | 2 +- .../v2/ManagedStreamingIngestClientTest.kt | 472 +++++ .../ingest/v2/QueuedIngestionClientTest.kt | 1632 ++++++++++++++++- .../ingest/v2/StreamingIngestClientTest.kt | 166 +- .../test/resources/compression/sample.avro | Bin 0 -> 414 bytes .../test/resources/compression/sample.json | 3 + .../test/resources/compression/sample.json.gz | Bin 0 -> 171 bytes .../test/resources/compression/sample.parquet | Bin 0 -> 3795 bytes 36 files changed, 4991 insertions(+), 197 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResult.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt delete mode 100644 ingest-v2/src/main/resources/application.yaml delete mode 100644 ingest-v2/src/main/resources/logback.xml create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt create mode 100644 ingest-v2/src/test/resources/compression/sample.avro create mode 100644 ingest-v2/src/test/resources/compression/sample.json create mode 100644 ingest-v2/src/test/resources/compression/sample.json.gz create mode 100644 ingest-v2/src/test/resources/compression/sample.parquet diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 1d6d3be73..8bf208c01 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -52,6 +52,11 @@ ktor-serialization-jackson ${ktor.version} + + io.ktor + ktor-serialization-jackson + ${ktor.version} + org.slf4j slf4j-simple @@ -79,6 +84,12 @@ ${junit.version} test + + ${project.groupId} + kusto-data + ${project.parent.version} + test + io.mockk mockk-jvm @@ -91,6 +102,10 @@ ${project.parent.version} test + + com.azure + azure-storage-blob + ${project.basedir}/src/main/kotlin @@ -266,6 +281,32 @@ + + org.apache.maven.plugins + maven-surefire-plugin + + + methods + 4 + 1 + true + + + + junit.jupiter.execution.parallel.enabled=true + junit.jupiter.execution.parallel.mode.default=concurrent + junit.jupiter.execution.parallel.mode.classes.default=concurrent + junit.jupiter.execution.parallel.config.strategy=dynamic + junit.jupiter.execution.parallel.config.dynamic.factor=2 + + + + + **/*Test.kt + **/*Test.java + + + \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt index a29762576..f1a4c6fd3 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt @@ -4,16 +4,82 @@ package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo import io.ktor.http.HttpStatusCode import org.slf4j.Logger import org.slf4j.LoggerFactory import java.net.ConnectException +/** + * Interface that provides core abstraction for ingesting data into Kusto. + * + * Supports multiple source types: + * - BlobSourceInfo: Ingest from Azure Blob Storage + * - FileSourceInfo: Ingest from local files + * - StreamSourceInfo: Ingest from in-memory streams + */ interface IngestClient { + val logger: Logger get() = LoggerFactory.getLogger(IngestClient::class.java) + /** + * Submits an ingestion request from any source type. + * + * @param database The target database name + * @param table The target table name + * @param sources List of sources to ingest (BlobSourceInfo, FileSourceInfo, + * or StreamSourceInfo) + * @param format The data format (CSV, JSON, others) + * @param ingestProperties Optional ingestion properties + * @return IngestResponse containing the operation ID for tracking + */ + suspend fun submitIngestion( + database: String, + table: String, + sources: List, + format: Format = Format.csv, + ingestProperties: IngestRequestProperties? = null, + ): IngestResponse + + /** + * Gets the status of an ingestion operation. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID returned from submitIngestion + * @param forceDetails Whether to force retrieval of detailed information + * @return StatusResponse containing the current status + */ + suspend fun getIngestionStatus( + database: String, + table: String, + operationId: String, + forceDetails: Boolean = false, + ): StatusResponse + + /** + * Gets detailed information about an ingestion operation. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID returned from submitIngestion + * @param details Whether to retrieve detailed blob-level information + * @return StatusResponse containing operation details + * @throws UnsupportedOperationException if the implementation doesn't + * support operation tracking + */ + suspend fun getIngestionDetails( + database: String, + table: String, + operationId: String, + details: Boolean = true, + ): StatusResponse + // Common way to parse ingestion response for both Streaming and Queued ingestion suspend fun handleIngestResponse( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt new file mode 100644 index 000000000..1869e77ba --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +// Size of each block to upload to Azure Blob Storage (4 MB) +const val UPLOAD_BLOCK_SIZE_BYTES: Long = 4 * 1024 * 1024 + +// Maximum number of concurrent upload operations for blob upload +const val UPLOAD_MAX_CONCURRENCY: Int = 8 + +// Maximum size for a single upload operation to Azure Blob Storage (256 MB) +const val UPLOAD_MAX_SINGLE_SIZE_BYTES: Long = 256 * 1024 * 1024 + +// Maximum number of retry attempts for blob upload operations +const val UPLOAD_RETRY_MAX_TRIES: Int = 3 + +// Timeout in seconds for each blob upload attempt +const val UPLOAD_RETRY_TIMEOUT_SECONDS: Int = 60 + +// Initial delay in milliseconds between blob upload retry attempts +const val UPLOAD_RETRY_DELAY_MS: Long = 100 + +// Maximum delay in milliseconds between blob upload retry attempts +const val UPLOAD_RETRY_MAX_DELAY_MS: Long = 300 + +// Request timeout in milliseconds for Kusto API HTTP requests +const val KUSTO_API_REQUEST_TIMEOUT_MS: Long = 60_000 + +// Connection timeout in milliseconds for Kusto API HTTP requests +const val KUSTO_API_CONNECT_TIMEOUT_MS: Long = 60_000 + +// Socket timeout in milliseconds for Kusto API HTTP requests +const val KUSTO_API_SOCKET_TIMEOUT_MS: Long = 60_000 + +// Kusto API version used in HTTP requests +const val KUSTO_API_VERSION = "2024-12-12" + +// Default refresh interval for configuration cache (1 hour) +const val CONFIG_CACHE_DEFAULT_REFRESH_INTERVAL_HOURS: Long = 1 + +// Default value for skipSecurityChecks if not provided +const val CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS: Boolean = false + +// Default interval between retries for SimpleRetryPolicy (10 seconds) +const val INGEST_RETRY_POLICY_DEFAULT_INTERVAL_SECONDS: Long = 10 + +// Default total number of retries for SimpleRetryPolicy +const val INGEST_RETRY_POLICY_DEFAULT_TOTAL_RETRIES: Int = 3 + +// Default timeout for blob upload operations (1 hour) +const val BLOB_UPLOAD_TIMEOUT_HOURS: Long = 1 + +// Default retry intervals for CustomRetryPolicy (1s, 3s, 7s) +val INGEST_RETRY_POLICY_CUSTOM_INTERVALS: Array = arrayOf(1, 3, 7) + +// Default maximum number of retry attempts for container upload operations +const val UPLOAD_CONTAINER_MAX_RETRIES: Int = 3 + +// Default maximum data size for blob upload operations (4GB) +const val UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES: Long = 4L * 1024 * 1024 * 1024 + +// Default maximum concurrency for blob upload operations +const val UPLOAD_CONTAINER_MAX_CONCURRENCY: Int = 4 diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 8457a0aaf..5168e54bb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -5,6 +5,7 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential import com.azure.core.credential.TokenRequestContext import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi +import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer import io.ktor.client.HttpClientConfig import io.ktor.client.plugins.DefaultRequest @@ -20,6 +21,7 @@ import kotlinx.serialization.json.Json import kotlinx.serialization.modules.SerializersModule import org.slf4j.LoggerFactory import java.time.OffsetDateTime +import java.util.UUID import kotlin.coroutines.resume import kotlin.coroutines.resumeWithException @@ -27,8 +29,11 @@ open class KustoBaseApiClient( open val dmUrl: String, open val tokenCredential: TokenCredential, open val skipSecurityChecks: Boolean = false, + open val clientDetails: ClientDetails? = ClientDetails.createDefault(), + open val clientRequestIdPrefix: String = "KIC.execute", ) { private val logger = LoggerFactory.getLogger(KustoBaseApiClient::class.java) + protected val setupConfig: (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } @@ -40,6 +45,22 @@ open class KustoBaseApiClient( private fun getClientConfig(config: HttpClientConfig<*>) { config.install(DefaultRequest) { header("Content-Type", "application/json") + + clientDetails?.let { details -> + header("x-ms-app", details.getApplicationForTracing()) + header("x-ms-user", details.getUserNameForTracing()) + header( + "x-ms-client-version", + details.getClientVersionForTracing(), + ) + } + + // Generate unique client request ID for tracing (format: prefix;uuid) + val clientRequestId = "$clientRequestIdPrefix;${UUID.randomUUID()}" + header("x-ms-client-request-id", clientRequestId) + header("x-ms-version", KUSTO_API_VERSION) + header("Connection", "Keep-Alive") + header("Accept", "application/json") } val trc = TokenRequestContext().addScopes("$dmUrl/.default") config.install(Auth) { @@ -106,9 +127,9 @@ open class KustoBaseApiClient( } /* TODO Check what these settings should be */ config.install(HttpTimeout) { - requestTimeoutMillis = 60_000 - connectTimeoutMillis = 60_000 - socketTimeoutMillis = 60_000 + requestTimeoutMillis = KUSTO_API_REQUEST_TIMEOUT_MS + connectTimeoutMillis = KUSTO_API_CONNECT_TIMEOUT_MS + socketTimeoutMillis = KUSTO_API_SOCKET_TIMEOUT_MS } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt new file mode 100644 index 000000000..833c40722 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt @@ -0,0 +1,657 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.Retry +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.runWithRetry +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo +import io.ktor.http.HttpStatusCode +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import java.io.InputStream +import java.time.Clock +import java.time.Duration +import java.time.Instant +import kotlin.time.Duration.Companion.milliseconds + +/** + * ManagedStreamingIngestClient chooses between streaming and queued ingestion + * based on data size, service availability, and error patterns. + * + * This client: + * - Attempts streaming ingestion first for eligible data + * - Automatically falls back to queued ingestion on failures + * - Implements retry logic with exponential backoff + * - Tracks per-table error patterns to optimize future ingestion attempts + * - Respects streaming ingestion limits and policies + * + * @param clusterUrl The Kusto cluster URL (used for both streaming and queued + * ingestion) + * @param tokenCredential Azure credential for authentication + * @param managedStreamingPolicy Policy controlling fallback behavior and retry + * logic + * @param skipSecurityChecks Whether to skip security checks (for testing) + */ +class ManagedStreamingIngestClient( + private val clusterUrl: String, + private val tokenCredential: TokenCredential, + private val managedStreamingPolicy: ManagedStreamingPolicy = + DefaultManagedStreamingPolicy(), + private val skipSecurityChecks: Boolean = false, +) : IngestClient { + + override val logger: Logger = + LoggerFactory.getLogger(ManagedStreamingIngestClient::class.java) + + private val streamingIngestClient = + StreamingIngestClient( + engineUrl = clusterUrl, + tokenCredential = tokenCredential, + skipSecurityChecks = skipSecurityChecks, + ) + + private val queuedIngestionClient = + QueuedIngestionClient( + dmUrl = clusterUrl, + tokenCredential = tokenCredential, + skipSecurityChecks = skipSecurityChecks, + ) + + // Maximum size for streaming ingestion (4MB default, can be tuned with dataSizeFactor) + private val maxStreamingIngestSize: Long = + (4 * 1024 * 1024 * managedStreamingPolicy.dataSizeFactor).toLong() + + /** + * Submits an ingestion request, intelligently choosing between streaming + * and queued ingestion. + * + * @param database The target database name + * @param table The target table name + * @param sources List of SourceInfo objects (BlobSourceInfo, + * FileSourceInfo, or StreamSourceInfo) + * @param format The data format + * @param ingestProperties Optional ingestion properties + * @return IngestResponse for tracking the request + */ + suspend fun submitManagedIngestion( + database: String, + table: String, + sources: List, + format: Format = Format.csv, + ingestProperties: IngestRequestProperties? = null, + ): IngestResponse { + require(database.isNotBlank()) { "Database name cannot be blank" } + require(table.isNotBlank()) { "Table name cannot be blank" } + require(sources.isNotEmpty()) { "Sources list cannot be empty" } + + val props = ingestProperties ?: IngestRequestProperties(format = format) + + logger.info( + "Starting managed ingestion for database: $database, table: $table, sources: ${sources.size}", + ) + + // Process each source + for (source in sources) { + when (source) { + is BlobSourceInfo -> ingestBlob(source, database, table, props) + is LocalSource -> ingestLocal(source, database, table, props) + else -> + throw IngestException( + "Unsupported source type: ${source::class.simpleName}", + isPermanent = true, + ) + } + } + + // Return a combined response (for now, return success) + return IngestResponse( + ingestionOperationId = + "managed-${Instant.now(Clock.systemUTC())}", + ) + } + + private suspend fun ingestBlob( + source: BlobSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + ): IngestResponse { + if (shouldUseQueuedIngestByPolicy(source, database, table, props)) { + logger.info( + "Policy dictates using queued ingestion for blob: ${source.blobPath}", + ) + return invokeQueuedIngestion(source, database, table, props) + } + + return invokeStreamingIngestion(source, database, table, props) + } + + private suspend fun ingestLocal( + source: LocalSource, + database: String, + table: String, + props: IngestRequestProperties, + ): IngestResponse { + val stream = source.data() + + if (!isStreamValid(stream)) { + throw IngestException( + "Stream is not valid for ingest. Ensure the stream is not null, has data, and is seekable.", + isPermanent = true, + ) + } + + if (shouldUseQueuedIngestBySize(source)) { + logger.info( + "Data size exceeds streaming limit, using queued ingestion", + ) + return invokeQueuedIngestion(source, database, table, props) + } + + if (shouldUseQueuedIngestByPolicy(source, database, table, props)) { + logger.info( + "Policy dictates using queued ingestion for local source: ${source.name}", + ) + return invokeQueuedIngestion(source, database, table, props) + } + + return invokeStreamingIngestion(source, database, table, props) + } + + private fun isStreamValid(stream: InputStream): Boolean { + return try { + // Mark the current position if supported + if (stream.markSupported()) { + stream.mark(1) + val hasData = stream.read() != -1 + stream.reset() // Reset to marked position + hasData + } else { + // For non-markable streams, check available bytes + stream.available() > 0 + } + } catch (e: Exception) { + logger.warn("Stream validation failed: ${e.message}") + false + } + } + + private fun shouldUseQueuedIngestBySize(source: LocalSource): Boolean { + val size = source.size() + + if (size == null) { + logger.warn( + "Could not determine data size for ${source::class.simpleName}", + ) + return false + } + + if (size > maxStreamingIngestSize) { + logger.info( + "Data size '$size' exceeds streaming limit '$maxStreamingIngestSize'. " + + "DataSizeFactor used: ${managedStreamingPolicy.dataSizeFactor}. Using queued ingestion.", + ) + return true + } + + return false + } + + private fun shouldUseQueuedIngestByPolicy( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + ): Boolean { + if ( + managedStreamingPolicy.shouldDefaultToQueuedIngestion( + source, + database, + table, + props, + ) + ) { + logger.info( + "ManagedStreamingPolicy indicates fallback to queued ingestion", + ) + return true + } + return false + } + + private suspend fun invokeStreamingIngestion( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + ): IngestResponse { + val startTime = Instant.now(Clock.systemUTC()).toEpochMilli() + + val result = + managedStreamingPolicy.retryPolicy.runWithRetry( + action = { attempt -> + val attemptStartTime = + Instant.now(Clock.systemUTC()) + .toEpochMilli() + + try { + val response = + when (source) { + is BlobSourceInfo -> { + streamingIngestClient + .submitStreamingIngestion( + database = + database, + table = table, + // Not used for blob-based streaming + data = + ByteArray( + 0, + ), + format = + props + .format, + ingestProperties = + props, + blobUrl = + source + .blobPath, + ) + IngestResponse( + ingestionOperationId = + source.sourceId + .toString(), + ) + } + + is LocalSource -> { + val data = + source.data() + .readBytes() + streamingIngestClient + .submitStreamingIngestion( + database = + database, + table = table, + data = data, + format = + props + .format, + ingestProperties = + props, + ) + IngestResponse( + ingestionOperationId = + source.sourceId + .toString(), + ) + } + + else -> + throw IngestException( + "Unsupported source type for streaming: ${source::class.simpleName}", + isPermanent = true, + ) + } + + val duration = + Duration.ofMillis( + Instant.now(Clock.systemUTC()) + .toEpochMilli() - + attemptStartTime, + ) + managedStreamingPolicy.streamingSuccessCallback( + source, + database, + table, + props, + ManagedStreamingRequestSuccessDetails( + duration, + ), + ) + + logger.info( + "Streaming ingestion succeeded for ${source::class.simpleName} on attempt $attempt. Duration: ${duration.toMillis()}ms", + ) + response + } catch (e: Exception) { + logger.warn( + "Streaming ingestion attempt $attempt failed: ${e.message}", + ) + throw e + } + }, + onRetry = { attempt, ex, _ -> + logger.debug( + "Retrying streaming ingestion after attempt $attempt due to: ${ex.message}", + ) + if (source is LocalSource) { + try { + source.reset() + } catch (e: Exception) { + logger.warn( + "Failed to reset source stream: ${e.message}", + ) + } + } + }, + shouldRetry = { attempt, ex, isPermanent -> + decideOnException( + source, + database, + table, + props, + startTime, + isPermanent, + ex, + ) + }, + throwOnExhaustedRetries = false, + tracer = { msg -> logger.debug(msg) }, + ) + + if (result == null) { + logger.info( + "Streaming ingestion failed after retries, falling back to queued ingestion for ${source::class.simpleName}", + ) + + if (source is LocalSource) { + try { + source.reset() + } catch (e: Exception) { + logger.warn( + "Failed to reset source stream before queued ingestion: ${e.message}", + ) + } + } + return invokeQueuedIngestion(source, database, table, props) + } + + return result + } + + /** Decides whether to retry, throw, or break based on the exception */ + private fun decideOnException( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + startTime: Long, + isPermanent: Boolean, + ex: Exception, + ): Retry { + val duration = + Duration.ofMillis( + Instant.now(Clock.systemUTC()).toEpochMilli() - + startTime, + ) + + // Handle transient errors + if (!isPermanent) { + reportTransientException( + source, + database, + table, + props, + ex, + duration, + ) + return Retry(shouldRetry = true, interval = Duration.ZERO) + } + + // Handle permanent errors + if (ex !is IngestException) { + reportUnknownException(source, database, table, props, ex, duration) + return Retry(shouldRetry = false, interval = Duration.ZERO) + } + + // Check if we should fallback to queued ingestion + if ( + shouldFallbackToQueuedOnPermanentError( + ex, + source, + database, + table, + props, + duration, + ) + ) { + return Retry(shouldRetry = false, interval = Duration.ZERO) + } + + logger.error( + "Permanent error occurred while trying streaming ingest, not switching to queued according to policy. Error: ${ex.message}", + ) + return Retry(shouldRetry = false, interval = Duration.ZERO) + } + + /** Reports a transient exception to the policy */ + private fun reportTransientException( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + ex: Exception, + duration: Duration, + ) { + val errorCategory = + if ( + ex is IngestException && + ex.failureCode == + HttpStatusCode.TooManyRequests.value + ) { + ManagedStreamingErrorCategory.THROTTLED + } else { + ManagedStreamingErrorCategory.OTHER_ERRORS + } + + logger.warn( + "Streaming ingestion transient error: ${ex.message}, category: $errorCategory", + ) + + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + ManagedStreamingRequestFailureDetails( + duration = duration, + isPermanent = false, + errorCategory = errorCategory, + exception = ex, + ), + ) + } + + private fun reportUnknownException( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + ex: Exception, + duration: Duration, + ) { + logger.error( + "Unexpected error occurred during streaming ingestion: ${ex.message}", + ex, + ) + + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + ManagedStreamingRequestFailureDetails( + duration = duration, + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory.UNKNOWN_ERRORS, + exception = ex, + ), + ) + } + + private fun shouldFallbackToQueuedOnPermanentError( + ex: IngestException, + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + duration: Duration, + ): Boolean { + val failureSubCode = ex.failureSubCode + + val errorCategory: ManagedStreamingErrorCategory + val shouldFallback: Boolean + + when { + // Streaming ingestion policy turned off + failureSubCode?.contains( + "StreamingIngestionPolicyNotEnabled", + ignoreCase = true, + ) == true || + failureSubCode?.contains( + "StreamingIngestionDisabledForCluster", + ignoreCase = true, + ) == true -> { + errorCategory = + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF + shouldFallback = + managedStreamingPolicy + .continueWhenStreamingIngestionUnavailable + logger.info( + "Streaming ingestion is off, fallback to queued ingestion is " + + "${if (shouldFallback) "enabled" else "disabled"}. Error: ${ex.message}", + ) + } + + // Table configuration prevents streaming + failureSubCode?.contains( + "UpdatePolicyIncompatible", + ignoreCase = true, + ) == true || + failureSubCode?.contains( + "QuerySchemaDoesNotMatchTableSchema", + ignoreCase = true, + ) == true -> { + errorCategory = + ManagedStreamingErrorCategory + .TABLE_CONFIGURATION_PREVENTS_STREAMING + shouldFallback = true + logger.info( + "Fallback to queued ingestion due to table configuration. Error: ${ex.message}", + ) + } + + // Request properties prevent streaming (e.g., file too large) + failureSubCode?.contains("FileTooLarge", ignoreCase = true) == + true || + failureSubCode?.contains( + "InputStreamTooLarge", + ignoreCase = true, + ) == true || + ex.failureCode == 413 -> { // 413 Payload Too Large + errorCategory = + ManagedStreamingErrorCategory + .REQUEST_PROPERTIES_PREVENT_STREAMING + shouldFallback = true + logger.info( + "Fallback to queued ingestion due to request properties. Error: ${ex.message}", + ) + } + + else -> { + errorCategory = ManagedStreamingErrorCategory.OTHER_ERRORS + shouldFallback = false + logger.info( + "Not falling back to queued ingestion for this exception: ${ex.message}", + ) + } + } + + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + ManagedStreamingRequestFailureDetails( + duration = duration, + isPermanent = true, + errorCategory = errorCategory, + exception = ex, + ), + ) + + return shouldFallback + } + + private suspend fun invokeQueuedIngestion( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties, + ): IngestResponse { + logger.info("Invoking queued ingestion for ${source::class.simpleName}") + + return queuedIngestionClient.submitQueuedIngestion( + database = database, + table = table, + sources = listOf(source), + format = props.format, + ingestProperties = props, + ) + } + + suspend fun pollUntilCompletion( + database: String, + table: String, + operationId: String, + pollingInterval: kotlin.time.Duration = 30.milliseconds, + timeout: kotlin.time.Duration = 300.milliseconds, + ): StatusResponse { + return queuedIngestionClient.pollUntilCompletion( + database = database, + table = table, + operationId = operationId, + pollingInterval = pollingInterval, + timeout = timeout, + ) + } + + override suspend fun submitIngestion( + database: String, + table: String, + sources: List, + format: Format, + ingestProperties: IngestRequestProperties?, + ): IngestResponse { + TODO("Not yet implemented") + } + + override suspend fun getIngestionStatus( + database: String, + table: String, + operationId: String, + forceDetails: Boolean, + ): StatusResponse { + TODO("Not yet implemented") + } + + override suspend fun getIngestionDetails( + database: String, + table: String, + operationId: String, + details: Boolean, + ): StatusResponse { + TODO("Not yet implemented") + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt new file mode 100644 index 000000000..624bfcdd5 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt @@ -0,0 +1,331 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.common.CustomRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo +import java.time.Duration +import java.time.Instant +import java.util.concurrent.ConcurrentHashMap +import kotlin.random.Random + +/** Error categories for managed streaming ingestion failures */ +enum class ManagedStreamingErrorCategory { + /** + * Indicates that streaming cannot be performed due to the properties of the + * request itself but would likely succeed if queued. These errors are + * request-specific and do not imply anything about following requests. + */ + REQUEST_PROPERTIES_PREVENT_STREAMING, + + /** + * Indicates streaming cannot be performed due to a conflicting table + * configuration, but may succeed if queued. These errors are table-specific + * and following requests will behave similarly until the conflict is + * resolved on the service side. + */ + TABLE_CONFIGURATION_PREVENTS_STREAMING, + + /** + * Indicates streaming cannot be performed due to some service + * configuration. To resolve these errors, a service side change is required + * to use streaming. + */ + STREAMING_INGESTION_OFF, + + /** + * Indicates streaming ingestion endpoint is throttled and returns HTTP + * TooManyRequests error code (429) + */ + THROTTLED, + + /** Reported for all other types of streaming errors */ + OTHER_ERRORS, + + /** Reported when an unexpected error type occurred */ + UNKNOWN_ERRORS, +} + +/** Details about a successful streaming ingestion request */ +data class ManagedStreamingRequestSuccessDetails(val duration: Duration) + +/** Details about a failed streaming ingestion request */ +data class ManagedStreamingRequestFailureDetails( + val duration: Duration, + val isPermanent: Boolean, + val errorCategory: ManagedStreamingErrorCategory, + val exception: Exception, +) + +/** + * A policy which controls the way the managed streaming ingest client behaves + * when there are errors. + */ +interface ManagedStreamingPolicy { + /** + * When streaming ingestion is disabled for the table, database or cluster, + * determine if the client will fallback to queued ingestion. When set to + * false managed streaming client will fail ingestions for tables where + * streaming policy is not enabled. Enabling this property means the client + * might use queued ingestion exclusively without the caller knowing. + * Permanent errors in streaming ingestion that are not errors in queued + * ingestion, will fallback to queued ingestion regardless of this setting. + */ + val continueWhenStreamingIngestionUnavailable: Boolean + + /** + * The retry policy for transient failures before falling back to queued + * ingestion + */ + val retryPolicy: IngestRetryPolicy + + /** + * A size factor that enables tuning up and down the upper limit of data + * sent to streaming. Default value is 1.0. + */ + val dataSizeFactor: Double + + /** + * Should this ingestion attempt skip streaming and go directly to queued + * ingestion + * + * @param source The ingestion source + * @param database The target database name + * @param table The target table name + * @param props The ingestion properties + * @return false if streaming should be attempted, true if streaming should + * be skipped + */ + fun shouldDefaultToQueuedIngestion( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties?, + ): Boolean + + /** + * This callback will be called when a streaming error occurs + * + * @param source The ingestion source + * @param database The target database name + * @param table The target table name + * @param props The ingestion properties + * @param failureDetails Details about the failure + */ + fun streamingErrorCallback( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties?, + failureDetails: ManagedStreamingRequestFailureDetails, + ) + + /** + * This callback will be called when streaming succeeds + * + * @param source The ingestion source + * @param database The target database name + * @param table The target table name + * @param props The ingestion properties + * @param successDetails Details about the success + */ + fun streamingSuccessCallback( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties?, + successDetails: ManagedStreamingRequestSuccessDetails, + ) +} + +/** + * This is the default policy used by the managed streaming ingestion client. + * Whenever there is a permanent streaming error, it defaults to queued + * ingestion for a time period defined by timeUntilResumingStreamingIngest. + */ +class DefaultManagedStreamingPolicy( + override val continueWhenStreamingIngestionUnavailable: Boolean = false, + override val retryPolicy: IngestRetryPolicy = + CustomRetryPolicy( + arrayOf( + Duration.ofSeconds( + DEFAULT_RETRY_FIRST_DELAY_SECONDS, + ) + .plusMillis( + Random.nextLong( + DEFAULT_JITTER_MIN_MS, + DEFAULT_JITTER_MAX_MS, + ), + ), + Duration.ofSeconds( + DEFAULT_RETRY_SECOND_DELAY_SECONDS, + ) + .plusMillis( + Random.nextLong( + DEFAULT_JITTER_MIN_MS, + DEFAULT_JITTER_MAX_MS, + ), + ), + Duration.ofSeconds( + DEFAULT_RETRY_THIRD_DELAY_SECONDS, + ) + .plusMillis( + Random.nextLong( + DEFAULT_JITTER_MIN_MS, + DEFAULT_JITTER_MAX_MS, + ), + ), + ), + ), + override val dataSizeFactor: Double = DEFAULT_DATA_SIZE_FACTOR, + /** + * When streaming is throttled, the client will fallback to queued + * ingestion. This property controls how long the client will use queued + * ingestion in the case of streaming is throttled before trying to + * resume streaming ingestion again. + */ + val throttleBackoffPeriod: Duration = + Duration.ofSeconds(DEFAULT_THROTTLE_BACKOFF_SECONDS), + /** + * When streaming ingestion is unavailable, the client will fallback to + * queued ingestion. This property controls how long the client will use + * queued ingestion before trying to resume streaming ingestion again. + */ + val timeUntilResumingStreamingIngest: Duration = + Duration.ofMinutes(DEFAULT_RESUME_STREAMING_MINUTES), +) : ManagedStreamingPolicy { + + private val defaultToQueuedUntilTimeByTable = + ConcurrentHashMap< + Pair, + Pair, + >() + + companion object { + /** + * Default data size factor for tuning the upper limit of data sent to + * streaming. A value of 1.0 means no adjustment to the default limit. + */ + private const val DEFAULT_DATA_SIZE_FACTOR = 1.0 + + /** + * Default delay in seconds for the first retry attempt when streaming + * ingestion fails transiently. + */ + private const val DEFAULT_RETRY_FIRST_DELAY_SECONDS = 1L + + /** + * Default delay in seconds for the second retry attempt when streaming + * ingestion fails transiently. + */ + private const val DEFAULT_RETRY_SECOND_DELAY_SECONDS = 2L + + /** + * Default delay in seconds for the third retry attempt when streaming + * ingestion fails transiently. + */ + private const val DEFAULT_RETRY_THIRD_DELAY_SECONDS = 4L + + /** + * Minimum jitter value in milliseconds added to retry delays to avoid + * thundering herd problems. + */ + private const val DEFAULT_JITTER_MIN_MS = 0L + + /** + * Maximum jitter value in milliseconds added to retry delays to avoid + * thundering herd problems. Adds up to 1 second of random delay to each + * retry attempt. + */ + private const val DEFAULT_JITTER_MAX_MS = 1000L + + /** + * Default backoff period in seconds when streaming ingestion is + * throttled (HTTP 429). The client will use queued ingestion for this + * duration before attempting streaming again. + */ + private const val DEFAULT_THROTTLE_BACKOFF_SECONDS = 10L + + /** + * Default time in minutes to wait before resuming streaming ingestion + * attempts after streaming becomes unavailable due to configuration or + * policy issues. + */ + private const val DEFAULT_RESUME_STREAMING_MINUTES = 15L + } + + override fun shouldDefaultToQueuedIngestion( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties?, + ): Boolean { + val key = Pair(database, table) + + val useQueuedUntilTime = defaultToQueuedUntilTimeByTable[key] + if (useQueuedUntilTime != null) { + val (dateTime, errorCategory) = useQueuedUntilTime + if (dateTime.isAfter(Instant.now())) { + if ( + errorCategory == + ManagedStreamingErrorCategory + .STREAMING_INGESTION_OFF && + !continueWhenStreamingIngestionUnavailable + ) { + return false + } + return true + } + defaultToQueuedUntilTimeByTable.remove(key) + } + + return false + } + + override fun streamingErrorCallback( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties?, + failureDetails: ManagedStreamingRequestFailureDetails, + ) { + val key = Pair(database, table) + when (failureDetails.errorCategory) { + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, + ManagedStreamingErrorCategory + .TABLE_CONFIGURATION_PREVENTS_STREAMING, + -> { + defaultToQueuedUntilTimeByTable[key] = + Pair( + Instant.now() + .plus(timeUntilResumingStreamingIngest), + failureDetails.errorCategory, + ) + } + + ManagedStreamingErrorCategory.THROTTLED -> { + defaultToQueuedUntilTimeByTable[key] = + Pair( + Instant.now().plus(throttleBackoffPeriod), + failureDetails.errorCategory, + ) + } + + else -> { + // No action for other error categories + } + } + } + + override fun streamingSuccessCallback( + source: AbstractSourceInfo, + database: String, + table: String, + props: IngestRequestProperties?, + successDetails: ManagedStreamingRequestSuccessDetails, + ) { + // Default implementation does nothing + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt index 4d94d6045..b775b29cb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt @@ -3,8 +3,11 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionResultUtils +import com.microsoft.azure.kusto.ingest.v2.container.BlobUploadContainer import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.Blob import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus @@ -14,50 +17,194 @@ import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo import io.ktor.http.HttpStatusCode import kotlinx.coroutines.delay import kotlinx.coroutines.withTimeoutOrNull -import java.util.UUID +import java.lang.Long +import java.time.Clock +import java.time.OffsetDateTime import kotlin.time.Duration class QueuedIngestionClient( override val dmUrl: String, override val tokenCredential: TokenCredential, override val skipSecurityChecks: Boolean = false, + override val clientDetails: ClientDetails? = null, + private val maxConcurrency: Int? = null, + private val maxDataSize: kotlin.Long? = null, + private val ignoreFileSize: Boolean = false, ) : - KustoBaseApiClient(dmUrl, tokenCredential, skipSecurityChecks), + KustoBaseApiClient( + dmUrl, + tokenCredential, + skipSecurityChecks, + clientDetails, + ), IngestClient { + override suspend fun submitIngestion( + database: String, + table: String, + sources: List, + format: Format, + ingestProperties: IngestRequestProperties?, + ): IngestResponse { + return submitQueuedIngestion( + database = database, + table = table, + sources = sources, + format = format, + ingestProperties = ingestProperties, + failOnPartialUploadError = true, + ) + } + + override suspend fun getIngestionStatus( + database: String, + table: String, + operationId: String, + forceDetails: Boolean, + ): StatusResponse { + // If details are explicitly requested, use the details API + if (forceDetails) { + val statusResponse = + getIngestionDetails(database, table, operationId, true) + logger.debug( + "Forcing detailed status retrieval for operation: {} returning {}", + operationId, + statusResponse, + ) + return statusResponse + } + // Start with summary for efficiency + val statusResponse = + getIngestionDetails(database, table, operationId, false) + // If operation has failures or is completed, get detailed information + return if ( + statusResponse.status?.failed?.let { it > 0 } == true || + IngestionResultUtils.isCompleted(statusResponse.details) + ) { + logger.debug( + "Operation $operationId has failures or is completed, retrieving details", + ) + getIngestionDetails(database, table, operationId, true) + } else { + statusResponse + } + } + + private val defaultConfigurationCache = + DefaultConfigurationCache( + dmUrl = dmUrl, + tokenCredential = tokenCredential, + skipSecurityChecks = skipSecurityChecks, + ) + + private val blobUploadContainer = + BlobUploadContainer( + configurationCache = defaultConfigurationCache, + maxConcurrency = + maxConcurrency ?: UPLOAD_CONTAINER_MAX_CONCURRENCY, + maxDataSize = + maxDataSize ?: UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES, + ignoreSizeLimit = ignoreFileSize, + ) + /** - * Submits a queued ingestion request. + * Submits a queued ingestion request with support for all source types. + * Local sources (FileSourceInfo, StreamSourceInfo) will be automatically + * uploaded to blob storage before ingestion using parallel batch uploads. * * @param database The target database name * @param table The target table name - * @param blobSources List of BlobSourceInfo objects to ingest + * @param sources List of SourceInfo objects (BlobSourceInfo, + * FileSourceInfo, or StreamSourceInfo) * @param format The data format * @param ingestProperties Optional ingestion properties + * @param failOnPartialUploadError If true, fails the entire operation if + * any uploads fail * @return IngestionOperation for tracking the request */ suspend fun submitQueuedIngestion( database: String, table: String, - blobSources: List, + sources: List, format: Format = Format.csv, ingestProperties: IngestRequestProperties? = null, + failOnPartialUploadError: Boolean = true, ): IngestResponse { logger.info( - "Submitting queued ingestion request for database: $database, table: $table, blobs: ${blobSources.size}", + "Submitting queued ingestion request for database: $database, table: $table, sources: ${sources.size}", ) + + // Separate sources by type + val blobSources = sources.filterIsInstance() + val localSources = sources.filterIsInstance() + + // Convert local sources to blob sources + val allBlobSources = + if (localSources.isNotEmpty()) { + logger.info( + "Uploading ${localSources.size} local sources to blob storage in parallel", + ) + + // Use batch upload for efficiency + val batchResult = + BlobSourceInfo.fromLocalSourcesBatch( + localSources, + blobUploadContainer, + ) + + // Log batch results + logger.info( + "Batch upload completed: ${batchResult.successes.size} succeeded, " + + "${batchResult.failures.size} failed out of ${localSources.size} total", + ) + + // Handle failures based on policy + if (batchResult.hasFailures) { + val failureDetails = + batchResult.failures.joinToString("\n") { + failure -> + " - ${failure.source.name}: ${failure.errorCode} - ${failure.errorMessage}" + } + + if (failOnPartialUploadError) { + throw IngestException( + "Failed to upload ${batchResult.failures.size} out of ${localSources.size} sources:\n$failureDetails", + isPermanent = + batchResult.failures.all { + it.isPermanent + }, + ) + } else { + logger.warn( + "Some uploads failed but continuing with successful uploads:\n$failureDetails", + ) + } + } + + blobSources + batchResult.successes + } else { + blobSources + } + + if (allBlobSources.isEmpty()) { + throw IngestException( + "No sources available for ingestion after upload processing", + isPermanent = true, + ) + } // Convert BlobSourceInfo objects to Blob objects val blobs = - blobSources.mapIndexed { index, blobSource -> - val sourceId = - blobSource.sourceId?.toString() - ?: UUID.randomUUID().toString() - logger.debug( - "Preparing blob {} with sourceId {} for ingestion.", - index, - sourceId, + allBlobSources.map { blobSource -> + val sourceId = blobSource.sourceId.toString() + Blob( + url = blobSource.blobPath, + sourceId = sourceId, + rawSize = blobSource.blobExactSize as Long?, ) Blob(url = blobSource.blobPath, sourceId = sourceId) } @@ -66,7 +213,7 @@ class QueuedIngestionClient( ingestProperties ?: IngestRequestProperties(format = format) logger.debug( - "** Ingesting to {}.{} with the following properties with properties {}", + "Ingesting to {}.{} with the following properties with properties {}", database, table, requestProperties, @@ -74,7 +221,7 @@ class QueuedIngestionClient( val ingestRequest = IngestRequest( - timestamp = java.time.OffsetDateTime.now(), + timestamp = OffsetDateTime.now(Clock.systemUTC()), blobs = blobs, properties = requestProperties, ) @@ -110,16 +257,15 @@ class QueuedIngestionClient( } /** - * Gets a summary of the ingestion operation status (lightweight, fast). - * This method provides overall status counters without detailed blob - * information. Use this for quick status checks and polling scenarios. + * Gets detailed information about an ingestion operation. * * @param database The target database name * @param table The target table name * @param operationId The operation ID returned from the ingestion request - * @return Updated IngestionOperation with status summary + * @param details Whether to retrieve detailed blob-level information + * @return StatusResponse with operation details */ - private suspend fun getIngestionDetails( + override suspend fun getIngestionDetails( database: String, table: String, operationId: String, @@ -162,11 +308,10 @@ class QueuedIngestionClient( hasTransientErrors ) { val message = - if (hasTransientErrors) { - printMessagesFromFailures(transientFailures) - } else { - "Error polling $dmUrl for operation $operationId." - } + printMessagesFromFailures( + transientFailures, + isTransientFailure = true, + ) logger.error(message) throw IngestException( message = message, @@ -178,8 +323,10 @@ class QueuedIngestionClient( } // TODO: We need to eventually look at OneApiExceptions val errorMessage = - printMessagesFromFailures(ingestStatusFailure.details) - ?: "Failed to get ingestion summary for operation $operationId. Status: ${response.status}, Body: $ingestStatusFailure" + printMessagesFromFailures( + ingestStatusFailure.details, + isTransientFailure = false, + ) logger.error(errorMessage) throw IngestException(errorMessage, isPermanent = true) } @@ -198,6 +345,7 @@ class QueuedIngestionClient( private fun printMessagesFromFailures( failures: List?, + isTransientFailure: Boolean, ): String? { return failures?.joinToString { ( @@ -212,54 +360,7 @@ class QueuedIngestionClient( -> "Error ingesting blob with $sourceId. ErrorDetails $details, ErrorCode $errorCode " + ", Status ${status?.value}. Ingestion lastUpdated at $lastUpdateTime & started at $startedAt. " + - "FailureStatus ${failureStatus?.value}" - } - } - - /** - * Gets the status of a queued ingestion operation with intelligent API - * selection. For completed operations or when details are explicitly - * requested, uses the details API. For in-progress operations, uses the - * summary API for efficiency. - * - * @param database The target database name - * @param table The target table name - * @param operationId The operation ID returned from the ingestion request - * @param forceDetails Force retrieval of detailed information regardless of - * operation status - * @return Updated IngestionOperation with current status - */ - suspend fun getIngestionStatus( - database: String, - table: String, - operationId: String, - forceDetails: Boolean = false, - ): StatusResponse { - // If details are explicitly requested, use the details API - if (forceDetails) { - val statusResponse = - getIngestionDetails(database, table, operationId, true) - logger.debug( - "Forcing detailed status retrieval for operation: {} returning {}", - operationId, - statusResponse, - ) - return statusResponse - } - // Start with summary for efficiency - val statusResponse = - getIngestionDetails(database, table, operationId, false) - // If operation has failures or is completed, get detailed information - return if ( - statusResponse.status?.failed?.let { it > 0 } == true || - IngestionResultUtils.isCompleted(statusResponse.details) - ) { - logger.debug( - "Operation $operationId has failures or is completed, retrieving details", - ) - getIngestionDetails(database, table, operationId, true) - } else { - statusResponse + "FailureStatus ${failureStatus?.value}. Is transient failure: $isTransientFailure" } } @@ -295,10 +396,6 @@ class QueuedIngestionClient( logger.debug( "Starting to poll ingestion status for operation: $operationId, timeout: $timeout", ) - logger.debug( - "IngestionStatus: {}", - currentStatus.details, - ) if ( IngestionResultUtils.isCompleted( currentStatus.details, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt index e7b0745f9..1bf40b572 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt @@ -3,16 +3,24 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.FileSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo import io.ktor.http.HttpStatusCode import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.json.Json import java.net.ConnectException import java.net.URI +import java.util.UUID @Serializable private data class StreamFromBlobRequestBody( @@ -23,10 +31,121 @@ class StreamingIngestClient( val engineUrl: String, override val tokenCredential: TokenCredential, override val skipSecurityChecks: Boolean = false, + override val clientDetails: ClientDetails? = null, ) : - KustoBaseApiClient(engineUrl, tokenCredential, skipSecurityChecks), + KustoBaseApiClient( + engineUrl, + tokenCredential, + skipSecurityChecks, + clientDetails, + ), IngestClient { + /** Handles multiple source types for streaming ingestion. */ + override suspend fun submitIngestion( + database: String, + table: String, + sources: List, + format: Format, + ingestProperties: IngestRequestProperties?, + ): IngestResponse { + require(sources.isNotEmpty()) { "At least one source is required" } + + // Streaming ingestion processes one source at a time + val source = sources.first() + val operationId = UUID.randomUUID().toString() + + when (source) { + is BlobSourceInfo -> { + logger.info( + "Streaming ingestion from BlobSource: ${source.blobPath}", + ) + submitStreamingIngestion( + database = database, + table = table, + // Not used for blob-based streaming + data = ByteArray(0), + format = format, + ingestProperties = ingestProperties, + blobUrl = source.blobPath, + ) + } + is FileSourceInfo -> { + logger.info( + "Streaming ingestion from FileSource: ${source.name}", + ) + val data = source.data().readBytes() + submitStreamingIngestion( + database = database, + table = table, + data = data, + format = format, + ingestProperties = ingestProperties, + blobUrl = null, + ) + source.close() + } + is StreamSourceInfo -> { + logger.info( + "Streaming ingestion from StreamSource: ${source.name}", + ) + val data = source.data().readBytes() + submitStreamingIngestion( + database = database, + table = table, + data = data, + format = format, + ingestProperties = ingestProperties, + blobUrl = null, + ) + source.close() + } + else -> { + throw IngestException( + message = + "Unsupported source type for streaming ingestion: ${source::class.simpleName}", + isPermanent = true, + ) + } + } + + // Streaming ingestion doesn't return an operation ID from the server + // We generate one locally for consistency with the IngestClient interface + return IngestResponse(ingestionOperationId = operationId) + } + + /** + * Note: Streaming ingestion doesn't support operation tracking. Throws + * UnsupportedOperationException. + */ + override suspend fun getIngestionStatus( + database: String, + table: String, + operationId: String, + forceDetails: Boolean, + ): StatusResponse { + throw UnsupportedOperationException( + "Streaming ingestion does not support operation status tracking. " + + "Operation ID: $operationId cannot be tracked. ", + ) + } + + /** + * Note: Streaming ingestion doesn't support operation tracking. Throws + * UnsupportedOperationException. + */ + override suspend fun getIngestionDetails( + database: String, + table: String, + operationId: String, + details: Boolean, + ): StatusResponse { + throw UnsupportedOperationException( + "Streaming ingestion does not support detailed operation tracking. " + + "Operation ID: $operationId cannot be tracked. ", + ) + } + /** * Submits a streaming ingestion request. * diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt new file mode 100644 index 000000000..6f8007f7a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails + +abstract class BaseIngestClientBuilder> { + protected var tokenCredential: TokenCredential? = null + protected var skipSecurityChecks: Boolean = false + protected var clientDetails: ClientDetails? = null + + @Suppress("UNCHECKED_CAST") + protected fun self(): B = this as B + + fun withAuthentication(credential: TokenCredential): B { + this.tokenCredential = credential + return self() + } + + fun skipSecurityChecks(): B { + this.skipSecurityChecks = true + return self() + } + + /** + * Sets the client details for tracing purposes, using defaults for any + * unprovided fields. + * + * @param applicationName The client application name + * @param version The client application version + * @param userName The username for tracing (optional). If null, system + * default will be used. + * @return This builder instance for method chaining + */ + fun withClientDetails( + applicationName: String, + version: String, + userName: String? = null, + ): B { + this.clientDetails = + ClientDetails( + applicationForTracing = applicationName, + userNameForTracing = userName, + clientVersionForTracing = version, + ) + return self() + } + + /** + * Sets the client details with the format for connectors. Example output: + * "Kusto.MyConnector:{1.0.0}|App.{MyApp}:{0.5.3}|CustomField:{CustomValue}" + * + * This method is useful for connectors and frameworks that want to identify + * themselves in Kusto tracing with additional metadata. + * + * @param name Name of the connector (will be prefixed with "Kusto.") + * @param version Version of the connector + * @param sendUser Whether to send the username in tracing (default: false) + * @param overrideUser Override username (used when sendUser is true) + * @param appName Name of the application (optional, defaults to process + * name) + * @param appVersion Version of the application (optional) + * @param additionalFields Additional key-value pairs to include in the + * connector details + * @return This builder instance for method chaining + */ + fun withConnectorClientDetails( + name: String, + version: String, + sendUser: Boolean = false, + overrideUser: String? = null, + appName: String? = null, + appVersion: String? = null, + additionalFields: Map? = null, + ): B { + this.clientDetails = + ClientDetails.fromConnectorDetails( + name = name, + version = version, + sendUser = sendUser, + overrideUser = overrideUser, + appName = appName, + appVersion = appVersion, + additionalFields = additionalFields, + ) + return self() + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt new file mode 100644 index 000000000..bbe5f6ada --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.microsoft.azure.kusto.ingest.v2.QueuedIngestionClient +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES + +class QueuedIngestionClientBuilder +private constructor(private val dmUrl: String) : + BaseIngestClientBuilder() { + + private var maxConcurrency: Int = UPLOAD_CONTAINER_MAX_CONCURRENCY + private var maxDataSize: Long = UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES + private var ignoreFileSize: Boolean = false + + companion object { + @JvmStatic + fun create(dmUrl: String): QueuedIngestionClientBuilder { + require(dmUrl.isNotBlank()) { "Data Ingestion URI cannot be blank" } + return QueuedIngestionClientBuilder(dmUrl) + } + } + + fun withMaxConcurrency(concurrency: Int): QueuedIngestionClientBuilder { + require(concurrency > 0) { + "Max concurrency must be positive, got: $concurrency" + } + this.maxConcurrency = concurrency + return this + } + + fun withMaxDataSize(bytes: Long): QueuedIngestionClientBuilder { + require(bytes > 0) { "Max data size must be positive, got: $bytes" } + this.maxDataSize = bytes + return this + } + + fun withIgnoreFileSize(ignore: Boolean): QueuedIngestionClientBuilder { + this.ignoreFileSize = ignore + return this + } + + fun build(): QueuedIngestionClient { + requireNotNull(tokenCredential) { + "Authentication is required. Call withAuthentication() before build()" + } + + return QueuedIngestionClient( + dmUrl = dmUrl, + tokenCredential = tokenCredential!!, + skipSecurityChecks = skipSecurityChecks, + clientDetails = clientDetails, + maxConcurrency = maxConcurrency, + maxDataSize = maxDataSize, + ignoreFileSize = ignoreFileSize, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt new file mode 100644 index 000000000..47978a33d --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.microsoft.azure.kusto.ingest.v2.StreamingIngestClient + +class StreamingIngestClientBuilder +private constructor(private val engineUrl: String) : + BaseIngestClientBuilder() { + + companion object { + @JvmStatic + fun create(engineUrl: String): StreamingIngestClientBuilder { + require(engineUrl.isNotBlank()) { "Engine URL cannot be blank" } + return StreamingIngestClientBuilder(engineUrl) + } + } + + fun build(): StreamingIngestClient { + requireNotNull(tokenCredential) { + "Authentication is required. Call withAuthentication() before build()" + } + + return StreamingIngestClient( + engineUrl = engineUrl, + tokenCredential = tokenCredential!!, + skipSecurityChecks = skipSecurityChecks, + clientDetails = clientDetails, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResult.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResult.kt new file mode 100644 index 000000000..b89fd285d --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResult.kt @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common + +/** + * Common interface for batch operation results that track successes and + * failures. + */ +interface BatchOperationResult { + val successes: List + val failures: List + + /** Returns true if any operations failed. */ + val hasFailures: Boolean + get() = failures.isNotEmpty() + + /** Returns true if all operations succeeded (no failures). */ + val allSucceeded: Boolean + get() = failures.isEmpty() + + /** Returns the total count of operations (successes + failures). */ + val totalCount: Int + get() = successes.size + failures.size +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt new file mode 100644 index 000000000..c5d575b0a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt @@ -0,0 +1,184 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common + +import java.util.concurrent.ConcurrentHashMap + +data class ClientDetails( + val applicationForTracing: String?, + val userNameForTracing: String?, + val clientVersionForTracing: String?, +) { + companion object { + const val NONE = "[none]" + + // Cache for default values to avoid recomputing on every call + private val defaultValuesCache = ConcurrentHashMap() + + /** + * Escapes special characters in header field values by wrapping in + * curly braces and replacing problematic characters with underscores. + */ + private fun escapeField(field: String): String { + val escaped = field.replace(Regex("[\\r\\n\\s{}|]+"), "_") + return "{$escaped}" + } + + /** + * Formats the given fields into a string that can be used as a header. + * Format: "field1:{value1}|field2:{value2}" + */ + private fun formatHeader(args: Map): String { + return args.entries + .filter { it.key.isNotBlank() && it.value.isNotBlank() } + .joinToString("|") { (key, value) -> + "$key:${escapeField(value)}" + } + } + + /** Gets the process name from system properties with caching. */ + private fun getProcessName(): String { + return defaultValuesCache.computeIfAbsent("processName") { + val command = System.getProperty("sun.java.command") + if (!command.isNullOrBlank()) { + // Strip file name from command line (matches + // UriUtils.stripFileNameFromCommandLine) + command.split(" ").firstOrNull() ?: "JavaProcess" + } else { + "JavaProcess" + } + } + } + + private fun getUserName(): String { + return defaultValuesCache.computeIfAbsent("userName") { + var user = System.getProperty("user.name") + if (user.isNullOrBlank()) { + user = System.getenv("USERNAME") + val domain = System.getenv("USERDOMAIN") + if (!domain.isNullOrBlank() && !user.isNullOrBlank()) { + user = "$domain\\$user" + } + } + if (!user.isNullOrBlank()) user else NONE + } + } + + private fun getRuntime(): String { + return defaultValuesCache.computeIfAbsent("runtime") { + System.getProperty("java.runtime.name") + ?: System.getProperty("java.vm.name") + ?: System.getProperty("java.vendor") + ?: "UnknownRuntime" + } + } + + private fun getJavaVersion(): String { + return defaultValuesCache.computeIfAbsent("javaVersion") { + System.getProperty("java.version") ?: "UnknownVersion" + } + } + + /** + * Gets the default client version string with caching. Format: + * "Kusto.Java.Client:{version}|Runtime.{runtime}:{javaVersion}" + */ + private fun getDefaultVersion(): String { + return defaultValuesCache.computeIfAbsent("defaultVersion") { + val baseMap = + linkedMapOf( + "Kusto.Java.Client" to getPackageVersion(), + "Runtime.${escapeField(getRuntime())}" to + getJavaVersion(), + ) + formatHeader(baseMap) + } + } + + /** Gets the package version from the manifest or returns a default. */ + private fun getPackageVersion(): String { + return try { + ClientDetails::class.java.`package`.implementationVersion + ?: "Unknown" + } catch (e: Exception) { + "Unknown" + } + } + + /** + * Creates a ClientDetails from connector details Example output: + * "Kusto.MyConnector:{1.0.0}|App.{MyApp}:{0.5.3}|CustomField:{CustomValue}" + * + * @param name The name of the connector (will be prefixed with + * "Kusto.") + * @param version The version of the connector + * @param sendUser True if the user should be sent to Kusto, otherwise + * "[none]" will be sent + * @param overrideUser The user to send to Kusto, or null to use the + * current user + * @param appName The app hosting the connector, or null to use the + * current process name + * @param appVersion The version of the app hosting the connector, or + * null to use "[none]" + * @param additionalFields Additional fields to trace as key-value pairs + * @return ClientDetails instance with formatted connector information + */ + fun fromConnectorDetails( + name: String, + version: String, + sendUser: Boolean = false, + overrideUser: String? = null, + appName: String? = null, + appVersion: String? = null, + additionalFields: Map? = null, + ): ClientDetails { + val fieldsMap = linkedMapOf() + fieldsMap["Kusto.$name"] = version + + val finalAppName = appName ?: getProcessName() + val finalAppVersion = appVersion ?: NONE + fieldsMap["App.${escapeField(finalAppName)}"] = finalAppVersion + + additionalFields?.let { fieldsMap.putAll(it) } + + val app = formatHeader(fieldsMap) + + val user = + if (sendUser) { + overrideUser ?: getUserName() + } else { + NONE + } + + return ClientDetails(app, user, null) + } + + fun createDefault(): ClientDetails { + return ClientDetails( + applicationForTracing = getProcessName(), + userNameForTracing = getUserName(), + clientVersionForTracing = getDefaultVersion(), + ) + } + } + + @JvmName("getApplicationForTracingOrDefault") + fun getApplicationForTracing(): String { + return applicationForTracing ?: getProcessName() + } + + @JvmName("getUserNameForTracingOrDefault") + fun getUserNameForTracing(): String { + return userNameForTracing ?: getUserName() + } + + @JvmName("getClientVersionForTracingOrDefault") + fun getClientVersionForTracing(): String { + val defaultVersion = getDefaultVersion() + return if (clientVersionForTracing != null) { + "$defaultVersion|$clientVersionForTracing" + } else { + defaultVersion + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 6e7448d96..07e88e0cb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -2,10 +2,13 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.CONFIG_CACHE_DEFAULT_REFRESH_INTERVAL_HOURS +import com.microsoft.azure.kusto.ingest.v2.CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS +import com.microsoft.azure.kusto.ingest.v2.ConfigurationClient import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import java.lang.AutoCloseable import java.time.Duration -import kotlin.text.compareTo interface ConfigurationCache : AutoCloseable { val refreshInterval: Duration @@ -16,9 +19,41 @@ interface ConfigurationCache : AutoCloseable { } class DefaultConfigurationCache( - override val refreshInterval: Duration = Duration.ofHours(1), - private val configurationProvider: suspend () -> ConfigurationResponse, + override val refreshInterval: Duration = + Duration.ofHours(CONFIG_CACHE_DEFAULT_REFRESH_INTERVAL_HOURS), + val dmUrl: String? = null, + val tokenCredential: TokenCredential? = null, + val skipSecurityChecks: Boolean? = + CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS, + val configurationProvider: (suspend () -> ConfigurationResponse)? = null, ) : ConfigurationCache { + + init { + if ( + configurationProvider == null && + ( + dmUrl == null || + tokenCredential == null || + skipSecurityChecks == null + ) + ) { + throw IllegalArgumentException( + "Either configurationProvider or all of dmUrl, tokenCredential, and skipSecurityChecks must be provided", + ) + } + } + + private val provider: suspend () -> ConfigurationResponse = + configurationProvider + ?: { + ConfigurationClient( + dmUrl!!, + tokenCredential!!, + skipSecurityChecks!!, + ) + .getConfigurationDetails() + } + @Volatile private var cachedConfiguration: ConfigurationResponse? = null private var lastRefreshTime: Long = 0 @@ -30,7 +65,7 @@ class DefaultConfigurationCache( refreshInterval.toMillis() if (needsRefresh) { val newConfig = - runCatching { configurationProvider() } + runCatching { provider() } .getOrElse { cachedConfiguration ?: throw it } synchronized(this) { // Double-check in case another thread refreshed while we were waiting diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index d871f853a..0ebb84448 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -2,6 +2,9 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common +import com.microsoft.azure.kusto.ingest.v2.INGEST_RETRY_POLICY_CUSTOM_INTERVALS +import com.microsoft.azure.kusto.ingest.v2.INGEST_RETRY_POLICY_DEFAULT_INTERVAL_SECONDS +import com.microsoft.azure.kusto.ingest.v2.INGEST_RETRY_POLICY_DEFAULT_TOTAL_RETRIES import java.time.Duration data class Retry(val shouldRetry: Boolean, val interval: Duration) @@ -22,8 +25,11 @@ object NoRetryPolicy : IngestRetryPolicy { } class SimpleRetryPolicy( - val intervalDuration: Duration = Duration.ofSeconds(10), - val totalRetries: Int = 3, + val intervalDuration: Duration = + Duration.ofSeconds( + INGEST_RETRY_POLICY_DEFAULT_INTERVAL_SECONDS, + ), + val totalRetries: Int = INGEST_RETRY_POLICY_DEFAULT_TOTAL_RETRIES, ) : IngestRetryPolicy { init { require(totalRetries > 0) { "totalRetries must be positive" } @@ -42,11 +48,10 @@ class CustomRetryPolicy(intervalDurations: Array? = null) : IngestRetryPolicy { private val intervalDurations: Array = intervalDurations - ?: arrayOf( - Duration.ofSeconds(1), - Duration.ofSeconds(3), - Duration.ofSeconds(7), - ) + ?: INGEST_RETRY_POLICY_CUSTOM_INTERVALS.map { + Duration.ofSeconds(it) + } + .toTypedArray() val intervals: List get() = intervalDurations.toList() diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt index e0527ef04..55f3643c4 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt @@ -15,7 +15,7 @@ open class IngestException( override val message: String get() = creationMessage - ?: "Something went wrong calling into a Kusto client library (fallback message)." + ?: "Something went wrong calling Kusto client library (fallback message)." override fun toString(): String = message } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt new file mode 100644 index 000000000..d4ced8da8 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt @@ -0,0 +1,392 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.container + +import com.azure.core.util.Context +import com.azure.storage.blob.BlobClientBuilder +import com.azure.storage.blob.models.BlockBlobItem +import com.azure.storage.blob.models.ParallelTransferOptions +import com.azure.storage.blob.options.BlobParallelUploadOptions +import com.microsoft.azure.kusto.ingest.v2.BLOB_UPLOAD_TIMEOUT_HOURS +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_BLOCK_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_RETRIES +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_MAX_SINGLE_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import org.slf4j.LoggerFactory +import java.io.InputStream +import java.time.Clock +import java.time.Duration +import java.time.Instant +import java.util.concurrent.atomic.AtomicInteger + +enum class UploadMethod { + // Use server preference or Storage as fallback + DEFAULT, + + // Use Storage blob + STORAGE, + + // Use OneLake + LAKE, +} + +data class UploadSource( + val name: String, + val stream: InputStream, + val sizeBytes: Long = -1, +) + +class BlobUploadContainer( + val configurationCache: ConfigurationCache, + private val uploadMethod: UploadMethod = UploadMethod.DEFAULT, + private val maxRetries: Int = UPLOAD_CONTAINER_MAX_RETRIES, + private val maxDataSize: Long = UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES, + private val ignoreSizeLimit: Boolean = false, + private val maxConcurrency: Int = UPLOAD_CONTAINER_MAX_CONCURRENCY, +) : UploadContainerBase { + private val logger = + LoggerFactory.getLogger(BlobUploadContainer::class.java) + private val containerIndex = AtomicInteger(0) + + override suspend fun uploadAsync( + name: String, + stream: InputStream, + ): String { + val errorCode = validateStream(stream, name) + if (errorCode != null) { + logger.error( + "Stream validation failed for {}: {}", + name, + errorCode.description, + ) + throw IngestException(errorCode.description, isPermanent = true) + } + + if (!ignoreSizeLimit && stream.available() > 0) { + val availableSize = stream.available().toLong() + if (availableSize > maxDataSize) { + logger.error( + "Stream size {} exceeds max allowed size {} for: {}", + availableSize, + maxDataSize, + name, + ) + throw IngestException( + "Upload source exceeds maximum allowed size: $availableSize > $maxDataSize", + isPermanent = true, + ) + } + } + + val containers = selectContainers() + require(containers.isNotEmpty()) { + "No containers available for upload" + } + + var lastException: Exception? = null + + repeat(maxRetries) { attempt -> + val container = + containers[ + containerIndex.getAndIncrement() % containers.size, + ] + try { + return uploadToContainer(name, stream, container) + } catch (e: Exception) { + logger.warn( + "Upload attempt ${attempt + 1} failed for container ${container.path}", + e, + ) + lastException = e + + if (stream.markSupported()) { + try { + stream.reset() + } catch (resetEx: Exception) { + logger.warn("Failed to reset stream for retry", resetEx) + throw IngestException( + "Upload failed and stream cannot be reset for retry", + cause = e, + isPermanent = true, + ) + } + } + } + } + + throw IngestException( + "Failed to upload after $maxRetries attempts", + cause = lastException, + isPermanent = false, + ) + } + + suspend fun uploadManyAsync(sources: List): UploadResults = + coroutineScope { + logger.info( + "Starting batch upload of {} sources with max concurrency {}", + sources.size, + maxConcurrency, + ) + + // Process sources in chunks to respect maxConcurrency at file level + val results = + sources.chunked(maxConcurrency).flatMap { chunk -> + chunk.map { source -> + async { + val startedAt = + Instant.now( + Clock.systemUTC(), + ) + try { + val blobUrl = + uploadAsync( + source.name, + source.stream, + ) + val completedAt = + Instant.now( + Clock + .systemUTC(), + ) + UploadResult.Success( + sourceName = + source.name, + startedAt = startedAt, + completedAt = + completedAt, + blobUrl = blobUrl, + sizeBytes = + source.sizeBytes, + ) + } catch (e: Exception) { + val completedAt = + Instant.now( + Clock + .systemUTC(), + ) + val errorCode = + when { + e.message?.contains( + "size", + ) == true -> + UploadErrorCode + .SOURCE_SIZE_LIMIT_EXCEEDED + e.message?.contains( + "readable", + ) == true -> + UploadErrorCode + .SOURCE_NOT_READABLE + e.message?.contains( + "empty", + ) == true -> + UploadErrorCode + .SOURCE_IS_EMPTY + e.message?.contains( + "container", + ) == true -> + UploadErrorCode + .NO_CONTAINERS_AVAILABLE + else -> + UploadErrorCode + .UPLOAD_FAILED + } + + UploadResult.Failure( + sourceName = + source.name, + startedAt = startedAt, + completedAt = + completedAt, + errorCode = errorCode, + errorMessage = + e.message + ?: "Upload failed", + exception = e, + isPermanent = + e is + IngestException && + e + .isPermanent == + true, + ) + } + } + } + .awaitAll() + } + + val successes = results.filterIsInstance() + val failures = results.filterIsInstance() + + logger.info( + "Batch upload completed: {} successes, {} failures out of {} total", + successes.size, + failures.size, + sources.size, + ) + + UploadResults(successes, failures) + } + + private fun validateStream( + stream: InputStream, + name: String, + ): UploadErrorCode? { + return try { + if (stream.available() < 0) { + UploadErrorCode.SOURCE_NOT_READABLE + } else if (stream.markSupported() && stream.available() == 0) { + UploadErrorCode.SOURCE_IS_EMPTY + } else { + null + } + } catch (e: Exception) { + logger.warn("Error validating stream for {}", name, e) + UploadErrorCode.SOURCE_NOT_READABLE + } + } + + private fun uploadToContainer( + name: String, + stream: InputStream, + container: ContainerInfo, + ): String { + val (url, sas) = container.path!!.split("?", limit = 2) + + val blobClient = + BlobClientBuilder() + .endpoint(container.path) + .blobName(name) + .buildClient() + + logger.debug( + "Uploading stream to blob url: {} to container {}", + url, + name, + ) + + val parallelTransferOptions = + ParallelTransferOptions() + .setBlockSizeLong(UPLOAD_BLOCK_SIZE_BYTES) + .setMaxConcurrency(maxConcurrency) + .setMaxSingleUploadSizeLong( + UPLOAD_MAX_SINGLE_SIZE_BYTES, + ) + + val blobUploadOptions = + BlobParallelUploadOptions(stream) + .setParallelTransferOptions(parallelTransferOptions) + + val blobUploadResult = + blobClient.uploadWithResponse( + blobUploadOptions, + Duration.ofHours(BLOB_UPLOAD_TIMEOUT_HOURS), + Context.NONE, + ) + + return if ( + blobUploadResult.statusCode in 200..299 && + blobUploadResult.value != null + ) { + val blockBlobItem: BlockBlobItem = blobUploadResult.value + logger.debug( + "Upload succeeded to blob url: {} with eTag: {}", + url, + blockBlobItem.eTag, + ) + "$url/$name?$sas" + } else { + throw IngestException( + "Upload failed with status: ${blobUploadResult.statusCode}", + isPermanent = blobUploadResult.statusCode in 400..<500, + ) + } + } + + private suspend fun selectContainers(): List { + val configResponse = configurationCache.getConfiguration() + val containerSettings = + configResponse.containerSettings + ?: throw IngestException( + "No container settings available", + isPermanent = true, + ) + val hasStorage = !containerSettings.containers.isNullOrEmpty() + val hasLake = !containerSettings.lakeFolders.isNullOrEmpty() + + if (!hasStorage && !hasLake) { + throw IngestException("No containers available", isPermanent = true) + } + + // Determine effective upload method + val effectiveMethod = + when (uploadMethod) { + UploadMethod.DEFAULT -> { + // Use server's preferred upload method if available + val serverPreference = + containerSettings.preferredUploadMethod + when { + serverPreference.equals( + "Storage", + ignoreCase = true, + ) && hasStorage -> { + logger.debug( + "Using server preferred upload method: Storage", + ) + UploadMethod.STORAGE + } + serverPreference.equals( + "Lake", + ignoreCase = true, + ) && hasLake -> { + logger.debug( + "Using server preferred upload method: Lake", + ) + UploadMethod.LAKE + } + // Fallback: prefer Storage if available, otherwise Lake + hasStorage -> { + logger.debug( + "No server preference or unavailable, defaulting to Storage", + ) + UploadMethod.STORAGE + } + else -> { + logger.debug( + "No server preference or unavailable, defaulting to Lake", + ) + UploadMethod.LAKE + } + } + } + UploadMethod.LAKE -> + if (hasLake) { + UploadMethod.LAKE + } else { + UploadMethod.STORAGE + } + UploadMethod.STORAGE -> + if (hasStorage) { + UploadMethod.STORAGE + } else { + UploadMethod.LAKE + } + } + return when { + effectiveMethod == UploadMethod.LAKE && hasLake -> + containerSettings.lakeFolders + effectiveMethod == UploadMethod.STORAGE && hasStorage -> + containerSettings.containers + hasStorage -> containerSettings.containers + else -> containerSettings.lakeFolders!! + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt new file mode 100644 index 000000000..34c08d076 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.container + +import java.io.InputStream + +interface UploadContainerBase { + suspend fun uploadAsync(name: String, stream: InputStream): String +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt new file mode 100644 index 000000000..02bbb91ff --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.container + +enum class UploadErrorCode(val code: String, val description: String) { + // Stream validation errors + SOURCE_IS_NULL("UploadError_SourceIsNull", "Upload source is null"), + SOURCE_NOT_READABLE( + "UploadError_SourceNotReadable", + "Upload source is not readable", + ), + SOURCE_IS_EMPTY("UploadError_SourceIsEmpty", "Upload source is empty"), + + // Size validation errors + SOURCE_SIZE_LIMIT_EXCEEDED( + "UploadError_SourceSizeLimitExceeded", + "Upload source exceeds maximum allowed size", + ), + + // Upload errors + UPLOAD_FAILED("UploadError_Failed", "Upload operation failed"), + NO_CONTAINERS_AVAILABLE( + "UploadError_NoContainersAvailable", + "No upload containers available", + ), + CONTAINER_UNAVAILABLE( + "UploadError_ContainerUnavailable", + "Upload container is unavailable", + ), + + // Network/Azure errors + NETWORK_ERROR("UploadError_NetworkError", "Network error during upload"), + AUTHENTICATION_FAILED( + "UploadError_AuthenticationFailed", + "Authentication failed for upload", + ), + + // General + UNKNOWN("UploadError_Unknown", "Unknown upload error"), + ; + + override fun toString(): String = code +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt new file mode 100644 index 000000000..bb2d18ca5 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.container + +import com.microsoft.azure.kusto.ingest.v2.common.BatchOperationResult +import java.time.Instant + +sealed class UploadResult { + abstract val sourceName: String + abstract val startedAt: Instant + abstract val completedAt: Instant + + data class Success( + override val sourceName: String, + override val startedAt: Instant, + override val completedAt: Instant, + val blobUrl: String, + val sizeBytes: Long, + ) : UploadResult() + + data class Failure( + override val sourceName: String, + override val startedAt: Instant, + override val completedAt: Instant, + val errorCode: UploadErrorCode, + val errorMessage: String, + val exception: Exception?, + val isPermanent: Boolean = false, + ) : UploadResult() +} + +data class UploadResults( + override val successes: List, + override val failures: List, +) : BatchOperationResult diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt index aa5eb1974..fada85ddd 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt @@ -2,8 +2,13 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source +import org.slf4j.Logger +import org.slf4j.LoggerFactory import java.util.UUID abstract class AbstractSourceInfo : SourceInfo { - override var sourceId: UUID? = null + val logger: Logger + get() = LoggerFactory.getLogger(SourceInfo::class.java) + + override var sourceId: UUID = UUID.randomUUID() } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt index 59d35003a..3b374b0eb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt @@ -2,7 +2,12 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.source -import java.io.File +import com.microsoft.azure.kusto.ingest.v2.common.BatchOperationResult +import com.microsoft.azure.kusto.ingest.v2.container.BlobUploadContainer +import com.microsoft.azure.kusto.ingest.v2.container.UploadErrorCode +import com.microsoft.azure.kusto.ingest.v2.container.UploadSource +import org.slf4j.Logger +import org.slf4j.LoggerFactory import java.util.UUID class BlobSourceInfo : AbstractSourceInfo { @@ -19,15 +24,10 @@ class BlobSourceInfo : AbstractSourceInfo { this.blobPath = blobPath } - constructor(blobPath: String, compressionType: CompressionType?) { - this.blobPath = blobPath - this.compressionType = compressionType - } - constructor( blobPath: String, compressionType: CompressionType?, - sourceId: UUID?, + sourceId: UUID, ) { this.blobPath = blobPath this.compressionType = compressionType @@ -38,42 +38,159 @@ class BlobSourceInfo : AbstractSourceInfo { require(blobPath.isNotBlank()) { "blobPath cannot be blank" } } + /** + * Returns the exact size of the blob in bytes if available. This is only + * set when the blob was created by uploading a local source. Returns null + * if size is not available (e.g., for external blob URLs). + */ + fun size(): Long? { + return blobExactSize + } + companion object { - /** For internal usage, adding blobExactSize */ - fun fromFile( - blobPath: String, - filePath: String, - sourceId: UUID?, - sourceCompressionType: CompressionType?, - gotCompressed: Boolean, + /** + * Create BlobSourceInfo from LocalSource (FileSourceInfo or + * StreamSourceInfo) using BlobUploadContainer + */ + val logger: Logger + get() = LoggerFactory.getLogger(BlobSourceInfo::class.java) + + private suspend fun fromLocalSource( + localSource: LocalSource, + blobUploadContainer: BlobUploadContainer, ): BlobSourceInfo { - val blobSourceInfo = - BlobSourceInfo( - blobPath, - if (gotCompressed) { - CompressionType.GZIP - } else { - sourceCompressionType - }, - sourceId, - ) - if (sourceCompressionType == null) { - blobSourceInfo.blobExactSize = File(filePath).length() - } - return blobSourceInfo + val (inputStream, size, effectiveCompression) = + localSource.prepareForUpload() + val blobName = localSource.generateBlobName() + val blobPath = + blobUploadContainer.uploadAsync(blobName, inputStream) + logger.info( + "Uploading blob to path {} with blob name {}", + blobPath.split("?").first(), + blobName, + ) + return BlobSourceInfo( + blobPath, + effectiveCompression, + localSource.sourceId, + ) + .apply { blobExactSize = size } } - /** For internal usage, adding blobExactSize */ - fun fromStream( - blobPath: String, - size: Int, - sourceId: UUID?, - compressionType: CompressionType?, - ): BlobSourceInfo { - val blobSourceInfo = - BlobSourceInfo(blobPath, compressionType, sourceId) - blobSourceInfo.blobExactSize = size.toLong() - return blobSourceInfo + /** + * Create BlobSourceInfo from FileSourceInfo using BlobUploadContainer + */ + suspend fun fromFileSourceInfo( + fileSourceInfo: FileSourceInfo, + blobUploadContainer: BlobUploadContainer, + ): BlobSourceInfo = fromLocalSource(fileSourceInfo, blobUploadContainer) + + /** + * Create BlobSourceInfo from StreamSourceInfo using BlobUploadContainer + */ + suspend fun fromStreamSourceInfo( + streamSourceInfo: StreamSourceInfo, + blobUploadContainer: BlobUploadContainer, + ): BlobSourceInfo = + fromLocalSource(streamSourceInfo, blobUploadContainer) + + // batch convert multiple LocalSource objects to BlobSourceInfo using parallel uploads + suspend fun fromLocalSourcesBatch( + localSources: List, + blobUploadContainer: BlobUploadContainer, + ): BatchConversionResult { + if (localSources.isEmpty()) { + return BatchConversionResult(emptyList(), emptyList()) + } + + logger.info( + "Starting batch conversion of {} local sources", + localSources.size, + ) + + val uploadSources = + localSources.map { source -> + val (inputStream, size, effectiveCompression) = + source.prepareForUpload() + val blobName = source.generateBlobName() + UploadSource( + name = blobName, + stream = inputStream, + sizeBytes = size ?: -1, + ) + } + + val uploadResults = + blobUploadContainer.uploadManyAsync(uploadSources) + + val blobSources = mutableListOf() + val failures = mutableListOf() + + val sourceMap = localSources.associateBy { it.generateBlobName() } + + uploadResults.successes.forEach { success -> + val originalSource = sourceMap[success.sourceName] + if (originalSource != null) { + blobSources.add( + BlobSourceInfo( + blobPath = success.blobUrl, + compressionType = + if ( + originalSource + .compressionType == + CompressionType + .NONE + ) { + CompressionType + .GZIP // Auto-compressed during + // upload + } else { + originalSource + .compressionType + }, + sourceId = originalSource.sourceId, + ) + .apply { blobExactSize = success.sizeBytes }, + ) + } + } + + uploadResults.failures.forEach { failure -> + val originalSource = sourceMap[failure.sourceName] + if (originalSource != null) { + failures.add( + SourceConversionFailure( + source = originalSource, + errorCode = failure.errorCode, + errorMessage = failure.errorMessage, + exception = failure.exception, + isPermanent = failure.isPermanent, + ), + ) + } + } + + logger.info( + "Batch conversion completed: {} successes, {} failures", + blobSources.size, + failures.size, + ) + + return BatchConversionResult(blobSources, failures) } } } + +/** Represents a failure during source conversion to blob. */ +data class SourceConversionFailure( + val source: LocalSource, + val errorCode: UploadErrorCode, + val errorMessage: String, + val exception: Exception?, + val isPermanent: Boolean, +) + +data class BatchConversionResult( + override val successes: List, + override val failures: List, +) : BatchOperationResult diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt index 4f7e3b94a..0c81a65ee 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/CompressionType.kt @@ -9,6 +9,8 @@ enum class CompressionType { ; override fun toString(): String { - return if (this == NONE) "" else name + return if (this == NONE) { + "" + } else if (this == GZIP) "gz" else this.name.lowercase() } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt new file mode 100644 index 000000000..ec1c80544 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -0,0 +1,192 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import java.io.ByteArrayOutputStream +import java.io.InputStream +import java.nio.file.Files +import java.nio.file.Path +import java.util.UUID +import java.util.zip.GZIPInputStream +import java.util.zip.GZIPOutputStream +import java.util.zip.ZipInputStream + +abstract class LocalSource( + val format: Format, + val leaveOpen: Boolean, + val compressionType: CompressionType = CompressionType.NONE, + baseName: String? = null, + override var sourceId: UUID = UUID.randomUUID(), +) : AbstractSourceInfo() { + + init { + initName(baseName) + } + + // Lazily initialized input stream for ingestion source + protected lateinit var mStream: InputStream + + lateinit var name: String + private set + + fun initName(baseName: String? = null) { + name = "${baseName ?: sourceId.toString()}_$format.$compressionType" + } + + // Indicates whether the stream should be left open after ingestion. + abstract fun data(): InputStream + + /** + * Returns the approximate size of the data in bytes. For files, returns the + * exact file size. For streams, attempts to determine available bytes (may + * not be accurate for all stream types). Returns null if size cannot be + * determined. + */ + abstract fun size(): Long? + + fun reset() { + data().reset() + } + + open fun close() { + if (!leaveOpen) { + if (this::mStream.isInitialized) { + mStream.close() + } + } + } + + /** + * Prepares the source data for blob upload, handling compression if needed. + * Returns a pair of (InputStream, size, effectiveCompressionType) + */ + fun prepareForUpload(): Triple { + // Binary formats (Parquet, AVRO, ORC) already have internal compression and should not be + // compressed again + val shouldCompress = + (compressionType == CompressionType.NONE) && + !FormatUtil.isBinaryFormat(format) + + return if (shouldCompress) { + // Compress using GZIP for non-binary formats + val byteStream = ByteArrayOutputStream() + GZIPOutputStream(byteStream).use { gzipOut -> + data().copyTo(gzipOut) + } + val bytes = byteStream.toByteArray() + Triple( + bytes.inputStream(), + bytes.size.toLong(), + CompressionType.GZIP, + ) + } else { + val stream = data() + val size = + when (this) { + is FileSourceInfo -> Files.size(path) + is StreamSourceInfo -> + try { + stream.available().toLong() + } catch (_: Exception) { + null + } + else -> null + } + Triple(stream, size, compressionType) + } + } + + /** Generates a unique blob name for upload */ + fun generateBlobName(): String { + // Binary formats should not be compressed, so effective compression stays NONE + val shouldCompress = + (compressionType == CompressionType.NONE) && + !FormatUtil.isBinaryFormat(format) + val effectiveCompression = + if (shouldCompress) { + CompressionType.GZIP + } else { + compressionType + } + return "${sourceId}_${format.value}.$effectiveCompression" + } + + override fun validate() { + // Basic validation - subclasses can override for specific validation + } +} + +class StreamSourceInfo( + stream: InputStream, + format: Format, + sourceCompression: CompressionType, + sourceId: UUID = UUID.randomUUID(), + name: String? = null, + leaveOpen: Boolean = false, +) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { + + init { + mStream = stream + } + + override fun data(): InputStream { + return mStream + } + + override fun size(): Long? { + return try { + mStream.available().toLong() + } catch (e: Exception) { + logger.warn("Could not determine stream size: ${e.message}") + null + } + } +} + +class FileSourceInfo( + val path: Path, + format: Format, + compressionType: CompressionType = CompressionType.NONE, + name: String? = null, + sourceId: UUID = UUID.randomUUID(), + leaveOpen: Boolean = false, +) : LocalSource(format, leaveOpen, compressionType, name, sourceId) { + + // Expose file path for direct file upload APIs + private val fileStream: InputStream = + when (compressionType) { + CompressionType.GZIP -> + GZIPInputStream(Files.newInputStream(path)) + CompressionType.ZIP -> { + val zipStream = ZipInputStream(Files.newInputStream(path)) + zipStream.nextEntry + zipStream + } + else -> Files.newInputStream(path) + } + + // Move to first entry + init { + mStream = fileStream + } + + override fun data(): InputStream { + return mStream + } + + override fun size(): Long? { + return try { + Files.size(path) + } catch (e: Exception) { + logger.warn("Could not determine file size for $path: ${e.message}") + null + } + } + + override fun close() { + if (!leaveOpen) { + fileStream.close() + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt index f68226638..c8179e993 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt @@ -8,5 +8,5 @@ interface SourceInfo { /** Checks that this SourceInfo is defined appropriately. */ fun validate() - var sourceId: UUID? + val sourceId: UUID } diff --git a/ingest-v2/src/main/resources/application.yaml b/ingest-v2/src/main/resources/application.yaml deleted file mode 100644 index 88e6eff80..000000000 --- a/ingest-v2/src/main/resources/application.yaml +++ /dev/null @@ -1,6 +0,0 @@ -ktor: - application: - modules: - - com.microsoft.azure.kusto.ApplicationKt.module - deployment: - port: 8080 diff --git a/ingest-v2/src/main/resources/logback.xml b/ingest-v2/src/main/resources/logback.xml deleted file mode 100644 index aadef5d5b..000000000 --- a/ingest-v2/src/main/resources/logback.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - %d{YYYY-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - \ No newline at end of file diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml index 85eb2d893..d3b73853a 100644 --- a/ingest-v2/src/main/resources/openapi.yaml +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -134,7 +134,7 @@ paths: required: true schema: type: string - example: "ingest-mycluster.swedencentral.kusto.windows.net" + example: "gzip" - name: Accept-Encoding in: header required: false diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt index ce58a8fca..e7fd12b21 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt @@ -84,7 +84,7 @@ abstract class IngestV2TestBase(testClass: Class<*>) { @AfterAll fun dropTables() { val dropTableScript = ".drop table $targetTable ifexists" - logger.info("Dropping table $targetTable") + logger.error("Dropping table $targetTable") adminClusterClient.executeMgmt(database, dropTableScript) } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt new file mode 100644 index 000000000..2d609e6e0 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt @@ -0,0 +1,472 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import com.microsoft.azure.kusto.ingest.v2.source.FileSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Assumptions.assumeTrue +import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.api.parallel.Execution +import org.junit.jupiter.api.parallel.ExecutionMode +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.MethodSource +import java.io.ByteArrayInputStream +import java.net.ConnectException +import java.nio.file.Files +import java.util.UUID +import java.util.stream.Stream +import kotlin.test.assertNotNull +import kotlin.time.Duration + +/** + * End-to-end tests for ManagedStreamingIngestClient. + * + * These tests verify that the client correctly: + * 1. Attempts streaming ingestion for small data + * 2. Falls back to queued ingestion for large data or when streaming fails + * 3. Handles various error scenarios + * 4. Respects the managed streaming policy settings + */ +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Execution(ExecutionMode.CONCURRENT) +class ManagedStreamingIngestClientTest : + IngestV2TestBase(ManagedStreamingIngestClientTest::class.java) { + + private val publicBlobUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" + + private val targetUuid = UUID.randomUUID().toString() + private val randomRow: String = + """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$targetUuid","messageId": "7f316225-839a-4593-92b5-1812949279b3","temperature": 31.0301639051317,"humidity": 62.0791099602725}""" + .trimIndent() + + /** Test managed streaming ingestion with small blob data */ + @ParameterizedTest( + name = "[ManagedStreaming-SmallData] {index} => TestName={0}", + ) + @CsvSource( + "ManagedStreaming-SmallBlob,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json", + "ManagedStreaming-SmallMultilineBlob,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", + ) + fun `test managed streaming ingestion with small blob data`( + testName: String, + blobUrl: String, + ): Unit = runBlocking { + logger.info("Starting test: $testName") + val managedClient = + ManagedStreamingIngestClient( + clusterUrl = engineEndpoint, + tokenCredential = tokenProvider, + managedStreamingPolicy = + DefaultManagedStreamingPolicy(), + skipSecurityChecks = true, + ) + + val testSources = listOf(BlobSourceInfo(blobUrl)) + val properties = + IngestRequestProperties( + format = targetTestFormat, + enableTracking = true, + ) + + try { + // Ingest data - should attempt streaming first + val ingestionResponse = + managedClient.submitManagedIngestion( + database = database, + table = targetTable, + sources = testSources, + format = targetTestFormat, + ingestProperties = properties, + ) + + logger.info( + "E2E: Submitted managed streaming ingestion with operation ID: {}", + ingestionResponse.ingestionOperationId, + ) + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull( + ingestionResponse.ingestionOperationId, + "Operation ID should not be null", + ) + + // If it fell back to queued ingestion, poll for status + if ( + !ingestionResponse.ingestionOperationId.startsWith( + "managed-", + ) + ) { + logger.info( + "Ingestion fell back to queued mode. Polling for completion...", + ) + val finalStatus = + managedClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + logger.info( + "Ingestion completed with final status: {}", + finalStatus.status, + ) + + assert( + finalStatus.details?.any { + it.status == BlobStatus.Status.Succeeded + } == true, + ) { + "Expected at least one successful ingestion" + } + } else { + // Streaming ingestion - verify data was ingested + logger.info("Ingestion used streaming mode. Verifying data...") + kotlinx.coroutines.delay(3000) + + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | summarize count=count()", + ) + .primaryResults + + assertNotNull(results, "Query results should not be null") + results.next() + val count: Long = results.getLong("count") + assertNotNull(count, "Count should not be null") + assert(count > 0) { + "Expected records in table after streaming ingestion, but got $count" + } + logger.info( + "Streaming ingestion verified - {} records in table", + count, + ) + } + } catch (e: ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.message}", + ) + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.cause?.message}", + ) + } else { + throw e + } + } + } + + /** Test managed streaming with small streaming data */ + @ParameterizedTest( + name = "[ManagedStreaming-DirectData] {index} => TestName={0}", + ) + @MethodSource("directDataTestParameters") + fun `test managed streaming with small stream data`( + testName: String, + data: String, + deviceId: String, + ) = runBlocking { + logger.info( + "Starting managed streaming with small stream data: $testName", + ) + + val managedClient = + ManagedStreamingIngestClient( + clusterUrl = engineEndpoint, + tokenCredential = tokenProvider, + managedStreamingPolicy = + DefaultManagedStreamingPolicy(), + skipSecurityChecks = true, + ) + + val source = + StreamSourceInfo( + stream = ByteArrayInputStream(data.toByteArray()), + format = targetTestFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "test-stream", + ) + + val properties = + IngestRequestProperties( + format = targetTestFormat, + enableTracking = true, + ) + + try { + val ingestionResponse = + managedClient.submitManagedIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = targetTestFormat, + ingestProperties = properties, + ) + + kotlinx.coroutines.delay(5000) + + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | where deviceId == '$deviceId' | summarize count=count() by deviceId", + ) + .primaryResults + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull(results, "Query results should not be null") + results.next() + val count: Long = results.getLong("count") + assertNotNull(count, "Count should not be null") + assert(count == 1L) { + "Expected 1 record for $deviceId, but got $count" + } + logger.debug("{} verified successfully", testName) + } catch (e: ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.message}", + ) + } + } + + private fun directDataTestParameters(): Stream { + val directDataId = UUID.randomUUID().toString() + val directData = + """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$directDataId","messageId": "test-message-1","temperature": 25.5,"humidity": 60.0}""" + return Stream.of( + Arguments.of("DirectData-SingleRow", directData, directDataId), + ) + } + + /** Test managed streaming with multiple sources (file and stream) */ + @ParameterizedTest( + name = + "[ManagedStreaming-LocalSource] {index} => SourceType={0}, TestName={1}", + ) + @CsvSource( + "file,ManagedStreaming-FileSource,SampleFileSource.json", + "stream,ManagedStreaming-StreamSource,SampleStreamSource.json", + ) + fun `test managed streaming with multiple sources`( + sourceType: String, + testName: String, + fileName: String, + ) = runBlocking { + logger.info("Starting multiple sources test: $testName") + + // Download test data + val deviceDataUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" + val deviceData = java.net.URL(deviceDataUrl).readText() + val targetFormat = Format.multijson + + val source: AbstractSourceInfo = + when (sourceType) { + "file" -> { + val tempFile = Files.createTempFile(fileName, null) + Files.write(tempFile, deviceData.toByteArray()) + FileSourceInfo( + path = tempFile, + format = targetFormat, + compressionType = CompressionType.NONE, + name = fileName, + sourceId = UUID.randomUUID(), + ) + .also { + Runtime.getRuntime() + .addShutdownHook( + Thread { + Files.deleteIfExists( + tempFile, + ) + }, + ) + } + } + "stream" -> + StreamSourceInfo( + stream = + ByteArrayInputStream( + deviceData.toByteArray(), + ), + format = targetFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = fileName, + ) + else -> error("Unknown sourceType: $sourceType") + } + + val managedClient = + ManagedStreamingIngestClient( + clusterUrl = engineEndpoint, + tokenCredential = tokenProvider, + managedStreamingPolicy = + DefaultManagedStreamingPolicy(), + skipSecurityChecks = true, + ) + + val properties = + IngestRequestProperties( + format = targetFormat, + enableTracking = true, + ) + + val ingestionResponse = + managedClient.submitManagedIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = targetFormat, + ingestProperties = properties, + ) + + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull( + ingestionResponse.ingestionOperationId, + "Operation ID should not be null", + ) + + // If it used queued ingestion, poll for status + if (!ingestionResponse.ingestionOperationId.startsWith("managed-")) { + val finalStatus = + managedClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + logger.info( + "{} ingestion completed with final status: {}", + testName, + finalStatus.status, + ) + + assert( + finalStatus.details?.any { + it.status == BlobStatus.Status.Succeeded + } == true, + ) { + "Expected at least one successful ingestion for $testName" + } + } + } + + /** Test managed streaming with custom policy */ + @ParameterizedTest( + name = "[ManagedStreaming-CustomPolicy] {index} => TestName={0}", + ) + @CsvSource( + "CustomPolicy-ContinueWhenUnavailable,true,1.0", + "CustomPolicy-ReducedSizeLimit,false,0.5", + ) + fun `test managed streaming with custom policy`( + testName: String, + continueWhenUnavailable: Boolean, + dataSizeFactor: Double, + ) = runBlocking { + logger.info("Starting custom policy test: $testName") + + val customPolicy = + DefaultManagedStreamingPolicy( + continueWhenStreamingIngestionUnavailable = + continueWhenUnavailable, + dataSizeFactor = dataSizeFactor, + ) + + val managedClient = + ManagedStreamingIngestClient( + clusterUrl = engineEndpoint, + tokenCredential = tokenProvider, + managedStreamingPolicy = customPolicy, + skipSecurityChecks = true, + ) + + val testData = randomRow + val source = + StreamSourceInfo( + stream = ByteArrayInputStream(testData.toByteArray()), + format = targetTestFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "test-custom-policy", + ) + + val properties = + IngestRequestProperties( + format = targetTestFormat, + enableTracking = true, + ) + + try { + val ingestionResponse = + managedClient.submitManagedIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = targetTestFormat, + ingestProperties = properties, + ) + + assertNotNull( + ingestionResponse, + "Ingestion response should not be null", + ) + + // Verify data was ingested (either via streaming or queued) + kotlinx.coroutines.delay(5000) + + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | where deviceId == '$targetUuid' | summarize count=count()", + ) + .primaryResults + + assertNotNull(results, "Query results should not be null") + if (results.next()) { + val count: Long = results.getLong("count") + logger.info("{} ingested {} records", testName, count) + // We verify data was ingested regardless of method + assert(count > 0) { + "Expected data to be ingested with custom policy" + } + } + } catch (e: ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.message}", + ) + } + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt index 4c267d7bd..bcf16cb83 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt @@ -2,21 +2,33 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestionClientBuilder +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.ColumnMapping import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.InlineIngestionMapping import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.TransformationMethod import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import com.microsoft.azure.kusto.ingest.v2.source.FileSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo import kotlinx.coroutines.runBlocking import kotlinx.serialization.json.Json import org.junit.jupiter.api.Assumptions.assumeTrue +import org.junit.jupiter.api.Test import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.parallel.Execution import org.junit.jupiter.api.parallel.ExecutionMode +import org.junit.jupiter.api.parallel.ResourceLock import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource +import java.io.ByteArrayInputStream import java.net.ConnectException +import java.nio.file.Files +import java.util.UUID import kotlin.test.assertNotNull import kotlin.time.Duration @@ -25,7 +37,158 @@ import kotlin.time.Duration class QueuedIngestionClientTest : IngestV2TestBase(QueuedIngestionClientTest::class.java) { + @Test + fun `test builder with optional parameters`() { + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withClientDetails("TestClient", "1.0") + .withMaxConcurrency(10) + .build() + + assertNotNull(client, "Client should not be null") + } + + @Test + fun `test builder with connector client details`() { + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withConnectorClientDetails( + name = "TestConnector", + version = "2.0", + appName = "MyApp", + appVersion = "1.5", + additionalFields = + mapOf( + "JobId" to "job-123", + "RunId" to "run-456", + ), + ) + .build() + + assertNotNull(client, "Client should not be null") + } + + @Test + fun `test builder with connector client details and user`() { + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withConnectorClientDetails( + name = "TestConnector", + version = "2.0", + sendUser = true, + overrideUser = "test-user@example.com", + ) + .build() + + assertNotNull(client, "Client should not be null") + } + + @Test + @ResourceLock("blob-ingestion") + fun `test queued ingestion with builder pattern`(): Unit = runBlocking { + logger.info("Starting builder pattern test") + + val queuedIngestionClient: IngestClient = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + val blobUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" + val testSources = listOf(BlobSourceInfo(blobUrl)) + val properties = + IngestRequestProperties( + format = targetTestFormat, + ingestionMappingReference = "${targetTable}_mapping", + enableTracking = true, + ) + + try { + val ingestionResponse = + queuedIngestionClient.submitIngestion( + database = database, + table = targetTable, + sources = testSources, + format = targetTestFormat, + ingestProperties = properties, + ) + + logger.info( + "Builder pattern test: Submitted queued ingestion with operation ID: {}", + ingestionResponse.ingestionOperationId, + ) + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull( + ingestionResponse.ingestionOperationId, + "Operation ID should not be null", + ) + + val finalStatus = + (queuedIngestionClient as QueuedIngestionClient) + .pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse + .ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + logger.info( + "Builder pattern test: Ingestion completed with final status: {}", + finalStatus.status, + ) + + if (finalStatus.details?.isNotEmpty() == true) { + val succeededCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Succeeded + } + val failedCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Failed + } + logger.info( + "Builder pattern test: Succeeded: {}, Failed: {}", + succeededCount, + failedCount, + ) + + assert(succeededCount > 0 || failedCount > 0) { + "Expected at least some blobs to be processed" + } + } else { + logger.info( + "Builder pattern test: No details available, but operation was submitted successfully", + ) + } + } catch (e: ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.message}", + ) + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.cause?.message}", + ) + } else { + throw e + } + } + } + @ParameterizedTest(name = "[QueuedIngestion] {index} => TestName ={0}") + @ResourceLock("blob-ingestion") @CsvSource( // Single JSON blob, no mapping "QueuedIngestion-NoMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,false,0", @@ -47,14 +210,13 @@ class QueuedIngestionClientTest : ): Unit = runBlocking { // Skip test if no DM_CONNECTION_STRING is set logger.info("Starting test: $testName") - val queuedIngestionClient = + val queuedIngestionClient: IngestClient = QueuedIngestionClient( dmUrl = dmEndpoint, tokenCredential = tokenProvider, skipSecurityChecks = true, ) - val testBlobUrls = listOf(blobUrl) - val testBlobSources = testBlobUrls.map { url -> BlobSourceInfo(url) } + val testSources = listOf(BlobSourceInfo(blobUrl)) val properties = if (useMappingReference) { @@ -129,10 +291,10 @@ class QueuedIngestionClientTest : try { // Test successful ingestion submission val ingestionResponse = - queuedIngestionClient.submitQueuedIngestion( + queuedIngestionClient.submitIngestion( database = database, table = targetTable, - blobSources = testBlobSources, + sources = testSources, format = targetTestFormat, ingestProperties = properties, ) @@ -156,16 +318,18 @@ class QueuedIngestionClientTest : ) val finalStatus = - queuedIngestionClient.pollUntilCompletion( - database = database, - table = targetTable, - operationId = - ingestionResponse.ingestionOperationId, - // Poll every 5 seconds for testing - pollingInterval = Duration.parse("PT5S"), - // 5 minute timeout for testing - timeout = Duration.parse("PT5M"), - ) + (queuedIngestionClient as QueuedIngestionClient) + .pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse + .ingestionOperationId, + // Poll every 5 seconds for testing + pollingInterval = Duration.parse("PT5S"), + // 5 minute timeout for testing + timeout = Duration.parse("PT5M"), + ) logger.info( "Ingestion completed with final status: {}", @@ -256,5 +420,1443 @@ class QueuedIngestionClientTest : } } } + + private fun createTestStreamSource( + sizeInBytes: Int, + name: String, + ): StreamSourceInfo { + val jsonLine = + """{"testField":"value","size":$sizeInBytes,"name":"$name"}""" + + "\n" + val jsonLineBytes = jsonLine.toByteArray() + + val numLines = (sizeInBytes / jsonLineBytes.size).coerceAtLeast(1) + val data = ByteArray(numLines * jsonLineBytes.size) + + for (i in 0 until numLines) { + System.arraycopy( + jsonLineBytes, + 0, + data, + i * jsonLineBytes.size, + jsonLineBytes.size, + ) + } + + return StreamSourceInfo( + stream = ByteArrayInputStream(data), + format = Format.multijson, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = name, + ) + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - single small file upload`() = runBlocking { + logger.info("E2E: Testing single upload with small file") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + val source = createTestStreamSource(1024, "e2e_single_small.json") + + try { + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Single small file submitted: ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(succeededCount > 0) { "Expected successful ingestion" } + logger.info("E2E: Single small file upload completed successfully") + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - single large file upload`() = runBlocking { + logger.info("E2E: Testing single upload with large file (10MB)") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + val source = + createTestStreamSource( + 10 * 1024 * 1024, + "e2e_single_large.json", + ) + + try { + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Large file submitted: ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(succeededCount > 0) { + "Expected successful large file ingestion" + } + logger.info("E2E: Large file upload completed successfully") + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - batch upload multiple files`() = runBlocking { + logger.info("E2E: Testing batch upload with multiple files") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + val sources = + (1..5).map { index -> + createTestStreamSource( + 1024 * index, + "e2e_batch_$index.json", + ) + } + + try { + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Batch submitted: ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + val failedCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Failed + } ?: 0 + + logger.info( + "E2E: Batch results - Success: $succeededCount, Failure: $failedCount", + ) + assert(succeededCount == sources.size) { + "Expected successful uploads" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - parallel processing with maxConcurrency`() = runBlocking { + logger.info("E2E: Testing parallel processing with maxConcurrency=3") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withMaxConcurrency(5) + .skipSecurityChecks() + .build() + + val sources = + (1..10).map { index -> + createTestStreamSource( + 512 * 1024, + "e2e_parallel_$index.json", + ) + } + + try { + val startTime = System.currentTimeMillis() + + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + val uploadDuration = System.currentTimeMillis() - startTime + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Parallel upload submitted in ${uploadDuration}ms: ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + logger.info( + "E2E: Parallel upload: $succeededCount/${sources.size} succeeded", + ) + logger.info( + "E2E: Average time per upload: ${uploadDuration / sources.size}ms", + ) + + assert(succeededCount == sources.size) { + "Expected parallel uploads to succeed" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - size validation within limit`() = runBlocking { + logger.info("E2E: Testing size validation with file within limit") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + // 10MB limit + .withMaxDataSize(10L * 1024 * 1024) + .skipSecurityChecks() + .build() + + val source = + createTestStreamSource(5 * 1024 * 1024, "e2e_size_valid.json") + + try { + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info("E2E: Size validation passed for file within limit") + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(succeededCount > 0) { + "Expected successful upload for file within size limit" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - size validation exceeds limit`() = runBlocking { + logger.info("E2E: Testing size validation with file exceeding limit") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + // 1MB limit + .withMaxDataSize(1L * 1024 * 1024) + .skipSecurityChecks() + .build() + + val source = + createTestStreamSource(2 * 1024 * 1024, "e2e_size_exceed.json") + + try { + try { + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + throw AssertionError( + "Expected size validation to reject the file", + ) + } catch (e: IngestException) { + logger.info( + "E2E: Size validation correctly rejected: ${e.message}", + ) + } + logger.info( + "E2E: Size validation correctly rejected file exceeding limit", + ) + } catch (e: AssertionError) { + logger.info("E2E: Size limit enforced as expected") + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else if (e.message?.contains("size", ignoreCase = true) == true) { + logger.info( + "E2E: Size validation correctly rejected: ${e.message}", + ) + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - ignore size limit flag`() = runBlocking { + logger.info("E2E: Testing size validation with ignore limit flag") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + // 1MB limit + .withMaxDataSize(1L * 1024 * 1024) + // But ignore it + .withIgnoreFileSize(true) + .skipSecurityChecks() + .build() + + val source = + createTestStreamSource(2 * 1024 * 1024, "e2e_size_ignore.json") + + try { + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Size limit successfully bypassed with ignoreFileSize flag", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(succeededCount > 0) { + "Expected successful upload with ignore flag" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - combined all features scenario`() = runBlocking { + logger.info( + "E2E: Testing combined features (parallel + size validation + ignore flag)", + ) + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withMaxConcurrency(8) + // 10MB standard limit + .withMaxDataSize(10L * 1024 * 1024) + .withIgnoreFileSize(true) + .skipSecurityChecks() + .build() + + // Mix of file sizes: small (1-5MB), medium (5-10MB), large (10-20MB) + val sources = mutableListOf() + + // small files + (1..7).forEach { i -> + sources.add( + createTestStreamSource( + 1024 * 1024 * (1 + (i % 5)), + "e2e_combined_small_$i.json", + ), + ) + } + + // medium files + (1..2).forEach { i -> + sources.add( + createTestStreamSource( + 1024 * 1024 * (5 + (i % 5)), + "e2e_combined_medium_$i.json", + ), + ) + } + + // large files (need ignore flag) + sources.add( + createTestStreamSource( + 15 * 1024 * 1024, + "e2e_combined_large_1.json", + ), + ) + + logger.info( + "E2E: Testing combined batch: ${sources.size} files, sizes 1MB-15MB", + ) + + try { + val startTime = System.currentTimeMillis() + + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + val uploadDuration = System.currentTimeMillis() - startTime + + assertNotNull(response.ingestionOperationId) + logger.info("E2E: combined batch uploaded in ${uploadDuration}ms") + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT10S"), + timeout = Duration.parse("PT15M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + + logger.info( + "E2E: combined scenario: $succeededCount/${sources.size} succeeded", + ) + assert(succeededCount == sources.size) { + "Combined scenario: ingestion succeeded" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `test parallel upload with multiple files`() = runBlocking { + logger.info("Starting parallel upload test with multiple files") + + val deviceDataUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" + val deviceData = java.net.URL(deviceDataUrl).readText() + val targetFormat = Format.multijson + + val sources = + (1..5).map { index -> + StreamSourceInfo( + stream = + ByteArrayInputStream( + deviceData.toByteArray(), + ), + format = targetFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "parallel_test_$index.json", + ) + } + + val queuedIngestionClient: IngestClient = + QueuedIngestionClient( + dmUrl = dmEndpoint, + tokenCredential = tokenProvider, + skipSecurityChecks = true, + ) + + val properties = + IngestRequestProperties( + format = targetFormat, + enableTracking = true, + ) + + try { + val startTime = System.currentTimeMillis() + + val ingestionResponse = + queuedIngestionClient.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = targetFormat, + ingestProperties = properties, + ) + + val uploadTime = System.currentTimeMillis() - startTime + + logger.info( + "Parallel upload test: Submitted {} files in {}ms with operation ID: {}", + sources.size, + uploadTime, + ingestionResponse.ingestionOperationId, + ) + + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull( + ingestionResponse.ingestionOperationId, + "Operation ID should not be null", + ) + + val finalStatus = + (queuedIngestionClient as QueuedIngestionClient) + .pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse + .ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + logger.info( + "Parallel upload test: Ingestion completed with final status: {}", + finalStatus.status, + ) + + if (finalStatus.details?.isNotEmpty() == true) { + val succeededCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Succeeded + } + val failedCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Failed + } + + logger.info( + "Parallel upload results - Total: {}, Succeeded: {}, Failed: {}", + finalStatus.details.size, + succeededCount, + failedCount, + ) + + assert(succeededCount > 0) { + "Expected at least some successful uploads in parallel test" + } + + logger.info( + "Parallel upload performance: {} files uploaded in {}ms (avg {}ms per file)", + sources.size, + uploadTime, + uploadTime / sources.size, + ) + } + } catch (e: ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.message}", + ) + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue( + false, + "Skipping test: Unable to connect to test cluster: ${e.cause?.message}", + ) + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - format mismatch rejection - mixed formats in batch`() = + runBlocking { + logger.info( + "E2E: Testing format mismatch rejection with mixed format sources", + ) + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + // Create JSON content + val jsonContent = + """{"name":"test","value":123,"timestamp":"2024-01-01"}""" + + // Create CSV content + val csvContent = + """name,value,timestamp +test,123,2024-01-01 +test2,456,2024-01-02""" + + // Create sources with different formats + val sources = + listOf( + // JSON source + StreamSourceInfo( + stream = + ByteArrayInputStream( + jsonContent + .toByteArray(), + ), + format = Format.json, + sourceCompression = + CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "format_test_json.json", + ), + // CSV source - This will cause format mismatch + StreamSourceInfo( + stream = + ByteArrayInputStream( + csvContent.toByteArray(), + ), + format = Format.csv, + sourceCompression = + CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "format_test_csv.csv", + ), + // Another JSON source + StreamSourceInfo( + stream = + ByteArrayInputStream( + jsonContent + .toByteArray(), + ), + format = Format.json, + sourceCompression = + CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "format_test_json2.json", + ), + ) + + try { + logger.info( + "Uploading ${sources.size} sources with mixed formats (JSON and CSV)", + ) + + // Submit ingestion declaring all as JSON (but one is actually CSV) + // Upload will succeed, but ingestion will fail on server side + // Declaring ALL as JSON + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = Format.json, + ingestProperties = + IngestRequestProperties( + format = Format.json, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Mixed format batch submitted successfully: ${response.ingestionOperationId}", + ) + logger.info( + "E2E: Uploads succeeded - format mismatch will be detected server-side", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + val failedCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Failed + } ?: 0 + + logger.info( + "E2E: Format mismatch results - Success: $succeededCount, Failed: $failedCount", + ) + + if (failedCount > 0) { + finalStatus.details + ?.filter { + it.status == BlobStatus.Status.Failed + } + ?.forEach { failedBlob -> + logger.error( + "E2E: Blob ingestion failed - sourceId: ${failedBlob.sourceId}, " + + "errorCode: ${failedBlob.errorCode}, " + + "failureStatus: ${failedBlob.failureStatus?.value}, " + + "details: ${failedBlob.details}", + ) + } + } + + // We expect at least one failure due to format mismatch + // The CSV file should fail when server tries to parse it as JSON + assert(failedCount >= 1) { + "Expected at least one failure due to format mismatch (CSV parsed as JSON), " + + "but got: succeeded=$succeededCount, failed=$failedCount" + } + + logger.info( + "E2E: Format mismatch correctly detected by Kusto server during ingestion processing", + ) + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - compression format test - GZIP pre-compressed file`() = + runBlocking { + logger.info( + "E2E: Testing GZIP pre-compressed file ingestion (NO double compression)", + ) + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + // Create test JSON data matching table schema + val jsonData = + """{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000002","temperature":25.5,"humidity":60.0}""" + + // Create a GZIP compressed file + val tempFile = Files.createTempFile("test_gzip", ".json.gz") + java.util.zip + .GZIPOutputStream(Files.newOutputStream(tempFile)) + .use { gzipOut -> + gzipOut.write(jsonData.toByteArray()) + } + + // Already GZIP compressed + val source = + FileSourceInfo( + path = tempFile, + format = Format.multijson, + compressionType = CompressionType.GZIP, + name = "pre_compressed.json.gz", + sourceId = UUID.randomUUID(), + ) + + try { + logger.info( + "Uploading GZIP pre-compressed file - already compressed, will NOT be compressed again during upload", + ) + + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: GZIP file submitted (pre-compressed, no additional compression): ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + val failedCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Failed + } ?: 0 + + logger.info( + "E2E: GZIP pre-compressed test - Success: $succeededCount, Failed: $failedCount", + ) + + if (failedCount > 0) { + finalStatus.details + ?.filter { + it.status == BlobStatus.Status.Failed + } + ?.forEach { failedBlob -> + logger.error( + "Failed blob details - sourceId: ${failedBlob.sourceId}, " + + "errorCode: ${failedBlob.errorCode}, " + + "details: ${failedBlob.details}", + ) + } + } + + // GZIP file is already compressed, so it should NOT be compressed again during + // upload + logger.info( + "E2E: GZIP test completed - verifies NO double compression for pre-compressed files", + ) + assert(succeededCount > 0) { + "Expected successful GZIP ingestion without double compression. " + + "Succeeded: $succeededCount, Failed: $failedCount" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } finally { + Files.deleteIfExists(tempFile) + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - compression format test - Parquet format with compression`() = + runBlocking { + logger.info("E2E: Testing Parquet format file ingestion") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + // Parquet files are internally compressed, and the upload will compress again + val parquetFile = + this::class + .java + .classLoader + .getResource("compression/sample.parquet") + + if (parquetFile == null) { + logger.warn( + "sample.parquet not found in test resources, skipping Parquet test", + ) + assumeTrue( + false, + "sample.parquet not found - skipping test", + ) + return@runBlocking + } + + val tempFile = Files.createTempFile("test_parquet", ".parquet") + Files.copy( + parquetFile.openStream(), + tempFile, + java.nio.file.StandardCopyOption.REPLACE_EXISTING, + ) + + // Parquet has internal Snappy compression, no transport compression needed + val source = + FileSourceInfo( + path = tempFile, + format = Format.parquet, + compressionType = CompressionType.NONE, + name = "test.parquet", + sourceId = UUID.randomUUID(), + ) + + try { + logger.info( + "Uploading Parquet file - binary format with internal compression, will NOT be compressed during upload", + ) + + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.parquet, + ingestProperties = + IngestRequestProperties( + format = Format.parquet, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: Parquet file submitted (binary format, no additional compression): ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + val failedCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Failed + } ?: 0 + + logger.info( + "E2E: Parquet binary format test - Success: $succeededCount, Failed: $failedCount", + ) + + // Log failures for debugging + if (failedCount > 0) { + finalStatus.details + ?.filter { + it.status == BlobStatus.Status.Failed + } + ?.forEach { failedBlob -> + logger.error( + "Failed blob details - sourceId: ${failedBlob.sourceId}, " + + "errorCode: ${failedBlob.errorCode}, " + + "details: ${failedBlob.details}", + ) + } + } + + // Parquet format has internal compression, upload should NOT compress again + // (fixed!) + logger.info( + "E2E: Parquet test completed - verifies NO double compression for binary formats", + ) + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + logger.warn( + "Parquet test failed (may be due to schema mismatch): ${e.message}", + ) + // Don't fail the test - schema mismatch is expected with sample Parquet + // file + } + } finally { + Files.deleteIfExists(tempFile) + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - compression format test - AVRO format with compression`() = + runBlocking { + logger.info("E2E: Testing AVRO format file ingestion") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + // AVRO files are internally compressed, similar to Parquet + val avroFile = + this::class + .java + .classLoader + .getResource("compression/sample.avro") + + if (avroFile == null) { + logger.warn( + "sample.avro not found in test resources, skipping AVRO test", + ) + assumeTrue(false, "sample.avro not found - skipping test") + return@runBlocking + } + + val tempFile = Files.createTempFile("test_avro", ".avro") + Files.copy( + avroFile.openStream(), + tempFile, + java.nio.file.StandardCopyOption.REPLACE_EXISTING, + ) + + // AVRO has internal Deflate compression + val source = + FileSourceInfo( + path = tempFile, + format = Format.avro, + compressionType = CompressionType.NONE, + name = "test.avro", + sourceId = UUID.randomUUID(), + ) + + try { + logger.info( + "Uploading AVRO file - binary format with internal compression, will NOT be compressed during upload", + ) + + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.avro, + ingestProperties = + IngestRequestProperties( + format = Format.avro, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: AVRO file submitted (binary format, no additional compression): ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + val failedCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Failed + } ?: 0 + + logger.info( + "E2E: AVRO binary format test - Success: $succeededCount, Failed: $failedCount", + ) + + if (failedCount > 0) { + finalStatus.details + ?.filter { + it.status == BlobStatus.Status.Failed + } + ?.forEach { failedBlob -> + logger.error( + "Failed blob details - sourceId: ${failedBlob.sourceId}, " + + "errorCode: ${failedBlob.errorCode}, " + + "details: ${failedBlob.details}", + ) + } + } + + // AVRO format has internal compression, upload should NOT compress again + logger.info( + "E2E: AVRO test completed - verifies NO double compression for binary formats", + ) + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + logger.warn( + "AVRO test failed (may be due to schema mismatch): ${e.message}", + ) + } + } finally { + Files.deleteIfExists(tempFile) + } + } + + @Test + @ResourceLock("blob-ingestion") + fun `E2E - compression format test - JSON file gets compressed during upload`() = + runBlocking { + logger.info("E2E: Testing JSON file compression during upload") + + val client = + QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .build() + + // Create test JSON data matching table schema + val jsonData = + """{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000002","temperature":25.5,"humidity":60.0}""" + + val tempFile = Files.createTempFile("test_json", ".json") + Files.write(tempFile, jsonData.toByteArray()) + + // Not pre-compressed + val source = + FileSourceInfo( + path = tempFile, + format = Format.multijson, + compressionType = CompressionType.NONE, + name = "test_json.json", + sourceId = UUID.randomUUID(), + ) + + try { + logger.info( + "Uploading JSON file - will be compressed during blob upload", + ) + + val response = + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = + IngestRequestProperties( + format = Format.multijson, + enableTracking = true, + ), + ) + + assertNotNull(response.ingestionOperationId) + logger.info( + "E2E: JSON file submitted (compressed during upload): ${response.ingestionOperationId}", + ) + + val finalStatus = + client.pollUntilCompletion( + database = database, + table = targetTable, + operationId = response.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + + logger.info( + "E2E: JSON compression test result - Success: $succeededCount", + ) + + // Uncompressed JSON gets compressed during upload + assert(succeededCount > 0) { + "Expected successful JSON ingestion with compression during upload" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } finally { + Files.deleteIfExists(tempFile) + } + } + + @ParameterizedTest( + name = + "[QueuedIngestion-LocalSource] {index} => SourceType={0}, TestName={1}", + ) + @CsvSource( + "file,QueuedIngestion-FileSource,SampleFileSource.json", + "stream,QueuedIngestion-StreamSource,SampleStreamSource.json", + ) + fun `test queued ingestion with LocalSource`( + sourceType: String, + testName: String, + fileName: String, + ) = runBlocking { + logger.info("Starting LocalSource test: $testName") + val deviceDataUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" + val deviceData = java.net.URL(deviceDataUrl).readText() + val targetFormat = Format.multijson + val source: AbstractSourceInfo = + when (sourceType) { + "file" -> { + val tempFile = Files.createTempFile(fileName, null) + Files.write(tempFile, deviceData.toByteArray()) + FileSourceInfo( + path = tempFile, + format = targetFormat, + compressionType = CompressionType.NONE, + name = fileName, + sourceId = UUID.randomUUID(), + ) + .also { + Runtime.getRuntime() + .addShutdownHook( + Thread { + Files.deleteIfExists( + tempFile, + ) + }, + ) + } + } + "stream" -> + StreamSourceInfo( + stream = + ByteArrayInputStream( + deviceData.toByteArray(), + ), + format = targetFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = fileName, + ) + else -> error("Unknown sourceType: $sourceType") + } + + val queuedIngestionClient: IngestClient = + QueuedIngestionClient( + dmUrl = dmEndpoint, + tokenCredential = tokenProvider, + skipSecurityChecks = true, + ) + val properties = + IngestRequestProperties( + format = targetFormat, + enableTracking = true, + ) + + val ingestionResponse = + queuedIngestionClient.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = targetFormat, + ingestProperties = properties, + ) + logger.info( + "{}: Submitted queued ingestion with operation ID: {}", + testName, + ingestionResponse.ingestionOperationId, + ) + assertNotNull( + ingestionResponse, + "IngestionOperation should not be null", + ) + assertNotNull( + ingestionResponse.ingestionOperationId, + "Operation ID should not be null", + ) + val finalStatus = + (queuedIngestionClient as QueuedIngestionClient) + .pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse.ingestionOperationId, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), + ) + logger.info( + "{} ingestion completed with final status: {}", + testName, + finalStatus.status, + ) + assert( + finalStatus.details?.any { + it.status == BlobStatus.Status.Succeeded + } == true, + ) { + "Expected at least one successful ingestion for $testName" + } + } } // https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index fd03e426a..cafcb5611 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -4,6 +4,9 @@ package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.assertThrows @@ -12,6 +15,8 @@ import org.junit.jupiter.api.parallel.ExecutionMode import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource +import java.io.ByteArrayInputStream +import java.net.ConnectException import java.util.UUID import java.util.stream.Stream import kotlin.test.assertNotNull @@ -23,7 +28,6 @@ class StreamingIngestClientTest : private val publicBlobUrl = "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" - private val targetUuid = UUID.randomUUID().toString() private val randomRow: String = """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$targetUuid","messageId": "7f316225-839a-4593-92b5-1812949279b3","temperature": 31.0301639051317,"humidity": 62.0791099602725}""" @@ -35,6 +39,7 @@ class StreamingIngestClientTest : "Direct ingest - success", engineEndpoint, // isException + // isUnreachableHost false, // isUnreachableHost false, @@ -50,7 +55,6 @@ class StreamingIngestClientTest : false, publicBlobUrl, ), - // Blob-based streaming - error case Arguments.of( "Blob based ingest- Invalid blob URL", engineEndpoint, @@ -63,6 +67,102 @@ class StreamingIngestClientTest : ) } + @ParameterizedTest(name = "{0}") + @MethodSource("testParameters") + fun `run streaming ingest test using builder pattern`( + testName: String, + cluster: String, + isException: Boolean, + isUnreachableHost: Boolean, + blobUrl: String?, + ) = runBlocking { + logger.info("Running streaming ingest builder test {}", testName) + + // Create client using builder + val client: IngestClient = + com.microsoft.azure.kusto.ingest.v2.builders + .StreamingIngestClientBuilder + .create(cluster) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .withClientDetails("BuilderStreamingE2ETest", "1.0") + .build() + + val ingestProps = IngestRequestProperties(format = targetTestFormat) + if (isException) { + if (blobUrl != null) { + logger.info( + "Testing error handling for invalid blob URL with builder: {}", + blobUrl, + ) + val exception = + assertThrows { + val sources = listOf(BlobSourceInfo(blobUrl)) + client.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = targetTestFormat, + ingestProperties = ingestProps, + ) + } + assertNotNull( + exception, + "Exception should not be null for invalid blob URL", + ) + logger.info( + "Expected exception caught (builder test): {}", + exception.message, + ) + assert(exception.failureCode != 0) { + "Expected non-zero failure code for invalid blob URL" + } + } + } else { + if (blobUrl != null) { + logger.info( + "Blob-based streaming ingestion with builder: {}", + blobUrl, + ) + + val sources = listOf(BlobSourceInfo(blobUrl)) + client.submitIngestion( + database = database, + table = targetTable, + sources = sources, + format = targetTestFormat, + ingestProperties = ingestProps, + ) + + logger.info( + "Blob-based streaming ingestion submitted successfully (builder)", + ) + + kotlinx.coroutines.delay(3000) + val results = + adminClusterClient + .executeQuery( + database, + "$targetTable | summarize count=count()", + ) + .primaryResults + + assertNotNull(results, "Query results should not be null") + results.next() + val count: Long = results.getLong("count") + assertNotNull(count, "Count should not be null") + assert(count > 0) { + "Expected records in table after builder streaming ingestion" + } + + logger.info( + "Builder streaming ingestion verified - {} records", + count, + ) + } + } + } + @ParameterizedTest(name = "{0}") @MethodSource("testParameters") fun `run streaming ingest test with various clusters`( @@ -73,23 +173,24 @@ class StreamingIngestClientTest : blobUrl: String?, ) = runBlocking { logger.info("Running streaming ingest test {}", testName) - val client = StreamingIngestClient(cluster, tokenProvider, true) + val client: IngestClient = + StreamingIngestClient(cluster, tokenProvider, true) val ingestProps = IngestRequestProperties(format = targetTestFormat) if (isException) { if (blobUrl != null) { logger.info( - "Testing error handling for invalid blob URL: {}", + "Testing error handling for invalid blob URL: {} (using interface method)", blobUrl, ) val exception = assertThrows { - client.submitStreamingIngestion( + val sources = listOf(BlobSourceInfo(blobUrl)) + client.submitIngestion( database = database, table = targetTable, - data = ByteArray(0), + sources = sources, format = targetTestFormat, ingestProperties = ingestProps, - blobUrl = blobUrl, ) } assertNotNull( @@ -105,7 +206,6 @@ class StreamingIngestClientTest : exception.failureCode, exception.isPermanent, ) - assert(exception.failureCode != 0) { "Expected non-zero failure code for invalid blob URL" } @@ -117,12 +217,21 @@ class StreamingIngestClientTest : val data = "col1,col2\nval1,val2".toByteArray() val exception = assertThrows { - client.submitStreamingIngestion( - database, - table, - data, - targetTestFormat, - ingestProps, + val streamSource = + StreamSourceInfo( + stream = ByteArrayInputStream(data), + format = targetTestFormat, + sourceCompression = + CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "error-test-stream", + ) + client.submitIngestion( + database = database, + table = table, + sources = listOf(streamSource), + format = targetTestFormat, + ingestProperties = ingestProps, ) } assertNotNull(exception, "Exception should not be null") @@ -140,21 +249,18 @@ class StreamingIngestClientTest : "Blob-based streaming ingestion with URL: {}", blobUrl, ) - - client.submitStreamingIngestion( + val sources = listOf(BlobSourceInfo(blobUrl)) + client.submitIngestion( database = database, table = targetTable, - // Ignored when blobUrl is provided - data = ByteArray(0), + sources = sources, format = targetTestFormat, ingestProperties = ingestProps, - blobUrl = blobUrl, ) logger.info( "Blob-based streaming ingestion submitted successfully", ) - kotlinx.coroutines.delay(3000) val results = adminClusterClient @@ -163,7 +269,6 @@ class StreamingIngestClientTest : "$targetTable | summarize count=count()", ) .primaryResults - assertNotNull(results, "Query results should not be null") results.next() val count: Long = results.getLong("count") @@ -178,13 +283,24 @@ class StreamingIngestClientTest : ) } else { logger.info("Direct streaming ingestion - success case") - client.submitStreamingIngestion( + val streamSource = + StreamSourceInfo( + stream = + ByteArrayInputStream( + randomRow.toByteArray(), + ), + format = targetTestFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "direct-stream-$targetUuid", + ) + + client.submitIngestion( database = database, table = targetTable, - data = randomRow.toByteArray(), + sources = listOf(streamSource), format = targetTestFormat, ingestProperties = ingestProps, - blobUrl = null, ) val results = @@ -203,5 +319,7 @@ class StreamingIngestClientTest : } } } + + logger.info("Blob streaming test '{}' completed successfully", testName) } } diff --git a/ingest-v2/src/test/resources/compression/sample.avro b/ingest-v2/src/test/resources/compression/sample.avro new file mode 100644 index 0000000000000000000000000000000000000000..866b12c0e2348fbbd8d2a6dbd8d42a88647f582f GIT binary patch literal 414 zcmeZI%3@>@Nh~YM*GtY%NloTUNlnX1EJ+mu3l%44q~<0zu~aLSR2HNvSt%$Lr6%VW zr6}nrDCH&Qf<;17i%WvwqG_3_IVr_JmC*=2#U(|VdFjY{N-}eSx)O5>lxlSp@F`45 zEz3+!^-Lk84rp9)VmgMxl>E}9oKzH>Q*#SaixNvpi&9a|#8jG5nwy!DSyHJ~8(Yg@ z8pm68xN_A*=iisE-`S-k%(jg2vVoC-(M3}OmPG>0%-RNBp9~t9Vs%(VB!m{9=wM`# z(vdx2(mcPJZBuERN<_L(kCJe*!G@-Y=MTEG<_dCNn+a6*=8dGlh5`>}M$>wBhK_tE GbXx!)UyT(2 literal 0 HcmV?d00001 diff --git a/ingest-v2/src/test/resources/compression/sample.json b/ingest-v2/src/test/resources/compression/sample.json new file mode 100644 index 000000000..05acc4567 --- /dev/null +++ b/ingest-v2/src/test/resources/compression/sample.json @@ -0,0 +1,3 @@ +{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000001","temperature":25.5,"humidity":60.0} +{"timestamp":"2024-01-01T01:00:00Z","deviceId":"00000000-0000-0000-0000-000000000002","messageId":"00000000-0000-0000-0000-000000000002","temperature":26.3,"humidity":62.5} +{"timestamp":"2024-01-01T02:00:00Z","deviceId":"00000000-0000-0000-0000-000000000003","messageId":"00000000-0000-0000-0000-000000000003","temperature":24.8,"humidity":58.2} diff --git a/ingest-v2/src/test/resources/compression/sample.json.gz b/ingest-v2/src/test/resources/compression/sample.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc1a3461fb34da2fa539cb71e67fc44e829bf64a GIT binary patch literal 171 zcmV;c095}UiwFok2qtL&19M?*aBO8RYIARH0G-ak3WG2V2H?Aok+X(0wIY3k-FMop z%&_%P5Um3d-+g6x*l-)v@RL*i@GTI1kTJT@g9Rq8D^e+EBKyh(O6cU_uX5_r1#MG( zS=*@8NH0fs%c?{Py$mi+gMej&riA+xeCOi~wjrdoSbk#FQ7q?Ud53LKe_$ESKP(@` Z>U^x;VQXyPv8F{{izg7}cV`Cz0050yP0auR literal 0 HcmV?d00001 diff --git a/ingest-v2/src/test/resources/compression/sample.parquet b/ingest-v2/src/test/resources/compression/sample.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0053d6d784963feec08205fbce71bf8c6a9b8f05 GIT binary patch literal 3795 zcmdT{&2Jh<6dxR$5XY6Av`av8B?~#om27tb;{-R6Jm9cEZEV&FyKE9!`(?9u7dEgM zgB2;~s((P6Q>98d^$+N|rykmC)l*JYRrOHyRQ1?5vtX|c&WDt$>dMT{ypQ?4`Moy} zyp8!7GtRKg@&@B)&~=1Dj}X#*H>O|QKwo_rfBZhY*M3sI4!*d-Am#%mv;cO~@bS-t z0s{@q`aVH4KSFx#8bWAtZVX1hZ|u?>0?0po%$)(|(exas*!O&B(HB^qKro3tpGPQ8 zhQxW?6*ND`_Mb)aJ~#ALiO`!^#r|)xu}pkGTL>YaFZV39_TvxmY^=KPA8xOmXDk#L zSoCLLLozii9mNLAEO-zneB;HniOchATWin1geSZ?0uHi?5Lko)0~=ZxL(&m&VKa$B z{+3$REt#2wO1U`}y5bm?-g0EiHb1{UbBFHEOo1Yx>4%1@=e3(?V+O=jcm-@*^1eqp z38q&a-Lmzj?6jLYVu&PAZCi$BI9(LXI z*_G&bv;f0Xjmg}0_3jH;495L#eezdDL%(hi;MOJ4Y zK38K?Jg7UO-K*{8B}`i#Cqgy7THXFBWd8b-Pc)#owp zVDN)C-Q*rD$r|JBN(N@P@|M|_xbm_XFCHqnvZK^DjV&Xsh&!GAyw#VPDO<64Q|HRP ze66uB#Mo9oQ>ZENqnIKjU0I@KC|1^ycJeErV`U*`stLZTh}hW3U_-J@Q{ni%SJ|js z7GlZv!~~yIxd#vj_;FcQ5_ww@9?*T1df8EP=AM=_JAJH!ipZN9XFAgT3M<5uJ#i;b z;&Z^SO8hG%CN3FMxTDlYhMYNBCHuMim*cWfGGLzppJW{!R>V@g_!cqBP|vc!+gk46qTDus3tQq$ zR(HAi*J5+;Q_GBT|rn#%-yBjs!-pmxBwy|-Vf5}wqMO%q4(>*qg<=#eZ)7XRF0DT4HT)GN( zx~h_XK>EcNX$l#Yj0zdn`~3o@{5JF_(oRa7w6o-h{-jlb7g=&u=mqA1Eb>XR#On$; z-G)gXYp?Ssd*^r)?u@Bgz>!!mF?Z^3pZGr*jZdT;={LoyMD8gpaq&LsswFbODxKzu z&~1XW|IsDsix@-y8}tAw1wHgt3RNQH!1*|gRZJQzX`ZA})2Gy+MsyP|A4HlXJ*(V< zd+q?K;J;6Jhc|s-tGjnmXx5gSPujY(sM_`-{X&`Te__nJUnSv3@O$6;hd!A?XdC{W F{Rc`5`m_K5 literal 0 HcmV?d00001 From ba3f817faaaa68ebd4d3a7cc2ada7934bd844c0e Mon Sep 17 00:00:00 2001 From: Tanmaya Panda Date: Wed, 3 Dec 2025 12:01:42 +0530 Subject: [PATCH 28/50] disabled serialized execution --- .../ingest/v2/QueuedIngestionClientTest.kt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt index bcf16cb83..baaaaef30 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt @@ -18,6 +18,7 @@ import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo import kotlinx.coroutines.runBlocking import kotlinx.serialization.json.Json import org.junit.jupiter.api.Assumptions.assumeTrue +import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.Test import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.parallel.Execution @@ -87,6 +88,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `test queued ingestion with builder pattern`(): Unit = runBlocking { logger.info("Starting builder pattern test") @@ -188,6 +190,7 @@ class QueuedIngestionClientTest : } @ParameterizedTest(name = "[QueuedIngestion] {index} => TestName ={0}") + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") @CsvSource( // Single JSON blob, no mapping @@ -453,6 +456,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - single small file upload`() = runBlocking { logger.info("E2E: Testing single upload with small file") @@ -511,6 +515,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - single large file upload`() = runBlocking { logger.info("E2E: Testing single upload with large file (10MB)") @@ -575,6 +580,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - batch upload multiple files`() = runBlocking { logger.info("E2E: Testing batch upload with multiple files") @@ -648,6 +654,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - parallel processing with maxConcurrency`() = runBlocking { logger.info("E2E: Testing parallel processing with maxConcurrency=3") @@ -725,6 +732,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - size validation within limit`() = runBlocking { logger.info("E2E: Testing size validation with file within limit") @@ -785,6 +793,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - size validation exceeds limit`() = runBlocking { logger.info("E2E: Testing size validation with file exceeding limit") @@ -842,6 +851,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - ignore size limit flag`() = runBlocking { logger.info("E2E: Testing size validation with ignore limit flag") @@ -906,6 +916,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - combined all features scenario`() = runBlocking { logger.info( @@ -1010,6 +1021,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `test parallel upload with multiple files`() = runBlocking { logger.info("Starting parallel upload test with multiple files") @@ -1139,6 +1151,7 @@ class QueuedIngestionClientTest : } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - format mismatch rejection - mixed formats in batch`() = runBlocking { @@ -1293,6 +1306,7 @@ test2,456,2024-01-02""" } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - compression format test - GZIP pre-compressed file`() = runBlocking { @@ -1410,6 +1424,7 @@ test2,456,2024-01-02""" } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - compression format test - Parquet format with compression`() = runBlocking { @@ -1539,6 +1554,7 @@ test2,456,2024-01-02""" } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - compression format test - AVRO format with compression`() = runBlocking { @@ -1661,6 +1677,7 @@ test2,456,2024-01-02""" } @Test + @Disabled("Serial execution test - disabled") @ResourceLock("blob-ingestion") fun `E2E - compression format test - JSON file gets compressed during upload`() = runBlocking { @@ -1751,6 +1768,7 @@ test2,456,2024-01-02""" name = "[QueuedIngestion-LocalSource] {index} => SourceType={0}, TestName={1}", ) + @Disabled("Serial execution test - disabled") @CsvSource( "file,QueuedIngestion-FileSource,SampleFileSource.json", "stream,QueuedIngestion-StreamSource,SampleStreamSource.json", From ab64a1bc5723049c7f61a9ec1e07a3f162eb67dc Mon Sep 17 00:00:00 2001 From: Tanmaya Panda Date: Fri, 5 Dec 2025 01:54:15 +0530 Subject: [PATCH 29/50] optimized QueuedIngestionClientTests execution --- .../ingest/v2/QueuedIngestionClientTest.kt | 1754 +++-------------- 1 file changed, 277 insertions(+), 1477 deletions(-) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt index baaaaef30..4981e05fb 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt @@ -18,12 +18,10 @@ import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo import kotlinx.coroutines.runBlocking import kotlinx.serialization.json.Json import org.junit.jupiter.api.Assumptions.assumeTrue -import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.Test import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.parallel.Execution import org.junit.jupiter.api.parallel.ExecutionMode -import org.junit.jupiter.api.parallel.ResourceLock import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource import java.io.ByteArrayInputStream @@ -38,21 +36,22 @@ import kotlin.time.Duration class QueuedIngestionClientTest : IngestV2TestBase(QueuedIngestionClientTest::class.java) { + private val POLLING_INTERVAL = Duration.parse("PT2S") + private val POLLING_TIMEOUT = Duration.parse("PT2M") + @Test - fun `test builder with optional parameters`() { - val client = + fun `test builder variations`() { + // builder with optional parameters + val client1 = QueuedIngestionClientBuilder.create(dmEndpoint) .withAuthentication(tokenProvider) .withClientDetails("TestClient", "1.0") .withMaxConcurrency(10) .build() + assertNotNull(client1, "Client with optional parameters should not be null") - assertNotNull(client, "Client should not be null") - } - - @Test - fun `test builder with connector client details`() { - val client = + // builder with connector client details + val client2 = QueuedIngestionClientBuilder.create(dmEndpoint) .withAuthentication(tokenProvider) .withConnectorClientDetails( @@ -67,13 +66,10 @@ class QueuedIngestionClientTest : ), ) .build() + assertNotNull(client2, "Client with connector details should not be null") - assertNotNull(client, "Client should not be null") - } - - @Test - fun `test builder with connector client details and user`() { - val client = + // builder with connector client details and user + val client3 = QueuedIngestionClientBuilder.create(dmEndpoint) .withAuthentication(tokenProvider) .withConnectorClientDetails( @@ -83,115 +79,10 @@ class QueuedIngestionClientTest : overrideUser = "test-user@example.com", ) .build() - - assertNotNull(client, "Client should not be null") - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `test queued ingestion with builder pattern`(): Unit = runBlocking { - logger.info("Starting builder pattern test") - - val queuedIngestionClient: IngestClient = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - val blobUrl = - "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" - val testSources = listOf(BlobSourceInfo(blobUrl)) - val properties = - IngestRequestProperties( - format = targetTestFormat, - ingestionMappingReference = "${targetTable}_mapping", - enableTracking = true, - ) - - try { - val ingestionResponse = - queuedIngestionClient.submitIngestion( - database = database, - table = targetTable, - sources = testSources, - format = targetTestFormat, - ingestProperties = properties, - ) - - logger.info( - "Builder pattern test: Submitted queued ingestion with operation ID: {}", - ingestionResponse.ingestionOperationId, - ) - assertNotNull( - ingestionResponse, - "IngestionOperation should not be null", - ) - assertNotNull( - ingestionResponse.ingestionOperationId, - "Operation ID should not be null", - ) - - val finalStatus = - (queuedIngestionClient as QueuedIngestionClient) - .pollUntilCompletion( - database = database, - table = targetTable, - operationId = - ingestionResponse - .ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - logger.info( - "Builder pattern test: Ingestion completed with final status: {}", - finalStatus.status, - ) - - if (finalStatus.details?.isNotEmpty() == true) { - val succeededCount = - finalStatus.details.count { - it.status == BlobStatus.Status.Succeeded - } - val failedCount = - finalStatus.details.count { - it.status == BlobStatus.Status.Failed - } - logger.info( - "Builder pattern test: Succeeded: {}, Failed: {}", - succeededCount, - failedCount, - ) - - assert(succeededCount > 0 || failedCount > 0) { - "Expected at least some blobs to be processed" - } - } else { - logger.info( - "Builder pattern test: No details available, but operation was submitted successfully", - ) - } - } catch (e: ConnectException) { - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster: ${e.message}", - ) - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster: ${e.cause?.message}", - ) - } else { - throw e - } - } + assertNotNull(client3, "Client with connector details and user should not be null") } @ParameterizedTest(name = "[QueuedIngestion] {index} => TestName ={0}") - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") @CsvSource( // Single JSON blob, no mapping "QueuedIngestion-NoMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,false,0", @@ -199,19 +90,14 @@ class QueuedIngestionClientTest : "QueuedIngestion-WithMappingReference,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,true,false,0", // Single JSON blob, with inline mapping "QueuedIngestion-WithInlineMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,true,0", - // TODO This test fails (failureStatus is not right) - // "QueuedIngestion-FailWithInvalidBlob,https://nonexistentaccount.blob.core.windows.net/samplefiles/StormEvents.json,false,false,0", - // "https://nonexistentaccount.blob.core.windows.net/samplefiles/StormEvents.json, 1", - ) - fun `test queued ingestion with CSV blob`( + fun `test queued ingestion with blob variations`( testName: String, blobUrl: String, useMappingReference: Boolean, useInlineIngestionMapping: Boolean, numberOfFailures: Int, ): Unit = runBlocking { - // Skip test if no DM_CONNECTION_STRING is set logger.info("Starting test: $testName") val queuedIngestionClient: IngestClient = QueuedIngestionClient( @@ -225,8 +111,7 @@ class QueuedIngestionClientTest : if (useMappingReference) { IngestRequestProperties( format = targetTestFormat, - ingestionMappingReference = - "${targetTable}_mapping", + ingestionMappingReference = "${targetTable}_mapping", enableTracking = true, ) } else if (useInlineIngestionMapping) { @@ -234,51 +119,23 @@ class QueuedIngestionClientTest : columnNamesToTypes.keys.map { col -> when (col) { "SourceLocation" -> - ColumnMapping( - columnName = col, - columnType = - "string", - ) - .apply { - setTransform( - TransformationMethod - .SourceLocation, - ) - } + ColumnMapping(columnName = col, columnType = "string") + .apply { setTransform(TransformationMethod.SourceLocation) } "Type" -> - ColumnMapping( - columnName = col, - columnType = - "string", - ) - .apply { - setConstantValue( - "IngestionMapping", - ) - } + ColumnMapping(columnName = col, columnType = "string") + .apply { setConstantValue("IngestionMapping") } else -> - ColumnMapping( - columnName = col, - columnType = - columnNamesToTypes[ - col, - ]!!, - ) + ColumnMapping(columnName = col, columnType = columnNamesToTypes[col]!!) .apply { setPath("$.$col") } } } val inlineIngestionMappingInline = InlineIngestionMapping( columnMappings = ingestionColumnMappings, - ingestionMappingType = - InlineIngestionMapping - .IngestionMappingType - .JSON, + ingestionMappingType = InlineIngestionMapping.IngestionMappingType.JSON, ) val ingestionMappingString = - Json.encodeToString( - inlineIngestionMappingInline.columnMappings, - ) + Json.encodeToString(inlineIngestionMappingInline.columnMappings) IngestRequestProperties( format = targetTestFormat, ingestionMapping = ingestionMappingString, @@ -292,7 +149,6 @@ class QueuedIngestionClientTest : } try { - // Test successful ingestion submission val ingestionResponse = queuedIngestionClient.submitIngestion( database = database, @@ -302,148 +158,76 @@ class QueuedIngestionClientTest : ingestProperties = properties, ) - logger.info( - "E2E: Submitted queued ingestion with operation ID: {}", - ingestionResponse.ingestionOperationId, - ) - assertNotNull( - ingestionResponse, - "IngestionOperation should not be null", - ) - assertNotNull( - ingestionResponse.ingestionOperationId, - "Operation ID should not be null", - ) - // Test polling until completion with timeout - logger.info( - "Starting to poll for completion of operation: {}", - ingestionResponse.ingestionOperationId, - ) + logger.info("$testName: Submitted with operation ID: ${ingestionResponse.ingestionOperationId}") + assertNotNull(ingestionResponse, "IngestionOperation should not be null") + assertNotNull(ingestionResponse.ingestionOperationId, "Operation ID should not be null") val finalStatus = (queuedIngestionClient as QueuedIngestionClient) .pollUntilCompletion( database = database, table = targetTable, - operationId = - ingestionResponse - .ingestionOperationId, - // Poll every 5 seconds for testing - pollingInterval = Duration.parse("PT5S"), - // 5 minute timeout for testing - timeout = Duration.parse("PT5M"), + operationId = ingestionResponse.ingestionOperationId, + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, ) - logger.info( - "Ingestion completed with final status: {}", - finalStatus.status, - ) + logger.info("$testName: Completed with status: ${finalStatus.status}") - // Verify the operation completed successfully - // Check if we have any results if (finalStatus.details?.isNotEmpty() == true) { - val succeededCount = - finalStatus.details.count { - it.status == BlobStatus.Status.Succeeded - } - val failedCount = - finalStatus.details.count { - it.status == BlobStatus.Status.Failed - } - logger.info( - "Ingestion results - Succeeded: {}, Failed: {}", - succeededCount, - failedCount, - ) - // For this test, we expect at least some processing to have occurred + val succeededCount = finalStatus.details.count { it.status == BlobStatus.Status.Succeeded } + val failedCount = finalStatus.details.count { it.status == BlobStatus.Status.Failed } + logger.info("$testName: Succeeded: $succeededCount, Failed: $failedCount") + assert(succeededCount > 0 || failedCount > 0) { "Expected at least some blobs to be processed" } - assert(failedCount == numberOfFailures) { "Expected $numberOfFailures failed ingestions, but got $failedCount" } if (failedCount > 0) { finalStatus.details - .filter { blobStatus -> - blobStatus.status == BlobStatus.Status.Failed - } - .forEach { failedBlob -> - logger.error( - "Blob ingestion failed for sourceId: ${failedBlob.sourceId}, message: ${failedBlob.details}", - ) - } - logger.error( - "There are $failedCount blobs that failed ingestion.", - ) + .filter { it.status == BlobStatus.Status.Failed } + .forEach { logger.error("Failed blob: ${it.sourceId}, message: ${it.details}") } + } + + val filterType = when { + useMappingReference -> "MappingRef" + useInlineIngestionMapping -> "IngestionMapping" + else -> "None" } - val filterType = - when { - useMappingReference -> "MappingRef" - useInlineIngestionMapping -> "IngestionMapping" - else -> "None" - } if (useMappingReference || useInlineIngestionMapping) { val results = adminClusterClient - .executeQuery( - database, - "$targetTable | where Type == '$filterType' | summarize count=count() by SourceLocation", - ) + .executeQuery(database, "$targetTable | where Type == '$filterType' | summarize count=count() by SourceLocation") .primaryResults assertNotNull(results, "Query results should not be null") results.next() val count: Long = results.getLong("count") assertNotNull(count, "Count should not be null") - assert(count > 0) { - "Expected some records in the table after ingestion" - } - val sourceLocation: String = - results.getString("SourceLocation") - assert(sourceLocation.isNotEmpty()) { - "SourceLocation should not be empty" - } + assert(count > 0) { "Expected some records in the table after ingestion" } } } } catch (e: ConnectException) { - // Skip test if we can't connect to the test cluster due to network issues - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster due to network connectivity issues: ${e.message}", - ) + assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { if (e.cause is ConnectException) { - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster due to network connectivity issues: ${e.cause?.message}", - ) + assumeTrue(false, "Skipping test: ${e.cause?.message}") } else { throw e } } } - private fun createTestStreamSource( - sizeInBytes: Int, - name: String, - ): StreamSourceInfo { - val jsonLine = - """{"testField":"value","size":$sizeInBytes,"name":"$name"}""" + - "\n" + private fun createTestStreamSource(sizeInBytes: Int, name: String): StreamSourceInfo { + val jsonLine = """{"testField":"value","size":$sizeInBytes,"name":"$name"}""" + "\n" val jsonLineBytes = jsonLine.toByteArray() - val numLines = (sizeInBytes / jsonLineBytes.size).coerceAtLeast(1) val data = ByteArray(numLines * jsonLineBytes.size) for (i in 0 until numLines) { - System.arraycopy( - jsonLineBytes, - 0, - data, - i * jsonLineBytes.size, - jsonLineBytes.size, - ) + System.arraycopy(jsonLineBytes, 0, data, i * jsonLineBytes.size, jsonLineBytes.size) } return StreamSourceInfo( @@ -456,10 +240,8 @@ class QueuedIngestionClientTest : } @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - single small file upload`() = runBlocking { - logger.info("E2E: Testing single upload with small file") + fun `E2E - file size variations and batch uploads`() = runBlocking { + logger.info("E2E: Testing combined file sizes (small, large, batch)") val client = QueuedIngestionClientBuilder.create(dmEndpoint) @@ -467,181 +249,79 @@ class QueuedIngestionClientTest : .skipSecurityChecks() .build() - val source = createTestStreamSource(1024, "e2e_single_small.json") - try { - val response = + // Small file (1KB) + logger.info("Testing small file upload (1KB)") + val smallSource = createTestStreamSource(1024, "combined_small.json") + val smallResponse = client.submitIngestion( database = database, table = targetTable, - sources = listOf(source), + sources = listOf(smallSource), format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), + ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Single small file submitted: ${response.ingestionOperationId}", - ) - - val finalStatus = + assertNotNull(smallResponse.ingestionOperationId) + val smallStatus = client.pollUntilCompletion( database = database, table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - assert(succeededCount > 0) { "Expected successful ingestion" } - logger.info("E2E: Single small file upload completed successfully") - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - single large file upload`() = runBlocking { - logger.info("E2E: Testing single upload with large file (10MB)") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - val source = - createTestStreamSource( - 10 * 1024 * 1024, - "e2e_single_large.json", - ) - - try { - val response = + operationId = smallResponse.ingestionOperationId, + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, + ) + val smallSucceeded = smallStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + assert(smallSucceeded > 0) { "Expected successful small file ingestion" } + logger.info("Small file upload completed: $smallSucceeded succeeded") + + // Large file (10MB) + logger.info("Testing large file upload (10MB)") + val largeSource = createTestStreamSource(10 * 1024 * 1024, "combined_large.json") + val largeResponse = client.submitIngestion( database = database, table = targetTable, - sources = listOf(source), + sources = listOf(largeSource), format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), + ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Large file submitted: ${response.ingestionOperationId}", - ) - - val finalStatus = + assertNotNull(largeResponse.ingestionOperationId) + val largeStatus = client.pollUntilCompletion( database = database, table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - assert(succeededCount > 0) { - "Expected successful large file ingestion" - } - logger.info("E2E: Large file upload completed successfully") - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - batch upload multiple files`() = runBlocking { - logger.info("E2E: Testing batch upload with multiple files") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - val sources = - (1..5).map { index -> - createTestStreamSource( - 1024 * index, - "e2e_batch_$index.json", - ) - } - - try { - val response = + operationId = largeResponse.ingestionOperationId, + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, + ) + val largeSucceeded = largeStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + assert(largeSucceeded > 0) { "Expected successful large file ingestion" } + logger.info("Large file upload completed: $largeSucceeded succeeded") + + // Batch upload (5 files) + logger.info("Testing batch upload (5 files)") + val batchSources = (1..5).map { i -> createTestStreamSource(1024 * i, "combined_batch_$i.json") } + val batchResponse = client.submitIngestion( database = database, table = targetTable, - sources = sources, + sources = batchSources, format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), + ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Batch submitted: ${response.ingestionOperationId}", - ) - - val finalStatus = + assertNotNull(batchResponse.ingestionOperationId) + val batchStatus = client.pollUntilCompletion( database = database, table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), + operationId = batchResponse.ingestionOperationId, + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, ) + val batchSucceeded = batchStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + assert(batchSucceeded == batchSources.size) { "Expected all batch files to succeed" } + logger.info("Batch upload completed: $batchSucceeded/${batchSources.size} succeeded") - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - val failedCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Failed - } ?: 0 - - logger.info( - "E2E: Batch results - Success: $succeededCount, Failure: $failedCount", - ) - assert(succeededCount == sources.size) { - "Expected successful uploads" - } } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { @@ -654,10 +334,8 @@ class QueuedIngestionClientTest : } @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") fun `E2E - parallel processing with maxConcurrency`() = runBlocking { - logger.info("E2E: Testing parallel processing with maxConcurrency=3") + logger.info("E2E: Testing parallel processing with maxConcurrency=5") val client = QueuedIngestionClientBuilder.create(dmEndpoint) @@ -666,60 +344,36 @@ class QueuedIngestionClientTest : .skipSecurityChecks() .build() - val sources = - (1..10).map { index -> - createTestStreamSource( - 512 * 1024, - "e2e_parallel_$index.json", - ) - } + val sources = (1..10).map { i -> createTestStreamSource(512 * 1024, "parallel_$i.json") } try { val startTime = System.currentTimeMillis() - val response = client.submitIngestion( database = database, table = targetTable, sources = sources, format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), + ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), ) - val uploadDuration = System.currentTimeMillis() - startTime assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Parallel upload submitted in ${uploadDuration}ms: ${response.ingestionOperationId}", - ) + logger.info("Parallel upload submitted in ${uploadDuration}ms") val finalStatus = client.pollUntilCompletion( database = database, table = targetTable, operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, ) - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - logger.info( - "E2E: Parallel upload: $succeededCount/${sources.size} succeeded", - ) - logger.info( - "E2E: Average time per upload: ${uploadDuration / sources.size}ms", - ) + val succeededCount = finalStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + logger.info("Parallel upload: $succeededCount/${sources.size} succeeded (avg ${uploadDuration / sources.size}ms per file)") + assert(succeededCount == sources.size) { "Expected parallel uploads to succeed" } - assert(succeededCount == sources.size) { - "Expected parallel uploads to succeed" - } } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { @@ -731,61 +385,86 @@ class QueuedIngestionClientTest : } } - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - size validation within limit`() = runBlocking { - logger.info("E2E: Testing size validation with file within limit") + @ParameterizedTest(name = "[SizeValidation] {index} => Scenario={0}") + @CsvSource( + "within-limit,10,5,false,true", // 5MB file, 10MB limit, no ignore, expect success + "exceeds-limit,1,2,false,false", // 2MB file, 1MB limit, no ignore, expect rejection + "ignore-flag,1,2,true,true" // 2MB file, 1MB limit, with ignore, expect success + ) + fun `E2E - size validation scenarios`( + scenario: String, + maxSizeMB: Long, + fileSizeMB: Int, + ignoreSize: Boolean, + expectSuccess: Boolean + ) = runBlocking { + logger.info("E2E: Testing size validation scenario: $scenario") - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - // 10MB limit - .withMaxDataSize(10L * 1024 * 1024) - .skipSecurityChecks() - .build() + val clientBuilder = QueuedIngestionClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withMaxDataSize(maxSizeMB * 1024 * 1024) + .skipSecurityChecks() + + val client = if (ignoreSize) { + clientBuilder.withIgnoreFileSize(true).build() + } else { + clientBuilder.build() + } - val source = - createTestStreamSource(5 * 1024 * 1024, "e2e_size_valid.json") + val source = createTestStreamSource(fileSizeMB * 1024 * 1024, "size_${scenario}.json") try { - val response = - client.submitIngestion( + if (expectSuccess) { + val response = client.submitIngestion( database = database, table = targetTable, sources = listOf(source), format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), + ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), ) - assertNotNull(response.ingestionOperationId) - logger.info("E2E: Size validation passed for file within limit") + assertNotNull(response.ingestionOperationId) + logger.info("E2E: $scenario - Submitted successfully: ${response.ingestionOperationId}") - val finalStatus = - client.pollUntilCompletion( + val finalStatus = client.pollUntilCompletion( database = database, table = targetTable, operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, ) - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - assert(succeededCount > 0) { - "Expected successful upload for file within size limit" + val succeededCount = finalStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + assert(succeededCount > 0) { "Expected successful upload for scenario: $scenario" } + logger.info("E2E: $scenario - Completed successfully") + } else { + try { + client.submitIngestion( + database = database, + table = targetTable, + sources = listOf(source), + format = Format.multijson, + ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), + ) + throw AssertionError("Expected size validation to reject the file for scenario: $scenario") + } catch (e: IngestException) { + logger.info("E2E: $scenario - Size validation correctly rejected: ${e.message}") + } + logger.info("E2E: $scenario - Correctly rejected file exceeding limit") + } + } catch (e: AssertionError) { + if (!expectSuccess) { + logger.info("E2E: $scenario - Size limit enforced as expected") + } else { + throw e } } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { if (e.cause is ConnectException) { assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else if (!expectSuccess && e.message?.contains("size", ignoreCase = true) == true) { + logger.info("E2E: $scenario - Size validation correctly rejected: ${e.message}") } else { throw e } @@ -793,57 +472,81 @@ class QueuedIngestionClientTest : } @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - size validation exceeds limit`() = runBlocking { - logger.info("E2E: Testing size validation with file exceeding limit") + fun `E2E - compression format tests`() = runBlocking { + logger.info("E2E: Testing compression formats (JSON, GZIP, Parquet, AVRO)") val client = QueuedIngestionClientBuilder.create(dmEndpoint) .withAuthentication(tokenProvider) - // 1MB limit - .withMaxDataSize(1L * 1024 * 1024) .skipSecurityChecks() .build() - val source = - createTestStreamSource(2 * 1024 * 1024, "e2e_size_exceed.json") - try { - try { - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), - ) - throw AssertionError( - "Expected size validation to reject the file", - ) - } catch (e: IngestException) { - logger.info( - "E2E: Size validation correctly rejected: ${e.message}", - ) + // JSON file (uncompressed, gets compressed during upload) + logger.info("Testing JSON file compression during upload") + val jsonData = """{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000002","temperature":25.5,"humidity":60.0}""" + val jsonFile = Files.createTempFile("test_json", ".json") + Files.write(jsonFile, jsonData.toByteArray()) + val jsonSource = FileSourceInfo(path = jsonFile, format = Format.multijson, compressionType = CompressionType.NONE, name = "test_json.json", sourceId = UUID.randomUUID()) + val jsonResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(jsonSource), format = Format.multijson, ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true)) + assertNotNull(jsonResponse.ingestionOperationId) + val jsonStatus = client.pollUntilCompletion(database = database, table = targetTable, operationId = jsonResponse.ingestionOperationId, pollingInterval = POLLING_INTERVAL, timeout = POLLING_TIMEOUT) + val jsonSucceeded = jsonStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + assert(jsonSucceeded > 0) { "Expected successful JSON ingestion" } + logger.info("JSON file compression test: passed") + Files.deleteIfExists(jsonFile) + + // GZIP pre-compressed file + logger.info("Testing GZIP pre-compressed file") + val gzipFile = Files.createTempFile("test_gzip", ".json.gz") + java.util.zip.GZIPOutputStream(Files.newOutputStream(gzipFile)).use { it.write(jsonData.toByteArray()) } + val gzipSource = FileSourceInfo(path = gzipFile, format = Format.multijson, compressionType = CompressionType.GZIP, name = "pre_compressed.json.gz", sourceId = UUID.randomUUID()) + val gzipResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(gzipSource), format = Format.multijson, ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true)) + assertNotNull(gzipResponse.ingestionOperationId) + val gzipStatus = client.pollUntilCompletion(database = database, table = targetTable, operationId = gzipResponse.ingestionOperationId, pollingInterval = POLLING_INTERVAL, timeout = POLLING_TIMEOUT) + val gzipSucceeded = gzipStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + assert(gzipSucceeded > 0) { "Expected successful GZIP ingestion" } + logger.info("GZIP pre-compressed test: passed") + Files.deleteIfExists(gzipFile) + + // Parquet and AVRO (skip if resources not found) + val parquetFile = this::class.java.classLoader.getResource("compression/sample.parquet") + if (parquetFile != null) { + logger.info("Testing Parquet format") + val tempParquet = Files.createTempFile("test_parquet", ".parquet") + Files.copy(parquetFile.openStream(), tempParquet, java.nio.file.StandardCopyOption.REPLACE_EXISTING) + val parquetSource = FileSourceInfo(path = tempParquet, format = Format.parquet, compressionType = CompressionType.NONE, name = "test.parquet", sourceId = UUID.randomUUID()) + try { + val parquetResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(parquetSource), format = Format.parquet, ingestProperties = IngestRequestProperties(format = Format.parquet, enableTracking = true)) + assertNotNull(parquetResponse.ingestionOperationId) + logger.info("Parquet format test: submitted (schema may not match)") + } catch (e: Exception) { + logger.warn("Parquet test skipped (may be due to schema mismatch): ${e.message}") + } + Files.deleteIfExists(tempParquet) + } + + val avroFile = this::class.java.classLoader.getResource("compression/sample.avro") + if (avroFile != null) { + logger.info("Testing AVRO format") + val tempAvro = Files.createTempFile("test_avro", ".avro") + Files.copy(avroFile.openStream(), tempAvro, java.nio.file.StandardCopyOption.REPLACE_EXISTING) + val avroSource = FileSourceInfo(path = tempAvro, format = Format.avro, compressionType = CompressionType.NONE, name = "test.avro", sourceId = UUID.randomUUID()) + try { + val avroResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(avroSource), format = Format.avro, ingestProperties = IngestRequestProperties(format = Format.avro, enableTracking = true)) + assertNotNull(avroResponse.ingestionOperationId) + logger.info("AVRO format test: submitted (schema may not match)") + } catch (e: Exception) { + logger.warn("AVRO test skipped (may be due to schema mismatch): ${e.message}") + } + Files.deleteIfExists(tempAvro) } - logger.info( - "E2E: Size validation correctly rejected file exceeding limit", - ) - } catch (e: AssertionError) { - logger.info("E2E: Size limit enforced as expected") + } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { if (e.cause is ConnectException) { assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else if (e.message?.contains("size", ignoreCase = true) == true) { - logger.info( - "E2E: Size validation correctly rejected: ${e.message}", - ) } else { throw e } @@ -851,59 +554,66 @@ class QueuedIngestionClientTest : } @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - ignore size limit flag`() = runBlocking { - logger.info("E2E: Testing size validation with ignore limit flag") + fun `E2E - format mismatch and mixed format batch`() = runBlocking { + logger.info("E2E: Testing format mismatch detection with mixed formats") val client = QueuedIngestionClientBuilder.create(dmEndpoint) .withAuthentication(tokenProvider) - // 1MB limit - .withMaxDataSize(1L * 1024 * 1024) - // But ignore it - .withIgnoreFileSize(true) .skipSecurityChecks() .build() - val source = - createTestStreamSource(2 * 1024 * 1024, "e2e_size_ignore.json") + val jsonContent = """{"name":"test","value":123,"timestamp":"2024-01-01"}""" + val csvContent = """name,value,timestamp +test,123,2024-01-01 +test2,456,2024-01-02""" + + val sources = + listOf( + StreamSourceInfo(stream = ByteArrayInputStream(jsonContent.toByteArray()), format = Format.json, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = "format_json.json"), + StreamSourceInfo(stream = ByteArrayInputStream(csvContent.toByteArray()), format = Format.csv, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = "format_csv.csv"), + StreamSourceInfo(stream = ByteArrayInputStream(jsonContent.toByteArray()), format = Format.json, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = "format_json2.json"), + ) try { + logger.info("Uploading ${sources.size} sources with mixed formats (JSON and CSV)") val response = client.submitIngestion( database = database, table = targetTable, - sources = listOf(source), - format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), + sources = sources, + format = Format.json, + ingestProperties = IngestRequestProperties(format = Format.json, enableTracking = true), ) assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Size limit successfully bypassed with ignoreFileSize flag", - ) + logger.info("Mixed format batch submitted: ${response.ingestionOperationId}") val finalStatus = client.pollUntilCompletion( database = database, table = targetTable, operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, ) - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - assert(succeededCount > 0) { - "Expected successful upload with ignore flag" + val succeededCount = finalStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 + val failedCount = finalStatus.details?.count { it.status == BlobStatus.Status.Failed } ?: 0 + + logger.info("Format mismatch results - Success: $succeededCount, Failed: $failedCount") + + if (failedCount > 0) { + finalStatus.details + ?.filter { it.status == BlobStatus.Status.Failed } + ?.forEach { logger.error("Failed: ${it.sourceId}, errorCode: ${it.errorCode}, details: ${it.details}") } + } + + assert(failedCount >= 1) { + "Expected at least one failure due to format mismatch, but got: succeeded=$succeededCount, failed=$failedCount" } + logger.info("Format mismatch correctly detected by server") + } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { @@ -915,872 +625,18 @@ class QueuedIngestionClientTest : } } - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - combined all features scenario`() = runBlocking { - logger.info( - "E2E: Testing combined features (parallel + size validation + ignore flag)", - ) - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .withMaxConcurrency(8) - // 10MB standard limit - .withMaxDataSize(10L * 1024 * 1024) - .withIgnoreFileSize(true) - .skipSecurityChecks() - .build() - - // Mix of file sizes: small (1-5MB), medium (5-10MB), large (10-20MB) - val sources = mutableListOf() - - // small files - (1..7).forEach { i -> - sources.add( - createTestStreamSource( - 1024 * 1024 * (1 + (i % 5)), - "e2e_combined_small_$i.json", - ), - ) - } - - // medium files - (1..2).forEach { i -> - sources.add( - createTestStreamSource( - 1024 * 1024 * (5 + (i % 5)), - "e2e_combined_medium_$i.json", - ), - ) - } - - // large files (need ignore flag) - sources.add( - createTestStreamSource( - 15 * 1024 * 1024, - "e2e_combined_large_1.json", - ), - ) - - logger.info( - "E2E: Testing combined batch: ${sources.size} files, sizes 1MB-15MB", - ) - - try { - val startTime = System.currentTimeMillis() - - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), - ) - - val uploadDuration = System.currentTimeMillis() - startTime - - assertNotNull(response.ingestionOperationId) - logger.info("E2E: combined batch uploaded in ${uploadDuration}ms") - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT10S"), - timeout = Duration.parse("PT15M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - - logger.info( - "E2E: combined scenario: $succeededCount/${sources.size} succeeded", - ) - assert(succeededCount == sources.size) { - "Combined scenario: ingestion succeeded" - } - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `test parallel upload with multiple files`() = runBlocking { - logger.info("Starting parallel upload test with multiple files") - - val deviceDataUrl = - "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" - val deviceData = java.net.URL(deviceDataUrl).readText() - val targetFormat = Format.multijson - - val sources = - (1..5).map { index -> - StreamSourceInfo( - stream = - ByteArrayInputStream( - deviceData.toByteArray(), - ), - format = targetFormat, - sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "parallel_test_$index.json", - ) - } - - val queuedIngestionClient: IngestClient = - QueuedIngestionClient( - dmUrl = dmEndpoint, - tokenCredential = tokenProvider, - skipSecurityChecks = true, - ) - - val properties = - IngestRequestProperties( - format = targetFormat, - enableTracking = true, - ) - - try { - val startTime = System.currentTimeMillis() - - val ingestionResponse = - queuedIngestionClient.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = targetFormat, - ingestProperties = properties, - ) - - val uploadTime = System.currentTimeMillis() - startTime - - logger.info( - "Parallel upload test: Submitted {} files in {}ms with operation ID: {}", - sources.size, - uploadTime, - ingestionResponse.ingestionOperationId, - ) - - assertNotNull( - ingestionResponse, - "IngestionOperation should not be null", - ) - assertNotNull( - ingestionResponse.ingestionOperationId, - "Operation ID should not be null", - ) - - val finalStatus = - (queuedIngestionClient as QueuedIngestionClient) - .pollUntilCompletion( - database = database, - table = targetTable, - operationId = - ingestionResponse - .ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - logger.info( - "Parallel upload test: Ingestion completed with final status: {}", - finalStatus.status, - ) - - if (finalStatus.details?.isNotEmpty() == true) { - val succeededCount = - finalStatus.details.count { - it.status == BlobStatus.Status.Succeeded - } - val failedCount = - finalStatus.details.count { - it.status == BlobStatus.Status.Failed - } - - logger.info( - "Parallel upload results - Total: {}, Succeeded: {}, Failed: {}", - finalStatus.details.size, - succeededCount, - failedCount, - ) - - assert(succeededCount > 0) { - "Expected at least some successful uploads in parallel test" - } - - logger.info( - "Parallel upload performance: {} files uploaded in {}ms (avg {}ms per file)", - sources.size, - uploadTime, - uploadTime / sources.size, - ) - } - } catch (e: ConnectException) { - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster: ${e.message}", - ) - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster: ${e.cause?.message}", - ) - } else { - throw e - } - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - format mismatch rejection - mixed formats in batch`() = - runBlocking { - logger.info( - "E2E: Testing format mismatch rejection with mixed format sources", - ) - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - // Create JSON content - val jsonContent = - """{"name":"test","value":123,"timestamp":"2024-01-01"}""" - - // Create CSV content - val csvContent = - """name,value,timestamp -test,123,2024-01-01 -test2,456,2024-01-02""" - - // Create sources with different formats - val sources = - listOf( - // JSON source - StreamSourceInfo( - stream = - ByteArrayInputStream( - jsonContent - .toByteArray(), - ), - format = Format.json, - sourceCompression = - CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "format_test_json.json", - ), - // CSV source - This will cause format mismatch - StreamSourceInfo( - stream = - ByteArrayInputStream( - csvContent.toByteArray(), - ), - format = Format.csv, - sourceCompression = - CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "format_test_csv.csv", - ), - // Another JSON source - StreamSourceInfo( - stream = - ByteArrayInputStream( - jsonContent - .toByteArray(), - ), - format = Format.json, - sourceCompression = - CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "format_test_json2.json", - ), - ) - - try { - logger.info( - "Uploading ${sources.size} sources with mixed formats (JSON and CSV)", - ) - - // Submit ingestion declaring all as JSON (but one is actually CSV) - // Upload will succeed, but ingestion will fail on server side - // Declaring ALL as JSON - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = Format.json, - ingestProperties = - IngestRequestProperties( - format = Format.json, - enableTracking = true, - ), - ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Mixed format batch submitted successfully: ${response.ingestionOperationId}", - ) - logger.info( - "E2E: Uploads succeeded - format mismatch will be detected server-side", - ) - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - val failedCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Failed - } ?: 0 - - logger.info( - "E2E: Format mismatch results - Success: $succeededCount, Failed: $failedCount", - ) - - if (failedCount > 0) { - finalStatus.details - ?.filter { - it.status == BlobStatus.Status.Failed - } - ?.forEach { failedBlob -> - logger.error( - "E2E: Blob ingestion failed - sourceId: ${failedBlob.sourceId}, " + - "errorCode: ${failedBlob.errorCode}, " + - "failureStatus: ${failedBlob.failureStatus?.value}, " + - "details: ${failedBlob.details}", - ) - } - } - - // We expect at least one failure due to format mismatch - // The CSV file should fail when server tries to parse it as JSON - assert(failedCount >= 1) { - "Expected at least one failure due to format mismatch (CSV parsed as JSON), " + - "but got: succeeded=$succeededCount, failed=$failedCount" - } - - logger.info( - "E2E: Format mismatch correctly detected by Kusto server during ingestion processing", - ) - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - compression format test - GZIP pre-compressed file`() = - runBlocking { - logger.info( - "E2E: Testing GZIP pre-compressed file ingestion (NO double compression)", - ) - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - // Create test JSON data matching table schema - val jsonData = - """{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000002","temperature":25.5,"humidity":60.0}""" - - // Create a GZIP compressed file - val tempFile = Files.createTempFile("test_gzip", ".json.gz") - java.util.zip - .GZIPOutputStream(Files.newOutputStream(tempFile)) - .use { gzipOut -> - gzipOut.write(jsonData.toByteArray()) - } - - // Already GZIP compressed - val source = - FileSourceInfo( - path = tempFile, - format = Format.multijson, - compressionType = CompressionType.GZIP, - name = "pre_compressed.json.gz", - sourceId = UUID.randomUUID(), - ) - - try { - logger.info( - "Uploading GZIP pre-compressed file - already compressed, will NOT be compressed again during upload", - ) - - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), - ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: GZIP file submitted (pre-compressed, no additional compression): ${response.ingestionOperationId}", - ) - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - val failedCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Failed - } ?: 0 - - logger.info( - "E2E: GZIP pre-compressed test - Success: $succeededCount, Failed: $failedCount", - ) - - if (failedCount > 0) { - finalStatus.details - ?.filter { - it.status == BlobStatus.Status.Failed - } - ?.forEach { failedBlob -> - logger.error( - "Failed blob details - sourceId: ${failedBlob.sourceId}, " + - "errorCode: ${failedBlob.errorCode}, " + - "details: ${failedBlob.details}", - ) - } - } - - // GZIP file is already compressed, so it should NOT be compressed again during - // upload - logger.info( - "E2E: GZIP test completed - verifies NO double compression for pre-compressed files", - ) - assert(succeededCount > 0) { - "Expected successful GZIP ingestion without double compression. " + - "Succeeded: $succeededCount, Failed: $failedCount" - } - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } finally { - Files.deleteIfExists(tempFile) - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - compression format test - Parquet format with compression`() = - runBlocking { - logger.info("E2E: Testing Parquet format file ingestion") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - // Parquet files are internally compressed, and the upload will compress again - val parquetFile = - this::class - .java - .classLoader - .getResource("compression/sample.parquet") - - if (parquetFile == null) { - logger.warn( - "sample.parquet not found in test resources, skipping Parquet test", - ) - assumeTrue( - false, - "sample.parquet not found - skipping test", - ) - return@runBlocking - } - - val tempFile = Files.createTempFile("test_parquet", ".parquet") - Files.copy( - parquetFile.openStream(), - tempFile, - java.nio.file.StandardCopyOption.REPLACE_EXISTING, - ) - - // Parquet has internal Snappy compression, no transport compression needed - val source = - FileSourceInfo( - path = tempFile, - format = Format.parquet, - compressionType = CompressionType.NONE, - name = "test.parquet", - sourceId = UUID.randomUUID(), - ) - - try { - logger.info( - "Uploading Parquet file - binary format with internal compression, will NOT be compressed during upload", - ) - - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.parquet, - ingestProperties = - IngestRequestProperties( - format = Format.parquet, - enableTracking = true, - ), - ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: Parquet file submitted (binary format, no additional compression): ${response.ingestionOperationId}", - ) - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - val failedCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Failed - } ?: 0 - - logger.info( - "E2E: Parquet binary format test - Success: $succeededCount, Failed: $failedCount", - ) - - // Log failures for debugging - if (failedCount > 0) { - finalStatus.details - ?.filter { - it.status == BlobStatus.Status.Failed - } - ?.forEach { failedBlob -> - logger.error( - "Failed blob details - sourceId: ${failedBlob.sourceId}, " + - "errorCode: ${failedBlob.errorCode}, " + - "details: ${failedBlob.details}", - ) - } - } - - // Parquet format has internal compression, upload should NOT compress again - // (fixed!) - logger.info( - "E2E: Parquet test completed - verifies NO double compression for binary formats", - ) - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - logger.warn( - "Parquet test failed (may be due to schema mismatch): ${e.message}", - ) - // Don't fail the test - schema mismatch is expected with sample Parquet - // file - } - } finally { - Files.deleteIfExists(tempFile) - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - compression format test - AVRO format with compression`() = - runBlocking { - logger.info("E2E: Testing AVRO format file ingestion") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - // AVRO files are internally compressed, similar to Parquet - val avroFile = - this::class - .java - .classLoader - .getResource("compression/sample.avro") - - if (avroFile == null) { - logger.warn( - "sample.avro not found in test resources, skipping AVRO test", - ) - assumeTrue(false, "sample.avro not found - skipping test") - return@runBlocking - } - - val tempFile = Files.createTempFile("test_avro", ".avro") - Files.copy( - avroFile.openStream(), - tempFile, - java.nio.file.StandardCopyOption.REPLACE_EXISTING, - ) - - // AVRO has internal Deflate compression - val source = - FileSourceInfo( - path = tempFile, - format = Format.avro, - compressionType = CompressionType.NONE, - name = "test.avro", - sourceId = UUID.randomUUID(), - ) - - try { - logger.info( - "Uploading AVRO file - binary format with internal compression, will NOT be compressed during upload", - ) - - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.avro, - ingestProperties = - IngestRequestProperties( - format = Format.avro, - enableTracking = true, - ), - ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: AVRO file submitted (binary format, no additional compression): ${response.ingestionOperationId}", - ) - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - val failedCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Failed - } ?: 0 - - logger.info( - "E2E: AVRO binary format test - Success: $succeededCount, Failed: $failedCount", - ) - - if (failedCount > 0) { - finalStatus.details - ?.filter { - it.status == BlobStatus.Status.Failed - } - ?.forEach { failedBlob -> - logger.error( - "Failed blob details - sourceId: ${failedBlob.sourceId}, " + - "errorCode: ${failedBlob.errorCode}, " + - "details: ${failedBlob.details}", - ) - } - } - - // AVRO format has internal compression, upload should NOT compress again - logger.info( - "E2E: AVRO test completed - verifies NO double compression for binary formats", - ) - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - logger.warn( - "AVRO test failed (may be due to schema mismatch): ${e.message}", - ) - } - } finally { - Files.deleteIfExists(tempFile) - } - } - - @Test - @Disabled("Serial execution test - disabled") - @ResourceLock("blob-ingestion") - fun `E2E - compression format test - JSON file gets compressed during upload`() = - runBlocking { - logger.info("E2E: Testing JSON file compression during upload") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - // Create test JSON data matching table schema - val jsonData = - """{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000002","temperature":25.5,"humidity":60.0}""" - - val tempFile = Files.createTempFile("test_json", ".json") - Files.write(tempFile, jsonData.toByteArray()) - - // Not pre-compressed - val source = - FileSourceInfo( - path = tempFile, - format = Format.multijson, - compressionType = CompressionType.NONE, - name = "test_json.json", - sourceId = UUID.randomUUID(), - ) - - try { - logger.info( - "Uploading JSON file - will be compressed during blob upload", - ) - - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.multijson, - ingestProperties = - IngestRequestProperties( - format = Format.multijson, - enableTracking = true, - ), - ) - - assertNotNull(response.ingestionOperationId) - logger.info( - "E2E: JSON file submitted (compressed during upload): ${response.ingestionOperationId}", - ) - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - val succeededCount = - finalStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - - logger.info( - "E2E: JSON compression test result - Success: $succeededCount", - ) - - // Uncompressed JSON gets compressed during upload - assert(succeededCount > 0) { - "Expected successful JSON ingestion with compression during upload" - } - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } finally { - Files.deleteIfExists(tempFile) - } - } - - @ParameterizedTest( - name = - "[QueuedIngestion-LocalSource] {index} => SourceType={0}, TestName={1}", - ) - @Disabled("Serial execution test - disabled") + @ParameterizedTest(name = "[LocalSource] {index} => SourceType={0}, TestName={1}") @CsvSource( "file,QueuedIngestion-FileSource,SampleFileSource.json", "stream,QueuedIngestion-StreamSource,SampleStreamSource.json", ) - fun `test queued ingestion with LocalSource`( + fun `test queued ingestion with local sources`( sourceType: String, testName: String, fileName: String, ) = runBlocking { logger.info("Starting LocalSource test: $testName") - val deviceDataUrl = - "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" + val deviceDataUrl = "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" val deviceData = java.net.URL(deviceDataUrl).readText() val targetFormat = Format.multijson val source: AbstractSourceInfo = @@ -1788,93 +644,37 @@ test2,456,2024-01-02""" "file" -> { val tempFile = Files.createTempFile(fileName, null) Files.write(tempFile, deviceData.toByteArray()) - FileSourceInfo( - path = tempFile, - format = targetFormat, - compressionType = CompressionType.NONE, - name = fileName, - sourceId = UUID.randomUUID(), - ) + FileSourceInfo(path = tempFile, format = targetFormat, compressionType = CompressionType.NONE, name = fileName, sourceId = UUID.randomUUID()) .also { - Runtime.getRuntime() - .addShutdownHook( - Thread { - Files.deleteIfExists( - tempFile, - ) - }, - ) + Runtime.getRuntime().addShutdownHook(Thread { Files.deleteIfExists(tempFile) }) } } "stream" -> - StreamSourceInfo( - stream = - ByteArrayInputStream( - deviceData.toByteArray(), - ), - format = targetFormat, - sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = fileName, - ) + StreamSourceInfo(stream = ByteArrayInputStream(deviceData.toByteArray()), format = targetFormat, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = fileName) else -> error("Unknown sourceType: $sourceType") } val queuedIngestionClient: IngestClient = - QueuedIngestionClient( - dmUrl = dmEndpoint, - tokenCredential = tokenProvider, - skipSecurityChecks = true, - ) - val properties = - IngestRequestProperties( - format = targetFormat, - enableTracking = true, - ) + QueuedIngestionClient(dmUrl = dmEndpoint, tokenCredential = tokenProvider, skipSecurityChecks = true) + val properties = IngestRequestProperties(format = targetFormat, enableTracking = true) + + val ingestionResponse = queuedIngestionClient.submitIngestion(database = database, table = targetTable, sources = listOf(source), format = targetFormat, ingestProperties = properties) + logger.info("$testName: Submitted with operation ID: ${ingestionResponse.ingestionOperationId}") + assertNotNull(ingestionResponse, "IngestionOperation should not be null") + assertNotNull(ingestionResponse.ingestionOperationId, "Operation ID should not be null") - val ingestionResponse = - queuedIngestionClient.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = targetFormat, - ingestProperties = properties, - ) - logger.info( - "{}: Submitted queued ingestion with operation ID: {}", - testName, - ingestionResponse.ingestionOperationId, - ) - assertNotNull( - ingestionResponse, - "IngestionOperation should not be null", - ) - assertNotNull( - ingestionResponse.ingestionOperationId, - "Operation ID should not be null", - ) val finalStatus = (queuedIngestionClient as QueuedIngestionClient) .pollUntilCompletion( database = database, table = targetTable, - operationId = - ingestionResponse.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), + operationId = ingestionResponse.ingestionOperationId, + pollingInterval = POLLING_INTERVAL, + timeout = POLLING_TIMEOUT, ) - logger.info( - "{} ingestion completed with final status: {}", - testName, - finalStatus.status, - ) - assert( - finalStatus.details?.any { - it.status == BlobStatus.Status.Succeeded - } == true, - ) { + logger.info("$testName: Completed with status: ${finalStatus.status}") + assert(finalStatus.details?.any { it.status == BlobStatus.Status.Succeeded } == true) { "Expected at least one successful ingestion for $testName" } } } -// https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json From 8b6315030d2701d1c1491fb0f75448374989e1d4 Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Wed, 17 Dec 2025 09:38:32 +0530 Subject: [PATCH 30/50] Feature/add uploader (#446) * added private link access context * Some refactor and add changes for ONE_LAKE_UPLOAD test * Fix uploader using DFS * Add additional test context for failures * Add error details * Add some info on ClientDetails --------- Co-authored-by: Tanmaya Panda --- ingest-v2/pom.xml | 37 +- .../kusto/ingest/v2/ConfigurationClient.kt | 10 +- .../azure/kusto/ingest/v2/IngestClient.kt | 142 --- .../azure/kusto/ingest/v2/IngestV2.kt | 45 +- .../kusto/ingest/v2/KustoBaseApiClient.kt | 56 +- .../ingest/v2/ManagedStreamingIngestClient.kt | 657 ------------ .../kusto/ingest/v2/ManagedStreamingPolicy.kt | 331 ------ .../kusto/ingest/v2/QueuedIngestionClient.kt | 429 -------- .../kusto/ingest/v2/StreamingIngestClient.kt | 249 ----- .../v2/builders/BaseIngestClientBuilder.kt | 133 ++- .../ManagedStreamingIngestClientBuilder.kt | 123 +++ .../v2/builders/QueuedIngestClientBuilder.kt | 100 ++ .../builders/QueuedIngestionClientBuilder.kt | 59 -- .../builders/StreamingIngestClientBuilder.kt | 40 +- .../kusto/ingest/v2/client/IngestClient.kt | 112 +++ .../ingest/v2/client/IngestionOperation.kt | 21 + .../v2/client/ManagedStreamingIngestClient.kt | 626 ++++++++++++ .../ingest/v2/client/QueuedIngestClient.kt | 602 +++++++++++ .../ingest/v2/client/StreamingIngestClient.kt | 344 +++++++ .../policy/DefaultManagedStreamingPolicy.kt | 155 +++ .../policy/ManagedStreamingErrorState.kt | 10 + .../client/policy/ManagedStreamingPolicy.kt | 118 +++ .../kusto/ingest/v2/common/ClientDetails.kt | 184 ---- .../ingest/v2/common/ConfigurationCache.kt | 43 +- .../ingest/v2/common/IngestRetryPolicy.kt | 2 +- .../ingest/v2/common/RetryPolicyExtensions.kt | 4 +- .../v2/common/exceptions/IngestException.kt | 35 +- .../ingest/v2/common/models/ClientDetails.kt | 193 +++- .../v2/common/models/ExtendedResponseTypes.kt | 15 + .../models/IngestRequestPropertiesBuilder.kt | 205 ++++ .../v2/common/models/KustoTokenCredentials.kt | 22 - .../ingest/v2/common/utils/IngestionUtils.kt | 30 + .../kusto/ingest/v2/common/utils/PathUtils.kt | 4 +- .../v2/container/BlobUploadContainer.kt | 392 -------- .../ingest/v2/container/ContainerBase.kt | 8 - .../v2/container/UploadContainerBase.kt | 9 - .../ingest/v2/source/AbstractSourceInfo.kt | 14 - .../kusto/ingest/v2/source/BlobSource.kt | 43 + .../kusto/ingest/v2/source/BlobSourceInfo.kt | 196 ---- .../kusto/ingest/v2/source/FileSource.kt | 105 ++ .../kusto/ingest/v2/source/IngestionSource.kt | 38 + .../kusto/ingest/v2/source/LocalSource.kt | 177 +--- .../kusto/ingest/v2/source/SourceInfo.kt | 12 - .../kusto/ingest/v2/source/StreamSource.kt | 34 + .../v2/uploader/ContainerUploaderBase.kt | 580 +++++++++++ .../v2/uploader/ExtendedContainerInfo.kt | 10 + .../kusto/ingest/v2/uploader/IUploader.kt | 36 + .../ingest/v2/uploader/ManagedUploader.kt | 137 +++ .../v2/uploader/ManagedUploaderBuilder.kt | 140 +++ .../kusto/ingest/v2/uploader/UploadMethod.kt | 15 + .../models}/UploadErrorCode.kt | 3 +- .../models}/UploadResult.kt | 2 +- ingest-v2/src/main/resources/app.properties | 1 + .../ingest/v2/ConfigurationClientTest.kt | 55 +- .../azure/kusto/ingest/v2/IngestV2TestBase.kt | 75 +- .../v2/ManagedStreamingIngestClientTest.kt | 442 +++----- .../kusto/ingest/v2/QueuedIngestClientTest.kt | 952 ++++++++++++++++++ .../ingest/v2/QueuedIngestionClientTest.kt | 680 ------------- .../ingest/v2/StreamingIngestClientTest.kt | 229 +---- .../common/DefaultConfigurationCacheTest.kt | 169 ++++ .../test/resources/compression/sample.avro | Bin 414 -> 441 bytes .../test/resources/compression/sample.json | 6 +- .../test/resources/compression/sample.json.gz | Bin 171 -> 0 bytes .../resources/compression/sample.multijson | 8 + .../resources/compression/sample.multijson.gz | Bin 0 -> 155 bytes .../compression/sample.multijson.zip | Bin 0 -> 286 bytes .../test/resources/compression/sample.parquet | Bin 3795 -> 2423 bytes 67 files changed, 5533 insertions(+), 4171 deletions(-) delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingErrorState.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypes.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtils.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ExtendedContainerInfo.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/IUploader.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploadMethod.kt rename ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/{container => uploader/models}/UploadErrorCode.kt (90%) rename ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/{container => uploader/models}/UploadResult.kt (94%) create mode 100644 ingest-v2/src/main/resources/app.properties create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt delete mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt delete mode 100644 ingest-v2/src/test/resources/compression/sample.json.gz create mode 100644 ingest-v2/src/test/resources/compression/sample.multijson create mode 100644 ingest-v2/src/test/resources/compression/sample.multijson.gz create mode 100644 ingest-v2/src/test/resources/compression/sample.multijson.zip diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 8bf208c01..f1f313cfd 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -3,13 +3,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 ingest-v2 - ${revision} ingest-v2 ingest-v2 + 4.3.0 official 2.2.10 - 3.3.0 + 3.3.3 3.1.1 1.10.2 1.4.14 @@ -47,16 +47,6 @@ ktor-client-java-jvm ${ktor.version} - - io.ktor - ktor-serialization-jackson - ${ktor.version} - - - io.ktor - ktor-serialization-jackson - ${ktor.version} - org.slf4j slf4j-simple @@ -66,6 +56,10 @@ com.azure azure-identity + + com.azure + azure-storage-file-datalake + org.jetbrains.kotlin kotlin-test-junit5 @@ -84,18 +78,18 @@ ${junit.version} test - - ${project.groupId} - kusto-data - ${project.parent.version} - test - io.mockk mockk-jvm 1.14.5 test + + org.awaitility + awaitility + ${awaitility.version} + test + ${project.groupId} kusto-data @@ -188,7 +182,7 @@ generate - integer=java.lang.Long,int=java.lang.Long + integer=kotlin.Long,int=kotlin.Long bearer @@ -284,10 +278,11 @@ org.apache.maven.plugins maven-surefire-plugin + ${maven-surefire-plugin.version} - methods - 4 + classesAndMethods + 8 1 true diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt index 3d30e593e..98b2f322e 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt @@ -4,6 +4,7 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import io.ktor.http.HttpStatusCode @@ -14,7 +15,14 @@ class ConfigurationClient( override val dmUrl: String, override val tokenCredential: TokenCredential, override val skipSecurityChecks: Boolean = false, -) : KustoBaseApiClient(dmUrl, tokenCredential, skipSecurityChecks) { + override val clientDetails: ClientDetails, +) : + KustoBaseApiClient( + dmUrl, + tokenCredential, + skipSecurityChecks, + clientDetails, + ) { private val logger = LoggerFactory.getLogger(ConfigurationClient::class.java) private val baseUrl = "$dmUrl/v1/rest/ingestion/configuration" diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt deleted file mode 100644 index f1a4c6fd3..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestClient.kt +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse -import com.microsoft.azure.kusto.ingest.v2.models.Format -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse -import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse -import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo -import io.ktor.http.HttpStatusCode -import org.slf4j.Logger -import org.slf4j.LoggerFactory -import java.net.ConnectException - -/** - * Interface that provides core abstraction for ingesting data into Kusto. - * - * Supports multiple source types: - * - BlobSourceInfo: Ingest from Azure Blob Storage - * - FileSourceInfo: Ingest from local files - * - StreamSourceInfo: Ingest from in-memory streams - */ -interface IngestClient { - - val logger: Logger - get() = LoggerFactory.getLogger(IngestClient::class.java) - - /** - * Submits an ingestion request from any source type. - * - * @param database The target database name - * @param table The target table name - * @param sources List of sources to ingest (BlobSourceInfo, FileSourceInfo, - * or StreamSourceInfo) - * @param format The data format (CSV, JSON, others) - * @param ingestProperties Optional ingestion properties - * @return IngestResponse containing the operation ID for tracking - */ - suspend fun submitIngestion( - database: String, - table: String, - sources: List, - format: Format = Format.csv, - ingestProperties: IngestRequestProperties? = null, - ): IngestResponse - - /** - * Gets the status of an ingestion operation. - * - * @param database The target database name - * @param table The target table name - * @param operationId The operation ID returned from submitIngestion - * @param forceDetails Whether to force retrieval of detailed information - * @return StatusResponse containing the current status - */ - suspend fun getIngestionStatus( - database: String, - table: String, - operationId: String, - forceDetails: Boolean = false, - ): StatusResponse - - /** - * Gets detailed information about an ingestion operation. - * - * @param database The target database name - * @param table The target table name - * @param operationId The operation ID returned from submitIngestion - * @param details Whether to retrieve detailed blob-level information - * @return StatusResponse containing operation details - * @throws UnsupportedOperationException if the implementation doesn't - * support operation tracking - */ - suspend fun getIngestionDetails( - database: String, - table: String, - operationId: String, - details: Boolean = true, - ): StatusResponse - - // Common way to parse ingestion response for both Streaming and Queued ingestion - - suspend fun handleIngestResponse( - response: HttpResponse, - database: String, - table: String, - dmUrl: String, - endpointType: String, - ): T { - if (response.success) { - val ingestResponseBody = response.body() - return ingestResponseBody - } else { - if (response.status == HttpStatusCode.NotFound.value) { - val message = - "Endpoint $dmUrl not found. Please ensure the cluster supports $endpointType ingestion." - logger.error( - "$endpointType ingestion endpoint not found. Please ensure that the target cluster supports $endpointType ingestion and that the endpoint URL is correct.", - ) - throw IngestException( - message = message, - cause = ConnectException(message), - failureCode = response.status, - failureSubCode = "", - isPermanent = false, - ) - } - val nonSuccessResponseBody: T = response.body() - val ingestResponseOperationId = - if (nonSuccessResponseBody is IngestResponse) { - if ( - (nonSuccessResponseBody as IngestResponse) - .ingestionOperationId != null - ) { - logger.info( - "Ingestion Operation ID: ${(nonSuccessResponseBody as IngestResponse).ingestionOperationId}", - ) - nonSuccessResponseBody.ingestionOperationId - } else { - "N/A" - } - } else { - "N/A" - } - val errorMessage = - "Failed to submit $endpointType ingestion to $database.$table. " + - "Status: ${response.status}, Body: $nonSuccessResponseBody. " + - "OperationId $ingestResponseOperationId" - logger.error( - "$endpointType ingestion failed with response: {}", - errorMessage, - ) - throw IngestException( - message = errorMessage, - cause = RuntimeException(errorMessage), - isPermanent = true, - ) - } - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt index 1869e77ba..654630195 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt @@ -5,24 +5,9 @@ package com.microsoft.azure.kusto.ingest.v2 // Size of each block to upload to Azure Blob Storage (4 MB) const val UPLOAD_BLOCK_SIZE_BYTES: Long = 4 * 1024 * 1024 -// Maximum number of concurrent upload operations for blob upload -const val UPLOAD_MAX_CONCURRENCY: Int = 8 - // Maximum size for a single upload operation to Azure Blob Storage (256 MB) const val UPLOAD_MAX_SINGLE_SIZE_BYTES: Long = 256 * 1024 * 1024 -// Maximum number of retry attempts for blob upload operations -const val UPLOAD_RETRY_MAX_TRIES: Int = 3 - -// Timeout in seconds for each blob upload attempt -const val UPLOAD_RETRY_TIMEOUT_SECONDS: Int = 60 - -// Initial delay in milliseconds between blob upload retry attempts -const val UPLOAD_RETRY_DELAY_MS: Long = 100 - -// Maximum delay in milliseconds between blob upload retry attempts -const val UPLOAD_RETRY_MAX_DELAY_MS: Long = 300 - // Request timeout in milliseconds for Kusto API HTTP requests const val KUSTO_API_REQUEST_TIMEOUT_MS: Long = 60_000 @@ -53,11 +38,37 @@ const val BLOB_UPLOAD_TIMEOUT_HOURS: Long = 1 // Default retry intervals for CustomRetryPolicy (1s, 3s, 7s) val INGEST_RETRY_POLICY_CUSTOM_INTERVALS: Array = arrayOf(1, 3, 7) -// Default maximum number of retry attempts for container upload operations -const val UPLOAD_CONTAINER_MAX_RETRIES: Int = 3 +// Number of blobs to upload in a single batch +const val MAX_BLOBS_PER_BATCH: Int = 70 // Default maximum data size for blob upload operations (4GB) const val UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES: Long = 4L * 1024 * 1024 * 1024 // Default maximum concurrency for blob upload operations const val UPLOAD_CONTAINER_MAX_CONCURRENCY: Int = 4 + +const val STREAMING_MAX_REQ_BODY_SIZE = 10 * 1024 * 1024 // 10 MB + +// Managed Streaming Policy Defaults + +// Default value for continueWhenStreamingIngestionUnavailable in ManagedStreamingPolicy +// When false, the client will fail if streaming ingestion is unavailable +const val MANAGED_STREAMING_CONTINUE_WHEN_UNAVAILABLE_DEFAULT: Boolean = false + +// Default data size factor for ManagedStreamingPolicy +// Factor used to determine size threshold for queued ingestion (1.0 = no adjustment) +const val MANAGED_STREAMING_DATA_SIZE_FACTOR_DEFAULT: Double = 1.0 + +// Default throttle backoff period in seconds for ManagedStreamingPolicy +// How long to use queued ingestion after streaming is throttled +const val MANAGED_STREAMING_THROTTLE_BACKOFF_SECONDS: Long = 10 + +// Default time until resuming streaming ingestion in minutes for ManagedStreamingPolicy +// How long to use queued ingestion after streaming becomes unavailable +const val MANAGED_STREAMING_RESUME_TIME_MINUTES: Long = 15 + +// Default retry delays for ManagedStreamingPolicy (in seconds): 1s, 2s, 4s +val MANAGED_STREAMING_RETRY_DELAYS_SECONDS: Array = arrayOf(1, 2, 4) + +// Maximum jitter to add to retry delays in milliseconds +const val MANAGED_STREAMING_RETRY_JITTER_MS: Long = 1000 diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 5168e54bb..5e1830bdf 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -5,7 +5,7 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential import com.azure.core.credential.TokenRequestContext import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi -import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer import io.ktor.client.HttpClientConfig import io.ktor.client.plugins.DefaultRequest @@ -15,6 +15,7 @@ import io.ktor.client.plugins.auth.providers.BearerTokens import io.ktor.client.plugins.auth.providers.bearer import io.ktor.client.plugins.contentnegotiation.ContentNegotiation import io.ktor.client.request.header +import io.ktor.http.ContentType import io.ktor.serialization.kotlinx.json.json import kotlinx.coroutines.suspendCancellableCoroutine import kotlinx.serialization.json.Json @@ -29,8 +30,10 @@ open class KustoBaseApiClient( open val dmUrl: String, open val tokenCredential: TokenCredential, open val skipSecurityChecks: Boolean = false, - open val clientDetails: ClientDetails? = ClientDetails.createDefault(), + open val clientDetails: ClientDetails, open val clientRequestIdPrefix: String = "KIC.execute", + open val s2sTokenProvider: (suspend () -> Pair)? = null, + open val s2sFabricPrivateLinkAccessContext: String? = null, ) { private val logger = LoggerFactory.getLogger(KustoBaseApiClient::class.java) @@ -38,15 +41,17 @@ open class KustoBaseApiClient( getClientConfig(config) } - protected val api: DefaultApi by lazy { + val engineUrl: String + get() = dmUrl.replace(Regex("https://ingest-"), "https://") + + val api: DefaultApi by lazy { DefaultApi(baseUrl = dmUrl, httpClientConfig = setupConfig) } private fun getClientConfig(config: HttpClientConfig<*>) { config.install(DefaultRequest) { - header("Content-Type", "application/json") - - clientDetails?.let { details -> + header("Content-Type", ContentType.Application.Json.toString()) + clientDetails.let { details -> header("x-ms-app", details.getApplicationForTracing()) header("x-ms-user", details.getUserNameForTracing()) header( @@ -59,8 +64,8 @@ open class KustoBaseApiClient( val clientRequestId = "$clientRequestIdPrefix;${UUID.randomUUID()}" header("x-ms-client-request-id", clientRequestId) header("x-ms-version", KUSTO_API_VERSION) - header("Connection", "Keep-Alive") - header("Accept", "application/json") + header("Connection", "keep-alive") + header("Accept", ContentType.Application.Json.toString()) } val trc = TokenRequestContext().addScopes("$dmUrl/.default") config.install(Auth) { @@ -108,6 +113,41 @@ open class KustoBaseApiClient( } } } + + // Add S2S authorization and Fabric Private Link headers using request interceptor + s2sTokenProvider?.let { provider -> + config.install( + io.ktor.client.plugins.api.createClientPlugin( + "S2SAuthPlugin", + ) { + onRequest { request, _ -> + try { + // Get S2S token + val (token, scheme) = provider() + request.headers.append( + "x-ms-s2s-actor-authorization", + "$scheme $token", + ) + + // Add Fabric Private Link access context header + s2sFabricPrivateLinkAccessContext?.let { context, + -> + request.headers.append( + "x-ms-fabric-s2s-access-context", + context, + ) + } + } catch (e: Exception) { + logger.error( + "Error retrieving S2S token: ${e.message}", + e, + ) + throw e + } + } + }, + ) + } config.install(ContentNegotiation) { json( Json { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt deleted file mode 100644 index 833c40722..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClient.kt +++ /dev/null @@ -1,657 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.azure.core.credential.TokenCredential -import com.microsoft.azure.kusto.ingest.v2.common.Retry -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.common.runWithRetry -import com.microsoft.azure.kusto.ingest.v2.models.Format -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse -import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse -import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.LocalSource -import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo -import io.ktor.http.HttpStatusCode -import org.slf4j.Logger -import org.slf4j.LoggerFactory -import java.io.InputStream -import java.time.Clock -import java.time.Duration -import java.time.Instant -import kotlin.time.Duration.Companion.milliseconds - -/** - * ManagedStreamingIngestClient chooses between streaming and queued ingestion - * based on data size, service availability, and error patterns. - * - * This client: - * - Attempts streaming ingestion first for eligible data - * - Automatically falls back to queued ingestion on failures - * - Implements retry logic with exponential backoff - * - Tracks per-table error patterns to optimize future ingestion attempts - * - Respects streaming ingestion limits and policies - * - * @param clusterUrl The Kusto cluster URL (used for both streaming and queued - * ingestion) - * @param tokenCredential Azure credential for authentication - * @param managedStreamingPolicy Policy controlling fallback behavior and retry - * logic - * @param skipSecurityChecks Whether to skip security checks (for testing) - */ -class ManagedStreamingIngestClient( - private val clusterUrl: String, - private val tokenCredential: TokenCredential, - private val managedStreamingPolicy: ManagedStreamingPolicy = - DefaultManagedStreamingPolicy(), - private val skipSecurityChecks: Boolean = false, -) : IngestClient { - - override val logger: Logger = - LoggerFactory.getLogger(ManagedStreamingIngestClient::class.java) - - private val streamingIngestClient = - StreamingIngestClient( - engineUrl = clusterUrl, - tokenCredential = tokenCredential, - skipSecurityChecks = skipSecurityChecks, - ) - - private val queuedIngestionClient = - QueuedIngestionClient( - dmUrl = clusterUrl, - tokenCredential = tokenCredential, - skipSecurityChecks = skipSecurityChecks, - ) - - // Maximum size for streaming ingestion (4MB default, can be tuned with dataSizeFactor) - private val maxStreamingIngestSize: Long = - (4 * 1024 * 1024 * managedStreamingPolicy.dataSizeFactor).toLong() - - /** - * Submits an ingestion request, intelligently choosing between streaming - * and queued ingestion. - * - * @param database The target database name - * @param table The target table name - * @param sources List of SourceInfo objects (BlobSourceInfo, - * FileSourceInfo, or StreamSourceInfo) - * @param format The data format - * @param ingestProperties Optional ingestion properties - * @return IngestResponse for tracking the request - */ - suspend fun submitManagedIngestion( - database: String, - table: String, - sources: List, - format: Format = Format.csv, - ingestProperties: IngestRequestProperties? = null, - ): IngestResponse { - require(database.isNotBlank()) { "Database name cannot be blank" } - require(table.isNotBlank()) { "Table name cannot be blank" } - require(sources.isNotEmpty()) { "Sources list cannot be empty" } - - val props = ingestProperties ?: IngestRequestProperties(format = format) - - logger.info( - "Starting managed ingestion for database: $database, table: $table, sources: ${sources.size}", - ) - - // Process each source - for (source in sources) { - when (source) { - is BlobSourceInfo -> ingestBlob(source, database, table, props) - is LocalSource -> ingestLocal(source, database, table, props) - else -> - throw IngestException( - "Unsupported source type: ${source::class.simpleName}", - isPermanent = true, - ) - } - } - - // Return a combined response (for now, return success) - return IngestResponse( - ingestionOperationId = - "managed-${Instant.now(Clock.systemUTC())}", - ) - } - - private suspend fun ingestBlob( - source: BlobSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - ): IngestResponse { - if (shouldUseQueuedIngestByPolicy(source, database, table, props)) { - logger.info( - "Policy dictates using queued ingestion for blob: ${source.blobPath}", - ) - return invokeQueuedIngestion(source, database, table, props) - } - - return invokeStreamingIngestion(source, database, table, props) - } - - private suspend fun ingestLocal( - source: LocalSource, - database: String, - table: String, - props: IngestRequestProperties, - ): IngestResponse { - val stream = source.data() - - if (!isStreamValid(stream)) { - throw IngestException( - "Stream is not valid for ingest. Ensure the stream is not null, has data, and is seekable.", - isPermanent = true, - ) - } - - if (shouldUseQueuedIngestBySize(source)) { - logger.info( - "Data size exceeds streaming limit, using queued ingestion", - ) - return invokeQueuedIngestion(source, database, table, props) - } - - if (shouldUseQueuedIngestByPolicy(source, database, table, props)) { - logger.info( - "Policy dictates using queued ingestion for local source: ${source.name}", - ) - return invokeQueuedIngestion(source, database, table, props) - } - - return invokeStreamingIngestion(source, database, table, props) - } - - private fun isStreamValid(stream: InputStream): Boolean { - return try { - // Mark the current position if supported - if (stream.markSupported()) { - stream.mark(1) - val hasData = stream.read() != -1 - stream.reset() // Reset to marked position - hasData - } else { - // For non-markable streams, check available bytes - stream.available() > 0 - } - } catch (e: Exception) { - logger.warn("Stream validation failed: ${e.message}") - false - } - } - - private fun shouldUseQueuedIngestBySize(source: LocalSource): Boolean { - val size = source.size() - - if (size == null) { - logger.warn( - "Could not determine data size for ${source::class.simpleName}", - ) - return false - } - - if (size > maxStreamingIngestSize) { - logger.info( - "Data size '$size' exceeds streaming limit '$maxStreamingIngestSize'. " + - "DataSizeFactor used: ${managedStreamingPolicy.dataSizeFactor}. Using queued ingestion.", - ) - return true - } - - return false - } - - private fun shouldUseQueuedIngestByPolicy( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - ): Boolean { - if ( - managedStreamingPolicy.shouldDefaultToQueuedIngestion( - source, - database, - table, - props, - ) - ) { - logger.info( - "ManagedStreamingPolicy indicates fallback to queued ingestion", - ) - return true - } - return false - } - - private suspend fun invokeStreamingIngestion( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - ): IngestResponse { - val startTime = Instant.now(Clock.systemUTC()).toEpochMilli() - - val result = - managedStreamingPolicy.retryPolicy.runWithRetry( - action = { attempt -> - val attemptStartTime = - Instant.now(Clock.systemUTC()) - .toEpochMilli() - - try { - val response = - when (source) { - is BlobSourceInfo -> { - streamingIngestClient - .submitStreamingIngestion( - database = - database, - table = table, - // Not used for blob-based streaming - data = - ByteArray( - 0, - ), - format = - props - .format, - ingestProperties = - props, - blobUrl = - source - .blobPath, - ) - IngestResponse( - ingestionOperationId = - source.sourceId - .toString(), - ) - } - - is LocalSource -> { - val data = - source.data() - .readBytes() - streamingIngestClient - .submitStreamingIngestion( - database = - database, - table = table, - data = data, - format = - props - .format, - ingestProperties = - props, - ) - IngestResponse( - ingestionOperationId = - source.sourceId - .toString(), - ) - } - - else -> - throw IngestException( - "Unsupported source type for streaming: ${source::class.simpleName}", - isPermanent = true, - ) - } - - val duration = - Duration.ofMillis( - Instant.now(Clock.systemUTC()) - .toEpochMilli() - - attemptStartTime, - ) - managedStreamingPolicy.streamingSuccessCallback( - source, - database, - table, - props, - ManagedStreamingRequestSuccessDetails( - duration, - ), - ) - - logger.info( - "Streaming ingestion succeeded for ${source::class.simpleName} on attempt $attempt. Duration: ${duration.toMillis()}ms", - ) - response - } catch (e: Exception) { - logger.warn( - "Streaming ingestion attempt $attempt failed: ${e.message}", - ) - throw e - } - }, - onRetry = { attempt, ex, _ -> - logger.debug( - "Retrying streaming ingestion after attempt $attempt due to: ${ex.message}", - ) - if (source is LocalSource) { - try { - source.reset() - } catch (e: Exception) { - logger.warn( - "Failed to reset source stream: ${e.message}", - ) - } - } - }, - shouldRetry = { attempt, ex, isPermanent -> - decideOnException( - source, - database, - table, - props, - startTime, - isPermanent, - ex, - ) - }, - throwOnExhaustedRetries = false, - tracer = { msg -> logger.debug(msg) }, - ) - - if (result == null) { - logger.info( - "Streaming ingestion failed after retries, falling back to queued ingestion for ${source::class.simpleName}", - ) - - if (source is LocalSource) { - try { - source.reset() - } catch (e: Exception) { - logger.warn( - "Failed to reset source stream before queued ingestion: ${e.message}", - ) - } - } - return invokeQueuedIngestion(source, database, table, props) - } - - return result - } - - /** Decides whether to retry, throw, or break based on the exception */ - private fun decideOnException( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - startTime: Long, - isPermanent: Boolean, - ex: Exception, - ): Retry { - val duration = - Duration.ofMillis( - Instant.now(Clock.systemUTC()).toEpochMilli() - - startTime, - ) - - // Handle transient errors - if (!isPermanent) { - reportTransientException( - source, - database, - table, - props, - ex, - duration, - ) - return Retry(shouldRetry = true, interval = Duration.ZERO) - } - - // Handle permanent errors - if (ex !is IngestException) { - reportUnknownException(source, database, table, props, ex, duration) - return Retry(shouldRetry = false, interval = Duration.ZERO) - } - - // Check if we should fallback to queued ingestion - if ( - shouldFallbackToQueuedOnPermanentError( - ex, - source, - database, - table, - props, - duration, - ) - ) { - return Retry(shouldRetry = false, interval = Duration.ZERO) - } - - logger.error( - "Permanent error occurred while trying streaming ingest, not switching to queued according to policy. Error: ${ex.message}", - ) - return Retry(shouldRetry = false, interval = Duration.ZERO) - } - - /** Reports a transient exception to the policy */ - private fun reportTransientException( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - ex: Exception, - duration: Duration, - ) { - val errorCategory = - if ( - ex is IngestException && - ex.failureCode == - HttpStatusCode.TooManyRequests.value - ) { - ManagedStreamingErrorCategory.THROTTLED - } else { - ManagedStreamingErrorCategory.OTHER_ERRORS - } - - logger.warn( - "Streaming ingestion transient error: ${ex.message}, category: $errorCategory", - ) - - managedStreamingPolicy.streamingErrorCallback( - source, - database, - table, - props, - ManagedStreamingRequestFailureDetails( - duration = duration, - isPermanent = false, - errorCategory = errorCategory, - exception = ex, - ), - ) - } - - private fun reportUnknownException( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - ex: Exception, - duration: Duration, - ) { - logger.error( - "Unexpected error occurred during streaming ingestion: ${ex.message}", - ex, - ) - - managedStreamingPolicy.streamingErrorCallback( - source, - database, - table, - props, - ManagedStreamingRequestFailureDetails( - duration = duration, - isPermanent = true, - errorCategory = - ManagedStreamingErrorCategory.UNKNOWN_ERRORS, - exception = ex, - ), - ) - } - - private fun shouldFallbackToQueuedOnPermanentError( - ex: IngestException, - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - duration: Duration, - ): Boolean { - val failureSubCode = ex.failureSubCode - - val errorCategory: ManagedStreamingErrorCategory - val shouldFallback: Boolean - - when { - // Streaming ingestion policy turned off - failureSubCode?.contains( - "StreamingIngestionPolicyNotEnabled", - ignoreCase = true, - ) == true || - failureSubCode?.contains( - "StreamingIngestionDisabledForCluster", - ignoreCase = true, - ) == true -> { - errorCategory = - ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF - shouldFallback = - managedStreamingPolicy - .continueWhenStreamingIngestionUnavailable - logger.info( - "Streaming ingestion is off, fallback to queued ingestion is " + - "${if (shouldFallback) "enabled" else "disabled"}. Error: ${ex.message}", - ) - } - - // Table configuration prevents streaming - failureSubCode?.contains( - "UpdatePolicyIncompatible", - ignoreCase = true, - ) == true || - failureSubCode?.contains( - "QuerySchemaDoesNotMatchTableSchema", - ignoreCase = true, - ) == true -> { - errorCategory = - ManagedStreamingErrorCategory - .TABLE_CONFIGURATION_PREVENTS_STREAMING - shouldFallback = true - logger.info( - "Fallback to queued ingestion due to table configuration. Error: ${ex.message}", - ) - } - - // Request properties prevent streaming (e.g., file too large) - failureSubCode?.contains("FileTooLarge", ignoreCase = true) == - true || - failureSubCode?.contains( - "InputStreamTooLarge", - ignoreCase = true, - ) == true || - ex.failureCode == 413 -> { // 413 Payload Too Large - errorCategory = - ManagedStreamingErrorCategory - .REQUEST_PROPERTIES_PREVENT_STREAMING - shouldFallback = true - logger.info( - "Fallback to queued ingestion due to request properties. Error: ${ex.message}", - ) - } - - else -> { - errorCategory = ManagedStreamingErrorCategory.OTHER_ERRORS - shouldFallback = false - logger.info( - "Not falling back to queued ingestion for this exception: ${ex.message}", - ) - } - } - - managedStreamingPolicy.streamingErrorCallback( - source, - database, - table, - props, - ManagedStreamingRequestFailureDetails( - duration = duration, - isPermanent = true, - errorCategory = errorCategory, - exception = ex, - ), - ) - - return shouldFallback - } - - private suspend fun invokeQueuedIngestion( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties, - ): IngestResponse { - logger.info("Invoking queued ingestion for ${source::class.simpleName}") - - return queuedIngestionClient.submitQueuedIngestion( - database = database, - table = table, - sources = listOf(source), - format = props.format, - ingestProperties = props, - ) - } - - suspend fun pollUntilCompletion( - database: String, - table: String, - operationId: String, - pollingInterval: kotlin.time.Duration = 30.milliseconds, - timeout: kotlin.time.Duration = 300.milliseconds, - ): StatusResponse { - return queuedIngestionClient.pollUntilCompletion( - database = database, - table = table, - operationId = operationId, - pollingInterval = pollingInterval, - timeout = timeout, - ) - } - - override suspend fun submitIngestion( - database: String, - table: String, - sources: List, - format: Format, - ingestProperties: IngestRequestProperties?, - ): IngestResponse { - TODO("Not yet implemented") - } - - override suspend fun getIngestionStatus( - database: String, - table: String, - operationId: String, - forceDetails: Boolean, - ): StatusResponse { - TODO("Not yet implemented") - } - - override suspend fun getIngestionDetails( - database: String, - table: String, - operationId: String, - details: Boolean, - ): StatusResponse { - TODO("Not yet implemented") - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt deleted file mode 100644 index 624bfcdd5..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingPolicy.kt +++ /dev/null @@ -1,331 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.microsoft.azure.kusto.ingest.v2.common.CustomRetryPolicy -import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo -import java.time.Duration -import java.time.Instant -import java.util.concurrent.ConcurrentHashMap -import kotlin.random.Random - -/** Error categories for managed streaming ingestion failures */ -enum class ManagedStreamingErrorCategory { - /** - * Indicates that streaming cannot be performed due to the properties of the - * request itself but would likely succeed if queued. These errors are - * request-specific and do not imply anything about following requests. - */ - REQUEST_PROPERTIES_PREVENT_STREAMING, - - /** - * Indicates streaming cannot be performed due to a conflicting table - * configuration, but may succeed if queued. These errors are table-specific - * and following requests will behave similarly until the conflict is - * resolved on the service side. - */ - TABLE_CONFIGURATION_PREVENTS_STREAMING, - - /** - * Indicates streaming cannot be performed due to some service - * configuration. To resolve these errors, a service side change is required - * to use streaming. - */ - STREAMING_INGESTION_OFF, - - /** - * Indicates streaming ingestion endpoint is throttled and returns HTTP - * TooManyRequests error code (429) - */ - THROTTLED, - - /** Reported for all other types of streaming errors */ - OTHER_ERRORS, - - /** Reported when an unexpected error type occurred */ - UNKNOWN_ERRORS, -} - -/** Details about a successful streaming ingestion request */ -data class ManagedStreamingRequestSuccessDetails(val duration: Duration) - -/** Details about a failed streaming ingestion request */ -data class ManagedStreamingRequestFailureDetails( - val duration: Duration, - val isPermanent: Boolean, - val errorCategory: ManagedStreamingErrorCategory, - val exception: Exception, -) - -/** - * A policy which controls the way the managed streaming ingest client behaves - * when there are errors. - */ -interface ManagedStreamingPolicy { - /** - * When streaming ingestion is disabled for the table, database or cluster, - * determine if the client will fallback to queued ingestion. When set to - * false managed streaming client will fail ingestions for tables where - * streaming policy is not enabled. Enabling this property means the client - * might use queued ingestion exclusively without the caller knowing. - * Permanent errors in streaming ingestion that are not errors in queued - * ingestion, will fallback to queued ingestion regardless of this setting. - */ - val continueWhenStreamingIngestionUnavailable: Boolean - - /** - * The retry policy for transient failures before falling back to queued - * ingestion - */ - val retryPolicy: IngestRetryPolicy - - /** - * A size factor that enables tuning up and down the upper limit of data - * sent to streaming. Default value is 1.0. - */ - val dataSizeFactor: Double - - /** - * Should this ingestion attempt skip streaming and go directly to queued - * ingestion - * - * @param source The ingestion source - * @param database The target database name - * @param table The target table name - * @param props The ingestion properties - * @return false if streaming should be attempted, true if streaming should - * be skipped - */ - fun shouldDefaultToQueuedIngestion( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties?, - ): Boolean - - /** - * This callback will be called when a streaming error occurs - * - * @param source The ingestion source - * @param database The target database name - * @param table The target table name - * @param props The ingestion properties - * @param failureDetails Details about the failure - */ - fun streamingErrorCallback( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties?, - failureDetails: ManagedStreamingRequestFailureDetails, - ) - - /** - * This callback will be called when streaming succeeds - * - * @param source The ingestion source - * @param database The target database name - * @param table The target table name - * @param props The ingestion properties - * @param successDetails Details about the success - */ - fun streamingSuccessCallback( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties?, - successDetails: ManagedStreamingRequestSuccessDetails, - ) -} - -/** - * This is the default policy used by the managed streaming ingestion client. - * Whenever there is a permanent streaming error, it defaults to queued - * ingestion for a time period defined by timeUntilResumingStreamingIngest. - */ -class DefaultManagedStreamingPolicy( - override val continueWhenStreamingIngestionUnavailable: Boolean = false, - override val retryPolicy: IngestRetryPolicy = - CustomRetryPolicy( - arrayOf( - Duration.ofSeconds( - DEFAULT_RETRY_FIRST_DELAY_SECONDS, - ) - .plusMillis( - Random.nextLong( - DEFAULT_JITTER_MIN_MS, - DEFAULT_JITTER_MAX_MS, - ), - ), - Duration.ofSeconds( - DEFAULT_RETRY_SECOND_DELAY_SECONDS, - ) - .plusMillis( - Random.nextLong( - DEFAULT_JITTER_MIN_MS, - DEFAULT_JITTER_MAX_MS, - ), - ), - Duration.ofSeconds( - DEFAULT_RETRY_THIRD_DELAY_SECONDS, - ) - .plusMillis( - Random.nextLong( - DEFAULT_JITTER_MIN_MS, - DEFAULT_JITTER_MAX_MS, - ), - ), - ), - ), - override val dataSizeFactor: Double = DEFAULT_DATA_SIZE_FACTOR, - /** - * When streaming is throttled, the client will fallback to queued - * ingestion. This property controls how long the client will use queued - * ingestion in the case of streaming is throttled before trying to - * resume streaming ingestion again. - */ - val throttleBackoffPeriod: Duration = - Duration.ofSeconds(DEFAULT_THROTTLE_BACKOFF_SECONDS), - /** - * When streaming ingestion is unavailable, the client will fallback to - * queued ingestion. This property controls how long the client will use - * queued ingestion before trying to resume streaming ingestion again. - */ - val timeUntilResumingStreamingIngest: Duration = - Duration.ofMinutes(DEFAULT_RESUME_STREAMING_MINUTES), -) : ManagedStreamingPolicy { - - private val defaultToQueuedUntilTimeByTable = - ConcurrentHashMap< - Pair, - Pair, - >() - - companion object { - /** - * Default data size factor for tuning the upper limit of data sent to - * streaming. A value of 1.0 means no adjustment to the default limit. - */ - private const val DEFAULT_DATA_SIZE_FACTOR = 1.0 - - /** - * Default delay in seconds for the first retry attempt when streaming - * ingestion fails transiently. - */ - private const val DEFAULT_RETRY_FIRST_DELAY_SECONDS = 1L - - /** - * Default delay in seconds for the second retry attempt when streaming - * ingestion fails transiently. - */ - private const val DEFAULT_RETRY_SECOND_DELAY_SECONDS = 2L - - /** - * Default delay in seconds for the third retry attempt when streaming - * ingestion fails transiently. - */ - private const val DEFAULT_RETRY_THIRD_DELAY_SECONDS = 4L - - /** - * Minimum jitter value in milliseconds added to retry delays to avoid - * thundering herd problems. - */ - private const val DEFAULT_JITTER_MIN_MS = 0L - - /** - * Maximum jitter value in milliseconds added to retry delays to avoid - * thundering herd problems. Adds up to 1 second of random delay to each - * retry attempt. - */ - private const val DEFAULT_JITTER_MAX_MS = 1000L - - /** - * Default backoff period in seconds when streaming ingestion is - * throttled (HTTP 429). The client will use queued ingestion for this - * duration before attempting streaming again. - */ - private const val DEFAULT_THROTTLE_BACKOFF_SECONDS = 10L - - /** - * Default time in minutes to wait before resuming streaming ingestion - * attempts after streaming becomes unavailable due to configuration or - * policy issues. - */ - private const val DEFAULT_RESUME_STREAMING_MINUTES = 15L - } - - override fun shouldDefaultToQueuedIngestion( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties?, - ): Boolean { - val key = Pair(database, table) - - val useQueuedUntilTime = defaultToQueuedUntilTimeByTable[key] - if (useQueuedUntilTime != null) { - val (dateTime, errorCategory) = useQueuedUntilTime - if (dateTime.isAfter(Instant.now())) { - if ( - errorCategory == - ManagedStreamingErrorCategory - .STREAMING_INGESTION_OFF && - !continueWhenStreamingIngestionUnavailable - ) { - return false - } - return true - } - defaultToQueuedUntilTimeByTable.remove(key) - } - - return false - } - - override fun streamingErrorCallback( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties?, - failureDetails: ManagedStreamingRequestFailureDetails, - ) { - val key = Pair(database, table) - when (failureDetails.errorCategory) { - ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, - ManagedStreamingErrorCategory - .TABLE_CONFIGURATION_PREVENTS_STREAMING, - -> { - defaultToQueuedUntilTimeByTable[key] = - Pair( - Instant.now() - .plus(timeUntilResumingStreamingIngest), - failureDetails.errorCategory, - ) - } - - ManagedStreamingErrorCategory.THROTTLED -> { - defaultToQueuedUntilTimeByTable[key] = - Pair( - Instant.now().plus(throttleBackoffPeriod), - failureDetails.errorCategory, - ) - } - - else -> { - // No action for other error categories - } - } - } - - override fun streamingSuccessCallback( - source: AbstractSourceInfo, - database: String, - table: String, - props: IngestRequestProperties?, - successDetails: ManagedStreamingRequestSuccessDetails, - ) { - // Default implementation does nothing - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt deleted file mode 100644 index b775b29cb..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClient.kt +++ /dev/null @@ -1,429 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.azure.core.credential.TokenCredential -import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails -import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionResultUtils -import com.microsoft.azure.kusto.ingest.v2.container.BlobUploadContainer -import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse -import com.microsoft.azure.kusto.ingest.v2.models.Blob -import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus -import com.microsoft.azure.kusto.ingest.v2.models.Format -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequest -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse -import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse -import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.LocalSource -import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo -import io.ktor.http.HttpStatusCode -import kotlinx.coroutines.delay -import kotlinx.coroutines.withTimeoutOrNull -import java.lang.Long -import java.time.Clock -import java.time.OffsetDateTime -import kotlin.time.Duration - -class QueuedIngestionClient( - override val dmUrl: String, - override val tokenCredential: TokenCredential, - override val skipSecurityChecks: Boolean = false, - override val clientDetails: ClientDetails? = null, - private val maxConcurrency: Int? = null, - private val maxDataSize: kotlin.Long? = null, - private val ignoreFileSize: Boolean = false, -) : - KustoBaseApiClient( - dmUrl, - tokenCredential, - skipSecurityChecks, - clientDetails, - ), - IngestClient { - - override suspend fun submitIngestion( - database: String, - table: String, - sources: List, - format: Format, - ingestProperties: IngestRequestProperties?, - ): IngestResponse { - return submitQueuedIngestion( - database = database, - table = table, - sources = sources, - format = format, - ingestProperties = ingestProperties, - failOnPartialUploadError = true, - ) - } - - override suspend fun getIngestionStatus( - database: String, - table: String, - operationId: String, - forceDetails: Boolean, - ): StatusResponse { - // If details are explicitly requested, use the details API - if (forceDetails) { - val statusResponse = - getIngestionDetails(database, table, operationId, true) - logger.debug( - "Forcing detailed status retrieval for operation: {} returning {}", - operationId, - statusResponse, - ) - return statusResponse - } - // Start with summary for efficiency - val statusResponse = - getIngestionDetails(database, table, operationId, false) - // If operation has failures or is completed, get detailed information - return if ( - statusResponse.status?.failed?.let { it > 0 } == true || - IngestionResultUtils.isCompleted(statusResponse.details) - ) { - logger.debug( - "Operation $operationId has failures or is completed, retrieving details", - ) - getIngestionDetails(database, table, operationId, true) - } else { - statusResponse - } - } - - private val defaultConfigurationCache = - DefaultConfigurationCache( - dmUrl = dmUrl, - tokenCredential = tokenCredential, - skipSecurityChecks = skipSecurityChecks, - ) - - private val blobUploadContainer = - BlobUploadContainer( - configurationCache = defaultConfigurationCache, - maxConcurrency = - maxConcurrency ?: UPLOAD_CONTAINER_MAX_CONCURRENCY, - maxDataSize = - maxDataSize ?: UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES, - ignoreSizeLimit = ignoreFileSize, - ) - - /** - * Submits a queued ingestion request with support for all source types. - * Local sources (FileSourceInfo, StreamSourceInfo) will be automatically - * uploaded to blob storage before ingestion using parallel batch uploads. - * - * @param database The target database name - * @param table The target table name - * @param sources List of SourceInfo objects (BlobSourceInfo, - * FileSourceInfo, or StreamSourceInfo) - * @param format The data format - * @param ingestProperties Optional ingestion properties - * @param failOnPartialUploadError If true, fails the entire operation if - * any uploads fail - * @return IngestionOperation for tracking the request - */ - suspend fun submitQueuedIngestion( - database: String, - table: String, - sources: List, - format: Format = Format.csv, - ingestProperties: IngestRequestProperties? = null, - failOnPartialUploadError: Boolean = true, - ): IngestResponse { - logger.info( - "Submitting queued ingestion request for database: $database, table: $table, sources: ${sources.size}", - ) - - // Separate sources by type - val blobSources = sources.filterIsInstance() - val localSources = sources.filterIsInstance() - - // Convert local sources to blob sources - val allBlobSources = - if (localSources.isNotEmpty()) { - logger.info( - "Uploading ${localSources.size} local sources to blob storage in parallel", - ) - - // Use batch upload for efficiency - val batchResult = - BlobSourceInfo.fromLocalSourcesBatch( - localSources, - blobUploadContainer, - ) - - // Log batch results - logger.info( - "Batch upload completed: ${batchResult.successes.size} succeeded, " + - "${batchResult.failures.size} failed out of ${localSources.size} total", - ) - - // Handle failures based on policy - if (batchResult.hasFailures) { - val failureDetails = - batchResult.failures.joinToString("\n") { - failure -> - " - ${failure.source.name}: ${failure.errorCode} - ${failure.errorMessage}" - } - - if (failOnPartialUploadError) { - throw IngestException( - "Failed to upload ${batchResult.failures.size} out of ${localSources.size} sources:\n$failureDetails", - isPermanent = - batchResult.failures.all { - it.isPermanent - }, - ) - } else { - logger.warn( - "Some uploads failed but continuing with successful uploads:\n$failureDetails", - ) - } - } - - blobSources + batchResult.successes - } else { - blobSources - } - - if (allBlobSources.isEmpty()) { - throw IngestException( - "No sources available for ingestion after upload processing", - isPermanent = true, - ) - } - // Convert BlobSourceInfo objects to Blob objects - val blobs = - allBlobSources.map { blobSource -> - val sourceId = blobSource.sourceId.toString() - Blob( - url = blobSource.blobPath, - sourceId = sourceId, - rawSize = blobSource.blobExactSize as Long?, - ) - Blob(url = blobSource.blobPath, sourceId = sourceId) - } - - val requestProperties = - ingestProperties ?: IngestRequestProperties(format = format) - - logger.debug( - "Ingesting to {}.{} with the following properties with properties {}", - database, - table, - requestProperties, - ) - - val ingestRequest = - IngestRequest( - timestamp = OffsetDateTime.now(Clock.systemUTC()), - blobs = blobs, - properties = requestProperties, - ) - - try { - val response: HttpResponse = - api.postQueuedIngest( - database = database, - table = table, - ingestRequest = ingestRequest, - ) - - return handleIngestResponse( - response = response, - database = database, - table = table, - dmUrl = dmUrl, - endpointType = "queued", - ) - } catch (e: Exception) { - logger.error( - "Exception occurred during queued ingestion submission", - e, - ) - if (e is IngestException) throw e - throw IngestException( - message = - "Error submitting queued ingest request to $dmUrl", - cause = e, - isPermanent = true, - ) - } - } - - /** - * Gets detailed information about an ingestion operation. - * - * @param database The target database name - * @param table The target table name - * @param operationId The operation ID returned from the ingestion request - * @param details Whether to retrieve detailed blob-level information - * @return StatusResponse with operation details - */ - override suspend fun getIngestionDetails( - database: String, - table: String, - operationId: String, - details: Boolean, - ): StatusResponse { - logger.debug("Checking ingestion summary for operation: $operationId") - try { - val response: HttpResponse = - api.getIngestStatus( - database = database, - table = table, - operationId = operationId, - details = details, - ) - - if ( - response.success && - response.status == HttpStatusCode.OK.value - ) { - val ingestStatusResponse = response.body() - logger.debug( - "Successfully retrieved summary for operation: {} and details: {}", - operationId, - ingestStatusResponse, - ) - return ingestStatusResponse - } else { - logger.error(response.toString()) - val ingestStatusFailure: StatusResponse = response.body() - // check if it is a permanent failure from status - val transientFailures = - ingestStatusFailure.details?.filter { - it.failureStatus == - BlobStatus.FailureStatus.Transient - } - val hasTransientErrors = transientFailures.isNullOrEmpty() - - if ( - response.status == HttpStatusCode.NotFound.value || - hasTransientErrors - ) { - val message = - printMessagesFromFailures( - transientFailures, - isTransientFailure = true, - ) - logger.error(message) - throw IngestException( - message = message, - cause = RuntimeException(message), - failureCode = response.status, - failureSubCode = "", - isPermanent = false, - ) - } - // TODO: We need to eventually look at OneApiExceptions - val errorMessage = - printMessagesFromFailures( - ingestStatusFailure.details, - isTransientFailure = false, - ) - logger.error(errorMessage) - throw IngestException(errorMessage, isPermanent = true) - } - } catch (e: Exception) { - logger.error( - "Exception occurred while getting ingestion summary for operation: $operationId", - e, - ) - if (e is IngestException) throw e - throw IngestException( - "Failed to get ingestion summary: ${e.message}", - e, - ) - } - } - - private fun printMessagesFromFailures( - failures: List?, - isTransientFailure: Boolean, - ): String? { - return failures?.joinToString { - ( - sourceId, - status, - startedAt, - lastUpdateTime, - errorCode, - failureStatus, - details, - ), - -> - "Error ingesting blob with $sourceId. ErrorDetails $details, ErrorCode $errorCode " + - ", Status ${status?.value}. Ingestion lastUpdated at $lastUpdateTime & started at $startedAt. " + - "FailureStatus ${failureStatus?.value}. Is transient failure: $isTransientFailure" - } - } - - /** - * Polls the ingestion status until completion or timeout. - * - * @param database The target database name - * @param table The target table name - * @param operationId The operation ID to poll - * @param pollingInterval How often to check the status - * @param timeout Maximum time to wait before throwing timeout exception - * @return The final StatusResponse when ingestion is completed - * @throws IngestException if the operation times out or fails - */ - suspend fun pollUntilCompletion( - database: String, - table: String, - operationId: String, - pollingInterval: Duration = Duration.parse("PT30S"), - timeout: Duration = Duration.parse("PT5M"), - ): StatusResponse { - val result = - withTimeoutOrNull(timeout.inWholeMilliseconds) { - var currentStatus: StatusResponse - do { - currentStatus = - getIngestionStatus( - database, - table, - operationId, - forceDetails = true, - ) - logger.debug( - "Starting to poll ingestion status for operation: $operationId, timeout: $timeout", - ) - if ( - IngestionResultUtils.isCompleted( - currentStatus.details, - ) - ) { - logger.info( - "Ingestion operation $operationId completed", - ) - return@withTimeoutOrNull currentStatus - } - - logger.debug( - "Ingestion operation $operationId still in progress, waiting ${pollingInterval.inWholeSeconds}s before next check", - ) - delay(pollingInterval.inWholeMilliseconds) - } while ( - !IngestionResultUtils.isCompleted( - currentStatus.details, - ) - ) - - currentStatus - } - - return result - ?: throw IngestException( - "Ingestion operation $operationId timed out after $timeout. " + - "Consider increasing the timeout duration or check the operation status manually.", - ) - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt deleted file mode 100644 index 1bf40b572..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClient.kt +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.azure.core.credential.TokenCredential -import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse -import com.microsoft.azure.kusto.ingest.v2.models.Format -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse -import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse -import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.FileSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.SourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo -import io.ktor.http.HttpStatusCode -import kotlinx.serialization.SerialName -import kotlinx.serialization.Serializable -import kotlinx.serialization.json.Json -import java.net.ConnectException -import java.net.URI -import java.util.UUID - -@Serializable -private data class StreamFromBlobRequestBody( - @SerialName("SourceUri") val sourceUri: String, -) - -class StreamingIngestClient( - val engineUrl: String, - override val tokenCredential: TokenCredential, - override val skipSecurityChecks: Boolean = false, - override val clientDetails: ClientDetails? = null, -) : - KustoBaseApiClient( - engineUrl, - tokenCredential, - skipSecurityChecks, - clientDetails, - ), - IngestClient { - - /** Handles multiple source types for streaming ingestion. */ - override suspend fun submitIngestion( - database: String, - table: String, - sources: List, - format: Format, - ingestProperties: IngestRequestProperties?, - ): IngestResponse { - require(sources.isNotEmpty()) { "At least one source is required" } - - // Streaming ingestion processes one source at a time - val source = sources.first() - val operationId = UUID.randomUUID().toString() - - when (source) { - is BlobSourceInfo -> { - logger.info( - "Streaming ingestion from BlobSource: ${source.blobPath}", - ) - submitStreamingIngestion( - database = database, - table = table, - // Not used for blob-based streaming - data = ByteArray(0), - format = format, - ingestProperties = ingestProperties, - blobUrl = source.blobPath, - ) - } - is FileSourceInfo -> { - logger.info( - "Streaming ingestion from FileSource: ${source.name}", - ) - val data = source.data().readBytes() - submitStreamingIngestion( - database = database, - table = table, - data = data, - format = format, - ingestProperties = ingestProperties, - blobUrl = null, - ) - source.close() - } - is StreamSourceInfo -> { - logger.info( - "Streaming ingestion from StreamSource: ${source.name}", - ) - val data = source.data().readBytes() - submitStreamingIngestion( - database = database, - table = table, - data = data, - format = format, - ingestProperties = ingestProperties, - blobUrl = null, - ) - source.close() - } - else -> { - throw IngestException( - message = - "Unsupported source type for streaming ingestion: ${source::class.simpleName}", - isPermanent = true, - ) - } - } - - // Streaming ingestion doesn't return an operation ID from the server - // We generate one locally for consistency with the IngestClient interface - return IngestResponse(ingestionOperationId = operationId) - } - - /** - * Note: Streaming ingestion doesn't support operation tracking. Throws - * UnsupportedOperationException. - */ - override suspend fun getIngestionStatus( - database: String, - table: String, - operationId: String, - forceDetails: Boolean, - ): StatusResponse { - throw UnsupportedOperationException( - "Streaming ingestion does not support operation status tracking. " + - "Operation ID: $operationId cannot be tracked. ", - ) - } - - /** - * Note: Streaming ingestion doesn't support operation tracking. Throws - * UnsupportedOperationException. - */ - override suspend fun getIngestionDetails( - database: String, - table: String, - operationId: String, - details: Boolean, - ): StatusResponse { - throw UnsupportedOperationException( - "Streaming ingestion does not support detailed operation tracking. " + - "Operation ID: $operationId cannot be tracked. ", - ) - } - - /** - * Submits a streaming ingestion request. - * - * @param database The target database name - * @param table The target table name - * @param data The data to ingest (as ByteArray) - * @param format The data format - * @param ingestProperties Optional ingestion properties - * @param blobUrl Optional blob URL for blob-based streaming ingestion (if - * provided, data is ignored) - * @return IngestResponse for tracking the request - */ - suspend fun submitStreamingIngestion( - database: String, - table: String, - data: ByteArray, - format: Format = Format.csv, - ingestProperties: IngestRequestProperties? = null, - blobUrl: String? = null, - ) { - val host = URI(engineUrl).host - - val bodyContent: Any - val sourceKind: String? - val contentType: String - - if (blobUrl != null) { - // Blob-based streaming - val requestBody = StreamFromBlobRequestBody(sourceUri = blobUrl) - bodyContent = Json.encodeToString(requestBody).toByteArray() - sourceKind = "uri" - contentType = "application/json" - logger.info( - "Submitting streaming ingestion from blob for database: {}, table: {}, blob: {}. Host {}", - database, - table, - blobUrl, - host, - ) - } else { - // Direct streaming using raw data - bodyContent = data - sourceKind = null - contentType = "application/octet-stream" - logger.info( - "Submitting streaming ingestion request for database: {}, table: {}, data size: {}. Host {}", - database, - table, - data.size, - host, - ) - } - - try { - val response: HttpResponse = - api.postStreamingIngest( - database = database, - table = table, - streamFormat = format, - body = bodyContent, - mappingName = - ingestProperties?.ingestionMappingReference, - sourceKind = sourceKind, - host = host, - acceptEncoding = "gzip", - connection = "Keep-Alive", - contentEncoding = null, - contentType = contentType, - ) - return handleIngestResponse( - response = response, - database = database, - table = table, - dmUrl = engineUrl, - endpointType = "streaming", - ) - } catch (notAbleToReachHost: ConnectException) { - val message = - "Failed to reach $engineUrl for streaming ingestion. Please ensure the cluster address is correct and the cluster is reachable." - throw IngestException( - message = message, - cause = notAbleToReachHost, - failureCode = HttpStatusCode.NotFound.value, - failureSubCode = "", - isPermanent = false, - ) - } catch (e: Exception) { - logger.error( - "Exception occurred during streaming ingestion submission", - e, - ) - if (e is IngestException) throw e - throw IngestException( - message = - "Error submitting streaming ingest request to $engineUrl", - cause = e, - isPermanent = true, - ) - } - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt index 6f8007f7a..053fb3956 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt @@ -3,26 +3,75 @@ package com.microsoft.azure.kusto.ingest.v2.builders import com.azure.core.credential.TokenCredential -import com.microsoft.azure.kusto.ingest.v2.common.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.KustoBaseApiClient +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader +import com.microsoft.azure.kusto.ingest.v2.uploader.ManagedUploader -abstract class BaseIngestClientBuilder> { +abstract class BaseIngestClientBuilder> { protected var tokenCredential: TokenCredential? = null protected var skipSecurityChecks: Boolean = false protected var clientDetails: ClientDetails? = null - @Suppress("UNCHECKED_CAST") - protected fun self(): B = this as B + // Fabric Private Link support + protected var s2sTokenProvider: (suspend () -> Pair)? = null + protected var s2sFabricPrivateLinkAccessContext: String? = null - fun withAuthentication(credential: TokenCredential): B { + // Added properties for ingestion endpoint and authentication + protected var ingestionEndpoint: String? = null + protected var clusterEndpoint: String? = null + protected var authentication: TokenCredential? = null + + protected var maxConcurrency: Int = UPLOAD_CONTAINER_MAX_CONCURRENCY + protected var maxDataSize: Long = UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES + protected var ignoreFileSize: Boolean = false + protected var uploader: IUploader? = null + protected var closeUploader: Boolean = false + protected var configuration: ConfigurationCache? = null + + protected abstract fun self(): T + + fun withAuthentication(credential: TokenCredential): T { this.tokenCredential = credential + this.authentication = credential // Set authentication return self() } - fun skipSecurityChecks(): B { + fun skipSecurityChecks(): T { this.skipSecurityChecks = true return self() } + /** + * Enables a run request to target a cluster with Fabric Private Link + * enabled. + * + * @param s2sTokenProvider A suspend function that provides the S2S + * (Service-to-Service) token, indicating that the caller is authorized as + * a valid Fabric Private Link client. Returns a Pair of (token, scheme) + * e.g., ("token_value", "Bearer") Note: The header format will be + * "{scheme} {token}" (scheme first) + * @param s2sFabricPrivateLinkAccessContext Specifies the scope of the + * Fabric Private Link perimeter, such as the entire tenant or a specific + * workspace. + * @return This builder instance for method chaining + */ + fun withFabricPrivateLink( + s2sTokenProvider: suspend () -> Pair, + s2sFabricPrivateLinkAccessContext: String, + ): T { + require(s2sFabricPrivateLinkAccessContext.isNotBlank()) { + "s2sFabricPrivateLinkAccessContext must not be blank" + } + this.s2sTokenProvider = s2sTokenProvider + this.s2sFabricPrivateLinkAccessContext = + s2sFabricPrivateLinkAccessContext + return self() + } + /** * Sets the client details for tracing purposes, using defaults for any * unprovided fields. @@ -37,7 +86,7 @@ abstract class BaseIngestClientBuilder> { applicationName: String, version: String, userName: String? = null, - ): B { + ): T { this.clientDetails = ClientDetails( applicationForTracing = applicationName, @@ -73,7 +122,7 @@ abstract class BaseIngestClientBuilder> { appName: String? = null, appVersion: String? = null, additionalFields: Map? = null, - ): B { + ): T { this.clientDetails = ClientDetails.fromConnectorDetails( name = name, @@ -86,4 +135,72 @@ abstract class BaseIngestClientBuilder> { ) return self() } + + protected fun createApiClient( + dmUrl: String, + tokenCredential: TokenCredential, + clientDetails: ClientDetails, + skipSecurityChecks: Boolean, + ): KustoBaseApiClient { + return KustoBaseApiClient( + dmUrl = dmUrl, + tokenCredential = tokenCredential, + skipSecurityChecks = skipSecurityChecks, + clientDetails = clientDetails, + s2sTokenProvider = s2sTokenProvider, + s2sFabricPrivateLinkAccessContext = + s2sFabricPrivateLinkAccessContext, + ) + } + + protected fun createDefaultUploader( + configuration: ConfigurationCache, + ignoreFileSize: Boolean, + maxConcurrency: Int, + maxDataSize: Long, + ): IUploader { + return ManagedUploader.builder() + .withConfigurationCache(configuration) + .withIgnoreSizeLimit(ignoreFileSize) + .withMaxConcurrency(maxConcurrency) + .withMaxDataSize(maxDataSize) + .apply { tokenCredential?.let { withTokenCredential(it) } } + .build() + } + + protected fun setEndpoint(endpoint: String) { + this.ingestionEndpoint = normalizeAndCheckDmUrl(endpoint) + this.clusterEndpoint = normalizeAndCheckEngineUrl(endpoint) + } + + companion object { + protected fun normalizeAndCheckEngineUrl(clusterUrl: String): String { + val normalizedUrl = + if (clusterUrl.matches(Regex("https://ingest-[^/]+.*"))) { + // If the URL starts with https://ingest-, remove ingest- + clusterUrl.replace( + Regex("https://ingest-([^/]+)"), + "https://$1", + ) + } else { + clusterUrl + } + return normalizedUrl + } + + @JvmStatic + protected fun normalizeAndCheckDmUrl(dmUrl: String): String { + val normalizedUrl = + if (dmUrl.matches(Regex("https://(?!ingest-)[^/]+.*"))) { + // If the URL starts with https:// and does not already have ingest-, add it + dmUrl.replace( + Regex("https://([^/]+)"), + "https://ingest-$1", + ) + } else { + dmUrl + } + return normalizedUrl + } + } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt new file mode 100644 index 000000000..aa50fb8e7 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.microsoft.azure.kusto.ingest.v2.client.ManagedStreamingIngestClient +import com.microsoft.azure.kusto.ingest.v2.client.policy.DefaultManagedStreamingPolicy.Companion.DEFAULT_MANAGED_STREAMING_POLICY +import com.microsoft.azure.kusto.ingest.v2.client.policy.ManagedStreamingPolicy +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader + +class ManagedStreamingIngestClientBuilder +private constructor(private val dmUrl: String) : + BaseIngestClientBuilder() { + override fun self(): ManagedStreamingIngestClientBuilder = this + + private var managedStreamingPolicy: ManagedStreamingPolicy? = null + + companion object { + @JvmStatic + fun create(dmUrl: String): ManagedStreamingIngestClientBuilder { + require(dmUrl.isNotBlank()) { "Data Ingestion URI cannot be blank" } + return ManagedStreamingIngestClientBuilder(dmUrl) + } + } + + fun withUploader( + uploader: IUploader, + closeUploader: Boolean, + ): ManagedStreamingIngestClientBuilder { + this.uploader = uploader + this.closeUploader = closeUploader + return this + } + + fun withManagedStreamingIngestPolicy( + managedStreamingPolicy: ManagedStreamingPolicy, + ): ManagedStreamingIngestClientBuilder { + this.managedStreamingPolicy = managedStreamingPolicy + return this + } + + fun build(): ManagedStreamingIngestClient { + requireNotNull(tokenCredential) { + "Authentication is required. Call withAuthentication() before build()" + } + val effectiveClientDetails = + clientDetails ?: ClientDetails.createDefault() + val effectiveConfiguration = + configuration + ?: DefaultConfigurationCache( + dmUrl = this.dmUrl, + tokenCredential = this.tokenCredential, + skipSecurityChecks = this.skipSecurityChecks, + clientDetails = effectiveClientDetails, + ) + + val effectiveUploader = + uploader + ?: createDefaultUploader( + configuration = effectiveConfiguration, + ignoreFileSize = this.ignoreFileSize, + maxConcurrency = this.maxConcurrency, + maxDataSize = this.maxDataSize, + ) + + val queuedIngestClient = + QueuedIngestClientBuilder.create(this.dmUrl) + .withConfiguration(effectiveConfiguration) + .withClientDetails( + effectiveClientDetails + .getApplicationForTracing(), + effectiveClientDetails + .getClientVersionForTracing(), + effectiveClientDetails.getUserNameForTracing(), + ) + .withAuthentication(this.tokenCredential!!) + .withUploader(effectiveUploader, closeUploader) + .apply { + s2sTokenProvider?.let { provider -> + s2sFabricPrivateLinkAccessContext?.let { context, + -> + withFabricPrivateLink(provider, context) + } + } + if (skipSecurityChecks) { + skipSecurityChecks() + } + } + .build() + + val effectiveManagedStreamingPolicy = + managedStreamingPolicy ?: DEFAULT_MANAGED_STREAMING_POLICY + val streamingIngestClient = + StreamingIngestClientBuilder.create(this.dmUrl) + .withClientDetails( + effectiveClientDetails + .getApplicationForTracing(), + effectiveClientDetails + .getClientVersionForTracing(), + effectiveClientDetails.getUserNameForTracing(), + ) + .withAuthentication(this.tokenCredential!!) + .apply { + s2sTokenProvider?.let { provider -> + s2sFabricPrivateLinkAccessContext?.let { context, + -> + withFabricPrivateLink(provider, context) + } + } + if (skipSecurityChecks) { + skipSecurityChecks() + } + } + .build() + + return ManagedStreamingIngestClient( + streamingIngestClient, + queuedIngestClient, + effectiveManagedStreamingPolicy, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt new file mode 100644 index 000000000..04834ec70 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader + +class QueuedIngestClientBuilder private constructor(private val dmUrl: String) : + BaseIngestClientBuilder() { + + override fun self(): QueuedIngestClientBuilder = this + + companion object { + @JvmStatic + fun create(dmUrl: String): QueuedIngestClientBuilder { + require(dmUrl.isNotBlank()) { "Data Ingestion URI cannot be blank" } + // Make sure to convert it to ingest-url if user passed engine-url + return QueuedIngestClientBuilder(normalizeAndCheckDmUrl(dmUrl)) + } + } + + fun withMaxConcurrency(concurrency: Int): QueuedIngestClientBuilder { + require(concurrency > 0) { + "Max concurrency must be positive, got: $concurrency" + } + this.maxConcurrency = concurrency + return this + } + + fun withMaxDataSize(bytes: Long): QueuedIngestClientBuilder { + require(bytes > 0) { "Max data size must be positive, got: $bytes" } + this.maxDataSize = bytes + return this + } + + fun withIgnoreFileSize(ignore: Boolean): QueuedIngestClientBuilder { + this.ignoreFileSize = ignore + return this + } + + fun withUploader( + uploader: IUploader, + closeUploader: Boolean, + ): QueuedIngestClientBuilder { + this.uploader = uploader + this.closeUploader = closeUploader + return this + } + + fun withConfiguration( + configuration: ConfigurationCache, + ): QueuedIngestClientBuilder { + this.configuration = configuration + return this + } + + fun build(): QueuedIngestClient { + setEndpoint(dmUrl) + requireNotNull(tokenCredential) { + "Authentication is required. Call withAuthentication() before build()" + } + val effectiveClientDetails = + clientDetails ?: ClientDetails.createDefault() + val effectiveConfiguration = + configuration + ?: DefaultConfigurationCache( + dmUrl = this.dmUrl, + tokenCredential = this.tokenCredential, + skipSecurityChecks = this.skipSecurityChecks, + clientDetails = effectiveClientDetails, + ) + val apiClient = + createApiClient( + this.dmUrl, + this.tokenCredential!!, + effectiveClientDetails, + this.skipSecurityChecks, + ) + + val effectiveUploader = + uploader + ?: createDefaultUploader( + configuration = effectiveConfiguration, + ignoreFileSize = this.ignoreFileSize, + maxConcurrency = this.maxConcurrency, + maxDataSize = this.maxDataSize, + ) + return QueuedIngestClient( + apiClient = apiClient, + // TODO Question if this is redundant. ConfigurationCache is already held by + // uploader + cachedConfiguration = effectiveConfiguration, + uploader = effectiveUploader, + shouldDisposeUploader = closeUploader, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt deleted file mode 100644 index bbe5f6ada..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestionClientBuilder.kt +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.builders - -import com.microsoft.azure.kusto.ingest.v2.QueuedIngestionClient -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES - -class QueuedIngestionClientBuilder -private constructor(private val dmUrl: String) : - BaseIngestClientBuilder() { - - private var maxConcurrency: Int = UPLOAD_CONTAINER_MAX_CONCURRENCY - private var maxDataSize: Long = UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES - private var ignoreFileSize: Boolean = false - - companion object { - @JvmStatic - fun create(dmUrl: String): QueuedIngestionClientBuilder { - require(dmUrl.isNotBlank()) { "Data Ingestion URI cannot be blank" } - return QueuedIngestionClientBuilder(dmUrl) - } - } - - fun withMaxConcurrency(concurrency: Int): QueuedIngestionClientBuilder { - require(concurrency > 0) { - "Max concurrency must be positive, got: $concurrency" - } - this.maxConcurrency = concurrency - return this - } - - fun withMaxDataSize(bytes: Long): QueuedIngestionClientBuilder { - require(bytes > 0) { "Max data size must be positive, got: $bytes" } - this.maxDataSize = bytes - return this - } - - fun withIgnoreFileSize(ignore: Boolean): QueuedIngestionClientBuilder { - this.ignoreFileSize = ignore - return this - } - - fun build(): QueuedIngestionClient { - requireNotNull(tokenCredential) { - "Authentication is required. Call withAuthentication() before build()" - } - - return QueuedIngestionClient( - dmUrl = dmUrl, - tokenCredential = tokenCredential!!, - skipSecurityChecks = skipSecurityChecks, - clientDetails = clientDetails, - maxConcurrency = maxConcurrency, - maxDataSize = maxDataSize, - ignoreFileSize = ignoreFileSize, - ) - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt index 47978a33d..02bfe6468 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilder.kt @@ -2,30 +2,48 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.builders -import com.microsoft.azure.kusto.ingest.v2.StreamingIngestClient +import com.microsoft.azure.kusto.ingest.v2.client.StreamingIngestClient +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails class StreamingIngestClientBuilder -private constructor(private val engineUrl: String) : +private constructor(private val clusterUrl: String) : BaseIngestClientBuilder() { + override fun self(): StreamingIngestClientBuilder = this + companion object { @JvmStatic - fun create(engineUrl: String): StreamingIngestClientBuilder { - require(engineUrl.isNotBlank()) { "Engine URL cannot be blank" } - return StreamingIngestClientBuilder(engineUrl) + fun create(clusterUrl: String): StreamingIngestClientBuilder { + require(clusterUrl.isNotBlank()) { "Cluster URI cannot be blank" } + // Make sure to convert it to cluster-url if user passed ingest-url + return StreamingIngestClientBuilder( + normalizeAndCheckEngineUrl(clusterUrl), + ) } } fun build(): StreamingIngestClient { + setEndpoint(clusterUrl) requireNotNull(tokenCredential) { "Authentication is required. Call withAuthentication() before build()" } - + validateParameters() + val effectiveClientDetails = + clientDetails ?: ClientDetails.createDefault() + val apiClient = + createApiClient( + this.clusterUrl, + this.tokenCredential!!, + effectiveClientDetails, + this.skipSecurityChecks, + ) return StreamingIngestClient( - engineUrl = engineUrl, - tokenCredential = tokenCredential!!, - skipSecurityChecks = skipSecurityChecks, - clientDetails = clientDetails, - ) + apiClient = apiClient, + ) // Assuming these are set in BaseIngestClientBuilder + } + + private fun validateParameters() { + requireNotNull(ingestionEndpoint) { "Ingestion endpoint must be set." } + requireNotNull(authentication) { "Authentication must be set." } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt new file mode 100644 index 000000000..4cfd30ab3 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client + +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.Status +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import java.io.Closeable + +/** + * Interface for ingesting data into Kusto. + * + * Ingestion can be done from: + * - A local file (see [com.microsoft.azure.kusto.ingest.v2.source.FileSource]) + * - A stream (see [com.microsoft.azure.kusto.ingest.v2.source.StreamSource]) + * - A blob (see [BlobSource]) + * + * To track the result, set the [IngestRequestProperties.enableTracking] + * property to true. Then you can use the [getOperationSummaryAsync] and + * [getOperationDetailsAsync] methods to get the status of the ingestion + * operation. + */ +interface IngestClient : Closeable { + + /** + * Ingests data from the specified source into the specified database and + * table. + * + * @param source The source to ingest. + * @param database The name of the database to ingest to. + * @param table The name of the table to ingest to. + * @param ingestRequestProperties Optional ingestion properties. + * @return An [IngestionOperation] object that can be used to track the + * status of the ingestion. + */ + suspend fun ingestAsync( + source: IngestionSource, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties? = null, + ): ExtendedIngestResponse + + /** + * Get the current status of an ingestion operation. + * + * Unlike [getOperationDetailsAsync], this method returns only the summary + * of the operation - statistics on the blobs ingested, and the operation + * status. + * + * To use this method, the [IngestRequestProperties.enableTracking] property + * must be set to true when ingesting the data. + * + * @param operation The ingestion operation to get the status for. + * @return An [Status] object that provides a summary of the ingestion + * operation. + */ + suspend fun getOperationSummaryAsync(operation: IngestionOperation): Status + + /** + * Get the current status of an ingestion operation. + * + * This method returns detailed information about the operation - statistics + * on the blobs ingested, and the operation status, as well as specific + * results for each blob. + * + * To use this method, the [IngestRequestProperties.enableTracking] property + * must be set to true when ingesting the data. + * + * @param operation The ingestion operation to get the status for. + * @return An [StatusResponse] object that provides detailed information + * about the ingestion operation. + */ + suspend fun getOperationDetailsAsync( + operation: IngestionOperation, + ): StatusResponse +} + +/** Interface for ingesting from multiple data sources into Kusto. */ +interface MultiIngestClient : IngestClient { + + /** + * Ingest data from multiple sources. + * + * @param sources The sources to ingest. + * @param database The name of the database to ingest to. + * @param table The name of the table to ingest to. + * @param ingestRequestProperties Optional ingestion properties. + * @return An [IngestionOperation] object that can be used to track the + * status of the ingestion. + */ + suspend fun ingestAsync( + sources: List, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties? = null, + ): ExtendedIngestResponse + + /** + * Returns the maximum number of sources that can be ingested in a single + * call to [ingestAsync]. + * + * This limit is imposed to avoid excessively large requests that could lead + * to performance degradation or failures. + * + * @return The maximum number of sources allowed in a single ingestion + * request. + */ + suspend fun getMaxSourcesPerMultiIngest(): Int +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt new file mode 100644 index 000000000..ddd932496 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client + +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind +import java.util.UUID + +/** Represents an ingestion operation that can be tracked. */ +data class IngestionOperation( + /** Unique identifier for the ingestion operation. */ + val operationId: UUID, + + /** The database name where data was ingested. */ + val database: String, + + /** The table name where data was ingested. */ + val table: String, + + /** The kind of ingestion (e.g., STREAMING, QUEUED). */ + val ingestKind: IngestKind, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt new file mode 100644 index 000000000..08c734879 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt @@ -0,0 +1,626 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client + +import com.microsoft.azure.kusto.ingest.v2.STREAMING_MAX_REQ_BODY_SIZE +import com.microsoft.azure.kusto.ingest.v2.client.policy.ManagedStreamingErrorCategory +import com.microsoft.azure.kusto.ingest.v2.client.policy.ManagedStreamingPolicy +import com.microsoft.azure.kusto.ingest.v2.client.policy.ManagedStreamingRequestFailureDetails +import com.microsoft.azure.kusto.ingest.v2.client.policy.ManagedStreamingRequestSuccessDetails +import com.microsoft.azure.kusto.ingest.v2.common.RetryDecision +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestClientException +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.runWithRetry +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.Status +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext +import org.slf4j.LoggerFactory +import java.io.InputStream +import java.time.Clock +import java.time.Duration +import java.time.Instant + +/** + * Managed streaming ingestion client that combines streaming and queued + * ingestion. + * + * This client intelligently chooses between streaming and queued ingestion + * based on: + * - Data size (falls back to queued for large data) + * - Server response (falls back to queued on certain errors) + * - Policy decisions (configured behavior) + * + * When streaming ingestion fails with transient errors, the client retries. + * When it fails with certain permanent errors (e.g., streaming disabled, data + * too large), it automatically falls back to queued ingestion. + * + * **Important:** This class cannot be instantiated directly. Use + * [com.microsoft.azure.kusto.ingest.v2.builders.ManagedStreamingIngestClientBuilder] + * to create instances of this client (to be implemented). + * + * Example usage: + * ``` + * val client = ManagedStreamingIngestClientBuilder.create(clusterUrl) + * .withAuthentication(tokenProvider) + * .build() + * ``` + */ +class ManagedStreamingIngestClient +internal constructor( + private val streamingIngestClient: StreamingIngestClient, + private val queuedIngestClient: QueuedIngestClient, + private val managedStreamingPolicy: ManagedStreamingPolicy, +) : IngestClient { + + private val logger = + LoggerFactory.getLogger(ManagedStreamingIngestClient::class.java) + + companion object { + private val EMPTY_STATUS = + Status( + succeeded = 0L, + failed = 0L, + inProgress = 0L, + canceled = 0L, + ) + + private val EMPTY_STATUS_RESPONSE = + StatusResponse( + status = EMPTY_STATUS, + details = emptyList(), + startTime = null, + ) + } + + override suspend fun ingestAsync( + source: IngestionSource, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties?, + ): ExtendedIngestResponse { + requireNotNull(database.trim().isNotEmpty()) { + "database cannot be blank" + } + requireNotNull(table.trim().isNotEmpty()) { "table cannot be blank" } + + val effectiveIngestRequestProperties = + ingestRequestProperties + ?: IngestRequestPropertiesBuilder(format = Format.csv) + .build() + + return when (source) { + is BlobSource -> + ingestBlobAsync( + source, + database, + table, + effectiveIngestRequestProperties, + ) + is LocalSource -> + ingestLocalAsync( + source, + database, + table, + effectiveIngestRequestProperties, + ) + else -> + throw IllegalArgumentException( + "Unsupported source type: ${source::class.simpleName}", + ) + } + } + + override suspend fun getOperationSummaryAsync( + operation: IngestionOperation, + ): Status { + // Delegate to queued client for tracking + if (operation.ingestKind == IngestKind.STREAMING) { + logger.warn( + "getOperationSummaryAsync called for a streaming ingestion operation. " + + "Streaming ingestion operations are not tracked. " + + "Returning a empty Status.", + ) + return EMPTY_STATUS + } + return queuedIngestClient.getOperationSummaryAsync(operation) + } + + override suspend fun getOperationDetailsAsync( + operation: IngestionOperation, + ): StatusResponse { + // Delegate to queued client for tracking + if (operation.ingestKind == IngestKind.STREAMING) { + logger.warn( + "getOperationDetailsAsync called for a streaming ingestion operation. " + + "Streaming ingestion operations are not tracked. " + + "Returning a dummy StatusResponse.", + ) + return EMPTY_STATUS_RESPONSE + } + return queuedIngestClient.getOperationDetailsAsync(operation) + } + + override fun close() { + try { + streamingIngestClient.close() + } catch (e: Exception) { + logger.warn("Error closing streaming ingest client", e) + } + try { + queuedIngestClient.close() + } catch (e: Exception) { + logger.warn("Error closing queued ingest client", e) + } + } + + private suspend fun ingestBlobAsync( + blobSource: BlobSource, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties, + ): ExtendedIngestResponse { + if ( + shouldUseQueuedIngestByPolicy( + blobSource, + database, + table, + ingestRequestProperties, + ) + ) { + return invokeQueuedIngestionAsync( + blobSource, + database, + table, + ingestRequestProperties, + ) + } + return invokeStreamingIngestionAsync( + blobSource, + database, + table, + ingestRequestProperties, + ) + } + + private suspend fun ingestLocalAsync( + source: LocalSource, + database: String, + table: String, + props: IngestRequestProperties, + ): ExtendedIngestResponse { + val stream = source.data() + if (!stream.isValidForIngest()) { + throw IngestClientException( + message = + "Stream is not valid for ingest. Ensure the stream is not null, has data, and is seekable.", + isPermanent = true, + ) + } + + val streamSize = + withContext(Dispatchers.IO) { stream.available() }.toLong() + + if ( + shouldUseQueuedIngestBySize(streamSize) || + shouldUseQueuedIngestByPolicy( + source, + database, + table, + props, + ) + ) { + return invokeQueuedIngestionAsync(source, database, table, props) + } + return invokeStreamingIngestionAsync(source, database, table, props) + } + + private fun shouldUseQueuedIngestBySize(size: Long): Boolean { + val sizeThreshold = + STREAMING_MAX_REQ_BODY_SIZE * + managedStreamingPolicy.dataSizeFactor + + if (size > sizeThreshold) { + logger.info( + "Blob size '{}' is too big for streaming ingest. " + + "The DataSizeFactor used is '{}' - ingest using queued ingest.", + size, + managedStreamingPolicy.dataSizeFactor, + ) + return true + } + return false + } + + private suspend fun invokeStreamingIngestionAsync( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ): ExtendedIngestResponse { + var startTime: Long + var currentAttempt = 1u + var lastException: Exception? = null + val result = + managedStreamingPolicy.retryPolicy.runWithRetry( + action = { attempt: UInt -> + startTime = + Instant.now(Clock.systemUTC()) + .toEpochMilli() + currentAttempt = attempt + val result = + streamingIngestClient.ingestAsync( + source, + database, + table, + props, + ) + val requestDuration = + Duration.ofMillis( + Instant.now(Clock.systemUTC()) + .toEpochMilli() - startTime, + ) + managedStreamingPolicy.streamingSuccessCallback( + source, + database, + table, + props, + ManagedStreamingRequestSuccessDetails( + requestDuration, + ), + ) + result + }, + onRetry = { retryNumber: UInt, ex: Exception, _: Boolean, + -> + // Reset stream if possible for retry + logger.error( + "Exception while trying streaming ingest $retryNumber, retrying...", + ex, + ) + resetLocalSourceIfPossible(source) + }, + shouldRetry = { + _: UInt, + ex: Exception, + isPermanent: Boolean, + -> + lastException = ex + decideOnException( + source, + database, + table, + props, + isPermanent, + ex, + ) + }, + throwOnExhaustedRetries = false, + ) + + if (result != null) { + return result + } + // Streaming failed, fall back to queued ingestion + logger.warn( + "Streaming ingestion failed, falling back to queued ingestion. Attempt: {}, Exception: {}", + currentAttempt, + lastException?.message, + ) + return invokeQueuedIngestionAsync(source, database, table, props) + } + + private fun resetLocalSourceIfPossible(source: IngestionSource) { + if (source is LocalSource) { + try { + val stream = source.data() + if (stream.markSupported()) { + stream.reset() + } + } catch (e: Exception) { + logger.warn("Failed to reset stream for retry: {}", e.message) + } + } + } + + private fun decideOnException( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + isPermanent: Boolean, + ex: Exception, + ): RetryDecision { + if (!isPermanent) { + reportTransientException(source, database, table, props, ex) + return RetryDecision.Continue + } + + val ingestEx = ex as? IngestException + if (ingestEx == null) { + reportUnknownException(source, database, table, props, ex) + return RetryDecision.Throw + } + + if ( + shouldFallbackToQueuedOnPermanentError( + ingestEx, + source, + database, + table, + props, + ) + ) { + return RetryDecision.Break + } + logger.error( + "Permanent error occurred while trying streaming ingest, didn't switch to queued according to policy: {}", + ex.message, + ex, + ) + return RetryDecision.Throw + } + + private suspend fun invokeQueuedIngestionAsync( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ): ExtendedIngestResponse { + return queuedIngestClient.ingestAsync(source, database, table, props) + } + + private fun shouldUseQueuedIngestByPolicy( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ): Boolean { + if ( + managedStreamingPolicy.shouldDefaultToQueuedIngestion( + source, + database, + table, + props, + ) + ) { + logger.info( + "According to the ManagedStreamingPolicy ingest will fall back to queued ingestion.", + ) + return true + } + + return false + } + + private fun reportTransientException( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ex: Exception, + ) { + val failureDetails = + ManagedStreamingRequestFailureDetails( + exception = ex, + isPermanent = false, + errorCategory = + if ( + (ex as? IngestException)?.failureCode == + 429 || + ex.message?.contains( + "KustoRequestThrottledException", + ignoreCase = true, + ) == true + ) { + ManagedStreamingErrorCategory.THROTTLED + } else { + ManagedStreamingErrorCategory.OTHER_ERRORS + }, + ) + logger.warn("Streaming ingestion throttled: {}", ex.message) + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + failureDetails, + ) + } + + private fun reportUnknownException( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ex: Exception, + ) { + logger.error("Unexpected error occurred during streaming ingestion", ex) + + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + ManagedStreamingRequestFailureDetails( + exception = ex, + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory.UNKNOWN_ERRORS, + ), + ) + } + + private fun shouldFallbackToQueuedOnPermanentError( + ex: IngestException, + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ): Boolean { + val failureDetails = + ManagedStreamingRequestFailureDetails( + exception = ex, + isPermanent = true, + ) + + // Check various error scenarios + when { + // Streaming ingestion policy turned off + isStreamingIngestionOff(ex) -> { + logger.info( + "Streaming ingestion is off, fallback to queued ingestion is {}, error: {}", + if ( + managedStreamingPolicy + .continueWhenStreamingIngestionUnavailable + ) { + "on" + } else { + "off" + }, + ex.message, + ) + + failureDetails.errorCategory = + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + failureDetails, + ) + + return managedStreamingPolicy + .continueWhenStreamingIngestionUnavailable + } + + // Table configuration prevents streaming + isTableConfigPreventsStreaming(ex) -> { + logger.info( + "Fallback to queued ingest due to a target table config, error: {}", + ex.message, + ) + + failureDetails.errorCategory = + ManagedStreamingErrorCategory + .TABLE_CONFIGURATION_PREVENTS_STREAMING + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + failureDetails, + ) + + return true + } + + // Request properties prevent streaming + isRequestPropertiesPreventsStreaming(ex) -> { + logger.info( + "Fallback to queued ingest due to request properties, error: {}", + ex.message, + ) + + failureDetails.errorCategory = + ManagedStreamingErrorCategory + .REQUEST_PROPERTIES_PREVENT_STREAMING + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + failureDetails, + ) + + return true + } + + else -> { + logger.info( + "Don't fallback to queued ingest given this exception: {}", + ex.message, + ) + + failureDetails.errorCategory = + ManagedStreamingErrorCategory.OTHER_ERRORS + managedStreamingPolicy.streamingErrorCallback( + source, + database, + table, + props, + failureDetails, + ) + + return false + } + } + } + + private fun isStreamingIngestionOff(ex: IngestException): Boolean { + // Check if error indicates streaming is disabled + val message = ex.message.lowercase() + return message.contains("streaming") && + ( + message.contains("disabled") || + message.contains("not enabled") || + message.contains("off") + ) + } + + private fun isTableConfigPreventsStreaming(ex: IngestException): Boolean { + // Check if error indicates table configuration prevents streaming + val message = ex.message.lowercase() + return message.contains("update policy") || + message.contains("schema") || + message.contains("incompatible") + } + + private fun isRequestPropertiesPreventsStreaming( + ex: IngestException, + ): Boolean { + // Check if error indicates request is too large or has incompatible properties + val message = ex.message.lowercase() + return message.contains("too large") || + message.contains("exceeds") || + message.contains("maximum allowed size") || + message.contains( + "KustoRequestPayloadTooLargeException".lowercase(), + ) || + ex.failureCode == 413 // Request Entity Too Large + } + + private fun InputStream.isValidForIngest(): Boolean { + return try { + this.available() > 0 + } catch (_: Exception) { + false + } + } + + suspend fun pollUntilCompletion( + database: String, + table: String, + operationId: String, + pollingInterval: kotlin.time.Duration = + kotlin.time.Duration.parse("PT30S"), + timeout: kotlin.time.Duration = kotlin.time.Duration.parse("PT5M"), + ): StatusResponse { + return queuedIngestClient.pollUntilCompletion( + database, + table, + operationId, + pollingInterval, + timeout, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt new file mode 100644 index 000000000..048ae3b90 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt @@ -0,0 +1,602 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client + +import com.microsoft.azure.kusto.ingest.v2.KustoBaseApiClient +import com.microsoft.azure.kusto.ingest.v2.MAX_BLOBS_PER_BATCH +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestClientException +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestSizeLimitExceededException +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionResultUtils +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.Blob +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequest +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.Status +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader +import io.ktor.http.HttpStatusCode +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.delay +import kotlinx.coroutines.withTimeoutOrNull +import org.slf4j.LoggerFactory +import java.net.ConnectException +import java.time.Clock +import java.time.OffsetDateTime +import kotlin.time.Duration + +/** + * Queued ingestion client for Azure Data Explorer (Kusto). + * + * This client handles ingestion through the queued ingestion path, which + * provides reliable, asynchronous data ingestion with operation tracking + * capabilities. + * + * **Important:** This class cannot be instantiated directly. Use + * [com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder] to + * create instances of this client. The internal constructor ensures that only + * the builder can create properly configured instances. + * + * Example usage: + * ``` + * val client = QueuedIngestionClientBuilder.create(dmEndpoint) + * .withAuthentication(tokenProvider) + * .withMaxConcurrency(10) + * .build() + * ``` + * + * @see + * com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestionClientBuilder + */ +class QueuedIngestClient +internal constructor( + private val apiClient: KustoBaseApiClient, + private val cachedConfiguration: ConfigurationCache, + private val uploader: IUploader, + private val shouldDisposeUploader: Boolean = false, +) : MultiIngestClient { + private val logger = LoggerFactory.getLogger(QueuedIngestClient::class.java) + + override suspend fun ingestAsync( + sources: List, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties?, + ): ExtendedIngestResponse { + // Validate sources list is not empty + require(sources.isNotEmpty()) { "sources list cannot be empty" } + val maxBlobsPerBatch = getMaxSourcesPerMultiIngest() + // Check if sources count exceeds the limit + if (sources.size > maxBlobsPerBatch) { + throw IngestSizeLimitExceededException( + size = sources.size.toLong(), + maxNumberOfBlobs = maxBlobsPerBatch, + message = + "Ingestion sources count(${sources.size}) is larger than the limit allowed ($maxBlobsPerBatch)", + isPermanent = true, + ) + } + + // Check that all blobs have the same format + val differentFormatBlob = + sources.map { source -> source.format }.toSet() + if (differentFormatBlob.size > 1) { + logger.error( + "All blobs in the request must have the same format.Received formats [{}]", + differentFormatBlob.joinToString(", "), + ) + throw IngestClientException( + "All blobs in the request must have the same format. All blobs in the request must have the same format.Received formats: $differentFormatBlob", + ) + } + + // Split sources and upload local sources in parallel + val blobSources = uploadLocalSourcesAsync(sources) + + // Check for duplicate blob URLs + val duplicates = + blobSources + .groupBy { sanitizeBlobUrl(it.blobPath) } + .filter { it.value.size > 1 } + + if (duplicates.isNotEmpty()) { + val duplicateInfo = + duplicates.entries.joinToString(", ") { (url, blobs) -> + val sourceIds = + blobs.joinToString(", ") { + it.sourceId.toString() + } + "{Url: $url, Source Ids: [$sourceIds]}" + } + throw IngestClientException( + "Duplicate blob sources detected in the request: [$duplicateInfo]", + ) + } + + val blobs = + blobSources.map { + Blob( + it.blobPath, + sourceId = it.sourceId.toString(), + rawSize = it.blobExactSize, + ) + } + val ingestRequest = + IngestRequest( + timestamp = OffsetDateTime.now(Clock.systemUTC()), + blobs = blobs, + properties = ingestRequestProperties, + ) + val response: HttpResponse = + this.apiClient.api.postQueuedIngest( + database = database, + table = table, + ingestRequest = ingestRequest, + ) + val ingestResponse = + handleIngestResponse( + response = response, + database = database, + table = table, + ) + return ExtendedIngestResponse(ingestResponse, IngestKind.QUEUED) + } + + override suspend fun getMaxSourcesPerMultiIngest(): Int { + // Get from configuration or return a default value + return try { + return cachedConfiguration + .getConfiguration() + .ingestionSettings + ?.maxBlobsPerBatch + ?.toInt() ?: MAX_BLOBS_PER_BATCH + } catch (e: Exception) { + logger.warn( + "Failed to get max sources from configuration, using default", + e, + ) + MAX_BLOBS_PER_BATCH + } + } + + override suspend fun ingestAsync( + source: IngestionSource, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties?, + ): ExtendedIngestResponse { + // Add this as a fallback because the format is mandatory and if that is not present it may + // cause a failure + val effectiveIngestionProperties = + ingestRequestProperties + ?: IngestRequestPropertiesBuilder(format = Format.csv) + .build() + when (source) { + is BlobSource -> { + return ingestAsync( + listOf(source), + database, + table, + effectiveIngestionProperties, + ) + } + is LocalSource -> { + // Upload the local source to blob storage + val blobSource = uploader.uploadAsync(source) + return ingestAsync( + listOf(blobSource), + database, + table, + effectiveIngestionProperties, + ) + } + else -> { + throw IngestClientException( + "Unsupported ingestion source type: ${source::class.simpleName}", + ) + } + } + } + + override suspend fun getOperationSummaryAsync( + operation: IngestionOperation, + ): Status { + val statusResponse = + getIngestionDetails( + operation.database, + operation.table, + operation.operationId.toString(), + false, + ) + return statusResponse.status + ?: Status( + inProgress = 0, + succeeded = 0, + failed = 0, + canceled = 0, + ) + } + + override suspend fun getOperationDetailsAsync( + operation: IngestionOperation, + ): StatusResponse { + return getIngestionDetails( + database = operation.database, + table = operation.table, + operationId = operation.operationId.toString(), + details = true, + ) + } + + override fun close() { + if (shouldDisposeUploader) { + uploader.close() + } + } + + /** + * Splits sources into BlobSources and LocalSources, uploads LocalSources in + * parallel, and returns a unified list of BlobSources. + * + * @param sources The list of ingestion sources to process + * @return A list of BlobSources including both original BlobSources and + * uploaded LocalSources + * @throws IngestClientException if an unsupported source type is + * encountered + */ + private suspend fun uploadLocalSourcesAsync( + sources: List, + ): List { + // Split sources into BlobSources and LocalSources + val blobSources = mutableListOf() + val localSources = mutableListOf() + + sources.forEach { source -> + when (source) { + is BlobSource -> blobSources.add(source) + is LocalSource -> localSources.add(source) + else -> + throw IngestClientException( + "Unsupported ingestion source type: ${source::class.simpleName}", + ) + } + } + + // Upload LocalSources in parallel and collect the resulting BlobSources + if (localSources.isNotEmpty()) { + logger.info( + "Uploading ${localSources.size} local source(s) to blob storage", + ) + val uploadedBlobs = coroutineScope { + localSources + .map { localSource -> + async { uploader.uploadAsync(localSource) } + } + .awaitAll() + } + blobSources.addAll(uploadedBlobs) + logger.info( + "Successfully uploaded ${uploadedBlobs.size} local source(s)", + ) + } + + return blobSources + } + + /** + * Sanitizes a blob URL by removing the SAS token and query parameters to + * allow proper duplicate detection. + */ + private fun sanitizeBlobUrl(blobPath: String): String { + return blobPath.split("?").first() + } + + suspend fun handleIngestResponse( + response: HttpResponse, + database: String, + table: String, + ): T { + if (response.success) { + val ingestResponseBody = response.body() + return ingestResponseBody + } else { + if (response.status == HttpStatusCode.NotFound.value) { + val message = + "Endpoint ${this.apiClient.dmUrl} not found. Please ensure that the " + + "target cluster is correct and reachable." + logger.error(message) + throw IngestException( + message = message, + cause = ConnectException(message), + failureCode = response.status, + failureSubCode = "", + isPermanent = false, + ) + } + val nonSuccessResponseBody: T = response.body() + val ingestResponseOperationId = + if (nonSuccessResponseBody is IngestResponse) { + if ( + (nonSuccessResponseBody as IngestResponse) + .ingestionOperationId != null + ) { + logger.info( + "Ingestion Operation ID: ${(nonSuccessResponseBody as IngestResponse).ingestionOperationId}", + ) + nonSuccessResponseBody.ingestionOperationId + } else { + "N/A" + } + } else { + "N/A" + } + val errorMessage = + "Failed to submit Queued ingestion to $database.$table. " + + "Status: ${response.status}, Body: $nonSuccessResponseBody. " + + "OperationId $ingestResponseOperationId" + logger.error( + "Queued ingestion failed with response: {}", + errorMessage, + ) + throw IngestException( + message = errorMessage, + cause = RuntimeException(errorMessage), + isPermanent = true, + ) + } + } + + /** + * Gets detailed information about an ingestion operation. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID returned from the ingestion request + * @param details Whether to retrieve detailed blob-level information + * @return StatusResponse with operation details + */ + private suspend fun getIngestionDetails( + database: String, + table: String, + operationId: String, + details: Boolean, + ): StatusResponse { + logger.debug("Checking ingestion summary for operation: $operationId") + try { + val response: HttpResponse = + this.apiClient.api.getIngestStatus( + database = database, + table = table, + operationId = operationId, + details = details, + ) + + if ( + response.success && + response.status == HttpStatusCode.OK.value + ) { + val ingestStatusResponse = response.body() + logger.debug( + "Successfully retrieved summary for operation: {} and details: {}", + operationId, + ingestStatusResponse, + ) + return ingestStatusResponse + } else { + logger.error(response.toString()) + val ingestStatusFailure: StatusResponse = response.body() + // check if it is a permanent failure from status + val transientFailures = + ingestStatusFailure.details?.filter { + it.failureStatus == + BlobStatus.FailureStatus.Transient + } + val hasTransientErrors = transientFailures.isNullOrEmpty() + + if ( + response.status == HttpStatusCode.NotFound.value || + hasTransientErrors + ) { + val message = + printMessagesFromFailures( + transientFailures, + isTransientFailure = true, + ) + logger.error(message) + throw IngestException( + message = message, + cause = RuntimeException(message), + failureCode = response.status, + failureSubCode = "", + isPermanent = false, + ) + } + // TODO: We need to eventually look at OneApiExceptions + val errorMessage = + printMessagesFromFailures( + ingestStatusFailure.details, + isTransientFailure = false, + ) + logger.error(errorMessage) + throw IngestException(errorMessage, isPermanent = true) + } + } catch (e: Exception) { + logger.error( + "Exception occurred while getting ingestion summary for operation: $operationId", + e, + ) + if (e is IngestException) throw e + throw IngestException( + "Failed to get ingestion summary: ${e.message}", + e, + ) + } + } + + private suspend fun getIngestionStatus( + database: String, + table: String, + operationId: String, + forceDetails: Boolean, + ): StatusResponse { + // If details are explicitly requested, use the details API + if (forceDetails) { + val statusResponse = + getIngestionDetails(database, table, operationId, true) + logger.debug( + "Forcing detailed status retrieval for operation: {} returning {}", + operationId, + statusResponse, + ) + return statusResponse + } + // Start with summary for efficiency + val statusResponse = + getIngestionDetails(database, table, operationId, false) + // If operation has failures or is completed, get detailed information + return if ( + statusResponse.status?.failed?.let { it > 0 } == true || + IngestionResultUtils.isCompleted(statusResponse.details) + ) { + logger.debug( + "Operation $operationId has failures or is completed, retrieving details", + ) + getIngestionDetails(database, table, operationId, true) + } else { + statusResponse + } + } + + /** + * Polls the ingestion status until completion or timeout. + * + * @param database The target database name + * @param table The target table name + * @param operationId The operation ID to poll + * @param pollingInterval How often to check the status + * @param timeout Maximum time to wait before throwing timeout exception + * @return The final StatusResponse when ingestion is completed + * @throws IngestException if the operation times out or fails + */ + suspend fun pollUntilCompletion( + database: String, + table: String, + operationId: String, + pollingInterval: Duration = Duration.parse("PT30S"), + timeout: Duration = Duration.parse("PT5M"), + ): StatusResponse { + val result = + withTimeoutOrNull(timeout.inWholeMilliseconds) { + var currentStatus: StatusResponse + do { + currentStatus = + getIngestionStatus( + database, + table, + operationId, + forceDetails = true, + ) + logger.debug( + "Starting to poll ingestion status for operation: $operationId, timeout: $timeout", + ) + if ( + IngestionResultUtils.isCompleted( + currentStatus.details, + ) + ) { + logger.info( + "Ingestion operation $operationId completed", + ) + return@withTimeoutOrNull currentStatus + } + + logger.debug( + "Ingestion operation $operationId still in progress, waiting ${pollingInterval.inWholeSeconds}s before next check", + ) + delay(pollingInterval.inWholeMilliseconds) + } while ( + !IngestionResultUtils.isCompleted( + currentStatus.details, + ) + ) + + currentStatus + } + + if (result != null) { + logger.debug( + "Finished polling ingestion status for operation: {}, result: {}", + operationId, + result, + ) + result.status?.failed?.let { + if (it >= 1) { + val errorMessage = + printMessagesFromFailures( + result.details, + isTransientFailure = false, + ) + logger.error( + "Ingestion operation $operationId failed. $errorMessage", + ) + throw IngestException( + "Ingestion operation $operationId failed. $errorMessage", + isPermanent = true, + ) + } + } + } + + return result + ?: throw IngestException( + "Ingestion operation $operationId timed out after $timeout. " + + "Consider increasing the timeout duration or check the operation status manually.", + ) + } + + private fun printMessagesFromFailures( + failures: List?, + isTransientFailure: Boolean, + ): String? { + return failures?.joinToString { + ( + sourceId, + status, + startedAt, + lastUpdateTime, + errorCode, + failureStatus, + details, + ), + -> + buildString { + append("Error ingesting blob with $sourceId. ") + if (!details.isNullOrBlank()) append("ErrorDetails $details, ") + if (!errorCode.isNullOrBlank()) { + append("ErrorCode $errorCode , ") + } + if (status != null) append("Status ${status.value}. ") + if (lastUpdateTime != null) { + append("Ingestion lastUpdated at $lastUpdateTime ") + } + if (startedAt != null) append("& started at $startedAt. ") + if (failureStatus != null) { + append("FailureStatus ${failureStatus.value}. ") + } + append("Is transient failure: $isTransientFailure") + } + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt new file mode 100644 index 000000000..d62e82c62 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt @@ -0,0 +1,344 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client + +import com.microsoft.azure.kusto.ingest.v2.KustoBaseApiClient +import com.microsoft.azure.kusto.ingest.v2.STREAMING_MAX_REQ_BODY_SIZE +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionUtils +import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import com.microsoft.azure.kusto.ingest.v2.models.Status +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.FileSource +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode +import io.ktor.http.ContentType +import io.ktor.http.HttpStatusCode +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.Json +import org.slf4j.LoggerFactory +import java.net.ConnectException +import java.net.URI +import java.util.UUID + +/** + * Streaming ingestion client for Azure Data Explorer (Kusto). + * + * This client handles ingestion through the streaming ingestion path, which + * provides direct, synchronous data ingestion suitable for low-latency + * scenarios. + * + * **Important:** This class cannot be instantiated directly. Use + * [com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder] + * to create instances of this client. The internal constructor ensures that + * only the builder can create properly configured instances. + * + * **Note:** Streaming ingestion does not support operation tracking. The + * methods [getOperationSummaryAsync] and [getOperationDetailsAsync] will return + * empty responses with warnings logged. + * + * Example usage: + * ``` + * val client = StreamingIngestClientBuilder.create(engineEndpoint) + * .withAuthentication(tokenProvider) + * .build() + * ``` + * + * @see + * com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder + */ +class StreamingIngestClient +internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { + private val logger = + LoggerFactory.getLogger(StreamingIngestClient::class.java) + + companion object { + private val EMPTY_STATUS = + Status( + succeeded = 0L, + failed = 0L, + inProgress = 0L, + canceled = 0L, + ) + + private val EMPTY_STATUS_RESPONSE = + StatusResponse( + status = EMPTY_STATUS, + details = emptyList(), + startTime = null, + ) + } + + override suspend fun ingestAsync( + source: IngestionSource, + database: String, + table: String, + ingestRequestProperties: IngestRequestProperties?, + ): ExtendedIngestResponse { + // Streaming ingestion processes one source at a time + val maxSize = getMaxStreamingIngestSize(source = source) + val operationId = UUID.randomUUID().toString() + val effectiveIngestionProperties = + ingestRequestProperties + ?: IngestRequestPropertiesBuilder(format = Format.csv) + .build() + when (source) { + is BlobSource -> { + logger.info( + "Streaming ingestion from BlobSource: ${source.blobPath}", + ) + submitStreamingIngestion( + database = database, + table = table, + // Not used for blob-based streaming + data = ByteArray(0), + ingestProperties = effectiveIngestionProperties, + blobUrl = source.blobPath, + ) + } + is FileSource, + is StreamSource, + -> { + val name = + when (source) { + is FileSource -> source.name + is StreamSource -> source.name + else -> "UnknownSource" + } + logger.info( + "Streaming ingestion from ${source::class.simpleName}: $name", + ) + val data = source.data().readBytes() + val contentSize = data.size + if (contentSize > maxSize) { + val message = + "Request content size $contentSize exceeds the maximum allowed size of $STREAMING_MAX_REQ_BODY_SIZE bytes." + throw IngestException(message = message, isPermanent = true) + } + submitStreamingIngestion( + database = database, + table = table, + data = data, + ingestProperties = effectiveIngestionProperties, + blobUrl = null, + ) + source.close() + } + else -> { + throw IngestException( + message = + "Unsupported source type for streaming ingestion: ${source::class.simpleName}", + isPermanent = true, + ) + } + } + // Streaming ingestion doesn't return an operation ID from the server + // We generate one locally for consistency with the IngestClient interface + return ExtendedIngestResponse( + IngestResponse(ingestionOperationId = operationId), + IngestKind.STREAMING, + ) + } + + /** + * Submits a streaming ingestion request. + * + * @param database The target database name + * @param table The target table name + * @param data The data to ingest (as ByteArray) + * @param format The data format + * @param ingestProperties Optional ingestion properties + * @param blobUrl Optional blob URL for blob-based streaming ingestion (if + * provided, data is ignored) + * @return IngestResponse for tracking the request + */ + suspend fun submitStreamingIngestion( + database: String, + table: String, + data: ByteArray, + ingestProperties: IngestRequestProperties, + blobUrl: String? = null, + ) { + val host = URI(this.apiClient.engineUrl).host + + val bodyContent: Any + val sourceKind: String? + val contentType: String + + if (blobUrl != null) { + // Blob-based streaming + val requestBody = StreamFromBlobRequestBody(sourceUri = blobUrl) + bodyContent = Json.encodeToString(requestBody).toByteArray() + sourceKind = "uri" + contentType = ContentType.Application.Json.toString() + logger.info( + "Submitting streaming ingestion from blob for database: {}, table: {}, blob: {}. Host {}", + database, + table, + blobUrl, + host, + ) + } else { + // Direct streaming using raw data + bodyContent = data + sourceKind = null + contentType = ContentType.Application.OctetStream.toString() + logger.info( + "Submitting streaming ingestion request for database: {}, table: {}, data size: {}. Host {}", + database, + table, + data.size, + host, + ) + } + + try { + val response: HttpResponse = + this.apiClient.api.postStreamingIngest( + database = database, + table = table, + streamFormat = ingestProperties.format, + body = bodyContent, + mappingName = + ingestProperties.ingestionMappingReference, + sourceKind = sourceKind, + host = host, + acceptEncoding = "gzip", + connection = "Keep-Alive", + contentEncoding = null, + contentType = contentType, + ) + return handleIngestResponse( + response = response, + database = database, + table = table, + engineUrl = host, + ) + } catch (notAbleToReachHost: ConnectException) { + val message = + "Failed to reach ${this.apiClient.engineUrl} for streaming ingestion. Please ensure the cluster address is correct and the cluster is reachable." + throw IngestException( + message = message, + cause = notAbleToReachHost, + failureCode = HttpStatusCode.NotFound.value, + failureSubCode = "", + isPermanent = false, + ) + } catch (e: Exception) { + logger.error( + "Exception occurred during streaming ingestion submission", + e, + ) + if (e is IngestException) throw e + throw IngestException( + message = + "Error submitting streaming ingest request to ${this.apiClient.engineUrl}", + cause = e, + failureSubCode = + UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED + .toString(), + isPermanent = true, + ) + } + } + + override suspend fun getOperationSummaryAsync( + operation: IngestionOperation, + ): Status { + logger.warn( + "Streaming ingestion does not support operation status tracking. Operation ID: ${operation.operationId} " + + "cannot be tracked. Returning empty status.", + ) + return EMPTY_STATUS + } + + override suspend fun getOperationDetailsAsync( + operation: IngestionOperation, + ): StatusResponse { + logger.warn( + "Streaming ingestion does not support detailed operation tracking. Operation ID: ${operation.operationId} cannot be tracked. Returning empty status response.", + ) + return EMPTY_STATUS_RESPONSE + } + + suspend fun handleIngestResponse( + response: HttpResponse, + database: String, + table: String, + engineUrl: String, + ): T { + if (response.success) { + val ingestResponseBody = response.body() + return ingestResponseBody + } else { + if (response.status == HttpStatusCode.NotFound.value) { + val message = + "Endpoint $engineUrl not found. Please ensure the cluster is reachable and supports streaming ingestion." + logger.error( + "$engineUrl streaming endpoint not found. Please ensure that the target cluster supports " + + "streaming ingestion and that the endpoint URL is correct.", + ) + throw IngestException( + message = message, + cause = ConnectException(message), + failureCode = response.status, + failureSubCode = + UploadErrorCode.NETWORK_ERROR.toString(), + isPermanent = false, + ) + } + val nonSuccessResponseBody: T = response.body() + val ingestResponseOperationId = + if (nonSuccessResponseBody is IngestResponse) { + if ( + (nonSuccessResponseBody as IngestResponse) + .ingestionOperationId != null + ) { + logger.info( + "Ingestion Operation ID: ${(nonSuccessResponseBody as IngestResponse).ingestionOperationId}", + ) + nonSuccessResponseBody.ingestionOperationId + } else { + "N/A" + } + } else { + "N/A" + } + val errorMessage = + "Failed to submit streaming ingestion to $database.$table. " + + "Status: ${response.status}, Body: $nonSuccessResponseBody. " + + "OperationId $ingestResponseOperationId" + logger.error(errorMessage) + throw IngestException( + message = errorMessage, + cause = RuntimeException(errorMessage), + isPermanent = true, + ) + } + } + + private fun getMaxStreamingIngestSize(source: IngestionSource): Long { + val compressionFactor = + IngestionUtils.getRowStoreEstimatedFactor( + source.format, + source.compressionType, + ) + return (STREAMING_MAX_REQ_BODY_SIZE * compressionFactor).toLong() + } + + override fun close() {} +} + +@Serializable +private data class StreamFromBlobRequestBody( + @SerialName("SourceUri") val sourceUri: String, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt new file mode 100644 index 000000000..83f452f18 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt @@ -0,0 +1,155 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client.policy + +import com.microsoft.azure.kusto.ingest.v2.MANAGED_STREAMING_CONTINUE_WHEN_UNAVAILABLE_DEFAULT +import com.microsoft.azure.kusto.ingest.v2.MANAGED_STREAMING_DATA_SIZE_FACTOR_DEFAULT +import com.microsoft.azure.kusto.ingest.v2.MANAGED_STREAMING_RESUME_TIME_MINUTES +import com.microsoft.azure.kusto.ingest.v2.MANAGED_STREAMING_RETRY_DELAYS_SECONDS +import com.microsoft.azure.kusto.ingest.v2.MANAGED_STREAMING_RETRY_JITTER_MS +import com.microsoft.azure.kusto.ingest.v2.MANAGED_STREAMING_THROTTLE_BACKOFF_SECONDS +import com.microsoft.azure.kusto.ingest.v2.common.CustomRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import java.time.Clock +import java.time.Duration +import java.time.Instant +import java.util.concurrent.ConcurrentHashMap +import kotlin.random.Random + +/** + * This is the default policy used by the managed streaming ingestion client. + * Whenever there is a permanent streaming error, it defaults to queued + * ingestion for a time period defined by timeUntilResumingStreamingIngest. + */ +class DefaultManagedStreamingPolicy( + override val continueWhenStreamingIngestionUnavailable: Boolean = + MANAGED_STREAMING_CONTINUE_WHEN_UNAVAILABLE_DEFAULT, + override val dataSizeFactor: Double = + MANAGED_STREAMING_DATA_SIZE_FACTOR_DEFAULT, + override val retryPolicy: IngestRetryPolicy = + createDefaultRetryPolicy(), + /** + * When streaming is throttled, the client will fall back to queued + * ingestion. This property controls how long the client will use queued + * ingestion in the case of streaming is throttled before trying to + * resume streaming ingestion again. + */ + val throttleBackoffPeriod: Duration = + Duration.ofSeconds(MANAGED_STREAMING_THROTTLE_BACKOFF_SECONDS), + /** + * When streaming ingestion is unavailable, the client will fall back to + * queued ingestion. This property controls how long the client will use + * queued ingestion before trying to resume streaming ingestion again. + */ + val timeUntilResumingStreamingIngest: Duration = + Duration.ofMinutes(MANAGED_STREAMING_RESUME_TIME_MINUTES), +) : ManagedStreamingPolicy { + + private val defaultToQueuedUntilTimeByTable = + ConcurrentHashMap() + + /** + * Determines whether to default to queued ingestion based on the current + * error state for the specified table. + */ + override fun shouldDefaultToQueuedIngestion( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ): Boolean { + val key = "$database-$table" + + val useQueuedUntilTime = defaultToQueuedUntilTimeByTable[key] + if (useQueuedUntilTime != null) { + val (dateTime, errorCategory) = useQueuedUntilTime + if (dateTime.isAfter(Instant.now(Clock.systemUTC()))) { + // If streaming is off, and we're not configured to continue, return false to fail + return !( + errorCategory == + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF && + !continueWhenStreamingIngestionUnavailable + ) + } + // Time expired, remove the entry + defaultToQueuedUntilTimeByTable.remove(key) + } + return false + } + + override fun streamingErrorCallback( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + failureDetails: ManagedStreamingRequestFailureDetails, + ) { + val key = "$database-$table" + + when (failureDetails.errorCategory) { + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, + ManagedStreamingErrorCategory + .TABLE_CONFIGURATION_PREVENTS_STREAMING, + -> { + defaultToQueuedUntilTimeByTable[key] = + ManagedStreamingErrorState( + Instant.now(Clock.systemUTC()) + + timeUntilResumingStreamingIngest, + failureDetails.errorCategory, + ) + } + + ManagedStreamingErrorCategory.THROTTLED -> { + defaultToQueuedUntilTimeByTable[key] = + ManagedStreamingErrorState( + Instant.now(Clock.systemUTC()) + + throttleBackoffPeriod, + errorState = failureDetails.errorCategory, + ) + } + else -> { + // No action needed for other error categories + } + } + } + + override fun streamingSuccessCallback( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + successDetails: ManagedStreamingRequestSuccessDetails, + ) { + // Default implementation does nothing + } + + companion object { + private val random = Random.Default + + /** + * Creates a default retry policy with exponential backoff and jitter. + * Uses delays from MANAGED_STREAMING_RETRY_DELAYS_SECONDS (1s, 2s, 4s) + * plus random jitter (0-MANAGED_STREAMING_RETRY_JITTER_MS ms). + */ + fun createDefaultRetryPolicy(): IngestRetryPolicy { + // Create delays with jitter based on configured values + val delays = + MANAGED_STREAMING_RETRY_DELAYS_SECONDS.map { seconds -> + Duration.ofSeconds(seconds) + .plusMillis( + random.nextLong( + 0, + MANAGED_STREAMING_RETRY_JITTER_MS, + ), + ) + } + .toTypedArray() + return CustomRetryPolicy(delays) + } + + /** Default instance with standard settings. */ + val DEFAULT_MANAGED_STREAMING_POLICY = DefaultManagedStreamingPolicy() + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingErrorState.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingErrorState.kt new file mode 100644 index 000000000..4a49f6980 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingErrorState.kt @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client.policy + +import java.time.Instant + +data class ManagedStreamingErrorState( + val resetStateAt: Instant, + val errorState: ManagedStreamingErrorCategory, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt new file mode 100644 index 000000000..f254cbb4c --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client.policy + +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import java.time.Duration + +/** Categories of errors that can occur during managed streaming ingestion. */ +enum class ManagedStreamingErrorCategory { + /** + * Indicates that streaming cannot be performed due to the properties of the + * request itself but would likely succeed if queued. These errors are + * request specific and do not imply anything on following requests. + */ + REQUEST_PROPERTIES_PREVENT_STREAMING, + + /** + * Indicates streaming cannot be performed due to a conflicting table + * configuration, but may succeed if queued. These errors are table specific + * and following requests will behave similarly until the conflict is + * resolved on the service side. + */ + TABLE_CONFIGURATION_PREVENTS_STREAMING, + + /** + * Indicates streaming cannot be performed due to some service + * configuration. To resolve these errors, a service side change is required + * to use streaming. + */ + STREAMING_INGESTION_OFF, + + /** + * Indicates streaming ingestion endpoint is throttled and returns HTTP + * TooManyRequests error code (429). + */ + THROTTLED, + + /** Reported for all other types of streaming errors. */ + OTHER_ERRORS, + + /** Reported when an unexpected error type occurred. */ + UNKNOWN_ERRORS, +} + +/** Details about a successful streaming ingestion request. */ +data class ManagedStreamingRequestSuccessDetails(val duration: Duration) + +/** Details about a failed streaming ingestion request. */ +data class ManagedStreamingRequestFailureDetails( + val duration: Duration = Duration.ZERO, + val isPermanent: Boolean, + var errorCategory: ManagedStreamingErrorCategory = + ManagedStreamingErrorCategory.OTHER_ERRORS, + val exception: Exception, +) + +/** + * A policy which controls the way the managed streaming ingest client behaves + * when there are errors. + */ +interface ManagedStreamingPolicy { + /** + * When streaming ingestion is disabled for the table, database or cluster, + * determine if the client will fall back to queued ingestion. When set to + * false managed streaming client will fail ingestions for tables where + * streaming policy is not enabled. Enabling this property means the client + * might use queued ingestion exclusively without the caller knowing. + * Permanent errors in streaming ingestion that are not errors in queued + * ingestion, will fall back to queued ingestion regardless of this setting. + */ + val continueWhenStreamingIngestionUnavailable: Boolean + + /** + * The number of times to attempt streaming data after transient failures, + * before falling back to queued ingestion. + */ + val retryPolicy: IngestRetryPolicy + + /** + * A size factor that enables tuning up and down the upper limit of data + * sent to streaming. Default value is 1.0. + */ + val dataSizeFactor: Double + + /** + * Should this ingestion attempt skip streaming and go directly to queued + * ingestion. + * + * @return false if streaming should be attempted, true if streaming should + * be skipped + */ + fun shouldDefaultToQueuedIngestion( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + ): Boolean + + /** This callback will be called when a streaming error occurs. */ + fun streamingErrorCallback( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + failureDetails: ManagedStreamingRequestFailureDetails, + ) + + /** This callback will be called when streaming succeeds. */ + fun streamingSuccessCallback( + source: IngestionSource, + database: String, + table: String, + props: IngestRequestProperties, + successDetails: ManagedStreamingRequestSuccessDetails, + ) +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt deleted file mode 100644 index c5d575b0a..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ClientDetails.kt +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.common - -import java.util.concurrent.ConcurrentHashMap - -data class ClientDetails( - val applicationForTracing: String?, - val userNameForTracing: String?, - val clientVersionForTracing: String?, -) { - companion object { - const val NONE = "[none]" - - // Cache for default values to avoid recomputing on every call - private val defaultValuesCache = ConcurrentHashMap() - - /** - * Escapes special characters in header field values by wrapping in - * curly braces and replacing problematic characters with underscores. - */ - private fun escapeField(field: String): String { - val escaped = field.replace(Regex("[\\r\\n\\s{}|]+"), "_") - return "{$escaped}" - } - - /** - * Formats the given fields into a string that can be used as a header. - * Format: "field1:{value1}|field2:{value2}" - */ - private fun formatHeader(args: Map): String { - return args.entries - .filter { it.key.isNotBlank() && it.value.isNotBlank() } - .joinToString("|") { (key, value) -> - "$key:${escapeField(value)}" - } - } - - /** Gets the process name from system properties with caching. */ - private fun getProcessName(): String { - return defaultValuesCache.computeIfAbsent("processName") { - val command = System.getProperty("sun.java.command") - if (!command.isNullOrBlank()) { - // Strip file name from command line (matches - // UriUtils.stripFileNameFromCommandLine) - command.split(" ").firstOrNull() ?: "JavaProcess" - } else { - "JavaProcess" - } - } - } - - private fun getUserName(): String { - return defaultValuesCache.computeIfAbsent("userName") { - var user = System.getProperty("user.name") - if (user.isNullOrBlank()) { - user = System.getenv("USERNAME") - val domain = System.getenv("USERDOMAIN") - if (!domain.isNullOrBlank() && !user.isNullOrBlank()) { - user = "$domain\\$user" - } - } - if (!user.isNullOrBlank()) user else NONE - } - } - - private fun getRuntime(): String { - return defaultValuesCache.computeIfAbsent("runtime") { - System.getProperty("java.runtime.name") - ?: System.getProperty("java.vm.name") - ?: System.getProperty("java.vendor") - ?: "UnknownRuntime" - } - } - - private fun getJavaVersion(): String { - return defaultValuesCache.computeIfAbsent("javaVersion") { - System.getProperty("java.version") ?: "UnknownVersion" - } - } - - /** - * Gets the default client version string with caching. Format: - * "Kusto.Java.Client:{version}|Runtime.{runtime}:{javaVersion}" - */ - private fun getDefaultVersion(): String { - return defaultValuesCache.computeIfAbsent("defaultVersion") { - val baseMap = - linkedMapOf( - "Kusto.Java.Client" to getPackageVersion(), - "Runtime.${escapeField(getRuntime())}" to - getJavaVersion(), - ) - formatHeader(baseMap) - } - } - - /** Gets the package version from the manifest or returns a default. */ - private fun getPackageVersion(): String { - return try { - ClientDetails::class.java.`package`.implementationVersion - ?: "Unknown" - } catch (e: Exception) { - "Unknown" - } - } - - /** - * Creates a ClientDetails from connector details Example output: - * "Kusto.MyConnector:{1.0.0}|App.{MyApp}:{0.5.3}|CustomField:{CustomValue}" - * - * @param name The name of the connector (will be prefixed with - * "Kusto.") - * @param version The version of the connector - * @param sendUser True if the user should be sent to Kusto, otherwise - * "[none]" will be sent - * @param overrideUser The user to send to Kusto, or null to use the - * current user - * @param appName The app hosting the connector, or null to use the - * current process name - * @param appVersion The version of the app hosting the connector, or - * null to use "[none]" - * @param additionalFields Additional fields to trace as key-value pairs - * @return ClientDetails instance with formatted connector information - */ - fun fromConnectorDetails( - name: String, - version: String, - sendUser: Boolean = false, - overrideUser: String? = null, - appName: String? = null, - appVersion: String? = null, - additionalFields: Map? = null, - ): ClientDetails { - val fieldsMap = linkedMapOf() - fieldsMap["Kusto.$name"] = version - - val finalAppName = appName ?: getProcessName() - val finalAppVersion = appVersion ?: NONE - fieldsMap["App.${escapeField(finalAppName)}"] = finalAppVersion - - additionalFields?.let { fieldsMap.putAll(it) } - - val app = formatHeader(fieldsMap) - - val user = - if (sendUser) { - overrideUser ?: getUserName() - } else { - NONE - } - - return ClientDetails(app, user, null) - } - - fun createDefault(): ClientDetails { - return ClientDetails( - applicationForTracing = getProcessName(), - userNameForTracing = getUserName(), - clientVersionForTracing = getDefaultVersion(), - ) - } - } - - @JvmName("getApplicationForTracingOrDefault") - fun getApplicationForTracing(): String { - return applicationForTracing ?: getProcessName() - } - - @JvmName("getUserNameForTracingOrDefault") - fun getUserNameForTracing(): String { - return userNameForTracing ?: getUserName() - } - - @JvmName("getClientVersionForTracingOrDefault") - fun getClientVersionForTracing(): String { - val defaultVersion = getDefaultVersion() - return if (clientVersionForTracing != null) { - "$defaultVersion|$clientVersionForTracing" - } else { - defaultVersion - } - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 07e88e0cb..3dee0f11d 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -6,18 +6,58 @@ import com.azure.core.credential.TokenCredential import com.microsoft.azure.kusto.ingest.v2.CONFIG_CACHE_DEFAULT_REFRESH_INTERVAL_HOURS import com.microsoft.azure.kusto.ingest.v2.CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS import com.microsoft.azure.kusto.ingest.v2.ConfigurationClient +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import java.lang.AutoCloseable import java.time.Duration +/** + * Interface for caching configuration data. + * + * Implementations should handle automatic refresh of stale data based on a + * refresh interval. When used with client builders, the configuration will be + * fetched at least once during client construction to ensure fresh data is + * available. + */ interface ConfigurationCache : AutoCloseable { val refreshInterval: Duration + /** + * Gets the current configuration, refreshing it if necessary based on the + * refresh interval. This method may return cached data if the cache is + * still valid. + */ suspend fun getConfiguration(): ConfigurationResponse override fun close() } +/** + * Default implementation of ConfigurationCache with time-based expiration. + * + * This cache automatically refreshes configuration data when it becomes stale + * based on the configured refresh interval. The implementation is thread-safe + * and handles concurrent requests efficiently. + * + * Refresh behavior: + * - Configuration is refreshed automatically when the refresh interval expires + * - If refresh fails, the existing cached configuration is returned (if + * available) + * - The first call to getConfiguration() will always fetch fresh data + * - Concurrent refresh attempts are synchronized to prevent duplicate fetches + * + * @param refreshInterval Duration after which cached configuration is + * considered stale + * @param dmUrl Data management endpoint URL (required if configurationProvider + * is null) + * @param tokenCredential Authentication credentials (required if + * configurationProvider is null) + * @param skipSecurityChecks Whether to skip security validation (required if + * configurationProvider is null) + * @param clientDetails Client identification details for tracking + * @param configurationProvider Optional custom provider for configuration data. + * If provided, dmUrl/tokenCredential/skipSecurityChecks are not required. + */ class DefaultConfigurationCache( override val refreshInterval: Duration = Duration.ofHours(CONFIG_CACHE_DEFAULT_REFRESH_INTERVAL_HOURS), @@ -25,9 +65,9 @@ class DefaultConfigurationCache( val tokenCredential: TokenCredential? = null, val skipSecurityChecks: Boolean? = CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS, + val clientDetails: ClientDetails, val configurationProvider: (suspend () -> ConfigurationResponse)? = null, ) : ConfigurationCache { - init { if ( configurationProvider == null && @@ -50,6 +90,7 @@ class DefaultConfigurationCache( dmUrl!!, tokenCredential!!, skipSecurityChecks!!, + clientDetails, ) .getConfigurationDetails() } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt index 0ebb84448..2a380e8d0 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/IngestRetryPolicy.kt @@ -37,7 +37,7 @@ class SimpleRetryPolicy( override fun moveNext(retryNumber: UInt): Retry { require(retryNumber > 0u) { "retryNumber must be positive" } - if (retryNumber >= totalRetries.toUInt()) { + if (retryNumber > totalRetries.toUInt()) { return Retry(false, Duration.ZERO) } return Retry(true, intervalDuration) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt index e042b454c..da583d60d 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyExtensions.kt @@ -18,12 +18,12 @@ suspend fun IngestRetryPolicy.runWithRetry( onRetry: ((UInt, Exception, Boolean) -> Unit)? = null, // retry attempt number, exception, isPermanent onError: ((UInt, Exception, Boolean) -> Unit)? = null, - shouldRetry: ((UInt, Exception, Boolean) -> Retry)? = null, + shouldRetry: ((UInt, Exception, Boolean) -> RetryDecision)? = null, throwOnExhaustedRetries: Boolean = true, tracer: ((String) -> Unit)? = null, cancellationChecker: (() -> Boolean)? = null, ): T? { - var attempt: UInt = 1u + var attempt = 1u while (true) { try { return action(attempt) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt index 55f3643c4..803728cb2 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt @@ -2,6 +2,8 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.exceptions +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode + open class IngestException( message: String? = null, cause: Throwable? = null, @@ -9,7 +11,6 @@ open class IngestException( val failureSubCode: String? = null, val isPermanent: Boolean? = null, ) : Exception(message, cause) { - open val alreadyTraced: Boolean = false open val creationMessage: String? = message override val message: String @@ -33,7 +34,14 @@ class IngestRequestException( isPermanent: Boolean? = true, message: String? = null, cause: Throwable? = null, -) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { +) : + IngestException( + message, + cause, + failureCode, + failureSubCode.toString(), + isPermanent, + ) { override val message: String get() = creationMessage @@ -77,7 +85,7 @@ open class IngestClientException( class IngestSizeLimitExceededException( val size: Long, - val maxSize: Long, + val maxNumberOfBlobs: Int, ingestionSourceId: String? = null, ingestionSource: String? = null, error: String? = null, @@ -100,7 +108,7 @@ class IngestSizeLimitExceededException( override val message: String get() = creationMessage - ?: "Size too large to ingest: Source: '${ingestionSource ?: ""}' size in bytes is '$size' which exceeds the maximal size of '$maxSize'" + ?: "Size too large to ingest: Source: '${ingestionSource ?: ""}' size in bytes is '$size' which exceeds the maximal size of '$maxNumberOfBlobs'" } class InvalidIngestionMappingException( @@ -126,7 +134,7 @@ class InvalidIngestionMappingException( override val message: String get() = creationMessage - ?: "Ingestion mapping is invalid: ${super.message ?: ""}" + ?: "Ingestion mapping is invalid: ${super.message}" } class MultipleIngestionMappingPropertiesException( @@ -159,11 +167,18 @@ open class UploadFailedException( val fileName: String? = null, val blobName: String? = null, failureCode: Int? = null, - failureSubCode: String? = null, + failureSubCode: UploadErrorCode, isPermanent: Boolean? = null, message: String? = null, cause: Throwable? = null, -) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { +) : + IngestException( + message, + cause, + failureCode, + failureSubCode.toString(), + isPermanent, + ) { override val message: String get() = creationMessage @@ -174,7 +189,7 @@ class NoAvailableIngestContainersException( fileName: String? = null, blobName: String? = null, failureCode: Int? = 500, - failureSubCode: String? = null, + failureSubCode: UploadErrorCode, isPermanent: Boolean? = false, message: String? = null, cause: Throwable? = null, @@ -196,7 +211,7 @@ class InvalidUploadStreamException( fileName: String? = null, blobName: String? = null, failureCode: Int? = null, - failureSubCode: String? = null, + failureSubCode: UploadErrorCode, isPermanent: Boolean? = true, message: String? = null, cause: Throwable? = null, @@ -222,7 +237,7 @@ class UploadSizeLimitExceededException( fileName: String? = null, blobName: String? = null, failureCode: Int? = null, - failureSubCode: String? = null, + failureSubCode: UploadErrorCode, isPermanent: Boolean? = true, message: String? = null, cause: Throwable? = null, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt index 972bd9366..9af839556 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt @@ -2,4 +2,195 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.models -data class ClientDetails(val name: String, val version: String) +import java.util.concurrent.ConcurrentHashMap + +data class ClientDetails( + val applicationForTracing: String?, + val userNameForTracing: String?, + val clientVersionForTracing: String?, +) { + companion object { + const val NONE = "[none]" + const val DEFAULT_APP_NAME = "Kusto.Java.Client.V2" + + // Cache for default values to avoid recomputing on every call + private val defaultValuesCache = ConcurrentHashMap() + + /** + * Escapes special characters in header field values by wrapping in + * curly braces and replacing problematic characters with underscores. + */ + private fun escapeField(field: String): String { + val escaped = field.replace(Regex("[\\r\\n\\s{}|]+"), "_") + return "{$escaped}" + } + + /** + * Formats the given fields into a string that can be used as a header. + * Format: "field1:{value1}|field2:{value2}" + */ + private fun formatHeader(args: Map): String { + return args.entries + .filter { it.key.isNotBlank() && it.value.isNotBlank() } + .joinToString("|") { (key, value) -> + "$key:${escapeField(value)}" + } + } + + /** Gets the process name from system properties with caching. */ + private fun getProcessName(): String { + return defaultValuesCache.computeIfAbsent("processName") { + val command = System.getProperty("sun.java.command") + if (!command.isNullOrBlank()) { + // Strip file name from command line (matches + // UriUtils.stripFileNameFromCommandLine) + try { + val processName = command.trim().split(" ")[0] + java.nio.file.Paths.get(processName).fileName.toString() + } catch (_: Exception) { + "JavaProcess" + } + } else { + "JavaProcess" + } + } + } + + private fun getUserName(): String { + return defaultValuesCache.computeIfAbsent("userName") { + var user = System.getProperty("user.name") + if (user.isNullOrBlank()) { + user = System.getenv("USERNAME") + val domain = System.getenv("USERDOMAIN") + if (!domain.isNullOrBlank() && !user.isNullOrBlank()) { + user = "$domain\\$user" + } + } + if (!user.isNullOrBlank()) user else NONE + } + } + + private fun getRuntime(): String { + return defaultValuesCache.computeIfAbsent("runtime") { + System.getProperty("java.runtime.name") + ?: System.getProperty("java.vm.name") + ?: System.getProperty("java.vendor") + ?: "UnknownRuntime" + } + } + + private fun getJavaVersion(): String { + return defaultValuesCache.computeIfAbsent("javaVersion") { + System.getProperty("java.version") ?: "UnknownVersion" + } + } + + /** + * Gets the default client version string with caching. Format: + * "Kusto.Java.Client:{version}|Runtime.{runtime}:{javaVersion}" + */ + private fun getDefaultVersion(): String { + return defaultValuesCache.computeIfAbsent("defaultVersion") { + val baseMap = + linkedMapOf( + DEFAULT_APP_NAME to getPackageVersion(), + "Runtime.${escapeField(getRuntime())}" to + getJavaVersion(), + ) + formatHeader(baseMap) + } + } + + /** Gets the package version from the manifest or returns a default. */ + private fun getPackageVersion(): String { + return try { + val props = java.util.Properties() + ClientDetails::class + .java + .getResourceAsStream("/app.properties") + ?.use { stream -> + props.load(stream) + props.getProperty("version")?.trim() ?: "" + } ?: "" + } catch (_: Exception) { + "" + } + } + + /** + * Creates a ClientDetails from connector details Example output: + * "Kusto.MyConnector:{1.0.0}|App.{MyApp}:{0.5.3}|CustomField:{CustomValue}" + * + * @param name The name of the connector (will be prefixed with + * "Kusto.") + * @param version The version of the connector + * @param sendUser True if the user should be sent to Kusto, otherwise + * "[none]" will be sent + * @param overrideUser The user to send to Kusto, or null to use the + * current user + * @param appName The app hosting the connector, or null to use the + * current process name + * @param appVersion The version of the app hosting the connector, or + * null to use "[none]" + * @param additionalFields Additional fields to trace as key-value pairs + * @return ClientDetails instance with formatted connector information + */ + fun fromConnectorDetails( + name: String, + version: String, + sendUser: Boolean = false, + overrideUser: String? = null, + appName: String? = null, + appVersion: String? = null, + additionalFields: Map? = null, + ): ClientDetails { + val fieldsMap = linkedMapOf() + fieldsMap["Kusto.$name"] = version + + val finalAppName = appName ?: getProcessName() + val finalAppVersion = appVersion ?: NONE + fieldsMap["App.${escapeField(finalAppName)}"] = finalAppVersion + + additionalFields?.let { fieldsMap.putAll(it) } + + val app = formatHeader(fieldsMap) + + val user = + if (sendUser) { + overrideUser ?: getUserName() + } else { + NONE + } + + return ClientDetails(app, user, null) + } + + fun createDefault(): ClientDetails { + return ClientDetails( + applicationForTracing = getProcessName(), + userNameForTracing = getUserName(), + clientVersionForTracing = getDefaultVersion(), + ) + } + } + + @JvmName("getApplicationForTracingOrDefault") + fun getApplicationForTracing(): String { + return applicationForTracing ?: getProcessName() + } + + @JvmName("getUserNameForTracingOrDefault") + fun getUserNameForTracing(): String { + return userNameForTracing ?: getUserName() + } + + @JvmName("getClientVersionForTracingOrDefault") + fun getClientVersionForTracing(): String { + val defaultVersion = getDefaultVersion() + return if (clientVersionForTracing != null) { + "$defaultVersion|$clientVersionForTracing" + } else { + defaultVersion + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypes.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypes.kt new file mode 100644 index 000000000..80ae7b5d5 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypes.kt @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse + +enum class IngestKind { + STREAMING, + QUEUED, +} + +data class ExtendedIngestResponse( + val ingestResponse: IngestResponse, + val ingestionType: IngestKind, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt new file mode 100644 index 000000000..4d942ceb8 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt @@ -0,0 +1,205 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import java.time.OffsetDateTime + +/** + * Builder class for + * [com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties] that + * provides a convenient way to construct instances with dropBy and ingestBy + * tags that are automatically combined into the tags property. + * + * Example usage: + * ```kotlin + * val properties = IngestRequestPropertiesBuilder(format = Format.json) + * .withDropByTags(listOf("tag1", "tag2")) + * .withIngestByTags(listOf("tag3")) + * .withEnableTracking(true) + * .build() + * ``` + */ +class IngestRequestPropertiesBuilder(private val format: Format) { + private var enableTracking: Boolean? = null + private var additionalTags: List? = null + private var dropByTags: List? = null + private var ingestByTags: List? = null + private var ingestIfNotExists: List? = null + private var skipBatching: Boolean? = null + private var deleteAfterDownload: Boolean? = null + private var ingestionMappingReference: String? = null + private var ingestionMapping: String? = null + private var validationPolicy: String? = null + private var ignoreSizeLimit: Boolean? = null + private var ignoreFirstRecord: Boolean? = null + private var ignoreLastRecordIfInvalid: Boolean? = null + private var creationTime: OffsetDateTime? = null + private var zipPattern: String? = null + private var extendSchema: Boolean? = null + private var recreateSchema: Boolean? = null + + fun withEnableTracking(value: Boolean) = apply { + this.enableTracking = value + } + + fun withAdditionalTags(value: List) = apply { + this.additionalTags = value + } + + /** + * Sets the drop-by tags. These will be prefixed with "drop-by:" when + * combined into the tags property. Drop-by tags are used to mark extents + * that should be dropped during merge operations. See + * [Kusto drop-by extent tags documentation](https://docs.microsoft.com/azure/kusto/management/extents-overview#drop-by-extent-tags) + */ + fun withDropByTags(value: List) = apply { this.dropByTags = value } + + /** + * Sets the ingest-by tags. These will be prefixed with "ingest-by:" when + * combined into the tags property. Ingest-by tags are used to prevent + * duplicate ingestion of data with the same tag. See + * [Kusto ingest-by extent tags documentation](https://docs.microsoft.com/azure/kusto/management/extents-overview#ingest-by-extent-tags) + */ + fun withIngestByTags(value: List) = apply { + this.ingestByTags = value + } + + fun withIngestIfNotExists(value: List) = apply { + this.ingestIfNotExists = value + } + + fun withSkipBatching(value: Boolean) = apply { this.skipBatching = value } + + fun withDeleteAfterDownload(value: Boolean) = apply { + this.deleteAfterDownload = value + } + + fun withIngestionMappingReference(value: String) = apply { + this.ingestionMappingReference = value + } + + fun withIngestionMapping(value: String) = apply { + this.ingestionMapping = value + } + + fun withValidationPolicy(value: String) = apply { + this.validationPolicy = value + } + + fun withIgnoreSizeLimit(value: Boolean) = apply { + this.ignoreSizeLimit = value + } + + fun withIgnoreFirstRecord(value: Boolean) = apply { + this.ignoreFirstRecord = value + } + + fun withIgnoreLastRecordIfInvalid(value: Boolean) = apply { + this.ignoreLastRecordIfInvalid = value + } + + fun withCreationTime(value: OffsetDateTime) = apply { + this.creationTime = value + } + + fun withZipPattern(value: String) = apply { this.zipPattern = value } + + fun withExtendSchema(value: Boolean) = apply { this.extendSchema = value } + + fun withRecreateSchema(value: Boolean) = apply { + this.recreateSchema = value + } + + /** + * Builds the + * [com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties] with + * combined tags from dropByTags, ingestByTags, and additionalTags. + */ + fun build(): IngestRequestProperties { + // Combine all tags: additional tags + prefixed ingest-by tags + prefixed drop-by tags + val combinedTags = mutableListOf() + + additionalTags?.let { combinedTags.addAll(it) } + + ingestByTags?.forEach { tag -> combinedTags.add("ingest-by:$tag") } + + dropByTags?.forEach { tag -> combinedTags.add("drop-by:$tag") } + + return IngestRequestProperties( + format = format, + enableTracking = enableTracking, + tags = combinedTags.ifEmpty { null }, + ingestIfNotExists = ingestIfNotExists, + skipBatching = skipBatching, + deleteAfterDownload = deleteAfterDownload, + ingestionMappingReference = ingestionMappingReference, + ingestionMapping = ingestionMapping, + validationPolicy = validationPolicy, + ignoreSizeLimit = ignoreSizeLimit, + ignoreFirstRecord = ignoreFirstRecord, + ignoreLastRecordIfInvalid = ignoreLastRecordIfInvalid, + creationTime = creationTime, + zipPattern = zipPattern, + extendSchema = extendSchema, + recreateSchema = recreateSchema, + ) + } +} + +/** + * Extension property to extract drop-by tags from the combined tags list. + * Returns all tags that start with "drop-by:" prefix. + */ +val IngestRequestProperties.dropByTags: List + get() = + tags?.filter { it.startsWith("drop-by:") } + ?.map { it.removePrefix("drop-by:") } ?: emptyList() + +/** + * Extension property to extract ingest-by tags from the combined tags list. + * Returns all tags that start with "ingest-by:" prefix. + */ +val IngestRequestProperties.ingestByTags: List + get() = + tags?.filter { it.startsWith("ingest-by:") } + ?.map { it.removePrefix("ingest-by:") } ?: emptyList() + +/** + * Extension property to extract additional (non-prefixed) tags from the + * combined tags list. Returns all tags that don't start with "drop-by:" or + * "ingest-by:" prefix. + */ +val IngestRequestProperties.additionalTags: List + get() = + tags?.filter { + !it.startsWith("drop-by:") && !it.startsWith("ingest-by:") + } ?: emptyList() + +/** + * Creates a copy of this [IngestRequestProperties] with modified tags. Useful + * for adding or removing drop-by and ingest-by tags without recreating the + * entire object. + * + * @param dropByTags New drop-by tags to replace existing ones (null means keep + * existing) + * @param ingestByTags New ingest-by tags to replace existing ones (null means + * keep existing) + * @param additionalTags New additional tags to replace existing ones (null + * means keep existing) + */ +fun IngestRequestProperties.copyWithTags( + dropByTags: List? = null, + ingestByTags: List? = null, + additionalTags: List? = null, +): IngestRequestProperties { + val newDropByTags = dropByTags ?: this.dropByTags + val newIngestByTags = ingestByTags ?: this.ingestByTags + val newAdditionalTags = additionalTags ?: this.additionalTags + val combinedTags = mutableListOf() + combinedTags.addAll(newAdditionalTags) + newIngestByTags.forEach { tag -> combinedTags.add("ingest-by:$tag") } + newDropByTags.forEach { tag -> combinedTags.add("drop-by:$tag") } + return this.copy(tags = combinedTags.ifEmpty { null }) +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt deleted file mode 100644 index bedb49131..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/KustoTokenCredentials.kt +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.common.models - -import java.time.OffsetDateTime - -/** - * Represents a token credentials holder, capable (at least) of authenticating - * over an HTTPS "Authorization" header. - */ -data class KustoTokenCredentials( - val tokenScheme: String? = null, - val tokenValue: String? = null, - val expiresOn: OffsetDateTime? = null, -) { - /** Returns the secure representation of this instance. */ - fun toSecureString(): String { - return "${this::class.simpleName}:$tokenScheme:*****" - } - - override fun toString(): String = toSecureString() -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtils.kt new file mode 100644 index 000000000..ae1d69f18 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtils.kt @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.utils + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType + +object IngestionUtils { + fun getRowStoreEstimatedFactor( + format: Format?, + compressionType: CompressionType, + ): Double { + val isCompressed = compressionType != CompressionType.NONE + val fmt = format ?: Format.csv + return when { + !isCompressed && fmt == Format.avro -> 0.55 + !isCompressed && fmt == Format.apacheavro -> 0.55 + !isCompressed && fmt == Format.csv -> 0.45 + isCompressed && fmt == Format.csv -> 3.6 + !isCompressed && fmt == Format.json -> 0.33 + isCompressed && fmt == Format.json -> 3.60 + isCompressed && fmt == Format.multijson -> 5.15 + !isCompressed && fmt == Format.txt -> 0.15 + isCompressed && fmt == Format.txt -> 1.8 + isCompressed && fmt == Format.psv -> 1.5 + !isCompressed && fmt == Format.parquet -> 3.35 + else -> 1.0 + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index a645dbf2a..fd0fea81c 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -17,13 +17,13 @@ object PathUtils { private val FORBIDDEN_CHARS = Pattern.compile("[^\\w-]", Pattern.CASE_INSENSITIVE) - fun sanitizeFileName(baseName: String?, sourceId: String?): String { + fun sanitizeFileName(baseName: String?, sourceId: UUID): String { val base = getBasename(baseName) val fileNameSegment = sanitize(base, FILE_NAME_SEGMENT_MAX_LENGTH) val baseNamePart = if (!base.isNullOrEmpty()) "_$fileNameSegment" else "" return sanitize( - sourceId, + sourceId.toString(), TOTAL_TWO_SEGMENT_MAX_LENGTH - fileNameSegment.length, ) + baseNamePart } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt deleted file mode 100644 index d4ced8da8..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/BlobUploadContainer.kt +++ /dev/null @@ -1,392 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -import com.azure.core.util.Context -import com.azure.storage.blob.BlobClientBuilder -import com.azure.storage.blob.models.BlockBlobItem -import com.azure.storage.blob.models.ParallelTransferOptions -import com.azure.storage.blob.options.BlobParallelUploadOptions -import com.microsoft.azure.kusto.ingest.v2.BLOB_UPLOAD_TIMEOUT_HOURS -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_BLOCK_SIZE_BYTES -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_RETRIES -import com.microsoft.azure.kusto.ingest.v2.UPLOAD_MAX_SINGLE_SIZE_BYTES -import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo -import kotlinx.coroutines.async -import kotlinx.coroutines.awaitAll -import kotlinx.coroutines.coroutineScope -import org.slf4j.LoggerFactory -import java.io.InputStream -import java.time.Clock -import java.time.Duration -import java.time.Instant -import java.util.concurrent.atomic.AtomicInteger - -enum class UploadMethod { - // Use server preference or Storage as fallback - DEFAULT, - - // Use Storage blob - STORAGE, - - // Use OneLake - LAKE, -} - -data class UploadSource( - val name: String, - val stream: InputStream, - val sizeBytes: Long = -1, -) - -class BlobUploadContainer( - val configurationCache: ConfigurationCache, - private val uploadMethod: UploadMethod = UploadMethod.DEFAULT, - private val maxRetries: Int = UPLOAD_CONTAINER_MAX_RETRIES, - private val maxDataSize: Long = UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES, - private val ignoreSizeLimit: Boolean = false, - private val maxConcurrency: Int = UPLOAD_CONTAINER_MAX_CONCURRENCY, -) : UploadContainerBase { - private val logger = - LoggerFactory.getLogger(BlobUploadContainer::class.java) - private val containerIndex = AtomicInteger(0) - - override suspend fun uploadAsync( - name: String, - stream: InputStream, - ): String { - val errorCode = validateStream(stream, name) - if (errorCode != null) { - logger.error( - "Stream validation failed for {}: {}", - name, - errorCode.description, - ) - throw IngestException(errorCode.description, isPermanent = true) - } - - if (!ignoreSizeLimit && stream.available() > 0) { - val availableSize = stream.available().toLong() - if (availableSize > maxDataSize) { - logger.error( - "Stream size {} exceeds max allowed size {} for: {}", - availableSize, - maxDataSize, - name, - ) - throw IngestException( - "Upload source exceeds maximum allowed size: $availableSize > $maxDataSize", - isPermanent = true, - ) - } - } - - val containers = selectContainers() - require(containers.isNotEmpty()) { - "No containers available for upload" - } - - var lastException: Exception? = null - - repeat(maxRetries) { attempt -> - val container = - containers[ - containerIndex.getAndIncrement() % containers.size, - ] - try { - return uploadToContainer(name, stream, container) - } catch (e: Exception) { - logger.warn( - "Upload attempt ${attempt + 1} failed for container ${container.path}", - e, - ) - lastException = e - - if (stream.markSupported()) { - try { - stream.reset() - } catch (resetEx: Exception) { - logger.warn("Failed to reset stream for retry", resetEx) - throw IngestException( - "Upload failed and stream cannot be reset for retry", - cause = e, - isPermanent = true, - ) - } - } - } - } - - throw IngestException( - "Failed to upload after $maxRetries attempts", - cause = lastException, - isPermanent = false, - ) - } - - suspend fun uploadManyAsync(sources: List): UploadResults = - coroutineScope { - logger.info( - "Starting batch upload of {} sources with max concurrency {}", - sources.size, - maxConcurrency, - ) - - // Process sources in chunks to respect maxConcurrency at file level - val results = - sources.chunked(maxConcurrency).flatMap { chunk -> - chunk.map { source -> - async { - val startedAt = - Instant.now( - Clock.systemUTC(), - ) - try { - val blobUrl = - uploadAsync( - source.name, - source.stream, - ) - val completedAt = - Instant.now( - Clock - .systemUTC(), - ) - UploadResult.Success( - sourceName = - source.name, - startedAt = startedAt, - completedAt = - completedAt, - blobUrl = blobUrl, - sizeBytes = - source.sizeBytes, - ) - } catch (e: Exception) { - val completedAt = - Instant.now( - Clock - .systemUTC(), - ) - val errorCode = - when { - e.message?.contains( - "size", - ) == true -> - UploadErrorCode - .SOURCE_SIZE_LIMIT_EXCEEDED - e.message?.contains( - "readable", - ) == true -> - UploadErrorCode - .SOURCE_NOT_READABLE - e.message?.contains( - "empty", - ) == true -> - UploadErrorCode - .SOURCE_IS_EMPTY - e.message?.contains( - "container", - ) == true -> - UploadErrorCode - .NO_CONTAINERS_AVAILABLE - else -> - UploadErrorCode - .UPLOAD_FAILED - } - - UploadResult.Failure( - sourceName = - source.name, - startedAt = startedAt, - completedAt = - completedAt, - errorCode = errorCode, - errorMessage = - e.message - ?: "Upload failed", - exception = e, - isPermanent = - e is - IngestException && - e - .isPermanent == - true, - ) - } - } - } - .awaitAll() - } - - val successes = results.filterIsInstance() - val failures = results.filterIsInstance() - - logger.info( - "Batch upload completed: {} successes, {} failures out of {} total", - successes.size, - failures.size, - sources.size, - ) - - UploadResults(successes, failures) - } - - private fun validateStream( - stream: InputStream, - name: String, - ): UploadErrorCode? { - return try { - if (stream.available() < 0) { - UploadErrorCode.SOURCE_NOT_READABLE - } else if (stream.markSupported() && stream.available() == 0) { - UploadErrorCode.SOURCE_IS_EMPTY - } else { - null - } - } catch (e: Exception) { - logger.warn("Error validating stream for {}", name, e) - UploadErrorCode.SOURCE_NOT_READABLE - } - } - - private fun uploadToContainer( - name: String, - stream: InputStream, - container: ContainerInfo, - ): String { - val (url, sas) = container.path!!.split("?", limit = 2) - - val blobClient = - BlobClientBuilder() - .endpoint(container.path) - .blobName(name) - .buildClient() - - logger.debug( - "Uploading stream to blob url: {} to container {}", - url, - name, - ) - - val parallelTransferOptions = - ParallelTransferOptions() - .setBlockSizeLong(UPLOAD_BLOCK_SIZE_BYTES) - .setMaxConcurrency(maxConcurrency) - .setMaxSingleUploadSizeLong( - UPLOAD_MAX_SINGLE_SIZE_BYTES, - ) - - val blobUploadOptions = - BlobParallelUploadOptions(stream) - .setParallelTransferOptions(parallelTransferOptions) - - val blobUploadResult = - blobClient.uploadWithResponse( - blobUploadOptions, - Duration.ofHours(BLOB_UPLOAD_TIMEOUT_HOURS), - Context.NONE, - ) - - return if ( - blobUploadResult.statusCode in 200..299 && - blobUploadResult.value != null - ) { - val blockBlobItem: BlockBlobItem = blobUploadResult.value - logger.debug( - "Upload succeeded to blob url: {} with eTag: {}", - url, - blockBlobItem.eTag, - ) - "$url/$name?$sas" - } else { - throw IngestException( - "Upload failed with status: ${blobUploadResult.statusCode}", - isPermanent = blobUploadResult.statusCode in 400..<500, - ) - } - } - - private suspend fun selectContainers(): List { - val configResponse = configurationCache.getConfiguration() - val containerSettings = - configResponse.containerSettings - ?: throw IngestException( - "No container settings available", - isPermanent = true, - ) - val hasStorage = !containerSettings.containers.isNullOrEmpty() - val hasLake = !containerSettings.lakeFolders.isNullOrEmpty() - - if (!hasStorage && !hasLake) { - throw IngestException("No containers available", isPermanent = true) - } - - // Determine effective upload method - val effectiveMethod = - when (uploadMethod) { - UploadMethod.DEFAULT -> { - // Use server's preferred upload method if available - val serverPreference = - containerSettings.preferredUploadMethod - when { - serverPreference.equals( - "Storage", - ignoreCase = true, - ) && hasStorage -> { - logger.debug( - "Using server preferred upload method: Storage", - ) - UploadMethod.STORAGE - } - serverPreference.equals( - "Lake", - ignoreCase = true, - ) && hasLake -> { - logger.debug( - "Using server preferred upload method: Lake", - ) - UploadMethod.LAKE - } - // Fallback: prefer Storage if available, otherwise Lake - hasStorage -> { - logger.debug( - "No server preference or unavailable, defaulting to Storage", - ) - UploadMethod.STORAGE - } - else -> { - logger.debug( - "No server preference or unavailable, defaulting to Lake", - ) - UploadMethod.LAKE - } - } - } - UploadMethod.LAKE -> - if (hasLake) { - UploadMethod.LAKE - } else { - UploadMethod.STORAGE - } - UploadMethod.STORAGE -> - if (hasStorage) { - UploadMethod.STORAGE - } else { - UploadMethod.LAKE - } - } - return when { - effectiveMethod == UploadMethod.LAKE && hasLake -> - containerSettings.lakeFolders - effectiveMethod == UploadMethod.STORAGE && hasStorage -> - containerSettings.containers - hasStorage -> containerSettings.containers - else -> containerSettings.lakeFolders!! - } - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt deleted file mode 100644 index ee7997003..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/ContainerBase.kt +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -interface ContainerBase { - val uri: String - val name: String -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt deleted file mode 100644 index 34c08d076..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadContainerBase.kt +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container - -import java.io.InputStream - -interface UploadContainerBase { - suspend fun uploadAsync(name: String, stream: InputStream): String -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt deleted file mode 100644 index fada85ddd..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/AbstractSourceInfo.kt +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -import org.slf4j.Logger -import org.slf4j.LoggerFactory -import java.util.UUID - -abstract class AbstractSourceInfo : SourceInfo { - val logger: Logger - get() = LoggerFactory.getLogger(SourceInfo::class.java) - - override var sourceId: UUID = UUID.randomUUID() -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt new file mode 100644 index 000000000..fb83d7846 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import java.util.UUID + +/** + * Represents a blob-based ingestion source. This source references data that + * already exists in blob storage. + */ +class BlobSource( + val blobPath: String, + format: Format = Format.csv, + compressionType: CompressionType = CompressionType.NONE, + sourceId: UUID = UUID.randomUUID(), + baseName: String? = null, +) : IngestionSource(format, compressionType, baseName, sourceId) { + + /** + * The exact size of the blob in bytes if available. This is only set when + * the blob was created by uploading a local source. Returns null if size is + * not available (e.g., for external blob URLs). + */ + var blobExactSize: Long? = null + internal set + + init { + require(blobPath.isNotBlank()) { "blobPath cannot be blank" } + } + + /** Returns the exact size of the blob in bytes if available. */ + fun size(): Long? = blobExactSize + + override fun toString(): String { + return "${super.toString()} blobPath: '${blobPath.split("?").first()}'" + } + + /** Returns the blob path for tracing purposes (without SAS token). */ + internal fun getPathForTracing(): String { + return blobPath.split("?").first() + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt deleted file mode 100644 index 3b374b0eb..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSourceInfo.kt +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -import com.microsoft.azure.kusto.ingest.v2.common.BatchOperationResult -import com.microsoft.azure.kusto.ingest.v2.container.BlobUploadContainer -import com.microsoft.azure.kusto.ingest.v2.container.UploadErrorCode -import com.microsoft.azure.kusto.ingest.v2.container.UploadSource -import org.slf4j.Logger -import org.slf4j.LoggerFactory -import java.util.UUID - -class BlobSourceInfo : AbstractSourceInfo { - var blobPath: String - private set - - // For internal usage - only when we create the blob - var blobExactSize: Long? = null - private set - - var compressionType: CompressionType? = null - - constructor(blobPath: String) { - this.blobPath = blobPath - } - - constructor( - blobPath: String, - compressionType: CompressionType?, - sourceId: UUID, - ) { - this.blobPath = blobPath - this.compressionType = compressionType - this.sourceId = sourceId - } - - override fun validate() { - require(blobPath.isNotBlank()) { "blobPath cannot be blank" } - } - - /** - * Returns the exact size of the blob in bytes if available. This is only - * set when the blob was created by uploading a local source. Returns null - * if size is not available (e.g., for external blob URLs). - */ - fun size(): Long? { - return blobExactSize - } - - companion object { - /** - * Create BlobSourceInfo from LocalSource (FileSourceInfo or - * StreamSourceInfo) using BlobUploadContainer - */ - val logger: Logger - get() = LoggerFactory.getLogger(BlobSourceInfo::class.java) - - private suspend fun fromLocalSource( - localSource: LocalSource, - blobUploadContainer: BlobUploadContainer, - ): BlobSourceInfo { - val (inputStream, size, effectiveCompression) = - localSource.prepareForUpload() - val blobName = localSource.generateBlobName() - val blobPath = - blobUploadContainer.uploadAsync(blobName, inputStream) - logger.info( - "Uploading blob to path {} with blob name {}", - blobPath.split("?").first(), - blobName, - ) - return BlobSourceInfo( - blobPath, - effectiveCompression, - localSource.sourceId, - ) - .apply { blobExactSize = size } - } - - /** - * Create BlobSourceInfo from FileSourceInfo using BlobUploadContainer - */ - suspend fun fromFileSourceInfo( - fileSourceInfo: FileSourceInfo, - blobUploadContainer: BlobUploadContainer, - ): BlobSourceInfo = fromLocalSource(fileSourceInfo, blobUploadContainer) - - /** - * Create BlobSourceInfo from StreamSourceInfo using BlobUploadContainer - */ - suspend fun fromStreamSourceInfo( - streamSourceInfo: StreamSourceInfo, - blobUploadContainer: BlobUploadContainer, - ): BlobSourceInfo = - fromLocalSource(streamSourceInfo, blobUploadContainer) - - // batch convert multiple LocalSource objects to BlobSourceInfo using parallel uploads - suspend fun fromLocalSourcesBatch( - localSources: List, - blobUploadContainer: BlobUploadContainer, - ): BatchConversionResult { - if (localSources.isEmpty()) { - return BatchConversionResult(emptyList(), emptyList()) - } - - logger.info( - "Starting batch conversion of {} local sources", - localSources.size, - ) - - val uploadSources = - localSources.map { source -> - val (inputStream, size, effectiveCompression) = - source.prepareForUpload() - val blobName = source.generateBlobName() - UploadSource( - name = blobName, - stream = inputStream, - sizeBytes = size ?: -1, - ) - } - - val uploadResults = - blobUploadContainer.uploadManyAsync(uploadSources) - - val blobSources = mutableListOf() - val failures = mutableListOf() - - val sourceMap = localSources.associateBy { it.generateBlobName() } - - uploadResults.successes.forEach { success -> - val originalSource = sourceMap[success.sourceName] - if (originalSource != null) { - blobSources.add( - BlobSourceInfo( - blobPath = success.blobUrl, - compressionType = - if ( - originalSource - .compressionType == - CompressionType - .NONE - ) { - CompressionType - .GZIP // Auto-compressed during - // upload - } else { - originalSource - .compressionType - }, - sourceId = originalSource.sourceId, - ) - .apply { blobExactSize = success.sizeBytes }, - ) - } - } - - uploadResults.failures.forEach { failure -> - val originalSource = sourceMap[failure.sourceName] - if (originalSource != null) { - failures.add( - SourceConversionFailure( - source = originalSource, - errorCode = failure.errorCode, - errorMessage = failure.errorMessage, - exception = failure.exception, - isPermanent = failure.isPermanent, - ), - ) - } - } - - logger.info( - "Batch conversion completed: {} successes, {} failures", - blobSources.size, - failures.size, - ) - - return BatchConversionResult(blobSources, failures) - } - } -} - -/** Represents a failure during source conversion to blob. */ -data class SourceConversionFailure( - val source: LocalSource, - val errorCode: UploadErrorCode, - val errorMessage: String, - val exception: Exception?, - val isPermanent: Boolean, -) - -data class BatchConversionResult( - override val successes: List, - override val failures: List, -) : BatchOperationResult diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt new file mode 100644 index 000000000..dc91048d1 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.InvalidUploadStreamException +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode +import java.io.FileNotFoundException +import java.io.InputStream +import java.nio.file.Files +import java.nio.file.NoSuchFileException +import java.nio.file.Path +import java.util.UUID + +/** Represents a file-based ingestion source. */ +class FileSource( + val path: Path, + format: Format, + sourceId: UUID = UUID.randomUUID(), + compressionType: CompressionType? = null, +) : + LocalSource( + format, + leaveOpen = false, + compressionType = + compressionType ?: detectCompressionFromPath(path), + baseName = path.fileName?.toString(), + sourceId = sourceId, + ) { + override fun data(): InputStream { + if (mStream == null) { + try { + if (!Files.exists(path)) { + throw InvalidUploadStreamException( + fileName = path.toString(), + blobName = null, + failureSubCode = UploadErrorCode.SOURCE_NOT_FOUND, + isPermanent = true, + cause = + FileNotFoundException( + "File not found: $path", + ), + ) + } + mStream = Files.newInputStream(path) + } catch (e: NoSuchFileException) { + throw InvalidUploadStreamException( + fileName = path.toString(), + blobName = null, + failureSubCode = UploadErrorCode.SOURCE_NOT_FOUND, + isPermanent = true, + cause = e, + ) + } catch (e: Exception) { + throw InvalidUploadStreamException( + fileName = path.toString(), + blobName = null, + failureSubCode = UploadErrorCode.SOURCE_NOT_READABLE, + isPermanent = false, + cause = e, + ) + } + if (mStream == null) { + throw InvalidUploadStreamException( + fileName = path.toString(), + blobName = null, + failureSubCode = UploadErrorCode.SOURCE_IS_EMPTY, + isPermanent = true, + ) + } + } + return mStream!! + } + + override fun size(): Long? { + return try { + Files.size(path) + } catch (_: Exception) { + null + } + } + + override fun toString(): String { + return "${super.toString()} path: '$path'" + } + + override fun getPathOrNameForTracing(): String { + return path.toString() + } + + companion object { + /** Detects compression type from file path based on file extension. */ + fun detectCompressionFromPath(path: Path): CompressionType { + val fileName = + path.fileName?.toString()?.lowercase() + ?: return CompressionType.NONE + return when { + fileName.endsWith(".gz") || fileName.endsWith(".gzip") -> + CompressionType.GZIP + fileName.endsWith(".zip") -> CompressionType.ZIP + else -> CompressionType.NONE + } + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt new file mode 100644 index 000000000..bb73f9dc4 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.common.utils.PathUtils +import com.microsoft.azure.kusto.ingest.v2.models.Format +import java.io.Closeable +import java.util.UUID + +abstract class IngestionSource( + open val format: Format, + open val compressionType: CompressionType, + baseName: String? = null, + open val sourceId: UUID = UUID.randomUUID(), +) : Closeable { + + var name: String + private set + + init { + name = initName(baseName) + } + + override fun close() { + // No-op by default, override if needed + } + + protected fun initName(baseName: String? = null): String { + val type = + this::class.simpleName?.removeSuffix("Source")?.lowercase() + ?: "source" + return "${type}_${PathUtils.sanitizeFileName(baseName, sourceId)}${format.value}$compressionType" + } + + override fun toString(): String { + return "${this::class.simpleName} - `$name`" + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index ec1c80544..1d1e36ecb 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -3,38 +3,30 @@ package com.microsoft.azure.kusto.ingest.v2.source import com.microsoft.azure.kusto.ingest.v2.models.Format -import java.io.ByteArrayOutputStream import java.io.InputStream -import java.nio.file.Files -import java.nio.file.Path import java.util.UUID -import java.util.zip.GZIPInputStream -import java.util.zip.GZIPOutputStream -import java.util.zip.ZipInputStream +/** Abstract base class for local ingestion sources (file or stream). */ abstract class LocalSource( - val format: Format, + format: Format, val leaveOpen: Boolean, - val compressionType: CompressionType = CompressionType.NONE, + compressionType: CompressionType = CompressionType.NONE, baseName: String? = null, - override var sourceId: UUID = UUID.randomUUID(), -) : AbstractSourceInfo() { - - init { - initName(baseName) - } - - // Lazily initialized input stream for ingestion source - protected lateinit var mStream: InputStream - - lateinit var name: String - private set + sourceId: UUID = UUID.randomUUID(), +) : IngestionSource(format, compressionType, baseName, sourceId) { + protected var mStream: InputStream? = null - fun initName(baseName: String? = null) { - name = "${baseName ?: sourceId.toString()}_$format.$compressionType" - } + /** + * Indicates whether the stream should be compressed during upload. Binary + * formats should not be compressed as they already have internal + * compression. + */ + val shouldCompress: Boolean + get() = + (compressionType == CompressionType.NONE) && + !FormatUtil.isBinaryFormat(format) - // Indicates whether the stream should be left open after ingestion. + /** Returns the data stream for ingestion. */ abstract fun data(): InputStream /** @@ -45,64 +37,15 @@ abstract class LocalSource( */ abstract fun size(): Long? - fun reset() { - data().reset() - } - - open fun close() { + override fun close() { if (!leaveOpen) { - if (this::mStream.isInitialized) { - mStream.close() - } + mStream?.close() } } - /** - * Prepares the source data for blob upload, handling compression if needed. - * Returns a pair of (InputStream, size, effectiveCompressionType) - */ - fun prepareForUpload(): Triple { - // Binary formats (Parquet, AVRO, ORC) already have internal compression and should not be - // compressed again - val shouldCompress = - (compressionType == CompressionType.NONE) && - !FormatUtil.isBinaryFormat(format) - - return if (shouldCompress) { - // Compress using GZIP for non-binary formats - val byteStream = ByteArrayOutputStream() - GZIPOutputStream(byteStream).use { gzipOut -> - data().copyTo(gzipOut) - } - val bytes = byteStream.toByteArray() - Triple( - bytes.inputStream(), - bytes.size.toLong(), - CompressionType.GZIP, - ) - } else { - val stream = data() - val size = - when (this) { - is FileSourceInfo -> Files.size(path) - is StreamSourceInfo -> - try { - stream.available().toLong() - } catch (_: Exception) { - null - } - else -> null - } - Triple(stream, size, compressionType) - } - } - - /** Generates a unique blob name for upload */ + /** Generates a unique blob name for upload. */ fun generateBlobName(): String { // Binary formats should not be compressed, so effective compression stays NONE - val shouldCompress = - (compressionType == CompressionType.NONE) && - !FormatUtil.isBinaryFormat(format) val effectiveCompression = if (shouldCompress) { CompressionType.GZIP @@ -112,81 +55,11 @@ abstract class LocalSource( return "${sourceId}_${format.value}.$effectiveCompression" } - override fun validate() { - // Basic validation - subclasses can override for specific validation - } -} - -class StreamSourceInfo( - stream: InputStream, - format: Format, - sourceCompression: CompressionType, - sourceId: UUID = UUID.randomUUID(), - name: String? = null, - leaveOpen: Boolean = false, -) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { - - init { - mStream = stream - } - - override fun data(): InputStream { - return mStream - } - - override fun size(): Long? { - return try { - mStream.available().toLong() - } catch (e: Exception) { - logger.warn("Could not determine stream size: ${e.message}") - null - } - } -} - -class FileSourceInfo( - val path: Path, - format: Format, - compressionType: CompressionType = CompressionType.NONE, - name: String? = null, - sourceId: UUID = UUID.randomUUID(), - leaveOpen: Boolean = false, -) : LocalSource(format, leaveOpen, compressionType, name, sourceId) { - - // Expose file path for direct file upload APIs - private val fileStream: InputStream = - when (compressionType) { - CompressionType.GZIP -> - GZIPInputStream(Files.newInputStream(path)) - CompressionType.ZIP -> { - val zipStream = ZipInputStream(Files.newInputStream(path)) - zipStream.nextEntry - zipStream - } - else -> Files.newInputStream(path) - } - - // Move to first entry - init { - mStream = fileStream - } - - override fun data(): InputStream { - return mStream - } - - override fun size(): Long? { - return try { - Files.size(path) - } catch (e: Exception) { - logger.warn("Could not determine file size for $path: ${e.message}") - null - } - } - - override fun close() { - if (!leaveOpen) { - fileStream.close() - } + /** + * Returns the path or name for tracing purposes. Subclasses can override + * this to provide specific information. + */ + internal open fun getPathOrNameForTracing(): String { + return name } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt deleted file mode 100644 index c8179e993..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceInfo.kt +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.source - -import java.util.UUID - -interface SourceInfo { - /** Checks that this SourceInfo is defined appropriately. */ - fun validate() - - val sourceId: UUID -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt new file mode 100644 index 000000000..5803e1969 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import java.io.InputStream +import java.util.UUID + +/** Represents a stream-based ingestion source. */ +class StreamSource( + stream: InputStream, + sourceCompression: CompressionType, + format: Format, + sourceId: UUID = UUID.randomUUID(), + name: String? = null, + leaveOpen: Boolean = false, +) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { + init { + mStream = stream + initName(name) + } + + override fun data(): InputStream { + return mStream!! + } + + override fun size(): Long? { + return try { + mStream?.available()?.toLong() + } catch (_: Exception) { + null + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt new file mode 100644 index 000000000..21a02afc9 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -0,0 +1,580 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.azure.core.credential.TokenCredential +import com.azure.core.util.Context +import com.azure.storage.blob.BlobClientBuilder +import com.azure.storage.blob.models.BlockBlobItem +import com.azure.storage.blob.options.BlobParallelUploadOptions +import com.azure.storage.common.ParallelTransferOptions +import com.azure.storage.file.datalake.DataLakeFileClient +import com.azure.storage.file.datalake.DataLakeServiceClientBuilder +import com.azure.storage.file.datalake.options.FileParallelUploadOptions +import com.microsoft.azure.kusto.ingest.v2.BLOB_UPLOAD_TIMEOUT_HOURS +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_BLOCK_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_MAX_SINGLE_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope +import kotlinx.coroutines.delay +import kotlinx.coroutines.withContext +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import java.io.InputStream +import java.time.Clock +import java.time.Duration +import java.time.Instant + +/** Represents an abstract base class for uploaders to storage containers. */ +abstract class ContainerUploaderBase( + private val retryPolicy: IngestRetryPolicy, + private val maxConcurrency: Int, + private val maxDataSize: Long, + private val configurationCache: ConfigurationCache, + private val uploadMethod: UploadMethod, + private val tokenCredential: TokenCredential?, +) : IUploader { + + protected val logger: Logger = + LoggerFactory.getLogger(ContainerUploaderBase::class.java) + + private val effectiveMaxConcurrency: Int = + minOf(maxConcurrency, Runtime.getRuntime().availableProcessors()) + + override var ignoreSizeLimit: Boolean = false + + override fun close() { + // Default implementation - can be overridden + } + + override suspend fun uploadAsync(local: LocalSource): BlobSource { + // Get the stream and validate it + val stream = local.data() + val name = local.generateBlobName() + + val errorCode = checkStreamForErrors(stream) + if (errorCode != null) { + logger.error( + "Stream validation failed for {}: {}", + name, + errorCode.description, + ) + throw IngestException(errorCode.description, isPermanent = true) + } + + // Check size limit if not ignored + val availableSize = + withContext(Dispatchers.IO) { stream.available() }.toLong() + if (!ignoreSizeLimit && availableSize > 0) { + if (availableSize > maxDataSize) { + logger.error( + "Stream size {} exceeds max allowed size {} for: {}", + availableSize, + maxDataSize, + name, + ) + throw IngestException( + "Upload source exceeds maximum allowed size: $availableSize > $maxDataSize", + isPermanent = true, + ) + } + } + + // Get containers from configuration + val containers = selectContainers(configurationCache, uploadMethod) + + if (containers.isEmpty()) { + logger.error("No containers available for upload") + throw IngestException( + "No upload containers available", + isPermanent = true, + ) + } + + // Upload with retry policy and container cycling + return uploadWithRetries( + local = local, + name = name, + stream = stream, + containers = containers, + ) + } + + /** + * Uploads a stream with retry logic and container cycling. Randomly selects + * a starting container and cycles through containers on each retry. For + * example, with 2 containers and 3 retries: 1->2->1 or 2->1->2 + */ + private suspend fun uploadWithRetries( + local: LocalSource, + name: String, + stream: InputStream, + containers: List, + ): BlobSource { + // Select random starting container index + var containerIndex = (0 until containers.size).random() + + logger.debug( + "Starting upload with {} containers, random start index: {}", + containers.size, + containerIndex, + ) + + var retryNumber = 0u + var lastException: Exception? + + while (true) { + try { + val container = containers[containerIndex] + + logger.debug( + "Upload attempt {} to container index {} ({}): {}", + retryNumber + 1u, + containerIndex, + container.containerInfo.path?.split("?")?.first() + ?: "unknown", + name, + ) + + // Perform the actual blob upload + val blobUrl = + uploadToContainer( + name = name, + stream = stream, + container = container, + maxConcurrency = effectiveMaxConcurrency, + ) + + logger.info( + "Successfully uploaded {} to container index {} on attempt {}", + name, + containerIndex, + retryNumber + 1u, + ) + + // Return BlobSource with the uploaded blob path + return BlobSource( + blobPath = blobUrl, + format = local.format, + compressionType = local.compressionType, + sourceId = local.sourceId, + ) + .apply { blobExactSize = local.size() } + } catch (e: Exception) { + lastException = e + + logger.warn( + "Upload attempt {} failed to container index {}: {}", + retryNumber + 1u, + containerIndex, + e.message, + ) + + // Don't retry on permanent errors + if (e is IngestException && e.isPermanent == true) { + logger.error( + "Permanent error on attempt {}: {}", + retryNumber + 1u, + e.message, + ) + throw e + } + + // Check if we should retry + retryNumber++ + val retryDecision = retryPolicy.moveNext(retryNumber) + + if (!retryDecision.shouldRetry) { + logger.error( + "Retry policy exhausted after {} attempts", + retryNumber, + ) + throw IngestException( + "Upload failed after $retryNumber attempts to ${containers.size} container(s)", + isPermanent = false, + cause = lastException, + ) + } + + // Cycle to next container + containerIndex = (containerIndex + 1) % containers.size + + logger.info( + "Retry attempt {} - cycling to container index {}, waiting {} ms", + retryNumber, + containerIndex, + retryDecision.interval.toMillis(), + ) + // Wait before retrying + if (retryDecision.interval.toMillis() > 0) { + delay(retryDecision.interval.toMillis()) + } + } + } + } + + override suspend fun uploadManyAsync( + localSources: List, + ): UploadResults = coroutineScope { + logger.info( + "Starting batch upload of {} sources with max concurrency {}", + localSources.size, + maxConcurrency, + ) + // Process sources in chunks to respect maxConcurrency at file level + val results = + localSources.chunked(maxConcurrency).flatMap { chunk -> + chunk.map { source -> + async { + val startedAt = + Instant.now(Clock.systemUTC()) + try { + val blobSource = uploadAsync(source) + val completedAt = + Instant.now(Clock.systemUTC()) + UploadResult.Success( + sourceName = source.name, + startedAt = startedAt, + completedAt = completedAt, + blobUrl = blobSource.blobPath, + sizeBytes = source.size() ?: -1, + ) + } catch (e: Exception) { + val completedAt = + Instant.now(Clock.systemUTC()) + val errorCode = + when { + e.message?.contains( + "size", + ) == true -> + UploadErrorCode + .SOURCE_SIZE_LIMIT_EXCEEDED + e.message?.contains( + "readable", + ) == true -> + UploadErrorCode + .SOURCE_NOT_READABLE + e.message?.contains( + "empty", + ) == true -> + UploadErrorCode + .SOURCE_IS_EMPTY + e.message?.contains( + "container", + ) == true -> + UploadErrorCode + .NO_CONTAINERS_AVAILABLE + else -> + UploadErrorCode + .UPLOAD_FAILED + } + + UploadResult.Failure( + sourceName = source.name, + startedAt = startedAt, + completedAt = completedAt, + errorCode = errorCode, + errorMessage = + e.message + ?: "Upload failed", + exception = e, + isPermanent = + e is IngestException && + e.isPermanent == + true, + ) + } + } + } + .awaitAll() + } + + val successes = results.filterIsInstance() + val failures = results.filterIsInstance() + + logger.info( + "Batch upload completed: {} successes, {} failures out of {} total", + successes.size, + failures.size, + localSources.size, + ) + + UploadResults(successes, failures) + } + + /** Validates the stream for ingestion. */ + private fun checkStreamForErrors(stream: InputStream?): UploadErrorCode? { + if (stream == null) { + return UploadErrorCode.SOURCE_IS_NULL + } + val length = estimateStreamLength(stream) + if (length < 0) { + return UploadErrorCode.SOURCE_NOT_READABLE + } + if (length == 0L) { + return UploadErrorCode.SOURCE_IS_EMPTY + } + if (length > maxDataSize && !ignoreSizeLimit) { + return UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED + } + return null + } + + private fun estimateStreamLength(stream: InputStream): Long { + return try { + stream.available().toLong() + } catch (_: Exception) { + -1L + } + } + + protected fun uploadToContainer( + name: String, + stream: InputStream, + container: ExtendedContainerInfo, + maxConcurrency: Int, + ): String { + val containerPath = container.containerInfo.path!! + // Parse URL and SAS token (if present) + // Storage containers have SAS tokens: "https://...?sp=..." + // Lake containers don't have SAS tokens: + // "https://msit-onelake.dfs.fabric.microsoft.com/..." + val pathParts = containerPath.split("?", limit = 2) + val url = pathParts[0] + val sas = if (pathParts.size > 1) pathParts[1] else null + + return if (container.uploadMethod == UploadMethod.STORAGE) { + // Use Blob API for STORAGE upload method + uploadUsingBlobApi( + name, + stream, + containerPath, + url, + sas, + maxConcurrency, + ) + } else { + // Use Data Lake API for LAKE upload method + uploadUsingDataLakeApi(name, stream, url, sas, maxConcurrency) + } + } + + private fun uploadUsingBlobApi( + name: String, + stream: InputStream, + containerPath: String, + url: String, + sas: String?, + maxConcurrency: Int, + ): String { + logger.info( + "Upload {} using STORAGE upload method for container url {}", + name, + url, + ) + + val blobClient = + BlobClientBuilder() + .endpoint(containerPath) + .blobName(name) + .buildClient() + + val parallelTransferOptions = + com.azure.storage.blob.models + .ParallelTransferOptions() + .setBlockSizeLong(UPLOAD_BLOCK_SIZE_BYTES) + .setMaxConcurrency(maxConcurrency) + .setMaxSingleUploadSizeLong( + UPLOAD_MAX_SINGLE_SIZE_BYTES, + ) + + val blobUploadOptions = + BlobParallelUploadOptions(stream) + .setParallelTransferOptions(parallelTransferOptions) + + val blobUploadResult = + blobClient.uploadWithResponse( + blobUploadOptions, + Duration.ofHours(BLOB_UPLOAD_TIMEOUT_HOURS), + Context.NONE, + ) + + return if ( + blobUploadResult.statusCode in 200..299 && + blobUploadResult.value != null + ) { + val blockBlobItem: BlockBlobItem = blobUploadResult.value + logger.debug( + "Upload succeeded to blob url: {} with eTag: {}", + url, + blockBlobItem.eTag, + ) + // Return the blob URL with SAS token if available + if (sas != null) { + "$url/$name?$sas" + } else { + "$url/$name" + } + } else { + throw IngestException( + "Upload failed with status: ${blobUploadResult.statusCode}", + isPermanent = blobUploadResult.statusCode in 400..<500, + ) + } + } + + private fun uploadUsingDataLakeApi( + name: String, + stream: InputStream, + url: String, + sas: String?, + maxConcurrency: Int, + ): String { + logger.info( + "Upload {} using LAKE upload method (Data Lake API) for container url {}", + name, + url, + ) + + // Parse the URL to extract file system and path + // OneLake URL format: + // https://msit-onelake.dfs.fabric.microsoft.com/{workspace-id}/{lakehouse-id}/Files/Ingestions/ + // In OneLake/Fabric, the workspace-id is treated as the "container" (file system in ADLS + // Gen2 terms) + // and {lakehouse-id}/Files/... is the path within that container + val uri = java.net.URI(url) + val pathSegments = uri.path.trimStart('/').split('/') + + val serviceEndpoint = "${uri.scheme}://${uri.host}" + // First segment is the workspace-id (container/filesystem) + val fileSystemName = + if (pathSegments.isNotEmpty()) pathSegments[0] else "" + // Remaining segments form the directory path: {lakehouse-id}/Files/Ingestions/... + val directoryPath = + if (pathSegments.size > 1) { + pathSegments + .subList(1, pathSegments.size) + .filter { it.isNotEmpty() } + .joinToString("/") + } else { + "" + } + + // Build the Data Lake file client + val fileClient: DataLakeFileClient = + if (tokenCredential != null) { + logger.debug( + "Using TokenCredential for Data Lake authentication", + ) + val serviceClient = + DataLakeServiceClientBuilder() + .endpoint(serviceEndpoint) + .credential(tokenCredential) + .buildClient() + + val fileSystemClient = + serviceClient.getFileSystemClient(fileSystemName) + if (directoryPath.isNotEmpty()) { + fileSystemClient + .getDirectoryClient(directoryPath) + .getFileClient(name) + } else { + fileSystemClient.getFileClient(name) + } + } else if (sas != null) { + logger.debug("Using SAS token for Data Lake authentication") + val serviceClient = + DataLakeServiceClientBuilder() + .endpoint("$serviceEndpoint?$sas") + .buildClient() + + val fileSystemClient = + serviceClient.getFileSystemClient(fileSystemName) + if (directoryPath.isNotEmpty()) { + fileSystemClient + .getDirectoryClient(directoryPath) + .getFileClient(name) + } else { + fileSystemClient.getFileClient(name) + } + } else { + logger.debug("Using anonymous access for Data Lake") + val serviceClient = + DataLakeServiceClientBuilder() + .endpoint(serviceEndpoint) + .buildClient() + + val fileSystemClient = + serviceClient.getFileSystemClient(fileSystemName) + if (directoryPath.isNotEmpty()) { + fileSystemClient + .getDirectoryClient(directoryPath) + .getFileClient(name) + } else { + fileSystemClient.getFileClient(name) + } + } + + val parallelTransferOptions = + ParallelTransferOptions() + .setBlockSizeLong(UPLOAD_BLOCK_SIZE_BYTES) + .setMaxConcurrency(maxConcurrency) + .setMaxSingleUploadSizeLong( + UPLOAD_MAX_SINGLE_SIZE_BYTES, + ) + + val uploadResponse = + fileClient.uploadWithResponse( + FileParallelUploadOptions(stream) + .setParallelTransferOptions( + parallelTransferOptions, + ), + Duration.ofHours(BLOB_UPLOAD_TIMEOUT_HOURS), + Context.NONE, + ) + + return if (uploadResponse.statusCode in 200..299) { + logger.debug( + "Upload succeeded to Data Lake file: {} with eTag: {}", + name, + uploadResponse.value?.eTag, + ) + // Return the file URL with SAS token if available + if (sas != null) { + "$url/$name?$sas" + } else { + "$url/$name" + } + } else { + throw IngestException( + "Data Lake upload failed with status: ${uploadResponse.statusCode}", + isPermanent = uploadResponse.statusCode in 400..<500, + ) + } + } + + /** + * Selects the appropriate containers for upload based on the provided + * configuration cache and upload method. + * + * @param configurationCache The configuration cache to use for selecting + * containers. + * @param uploadMethod The upload method to consider when selecting + * containers. + * @return A list of selected container information. + */ + abstract suspend fun selectContainers( + configurationCache: ConfigurationCache, + uploadMethod: UploadMethod, + ): List +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ExtendedContainerInfo.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ExtendedContainerInfo.kt new file mode 100644 index 000000000..5fdf2bcd8 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ExtendedContainerInfo.kt @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo + +data class ExtendedContainerInfo( + val containerInfo: ContainerInfo, + val uploadMethod: UploadMethod, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/IUploader.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/IUploader.kt new file mode 100644 index 000000000..04b1348ca --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/IUploader.kt @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults +import java.io.Closeable + +/** Interface for uploading data sources to blob storage. */ +interface IUploader : Closeable { + /** + * Indicates whether to ignore the max data size allowed during the upload + * operation. Default is false. + */ + var ignoreSizeLimit: Boolean + + /** + * Uploads the specified local source. + * + * @param local The local source to upload. + * @return The uploaded blob source. + */ + suspend fun uploadAsync(local: LocalSource): BlobSource + + /** + * Uploads the specified local sources. + * + * @param localSources List of the local sources to upload. + * @return The uploaded results - successes (as + * [com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult.Success]) + * and failures (as + * [com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult.Failure]). + */ + suspend fun uploadManyAsync(localSources: List): UploadResults +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt new file mode 100644 index 000000000..a2084e200 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.SimpleRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException + +class ManagedUploader +internal constructor( + override var ignoreSizeLimit: Boolean, + maxConcurrency: Int, + maxDataSize: Long, + configurationCache: ConfigurationCache, + uploadMethod: UploadMethod = UploadMethod.DEFAULT, + ingestRetryPolicy: IngestRetryPolicy = SimpleRetryPolicy(), + tokenCredential: TokenCredential? = null, +) : + ContainerUploaderBase( + maxConcurrency = maxConcurrency, + maxDataSize = maxDataSize, + configurationCache = configurationCache, + uploadMethod = uploadMethod, + retryPolicy = ingestRetryPolicy, + tokenCredential = tokenCredential, + ) { + + companion object { + /** + * Creates a new builder for constructing ManagedUploader instances. + * + * @return a new ManagedUploaderBuilder instance + */ + @JvmStatic + fun builder(): ManagedUploaderBuilder { + return ManagedUploaderBuilder.create() + } + } + + override suspend fun selectContainers( + configurationCache: ConfigurationCache, + uploadMethod: UploadMethod, + ): List { + // This method is delegated to and this calls getConfiguration again to ensure fresh data is + // retrieved + // or cached data is used as appropriate. + val containerSettings = + configurationCache.getConfiguration().containerSettings + ?: throw IngestException( + "No container settings available", + isPermanent = true, + ) + val hasStorage = !containerSettings.containers.isNullOrEmpty() + val hasLake = !containerSettings.lakeFolders.isNullOrEmpty() + + if (!hasStorage && !hasLake) { + throw IngestException("No containers available", isPermanent = true) + } + + // Determine effective upload method + val effectiveMethod = + when (uploadMethod) { + UploadMethod.DEFAULT -> { + // Use server's preferred upload method if available + val serverPreference = + containerSettings.preferredUploadMethod + when { + serverPreference.equals( + "Storage", + ignoreCase = true, + ) && hasStorage -> { + logger.debug( + "Using server preferred upload method: Storage", + ) + UploadMethod.STORAGE + } + serverPreference.equals( + "Lake", + ignoreCase = true, + ) && hasLake -> { + logger.debug( + "Using server preferred upload method: Lake", + ) + UploadMethod.LAKE + } + // Fallback: prefer Storage if available, otherwise Lake + hasStorage -> { + logger.debug( + "No server preference or unavailable, defaulting to Storage", + ) + UploadMethod.STORAGE + } + else -> { + logger.debug( + "No server preference or unavailable, defaulting to Lake", + ) + UploadMethod.LAKE + } + } + } + UploadMethod.LAKE -> + if (hasLake) { + UploadMethod.LAKE + } else { + UploadMethod.STORAGE + } + UploadMethod.STORAGE -> + if (hasStorage) { + UploadMethod.STORAGE + } else { + UploadMethod.LAKE + } + } + return when { + effectiveMethod == UploadMethod.LAKE && hasLake -> + containerSettings.lakeFolders.map { + ExtendedContainerInfo(it, UploadMethod.LAKE) + } + effectiveMethod == UploadMethod.STORAGE && hasStorage -> + containerSettings.containers.map { + ExtendedContainerInfo(it, UploadMethod.STORAGE) + } + hasStorage -> + containerSettings.containers.map { + ExtendedContainerInfo(it, UploadMethod.STORAGE) + } + else -> + containerSettings.lakeFolders!!.map { + ExtendedContainerInfo(it, UploadMethod.LAKE) + } + } + } + + override fun close() {} +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt new file mode 100644 index 000000000..085f37e8e --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt @@ -0,0 +1,140 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY +import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import com.microsoft.azure.kusto.ingest.v2.common.SimpleRetryPolicy + +/** Builder for creating ManagedUploader instances with a fluent API. */ +class ManagedUploaderBuilder private constructor() { + private var ignoreSizeLimit: Boolean = false + private var maxConcurrency: Int = UPLOAD_CONTAINER_MAX_CONCURRENCY + private var maxDataSize: Long? = null + private var configurationCache: ConfigurationCache? = null + private var uploadMethod: UploadMethod = UploadMethod.DEFAULT + private var ingestRetryPolicy: IngestRetryPolicy = SimpleRetryPolicy() + private var tokenCredential: TokenCredential? = null + + companion object { + /** Creates a new ManagedUploaderBuilder instance. */ + @JvmStatic + fun create(): ManagedUploaderBuilder { + return ManagedUploaderBuilder() + } + } + + /** + * Sets whether to ignore the size limit for uploads. + * + * @param ignore true to ignore size limits, false to enforce them + * @return this builder instance for method chaining + */ + fun withIgnoreSizeLimit(ignore: Boolean): ManagedUploaderBuilder { + this.ignoreSizeLimit = ignore + return this + } + + /** + * Sets the maximum concurrency for parallel uploads. + * + * @param concurrency the maximum number of concurrent uploads + * @return this builder instance for method chaining + * @throws IllegalArgumentException if concurrency is not positive + */ + fun withMaxConcurrency(concurrency: Int): ManagedUploaderBuilder { + require(concurrency > 0) { + "Max concurrency must be positive, got: $concurrency" + } + this.maxConcurrency = concurrency + return this + } + + /** + * Sets the maximum data size for uploads in bytes. + * + * @param bytes the maximum data size in bytes + * @return this builder instance for method chaining + * @throws IllegalArgumentException if bytes is not positive + */ + fun withMaxDataSize(bytes: Long): ManagedUploaderBuilder { + require(bytes > 0) { "Max data size must be positive, got: $bytes" } + this.maxDataSize = bytes + return this + } + + /** + * Sets the configuration cache to use. + * + * @param cache the configuration cache instance + * @return this builder instance for method chaining + */ + fun withConfigurationCache( + cache: ConfigurationCache, + ): ManagedUploaderBuilder { + this.configurationCache = cache + return this + } + + /** + * Sets the upload method to use (Storage, Lake, or Default). + * + * @param method the upload method + * @return this builder instance for method chaining + */ + fun withUploadMethod(method: UploadMethod): ManagedUploaderBuilder { + this.uploadMethod = method + return this + } + + /** + * Sets the retry policy for ingestion operations. + * + * @param policy the retry policy to use + * @return this builder instance for method chaining + */ + fun withRetryPolicy(policy: IngestRetryPolicy): ManagedUploaderBuilder { + this.ingestRetryPolicy = policy + return this + } + + /** + * Sets the token credential for authentication. + * + * @param credential the token credential + * @return this builder instance for method chaining + */ + fun withTokenCredential( + credential: TokenCredential, + ): ManagedUploaderBuilder { + this.tokenCredential = credential + return this + } + + /** + * Builds and returns a ManagedUploader instance with the configured + * settings. + * + * @return a new ManagedUploader instance + * @throws IllegalStateException if required configuration is missing + */ + fun build(): ManagedUploader { + requireNotNull(configurationCache) { + "Configuration cache is required. Call withConfigurationCache() before build()" + } + + return ManagedUploader( + ignoreSizeLimit = ignoreSizeLimit, + maxConcurrency = maxConcurrency, + maxDataSize = + maxDataSize ?: UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES, + configurationCache = configurationCache!!, + uploadMethod = uploadMethod, + ingestRetryPolicy = ingestRetryPolicy, + tokenCredential = tokenCredential, + ) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploadMethod.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploadMethod.kt new file mode 100644 index 000000000..3cac2ef5b --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploadMethod.kt @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +/** Specifies the upload method to use for blob uploads. */ +enum class UploadMethod { + /** Use server preference or Storage as fallback. */ + DEFAULT, + + /** Use Azure Storage blob. */ + STORAGE, + + /** Use OneLake. */ + LAKE, +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadErrorCode.kt similarity index 90% rename from ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt rename to ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadErrorCode.kt index 02bbb91ff..658555c69 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadErrorCode.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadErrorCode.kt @@ -1,10 +1,11 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container +package com.microsoft.azure.kusto.ingest.v2.uploader.models enum class UploadErrorCode(val code: String, val description: String) { // Stream validation errors SOURCE_IS_NULL("UploadError_SourceIsNull", "Upload source is null"), + SOURCE_NOT_FOUND("UploadError_SourceNotFound", "Upload source not found"), SOURCE_NOT_READABLE( "UploadError_SourceNotReadable", "Upload source is not readable", diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadResult.kt similarity index 94% rename from ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt rename to ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadResult.kt index bb2d18ca5..186406da6 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/container/UploadResult.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadResult.kt @@ -1,6 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.container +package com.microsoft.azure.kusto.ingest.v2.uploader.models import com.microsoft.azure.kusto.ingest.v2.common.BatchOperationResult import java.time.Instant diff --git a/ingest-v2/src/main/resources/app.properties b/ingest-v2/src/main/resources/app.properties new file mode 100644 index 000000000..e5683df88 --- /dev/null +++ b/ingest-v2/src/main/resources/app.properties @@ -0,0 +1 @@ +version=${project.version} \ No newline at end of file diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt index b572cdfa5..9006e7a13 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClientTest.kt @@ -4,38 +4,42 @@ package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import kotlinx.coroutines.runBlocking -import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.assertThrows import org.junit.jupiter.api.parallel.Execution import org.junit.jupiter.api.parallel.ExecutionMode import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource +import java.net.ConnectException import java.util.stream.Stream import kotlin.test.assertNotNull -@TestInstance(TestInstance.Lifecycle.PER_CLASS) @Execution(ExecutionMode.CONCURRENT) class ConfigurationClientTest : IngestV2TestBase(ConfigurationClientTest::class.java) { - private fun endpointAndExceptionClause(): Stream { - return Stream.of( - Arguments.of( - "Success Scenario", - System.getenv("DM_CONNECTION_STRING"), - false, - false, - ), - // Note on the arg below when this is rolled out to all clusters, this test will - // start failing - Arguments.of( - "Cluster without ingest-v2", - "https://help.kusto.windows.net", - true, - false, - ), - ) + + companion object { + @JvmStatic + private fun endpointAndExceptionClause(): Stream { + return Stream.of( + Arguments.of( + "Success Scenario", + System.getenv("DM_CONNECTION_STRING"), + false, + false, + ), + // Note on the arg below when this is rolled out to all clusters, this test will + // start failing + Arguments.of( + "Cluster without ingest-v2", + "https://help.kusto.windows.net", + true, + false, + ), + ) + } } @ParameterizedTest(name = "{0}") @@ -48,7 +52,13 @@ class ConfigurationClientTest : ): Unit = runBlocking { logger.info("Running configuration test {}", testName) // val cluster = System.getenv("DM_CONNECTION_STRING") - val actualWrapper = ConfigurationClient(cluster, tokenProvider, true) + val actualWrapper = + ConfigurationClient( + cluster, + tokenProvider, + true, + ClientDetails.createDefault(), + ) if (isException) { // assert the call to DefaultConfigurationCache throws val exception = @@ -58,12 +68,14 @@ class ConfigurationClientTest : actualWrapper .getConfigurationDetails() }, + clientDetails = + ClientDetails.createDefault(), ) .getConfiguration() } assertNotNull(exception, "Exception should not be null") if (isUnreachableHost) { - assert(exception.cause is java.net.ConnectException) + assert(exception.cause is ConnectException) assert(exception.isPermanent == false) } else { // if the host is reachable, we expect a 404 @@ -73,6 +85,7 @@ class ConfigurationClientTest : } else { val defaultCachedConfig = DefaultConfigurationCache( + clientDetails = ClientDetails.createDefault(), configurationProvider = { actualWrapper.getConfigurationDetails() }, diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt index e7fd12b21..9a46de1a7 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt @@ -8,14 +8,17 @@ import com.microsoft.azure.kusto.data.Client import com.microsoft.azure.kusto.data.ClientFactory import com.microsoft.azure.kusto.data.auth.ConnectionStringBuilder import com.microsoft.azure.kusto.ingest.v2.models.Format -import org.junit.jupiter.api.AfterAll -import org.junit.jupiter.api.BeforeAll -import org.junit.jupiter.api.TestInstance +import org.awaitility.Awaitility +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeEach import org.slf4j.Logger import org.slf4j.LoggerFactory -import java.util.UUID +import java.time.Duration +import java.time.temporal.ChronoUnit +import java.util.* +import kotlin.test.assertNotNull +import kotlin.test.assertTrue -@TestInstance(TestInstance.Lifecycle.PER_CLASS) abstract class IngestV2TestBase(testClass: Class<*>) { protected val logger: Logger = LoggerFactory.getLogger(testClass) protected val tokenProvider: TokenCredential = @@ -26,11 +29,12 @@ abstract class IngestV2TestBase(testClass: Class<*>) { ?: throw IllegalArgumentException( "DM_CONNECTION_STRING environment variable is not set", ) + protected val oneLakeFolder: String? = System.getenv("ONE_LAKE_FOLDER") protected val targetTestFormat = Format.json protected val engineEndpoint: String = dmEndpoint.replace("https://ingest-", "https://") - protected open val targetTable: String = - "Sensor_${UUID.randomUUID().toString().replace("-", "").take(8)}" + protected val targetTable: String = + "V2_Java_Tests_Sensor_${UUID.randomUUID().toString().replace("-", "").take(8)}" protected val columnNamesToTypes: Map = mapOf( "timestamp" to "datetime", @@ -38,12 +42,13 @@ abstract class IngestV2TestBase(testClass: Class<*>) { "messageId" to "guid", "temperature" to "real", "humidity" to "real", + "format" to "string", "SourceLocation" to "string", "Type" to "string", ) protected lateinit var adminClusterClient: Client - @BeforeAll + @BeforeEach fun createTables() { val createTableScript = """ @@ -75,16 +80,66 @@ abstract class IngestV2TestBase(testClass: Class<*>) { ) adminClusterClient.executeMgmt(database, createTableScript) adminClusterClient.executeMgmt(database, mappingReference) + clearDatabaseSchemaCache() + } + + protected fun alterTableToEnableStreaming() { + adminClusterClient.executeMgmt( + database, + ".alter table $targetTable policy streamingingestion enable", + ) + } + + protected fun clearDatabaseSchemaCache() { adminClusterClient.executeMgmt( database, ".clear database cache streamingingestion schema", ) } - @AfterAll + @AfterEach fun dropTables() { val dropTableScript = ".drop table $targetTable ifexists" - logger.error("Dropping table $targetTable") + logger.info("Dropping table $targetTable") adminClusterClient.executeMgmt(database, dropTableScript) } + + protected fun awaitAndQuery( + query: String, + queryColumnName: String = "count", + expectedResultsCount: Long, + isManagementQuery: Boolean = false, + ) { + Awaitility.await() + .atMost(Duration.of(2, ChronoUnit.MINUTES)) + .pollInterval(Duration.of(5, ChronoUnit.SECONDS)) + .ignoreExceptions() + .untilAsserted { + val results = + if (isManagementQuery) { + adminClusterClient + .executeMgmt(database, query) + .primaryResults + } else { + adminClusterClient + .executeQuery(database, query) + .primaryResults + } + results.next() + val actualResultCount = results.getLong(queryColumnName) + logger.trace( + "For query {} , Current result count: {}, waiting for {}", + query, + actualResultCount, + expectedResultsCount, + ) + actualResultCount >= expectedResultsCount + assertNotNull(results, "Query results should not be null") + assertNotNull(actualResultCount, "Count should not be null") + assertTrue( + actualResultCount >= expectedResultsCount, + "expected $expectedResultsCount counts should match $actualResultCount", + ) + } + } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt index 2d609e6e0..b128e036e 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt @@ -2,28 +2,28 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 +import com.microsoft.azure.kusto.ingest.v2.builders.ManagedStreamingIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.client.policy.DefaultManagedStreamingPolicy +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource import com.microsoft.azure.kusto.ingest.v2.source.CompressionType -import com.microsoft.azure.kusto.ingest.v2.source.FileSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Assumptions.assumeTrue -import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.parallel.Execution import org.junit.jupiter.api.parallel.ExecutionMode import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.CsvSource -import org.junit.jupiter.params.provider.MethodSource import java.io.ByteArrayInputStream import java.net.ConnectException -import java.nio.file.Files -import java.util.UUID -import java.util.stream.Stream +import java.util.* +import kotlin.test.DefaultAsserter.assertNotNull +import kotlin.test.Test +import kotlin.test.assertEquals import kotlin.test.assertNotNull import kotlin.time.Duration @@ -36,78 +36,75 @@ import kotlin.time.Duration * 3. Handles various error scenarios * 4. Respects the managed streaming policy settings */ -@TestInstance(TestInstance.Lifecycle.PER_CLASS) @Execution(ExecutionMode.CONCURRENT) class ManagedStreamingIngestClientTest : IngestV2TestBase(ManagedStreamingIngestClientTest::class.java) { - private val publicBlobUrl = - "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" - private val targetUuid = UUID.randomUUID().toString() private val randomRow: String = """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$targetUuid","messageId": "7f316225-839a-4593-92b5-1812949279b3","temperature": 31.0301639051317,"humidity": 62.0791099602725}""" .trimIndent() + private val managedClient = + ManagedStreamingIngestClientBuilder.create(dmUrl = dmEndpoint) + .withAuthentication(tokenProvider) + .withManagedStreamingIngestPolicy( + DefaultManagedStreamingPolicy(), + ) + .build() + /** Test managed streaming ingestion with small blob data */ @ParameterizedTest( name = "[ManagedStreaming-SmallData] {index} => TestName={0}", ) @CsvSource( - "ManagedStreaming-SmallBlob,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json", - "ManagedStreaming-SmallMultilineBlob,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", + "ManagedStreaming-SmallBlob,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,json", + "ManagedStreaming-SmallMultilineBlob,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json,multijson", ) fun `test managed streaming ingestion with small blob data`( testName: String, blobUrl: String, + targetFormat: String, ): Unit = runBlocking { logger.info("Starting test: $testName") - val managedClient = - ManagedStreamingIngestClient( - clusterUrl = engineEndpoint, - tokenCredential = tokenProvider, - managedStreamingPolicy = - DefaultManagedStreamingPolicy(), - skipSecurityChecks = true, - ) - - val testSources = listOf(BlobSourceInfo(blobUrl)) - val properties = - IngestRequestProperties( - format = targetTestFormat, - enableTracking = true, - ) - + val testSources = BlobSource(blobUrl) + val format = + when (targetFormat.lowercase()) { + "json" -> Format.json + "multijson" -> Format.multijson + else -> + throw IllegalArgumentException( + "Unsupported format: $targetFormat", + ) + } + val ingestRequestProperties = + IngestRequestPropertiesBuilder(format) + .withEnableTracking(true) + .build() try { // Ingest data - should attempt streaming first val ingestionResponse = - managedClient.submitManagedIngestion( + managedClient.ingestAsync( database = database, table = targetTable, - sources = testSources, - format = targetTestFormat, - ingestProperties = properties, + source = testSources, + ingestRequestProperties = ingestRequestProperties, ) - logger.info( "E2E: Submitted managed streaming ingestion with operation ID: {}", - ingestionResponse.ingestionOperationId, + ingestionResponse.ingestResponse.ingestionOperationId, ) assertNotNull( ingestionResponse, "IngestionOperation should not be null", ) assertNotNull( - ingestionResponse.ingestionOperationId, + ingestionResponse.ingestResponse.ingestionOperationId, "Operation ID should not be null", ) // If it fell back to queued ingestion, poll for status - if ( - !ingestionResponse.ingestionOperationId.startsWith( - "managed-", - ) - ) { + if (ingestionResponse.ingestionType == IngestKind.QUEUED) { logger.info( "Ingestion fell back to queued mode. Polling for completion...", ) @@ -116,7 +113,8 @@ class ManagedStreamingIngestClientTest : database = database, table = targetTable, operationId = - ingestionResponse.ingestionOperationId, + ingestionResponse.ingestResponse + .ingestionOperationId!!, pollingInterval = Duration.parse("PT5S"), timeout = Duration.parse("PT5M"), ) @@ -136,26 +134,11 @@ class ManagedStreamingIngestClientTest : } else { // Streaming ingestion - verify data was ingested logger.info("Ingestion used streaming mode. Verifying data...") - kotlinx.coroutines.delay(3000) - - val results = - adminClusterClient - .executeQuery( - database, - "$targetTable | summarize count=count()", - ) - .primaryResults - assertNotNull(results, "Query results should not be null") - results.next() - val count: Long = results.getLong("count") - assertNotNull(count, "Count should not be null") - assert(count > 0) { - "Expected records in table after streaming ingestion, but got $count" - } - logger.info( - "Streaming ingestion verified - {} records in table", - count, + // kotlinx.coroutines.delay(3000) + awaitAndQuery( + query = "$targetTable | summarize count=count()", + expectedResultsCount = 5, ) } } catch (e: ConnectException) { @@ -175,219 +158,13 @@ class ManagedStreamingIngestClientTest : } } - /** Test managed streaming with small streaming data */ - @ParameterizedTest( - name = "[ManagedStreaming-DirectData] {index} => TestName={0}", - ) - @MethodSource("directDataTestParameters") - fun `test managed streaming with small stream data`( - testName: String, - data: String, - deviceId: String, - ) = runBlocking { - logger.info( - "Starting managed streaming with small stream data: $testName", - ) - - val managedClient = - ManagedStreamingIngestClient( - clusterUrl = engineEndpoint, - tokenCredential = tokenProvider, - managedStreamingPolicy = - DefaultManagedStreamingPolicy(), - skipSecurityChecks = true, - ) - - val source = - StreamSourceInfo( - stream = ByteArrayInputStream(data.toByteArray()), - format = targetTestFormat, - sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "test-stream", - ) - - val properties = - IngestRequestProperties( - format = targetTestFormat, - enableTracking = true, - ) - - try { - val ingestionResponse = - managedClient.submitManagedIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = targetTestFormat, - ingestProperties = properties, - ) - - kotlinx.coroutines.delay(5000) - - val results = - adminClusterClient - .executeQuery( - database, - "$targetTable | where deviceId == '$deviceId' | summarize count=count() by deviceId", - ) - .primaryResults - assertNotNull( - ingestionResponse, - "IngestionOperation should not be null", - ) - assertNotNull(results, "Query results should not be null") - results.next() - val count: Long = results.getLong("count") - assertNotNull(count, "Count should not be null") - assert(count == 1L) { - "Expected 1 record for $deviceId, but got $count" - } - logger.debug("{} verified successfully", testName) - } catch (e: ConnectException) { - assumeTrue( - false, - "Skipping test: Unable to connect to test cluster: ${e.message}", - ) - } - } - - private fun directDataTestParameters(): Stream { - val directDataId = UUID.randomUUID().toString() - val directData = - """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$directDataId","messageId": "test-message-1","temperature": 25.5,"humidity": 60.0}""" - return Stream.of( - Arguments.of("DirectData-SingleRow", directData, directDataId), - ) - } - - /** Test managed streaming with multiple sources (file and stream) */ - @ParameterizedTest( - name = - "[ManagedStreaming-LocalSource] {index} => SourceType={0}, TestName={1}", - ) - @CsvSource( - "file,ManagedStreaming-FileSource,SampleFileSource.json", - "stream,ManagedStreaming-StreamSource,SampleStreamSource.json", - ) - fun `test managed streaming with multiple sources`( - sourceType: String, - testName: String, - fileName: String, - ) = runBlocking { - logger.info("Starting multiple sources test: $testName") - - // Download test data - val deviceDataUrl = - "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" - val deviceData = java.net.URL(deviceDataUrl).readText() - val targetFormat = Format.multijson - - val source: AbstractSourceInfo = - when (sourceType) { - "file" -> { - val tempFile = Files.createTempFile(fileName, null) - Files.write(tempFile, deviceData.toByteArray()) - FileSourceInfo( - path = tempFile, - format = targetFormat, - compressionType = CompressionType.NONE, - name = fileName, - sourceId = UUID.randomUUID(), - ) - .also { - Runtime.getRuntime() - .addShutdownHook( - Thread { - Files.deleteIfExists( - tempFile, - ) - }, - ) - } - } - "stream" -> - StreamSourceInfo( - stream = - ByteArrayInputStream( - deviceData.toByteArray(), - ), - format = targetFormat, - sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = fileName, - ) - else -> error("Unknown sourceType: $sourceType") - } - - val managedClient = - ManagedStreamingIngestClient( - clusterUrl = engineEndpoint, - tokenCredential = tokenProvider, - managedStreamingPolicy = - DefaultManagedStreamingPolicy(), - skipSecurityChecks = true, - ) - - val properties = - IngestRequestProperties( - format = targetFormat, - enableTracking = true, - ) - - val ingestionResponse = - managedClient.submitManagedIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = targetFormat, - ingestProperties = properties, - ) - - assertNotNull( - ingestionResponse, - "IngestionOperation should not be null", - ) - assertNotNull( - ingestionResponse.ingestionOperationId, - "Operation ID should not be null", - ) - - // If it used queued ingestion, poll for status - if (!ingestionResponse.ingestionOperationId.startsWith("managed-")) { - val finalStatus = - managedClient.pollUntilCompletion( - database = database, - table = targetTable, - operationId = - ingestionResponse.ingestionOperationId, - pollingInterval = Duration.parse("PT5S"), - timeout = Duration.parse("PT5M"), - ) - - logger.info( - "{} ingestion completed with final status: {}", - testName, - finalStatus.status, - ) - - assert( - finalStatus.details?.any { - it.status == BlobStatus.Status.Succeeded - } == true, - ) { - "Expected at least one successful ingestion for $testName" - } - } - } - /** Test managed streaming with custom policy */ @ParameterizedTest( name = "[ManagedStreaming-CustomPolicy] {index} => TestName={0}", ) @CsvSource( "CustomPolicy-ContinueWhenUnavailable,true,1.0", - "CustomPolicy-ReducedSizeLimit,false,0.5", + "CustomPolicy-ReducedSizeLimit,false,0.25", ) fun `test managed streaming with custom policy`( testName: String, @@ -395,25 +172,25 @@ class ManagedStreamingIngestClientTest : dataSizeFactor: Double, ) = runBlocking { logger.info("Starting custom policy test: $testName") - + alterTableToEnableStreaming() + clearDatabaseSchemaCache() val customPolicy = DefaultManagedStreamingPolicy( continueWhenStreamingIngestionUnavailable = continueWhenUnavailable, dataSizeFactor = dataSizeFactor, ) - - val managedClient = - ManagedStreamingIngestClient( - clusterUrl = engineEndpoint, - tokenCredential = tokenProvider, - managedStreamingPolicy = customPolicy, - skipSecurityChecks = true, - ) - + val customManagedClient = + ManagedStreamingIngestClientBuilder.create(dmUrl = dmEndpoint) + .withAuthentication(tokenProvider) + .withManagedStreamingIngestPolicy( + DefaultManagedStreamingPolicy(), + ) + .withManagedStreamingIngestPolicy(customPolicy) + .build() val testData = randomRow val source = - StreamSourceInfo( + StreamSource( stream = ByteArrayInputStream(testData.toByteArray()), format = targetTestFormat, sourceCompression = CompressionType.NONE, @@ -429,39 +206,46 @@ class ManagedStreamingIngestClientTest : try { val ingestionResponse = - managedClient.submitManagedIngestion( + customManagedClient.ingestAsync( database = database, table = targetTable, - sources = listOf(source), - format = targetTestFormat, - ingestProperties = properties, + source = source, + ingestRequestProperties = properties, ) - assertNotNull( ingestionResponse, "Ingestion response should not be null", ) - // Verify data was ingested (either via streaming or queued) - kotlinx.coroutines.delay(5000) - - val results = - adminClusterClient - .executeQuery( - database, - "$targetTable | where deviceId == '$targetUuid' | summarize count=count()", + // If it used queued ingestion, poll for status + if (ingestionResponse.ingestionType == IngestKind.QUEUED) { + val finalStatus = + customManagedClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + ingestionResponse.ingestResponse + .ingestionOperationId!!, + pollingInterval = Duration.parse("PT5S"), + timeout = Duration.parse("PT5M"), ) - .primaryResults - - assertNotNull(results, "Query results should not be null") - if (results.next()) { - val count: Long = results.getLong("count") - logger.info("{} ingested {} records", testName, count) - // We verify data was ingested regardless of method - assert(count > 0) { - "Expected data to be ingested with custom policy" + logger.info( + "{} ingestion fell back to QUEUED mode and completed with final status: {}", + testName, + finalStatus.status, + ) + assertNotNull(finalStatus.status?.succeeded) + finalStatus.status.succeeded.let { + assert(it > 0) { + "Expected at least one successful ingestion for $testName" + } } } + awaitAndQuery( + query = + "$targetTable | where deviceId == '$targetUuid' | summarize count=count()", + expectedResultsCount = 1, + ) } catch (e: ConnectException) { assumeTrue( false, @@ -469,4 +253,54 @@ class ManagedStreamingIngestClientTest : ) } } + + @Test + fun fallbackToQueuedIngestionTest() = runBlocking { + val customPolicy = + DefaultManagedStreamingPolicy( + continueWhenStreamingIngestionUnavailable = true, + dataSizeFactor = 0.25, + ) + val customManagedClient = + ManagedStreamingIngestClientBuilder.create(dmUrl = dmEndpoint) + .withAuthentication(tokenProvider) + .withManagedStreamingIngestPolicy( + DefaultManagedStreamingPolicy(), + ) + .withManagedStreamingIngestPolicy(customPolicy) + .build() + adminClusterClient.executeMgmt( + database, + ".alter table $targetTable policy streamingingestion disable", + ) + clearDatabaseSchemaCache() + val testSource = + BlobSource( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", + ) + val ingestRequestProperties = + IngestRequestPropertiesBuilder(Format.multijson) + .withEnableTracking(true) + .build() + val ingestionResponse = + customManagedClient.ingestAsync( + database = database, + table = targetTable, + source = testSource, + ingestRequestProperties = ingestRequestProperties, + ) + assertNotNull( + ingestionResponse, + "Ingestion response should not be null", + ) + assertEquals( + IngestKind.QUEUED, + ingestionResponse.ingestionType, + "Ingestion should have fallen back to QUEUED", + ) + awaitAndQuery( + query = "$targetTable | summarize count=count()", + expectedResultsCount = 5, + ) + } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt new file mode 100644 index 000000000..c5576fb97 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt @@ -0,0 +1,952 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestClientException +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.ColumnMapping +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.InlineIngestionMapping +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.TransformationMethod +import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo +import com.microsoft.azure.kusto.ingest.v2.models.ContainerSettings +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import com.microsoft.azure.kusto.ingest.v2.source.FileSource +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource +import kotlinx.coroutines.runBlocking +import kotlinx.serialization.json.Json +import kotlinx.serialization.modules.SerializersModule +import org.junit.jupiter.api.Assumptions.assumeTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.api.fail +import org.junit.jupiter.api.parallel.Execution +import org.junit.jupiter.api.parallel.ExecutionMode +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import java.io.ByteArrayInputStream +import java.net.ConnectException +import java.nio.file.Files +import java.nio.file.StandardCopyOption +import java.time.Clock +import java.time.Instant +import java.time.OffsetDateTime +import java.time.temporal.ChronoUnit +import java.util.UUID +import kotlin.test.assertNotNull +import kotlin.time.Duration + +@Execution(ExecutionMode.CONCURRENT) +class QueuedIngestClientTest : + IngestV2TestBase(QueuedIngestClientTest::class.java) { + + private val pollInterval = Duration.parse("PT2S") + private val pollTimeout = Duration.parse("PT2M") + + private fun createTestClient( + maxConcurrency: Int? = null, + maxDataSize: Long? = null, + ignoreFileSize: Boolean = false, + ): QueuedIngestClient { + val builder = + QueuedIngestClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + + maxConcurrency?.let { builder.withMaxConcurrency(it) } + maxDataSize?.let { builder.withMaxDataSize(it) } + if (ignoreFileSize) { + builder.withIgnoreFileSize(true) + } + + return builder.build() + } + + private fun assertValidIngestionResponse( + response: ExtendedIngestResponse, + testName: String, + ): String { + assertNotNull(response, "$testName: IngestResponse should not be null") + assertNotNull( + response.ingestResponse.ingestionOperationId, + "$testName: Operation ID should not be null", + ) + return response.ingestResponse.ingestionOperationId + } + + @Test + fun `test builder variations`() { + // builder with optional parameters + val client1 = createTestClient(maxConcurrency = 10) + assertNotNull( + client1, + "Client with optional parameters should not be null", + ) + + // builder with connector client details (uses custom configuration) + val client2 = + QueuedIngestClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withConnectorClientDetails( + name = "TestConnector", + version = "2.0", + appName = "MyApp", + appVersion = "1.5", + additionalFields = + mapOf( + "JobId" to "job-123", + "RunId" to "run-456", + ), + ) + .skipSecurityChecks() + .build() + assertNotNull( + client2, + "Client with connector details should not be null", + ) + + // builder with connector client details and user (uses custom configuration) + val client3 = + QueuedIngestClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withConnectorClientDetails( + name = "TestConnector", + version = "2.0", + sendUser = true, + overrideUser = "test-user@example.com", + ) + .skipSecurityChecks() + .build() + assertNotNull( + client3, + "Client with connector details and user should not be null", + ) + } + + @ParameterizedTest(name = "[QueuedIngestion] {index} => TestName ={0}") + @CsvSource( + // Single JSON blob, no mapping + "QueuedIngestion-NoMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,false", + // Single JSON blob, with mapping reference + "QueuedIngestion-WithMappingReference,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,true,false", + // Single JSON blob, with inline mapping + "QueuedIngestion-WithInlineMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,true", + ) + fun `test queued ingestion with blob variations`( + testName: String, + blobUrl: String, + useMappingReference: Boolean, + useInlineIngestionMapping: Boolean, + ): Unit = runBlocking { + logger.info("Starting test: $testName") + val ingestClient = createTestClient() + val testSources = listOf(BlobSource(blobUrl)) + + val properties = + if (useMappingReference) { + IngestRequestPropertiesBuilder(format = targetTestFormat) + .withIngestionMappingReference( + "${targetTable}_mapping", + ) + .withEnableTracking(true) + .build() + } else if (useInlineIngestionMapping) { + val ingestionColumnMappings = + columnNamesToTypes.keys.map { col -> + when (col) { + "SourceLocation" -> + ColumnMapping( + columnName = col, + columnType = + "string", + ) + .apply { + setTransform( + TransformationMethod + .SourceLocation, + ) + } + "Type" -> + ColumnMapping( + columnName = col, + columnType = + "string", + ) + .apply { + setConstantValue( + "IngestionMapping", + ) + } + else -> + ColumnMapping( + columnName = col, + columnType = + columnNamesToTypes[ + col, + ]!!, + ) + .apply { setPath("$.$col") } + } + } + val inlineIngestionMappingInline = + InlineIngestionMapping( + columnMappings = ingestionColumnMappings, + ingestionMappingType = + InlineIngestionMapping + .IngestionMappingType + .JSON, + ) + val ingestionMappingString = + jsonPrinter.encodeToString( + inlineIngestionMappingInline.columnMappings, + ) + IngestRequestPropertiesBuilder(format = targetTestFormat) + .withIngestionMapping(ingestionMappingString) + .withEnableTracking(true) + .build() + } else { + IngestRequestPropertiesBuilder(format = targetTestFormat) + .withEnableTracking(true) + .build() + } + + try { + val ingestionResponse = + ingestClient.ingestAsync( + sources = testSources, + database = database, + table = targetTable, + ingestRequestProperties = properties, + ) + + val operationId = + assertValidIngestionResponse(ingestionResponse, testName) + val finalStatus = + ingestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = operationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + + logger.info( + "{}: Polling completed with status:{}", + testName, + finalStatus.status, + ) + + if (finalStatus.details?.isNotEmpty() == true) { + val succeededCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Succeeded + } + val failedCount = + finalStatus.details.count { + it.status == BlobStatus.Status.Failed + } + logger.info( + "$testName: Succeeded: $succeededCount, Failed: $failedCount", + ) + + assert(succeededCount > 0 || failedCount > 0) { + "Expected at least some blobs to be processed" + } + assert(failedCount == 0) { + "Expected 0 failed ingestions, but got $failedCount" + } + + if (failedCount > 0) { + finalStatus.details + .filter { it.status == BlobStatus.Status.Failed } + .forEach { + logger.error( + "Failed blob: ${it.sourceId}, message: ${it.details}", + ) + } + } + + val filterType = + when { + useMappingReference -> "MappingRef" + useInlineIngestionMapping -> "IngestionMapping" + else -> "None" + } + if (useMappingReference || useInlineIngestionMapping) { + awaitAndQuery( + query = + "$targetTable | where Type == '$filterType' | summarize count=count() by SourceLocation", + expectedResultsCount = 5L, + ) + } + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + private fun createTestStreamSource( + sizeInBytes: Int, + name: String, + ): StreamSource { + val jsonLine = + """{"testField":"value","size":$sizeInBytes,"name":"$name"}""" + + "\n" + val jsonLineBytes = jsonLine.toByteArray() + val numLines = (sizeInBytes / jsonLineBytes.size).coerceAtLeast(1) + val data = ByteArray(numLines * jsonLineBytes.size) + + for (i in 0 until numLines) { + System.arraycopy( + jsonLineBytes, + 0, + data, + i * jsonLineBytes.size, + jsonLineBytes.size, + ) + } + + return StreamSource( + stream = ByteArrayInputStream(data), + format = Format.multijson, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = name, + ) + } + + // definitely can be parallelized and optimized + @Test + fun `E2E - file size variations and batch uploads`() = runBlocking { + logger.info("E2E: Testing combined file sizes (small, large, batch)") + + val queuedIngestClient = createTestClient() + try { + // Small file (1KB) + logger.info("Testing small file upload (1KB)") + val smallSource = + createTestStreamSource(1024, "combined_small.json") + val smallResponse = + queuedIngestClient.ingestAsync( + source = smallSource, + database = database, + table = targetTable, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = Format.multijson, + ) + .withEnableTracking(true) + .build(), + ) + assertNotNull(smallResponse.ingestResponse.ingestionOperationId) + val smallStatus = + queuedIngestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + smallResponse.ingestResponse + .ingestionOperationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + val smallSucceeded = + smallStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(smallSucceeded > 0) { + "Expected successful small file ingestion" + } + logger.info( + "Small file upload completed: $smallSucceeded succeeded", + ) + + // Large file (10MB) + logger.info("Testing large file upload (10MB)") + val largeSource = + createTestStreamSource( + 10 * 1024 * 1024, + "combined_large.json", + ) + val largeResponse = + queuedIngestClient.ingestAsync( + database = database, + table = targetTable, + source = largeSource, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = Format.multijson, + ) + .withEnableTracking(true) + .build(), + ) + assertNotNull(largeResponse.ingestResponse.ingestionOperationId) + val largeStatus = + queuedIngestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + largeResponse.ingestResponse + .ingestionOperationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + val largeSucceeded = + largeStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(largeSucceeded > 0) { + "Expected successful large file ingestion" + } + logger.info( + "Large file upload completed: $largeSucceeded succeeded", + ) + + // Batch upload (5 files) + logger.info("Testing batch upload (5 files)") + val batchSources = + (1..5).map { i -> + createTestStreamSource( + 1024 * i, + "combined_batch_$i.json", + ) + } + val batchResponse = + queuedIngestClient.ingestAsync( + database = database, + table = targetTable, + sources = batchSources, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = Format.multijson, + ) + .withEnableTracking(true) + .build(), + ) + assertNotNull(batchResponse.ingestResponse.ingestionOperationId) + val batchStatus = + queuedIngestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = + batchResponse.ingestResponse + .ingestionOperationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + val batchSucceeded = + batchStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(batchSucceeded == batchSources.size) { + "Expected all batch files to succeed" + } + logger.info( + "Batch upload completed: $batchSucceeded/${batchSources.size} succeeded", + ) + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + fun `E2E - parallel processing with maxConcurrency`() = runBlocking { + logger.info("E2E: Testing parallel processing with maxConcurrency=5") + + val queuedIngestClient = createTestClient(maxConcurrency = 5) + + val sources = + (1..10).map { i -> + createTestStreamSource(512 * 1024, "parallel_$i.json") + } + + try { + val startTime = System.currentTimeMillis() + val response = + queuedIngestClient.ingestAsync( + database = database, + table = targetTable, + sources = sources, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = Format.multijson, + ) + .withEnableTracking(true) + .build(), + ) + val uploadDuration = System.currentTimeMillis() - startTime + + val operationId = + assertValidIngestionResponse( + response, + "E2E - parallel processing", + ) + logger.info( + "Parallel upload submitted in ${uploadDuration}ms with operation ID: $operationId", + ) + + val finalStatus = + queuedIngestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = operationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + + val succeededCount = + finalStatus.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + logger.info( + "Parallel upload: $succeededCount/${sources.size} succeeded (avg ${uploadDuration / sources.size}ms per file)", + ) + assert(succeededCount == sources.size) { + "Expected parallel uploads to succeed" + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + private val jsonPrinter = Json { + serializersModule = SerializersModule { + contextual(OffsetDateTime::class, OffsetDateTimeSerializer) + } + } + + @ParameterizedTest( + name = + "[CompressionFormat] {index} => Format={0}, File={1}, Compression={2}", + ) + @CsvSource( + // Format, Resource file path, Compression type, Expected success + "multijson,compression/sample.multijson,NONE,1", + "multijson,compression/sample.multijson.gz,GZIP,1", + "multijson,compression/sample.multijson.zip,ZIP,1", + "json,compression/sample.json,NONE,3", + "parquet,compression/sample.parquet,NONE,1", + "avro,compression/sample.avro,NONE,1", + ) + fun `E2E - compression format tests`( + formatName: String, + resourcePath: String, + compressionTypeName: String, + expectedRecordCount: Int, + ): Unit = runBlocking { + logger.info( + "E2E: Testing format=$formatName, compression=$compressionTypeName, file=$resourcePath", + ) + + val queuedIngestClient = createTestClient() + try { + val resourceFile = + this::class.java.classLoader.getResource(resourcePath) + if (resourceFile == null) { + logger.warn( + "Skipping test: Resource file not found: $resourcePath", + ) + assumeTrue(false, "Resource file not found: $resourcePath") + return@runBlocking + } + + val format = Format.valueOf(formatName) + val compressionType = CompressionType.valueOf(compressionTypeName) + val fileExtension = resourcePath.substringAfterLast('.') + + val tempFile = + Files.createTempFile("test_$formatName", ".$fileExtension") + Files.copy( + resourceFile.openStream(), + tempFile, + StandardCopyOption.REPLACE_EXISTING, + ) + + val source = + FileSource( + path = tempFile, + format = format, + compressionType = compressionType, + sourceId = UUID.randomUUID(), + ) + + try { + val createdTimeTag = + OffsetDateTime.now(Clock.systemUTC()) + .minusHours((1..5L).random()) + val extentTags = + listOf("ingest-by:i-tag") + listOf("drop-by:d-tag") + val response = + queuedIngestClient.ingestAsync( + sources = listOf(source), + database = database, + table = targetTable, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = format, + ) + .withEnableTracking(true) + .withIngestByTags( + listOf("i-tag"), + ) + .withDropByTags(listOf("d-tag")) + .withCreationTime( + createdTimeTag, + ) + .build(), + ) + + val operationId = + assertValidIngestionResponse( + response, + "$formatName format test", + ) + logger.info( + "$formatName format test: submitted with operation ID $operationId", + ) + + val status = + queuedIngestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = operationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + val succeededCount = + status.details?.count { + it.status == BlobStatus.Status.Succeeded + } ?: 0 + assert(succeededCount > 0) { + "Expected successful ingestion for $formatName and operation-id $operationId.Got response: " + + jsonPrinter.encodeToString(status) + } + logger.info( + "$formatName format test: passed ($succeededCount succeeded)", + ) + awaitAndQuery( + query = + "$targetTable | where format == '$format' |summarize count=count() by format", + expectedResultsCount = expectedRecordCount.toLong(), + ) + + val extentDetailsResults = + adminClusterClient + .executeMgmt( + database, + ".show table $targetTable extents | project MinCreatedOn,Tags", + ) + .primaryResults + assertNotNull( + extentDetailsResults, + "Query results should not be null", + ) + extentDetailsResults.next() + val actualTags: String = extentDetailsResults.getString("Tags") + /* TODO : This is being checked in the ingestion service side now. Uncomment when confirmed */ + val actualCreatedOnTime: Instant = + Instant.parse( + extentDetailsResults.getString("MinCreatedOn"), + ) + assertNotNull( + actualCreatedOnTime, + "Extent timestamp should not be null", + ) + assertNotNull(actualTags, "Extent timestamp should not be null") + + val actualCreatedOnInstant = + actualCreatedOnTime.truncatedTo(ChronoUnit.SECONDS) + val expectedCreatedOnInstant = + createdTimeTag + .toInstant() + .truncatedTo(ChronoUnit.SECONDS) + assert(actualCreatedOnInstant == expectedCreatedOnInstant) { + "Extent creation time $actualCreatedOnInstant is <> expected $expectedCreatedOnInstant (rounded to seconds)" + } + extentTags.forEach { tag -> + assert(actualTags.contains(tag)) { + "Extent tags $actualTags does not contain expected tag $tag" + } + } + } catch (e: Exception) { + e.printStackTrace() + fail("Ingestion failed for $formatName: ${e.message}") + } finally { + Files.deleteIfExists(tempFile) + } + } catch (e: ConnectException) { + assumeTrue(false, "Skipping test: ${e.message}") + } catch (e: Exception) { + if (e.cause is ConnectException) { + assumeTrue(false, "Skipping test: ${e.cause?.message}") + } else { + throw e + } + } + } + + @Test + fun `E2E - format mismatch and mixed format batch`(): Unit = runBlocking { + logger.info("E2E: Testing format mismatch detection with mixed formats") + + val client = createTestClient() + + val jsonContent = + """{"name":"test","value":123,"timestamp":"2024-01-01"}""" + val csvContent = + """name,value,timestamp +test,123,2024-01-01 +test2,456,2024-01-02""" + + val sources = + listOf( + StreamSource( + stream = + ByteArrayInputStream( + jsonContent.toByteArray(), + ), + format = Format.json, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "format_json.json", + ), + StreamSource( + stream = + ByteArrayInputStream( + csvContent.toByteArray(), + ), + format = Format.csv, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "format_csv.csv", + ), + StreamSource( + stream = + ByteArrayInputStream( + jsonContent.toByteArray(), + ), + format = Format.json, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = "format_json2.json", + ), + ) + + logger.info( + "Uploading ${sources.size} sources with mixed formats (JSON and CSV)", + ) + val exception = + assertThrows { + client.ingestAsync( + database = database, + table = targetTable, + sources = sources, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = Format.json, + ) + .withEnableTracking(true) + .build(), + ) + } + assertNotNull( + exception, + "Mixed formats are not permitted for ingestion", + ) + } + + @ParameterizedTest( + name = "[LocalSource] {index} => SourceType={0}, TestName={1}", + ) + @CsvSource( + "file,QueuedIngestion-FileSource,SampleFileSource.json", + "stream,QueuedIngestion-StreamSource,SampleStreamSource.json", + ) + fun `test queued ingestion with local sources`( + sourceType: String, + testName: String, + fileName: String, + ) = runBlocking { + logger.info("Starting LocalSource test: $testName") + val deviceDataUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" + val deviceData = java.net.URL(deviceDataUrl).readText() + val targetFormat = Format.multijson + val source: IngestionSource = + when (sourceType) { + "file" -> { + val tempFile = Files.createTempFile(fileName, null) + Files.write(tempFile, deviceData.toByteArray()) + FileSource( + path = tempFile, + format = targetFormat, + compressionType = CompressionType.NONE, + sourceId = UUID.randomUUID(), + ) + .also { + Runtime.getRuntime() + .addShutdownHook( + Thread { + Files.deleteIfExists( + tempFile, + ) + }, + ) + } + } + "stream" -> + StreamSource( + stream = + ByteArrayInputStream( + deviceData.toByteArray(), + ), + format = targetFormat, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + name = fileName, + ) + else -> error("Unknown sourceType: $sourceType") + } + + val queuedIngestClient = createTestClient() + val properties = + IngestRequestPropertiesBuilder(format = targetFormat) + .withEnableTracking(true) + .build() + + val ingestionResponse = + queuedIngestClient.ingestAsync( + database = database, + table = targetTable, + sources = listOf(source), + ingestRequestProperties = properties, + ) + + val operationId = + assertValidIngestionResponse(ingestionResponse, testName) + logger.info("$testName: Submitted with operation ID: $operationId") + + val finalStatus = + queuedIngestClient.pollUntilCompletion( + database = database, + table = targetTable, + operationId = operationId, + pollingInterval = pollInterval, + timeout = pollTimeout, + ) + logger.info("$testName: Completed with status: ${finalStatus.status}") + assert( + finalStatus.details?.any { + it.status == BlobStatus.Status.Succeeded + } == true, + ) { + "Expected at least one successful ingestion for $testName" + } + } + + @Test + fun `E2E - OneLake uploader test`(): Unit = runBlocking { + if (oneLakeFolder != null) { + logger.info("E2E: Testing OneLake uploader") + + // Create a ConfigurationResponse with OneLake lakeFolders configuration + val oneLakeConfigResponse = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = null, + lakeFolders = + listOf( + ContainerInfo( + path = + oneLakeFolder, + ), + ), + refreshInterval = null, + preferredUploadMethod = "Rest", + ), + ingestionSettings = null, + ) + + // Create a configuration cache that returns our OneLake configuration + val oneLakeConfiguration = + DefaultConfigurationCache( + dmUrl = dmEndpoint, + tokenCredential = tokenProvider, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + configurationProvider = { oneLakeConfigResponse }, + ) + + val builder = + QueuedIngestClientBuilder.create(dmEndpoint) + .withAuthentication(tokenProvider) + .withConfiguration(oneLakeConfiguration) + .skipSecurityChecks() + + val oneLakeIngestClient = builder.build() + + val source = createTestStreamSource(1024 * 10, "onelake_test.json") + + try { + val response = + oneLakeIngestClient.ingestAsync( + database = database, + table = targetTable, + source = source, + ingestRequestProperties = + IngestRequestPropertiesBuilder( + format = + Format.multijson, + ) + .withEnableTracking(true) + .build(), + ) + + val operationId = + assertValidIngestionResponse( + response, + "OneLake uploader test", + ) + logger.info( + "OneLake uploader test: submitted with operation ID $operationId", + ) + } catch (e: Exception) { + e.printStackTrace() + fail("Ingestion failed for OneLake uploader: ${e.message}") + } + } else { + logger.warn( + "Skipping OneLake uploader test: ONE_LAKE_FOLDER not set", + ) + assumeTrue(false, "ONE_LAKE_FOLDER environment variable is not set") + } + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt deleted file mode 100644 index 4981e05fb..000000000 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestionClientTest.kt +++ /dev/null @@ -1,680 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2 - -import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestionClientBuilder -import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.ColumnMapping -import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.InlineIngestionMapping -import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.TransformationMethod -import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus -import com.microsoft.azure.kusto.ingest.v2.models.Format -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.source.AbstractSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.CompressionType -import com.microsoft.azure.kusto.ingest.v2.source.FileSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo -import kotlinx.coroutines.runBlocking -import kotlinx.serialization.json.Json -import org.junit.jupiter.api.Assumptions.assumeTrue -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.TestInstance -import org.junit.jupiter.api.parallel.Execution -import org.junit.jupiter.api.parallel.ExecutionMode -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.CsvSource -import java.io.ByteArrayInputStream -import java.net.ConnectException -import java.nio.file.Files -import java.util.UUID -import kotlin.test.assertNotNull -import kotlin.time.Duration - -@TestInstance(TestInstance.Lifecycle.PER_CLASS) -@Execution(ExecutionMode.CONCURRENT) -class QueuedIngestionClientTest : - IngestV2TestBase(QueuedIngestionClientTest::class.java) { - - private val POLLING_INTERVAL = Duration.parse("PT2S") - private val POLLING_TIMEOUT = Duration.parse("PT2M") - - @Test - fun `test builder variations`() { - // builder with optional parameters - val client1 = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .withClientDetails("TestClient", "1.0") - .withMaxConcurrency(10) - .build() - assertNotNull(client1, "Client with optional parameters should not be null") - - // builder with connector client details - val client2 = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .withConnectorClientDetails( - name = "TestConnector", - version = "2.0", - appName = "MyApp", - appVersion = "1.5", - additionalFields = - mapOf( - "JobId" to "job-123", - "RunId" to "run-456", - ), - ) - .build() - assertNotNull(client2, "Client with connector details should not be null") - - // builder with connector client details and user - val client3 = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .withConnectorClientDetails( - name = "TestConnector", - version = "2.0", - sendUser = true, - overrideUser = "test-user@example.com", - ) - .build() - assertNotNull(client3, "Client with connector details and user should not be null") - } - - @ParameterizedTest(name = "[QueuedIngestion] {index} => TestName ={0}") - @CsvSource( - // Single JSON blob, no mapping - "QueuedIngestion-NoMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,false,0", - // Single JSON blob, with mapping reference - "QueuedIngestion-WithMappingReference,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,true,false,0", - // Single JSON blob, with inline mapping - "QueuedIngestion-WithInlineMapping,https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json,false,true,0", - ) - fun `test queued ingestion with blob variations`( - testName: String, - blobUrl: String, - useMappingReference: Boolean, - useInlineIngestionMapping: Boolean, - numberOfFailures: Int, - ): Unit = runBlocking { - logger.info("Starting test: $testName") - val queuedIngestionClient: IngestClient = - QueuedIngestionClient( - dmUrl = dmEndpoint, - tokenCredential = tokenProvider, - skipSecurityChecks = true, - ) - val testSources = listOf(BlobSourceInfo(blobUrl)) - - val properties = - if (useMappingReference) { - IngestRequestProperties( - format = targetTestFormat, - ingestionMappingReference = "${targetTable}_mapping", - enableTracking = true, - ) - } else if (useInlineIngestionMapping) { - val ingestionColumnMappings = - columnNamesToTypes.keys.map { col -> - when (col) { - "SourceLocation" -> - ColumnMapping(columnName = col, columnType = "string") - .apply { setTransform(TransformationMethod.SourceLocation) } - "Type" -> - ColumnMapping(columnName = col, columnType = "string") - .apply { setConstantValue("IngestionMapping") } - else -> - ColumnMapping(columnName = col, columnType = columnNamesToTypes[col]!!) - .apply { setPath("$.$col") } - } - } - val inlineIngestionMappingInline = - InlineIngestionMapping( - columnMappings = ingestionColumnMappings, - ingestionMappingType = InlineIngestionMapping.IngestionMappingType.JSON, - ) - val ingestionMappingString = - Json.encodeToString(inlineIngestionMappingInline.columnMappings) - IngestRequestProperties( - format = targetTestFormat, - ingestionMapping = ingestionMappingString, - enableTracking = true, - ) - } else { - IngestRequestProperties( - format = targetTestFormat, - enableTracking = true, - ) - } - - try { - val ingestionResponse = - queuedIngestionClient.submitIngestion( - database = database, - table = targetTable, - sources = testSources, - format = targetTestFormat, - ingestProperties = properties, - ) - - logger.info("$testName: Submitted with operation ID: ${ingestionResponse.ingestionOperationId}") - assertNotNull(ingestionResponse, "IngestionOperation should not be null") - assertNotNull(ingestionResponse.ingestionOperationId, "Operation ID should not be null") - - val finalStatus = - (queuedIngestionClient as QueuedIngestionClient) - .pollUntilCompletion( - database = database, - table = targetTable, - operationId = ingestionResponse.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - - logger.info("$testName: Completed with status: ${finalStatus.status}") - - if (finalStatus.details?.isNotEmpty() == true) { - val succeededCount = finalStatus.details.count { it.status == BlobStatus.Status.Succeeded } - val failedCount = finalStatus.details.count { it.status == BlobStatus.Status.Failed } - logger.info("$testName: Succeeded: $succeededCount, Failed: $failedCount") - - assert(succeededCount > 0 || failedCount > 0) { - "Expected at least some blobs to be processed" - } - assert(failedCount == numberOfFailures) { - "Expected $numberOfFailures failed ingestions, but got $failedCount" - } - - if (failedCount > 0) { - finalStatus.details - .filter { it.status == BlobStatus.Status.Failed } - .forEach { logger.error("Failed blob: ${it.sourceId}, message: ${it.details}") } - } - - val filterType = when { - useMappingReference -> "MappingRef" - useInlineIngestionMapping -> "IngestionMapping" - else -> "None" - } - if (useMappingReference || useInlineIngestionMapping) { - val results = - adminClusterClient - .executeQuery(database, "$targetTable | where Type == '$filterType' | summarize count=count() by SourceLocation") - .primaryResults - assertNotNull(results, "Query results should not be null") - results.next() - val count: Long = results.getLong("count") - assertNotNull(count, "Count should not be null") - assert(count > 0) { "Expected some records in the table after ingestion" } - } - } - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - private fun createTestStreamSource(sizeInBytes: Int, name: String): StreamSourceInfo { - val jsonLine = """{"testField":"value","size":$sizeInBytes,"name":"$name"}""" + "\n" - val jsonLineBytes = jsonLine.toByteArray() - val numLines = (sizeInBytes / jsonLineBytes.size).coerceAtLeast(1) - val data = ByteArray(numLines * jsonLineBytes.size) - - for (i in 0 until numLines) { - System.arraycopy(jsonLineBytes, 0, data, i * jsonLineBytes.size, jsonLineBytes.size) - } - - return StreamSourceInfo( - stream = ByteArrayInputStream(data), - format = Format.multijson, - sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = name, - ) - } - - @Test - fun `E2E - file size variations and batch uploads`() = runBlocking { - logger.info("E2E: Testing combined file sizes (small, large, batch)") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - try { - // Small file (1KB) - logger.info("Testing small file upload (1KB)") - val smallSource = createTestStreamSource(1024, "combined_small.json") - val smallResponse = - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(smallSource), - format = Format.multijson, - ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), - ) - assertNotNull(smallResponse.ingestionOperationId) - val smallStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = smallResponse.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - val smallSucceeded = smallStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - assert(smallSucceeded > 0) { "Expected successful small file ingestion" } - logger.info("Small file upload completed: $smallSucceeded succeeded") - - // Large file (10MB) - logger.info("Testing large file upload (10MB)") - val largeSource = createTestStreamSource(10 * 1024 * 1024, "combined_large.json") - val largeResponse = - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(largeSource), - format = Format.multijson, - ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), - ) - assertNotNull(largeResponse.ingestionOperationId) - val largeStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = largeResponse.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - val largeSucceeded = largeStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - assert(largeSucceeded > 0) { "Expected successful large file ingestion" } - logger.info("Large file upload completed: $largeSucceeded succeeded") - - // Batch upload (5 files) - logger.info("Testing batch upload (5 files)") - val batchSources = (1..5).map { i -> createTestStreamSource(1024 * i, "combined_batch_$i.json") } - val batchResponse = - client.submitIngestion( - database = database, - table = targetTable, - sources = batchSources, - format = Format.multijson, - ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), - ) - assertNotNull(batchResponse.ingestionOperationId) - val batchStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = batchResponse.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - val batchSucceeded = batchStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - assert(batchSucceeded == batchSources.size) { "Expected all batch files to succeed" } - logger.info("Batch upload completed: $batchSucceeded/${batchSources.size} succeeded") - - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @Test - fun `E2E - parallel processing with maxConcurrency`() = runBlocking { - logger.info("E2E: Testing parallel processing with maxConcurrency=5") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .withMaxConcurrency(5) - .skipSecurityChecks() - .build() - - val sources = (1..10).map { i -> createTestStreamSource(512 * 1024, "parallel_$i.json") } - - try { - val startTime = System.currentTimeMillis() - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = Format.multijson, - ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), - ) - val uploadDuration = System.currentTimeMillis() - startTime - - assertNotNull(response.ingestionOperationId) - logger.info("Parallel upload submitted in ${uploadDuration}ms") - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - - val succeededCount = finalStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - logger.info("Parallel upload: $succeededCount/${sources.size} succeeded (avg ${uploadDuration / sources.size}ms per file)") - assert(succeededCount == sources.size) { "Expected parallel uploads to succeed" } - - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @ParameterizedTest(name = "[SizeValidation] {index} => Scenario={0}") - @CsvSource( - "within-limit,10,5,false,true", // 5MB file, 10MB limit, no ignore, expect success - "exceeds-limit,1,2,false,false", // 2MB file, 1MB limit, no ignore, expect rejection - "ignore-flag,1,2,true,true" // 2MB file, 1MB limit, with ignore, expect success - ) - fun `E2E - size validation scenarios`( - scenario: String, - maxSizeMB: Long, - fileSizeMB: Int, - ignoreSize: Boolean, - expectSuccess: Boolean - ) = runBlocking { - logger.info("E2E: Testing size validation scenario: $scenario") - - val clientBuilder = QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .withMaxDataSize(maxSizeMB * 1024 * 1024) - .skipSecurityChecks() - - val client = if (ignoreSize) { - clientBuilder.withIgnoreFileSize(true).build() - } else { - clientBuilder.build() - } - - val source = createTestStreamSource(fileSizeMB * 1024 * 1024, "size_${scenario}.json") - - try { - if (expectSuccess) { - val response = client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.multijson, - ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), - ) - - assertNotNull(response.ingestionOperationId) - logger.info("E2E: $scenario - Submitted successfully: ${response.ingestionOperationId}") - - val finalStatus = client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - - val succeededCount = finalStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - assert(succeededCount > 0) { "Expected successful upload for scenario: $scenario" } - logger.info("E2E: $scenario - Completed successfully") - } else { - try { - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(source), - format = Format.multijson, - ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true), - ) - throw AssertionError("Expected size validation to reject the file for scenario: $scenario") - } catch (e: IngestException) { - logger.info("E2E: $scenario - Size validation correctly rejected: ${e.message}") - } - logger.info("E2E: $scenario - Correctly rejected file exceeding limit") - } - } catch (e: AssertionError) { - if (!expectSuccess) { - logger.info("E2E: $scenario - Size limit enforced as expected") - } else { - throw e - } - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else if (!expectSuccess && e.message?.contains("size", ignoreCase = true) == true) { - logger.info("E2E: $scenario - Size validation correctly rejected: ${e.message}") - } else { - throw e - } - } - } - - @Test - fun `E2E - compression format tests`() = runBlocking { - logger.info("E2E: Testing compression formats (JSON, GZIP, Parquet, AVRO)") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - try { - // JSON file (uncompressed, gets compressed during upload) - logger.info("Testing JSON file compression during upload") - val jsonData = """{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000002","temperature":25.5,"humidity":60.0}""" - val jsonFile = Files.createTempFile("test_json", ".json") - Files.write(jsonFile, jsonData.toByteArray()) - val jsonSource = FileSourceInfo(path = jsonFile, format = Format.multijson, compressionType = CompressionType.NONE, name = "test_json.json", sourceId = UUID.randomUUID()) - val jsonResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(jsonSource), format = Format.multijson, ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true)) - assertNotNull(jsonResponse.ingestionOperationId) - val jsonStatus = client.pollUntilCompletion(database = database, table = targetTable, operationId = jsonResponse.ingestionOperationId, pollingInterval = POLLING_INTERVAL, timeout = POLLING_TIMEOUT) - val jsonSucceeded = jsonStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - assert(jsonSucceeded > 0) { "Expected successful JSON ingestion" } - logger.info("JSON file compression test: passed") - Files.deleteIfExists(jsonFile) - - // GZIP pre-compressed file - logger.info("Testing GZIP pre-compressed file") - val gzipFile = Files.createTempFile("test_gzip", ".json.gz") - java.util.zip.GZIPOutputStream(Files.newOutputStream(gzipFile)).use { it.write(jsonData.toByteArray()) } - val gzipSource = FileSourceInfo(path = gzipFile, format = Format.multijson, compressionType = CompressionType.GZIP, name = "pre_compressed.json.gz", sourceId = UUID.randomUUID()) - val gzipResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(gzipSource), format = Format.multijson, ingestProperties = IngestRequestProperties(format = Format.multijson, enableTracking = true)) - assertNotNull(gzipResponse.ingestionOperationId) - val gzipStatus = client.pollUntilCompletion(database = database, table = targetTable, operationId = gzipResponse.ingestionOperationId, pollingInterval = POLLING_INTERVAL, timeout = POLLING_TIMEOUT) - val gzipSucceeded = gzipStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - assert(gzipSucceeded > 0) { "Expected successful GZIP ingestion" } - logger.info("GZIP pre-compressed test: passed") - Files.deleteIfExists(gzipFile) - - // Parquet and AVRO (skip if resources not found) - val parquetFile = this::class.java.classLoader.getResource("compression/sample.parquet") - if (parquetFile != null) { - logger.info("Testing Parquet format") - val tempParquet = Files.createTempFile("test_parquet", ".parquet") - Files.copy(parquetFile.openStream(), tempParquet, java.nio.file.StandardCopyOption.REPLACE_EXISTING) - val parquetSource = FileSourceInfo(path = tempParquet, format = Format.parquet, compressionType = CompressionType.NONE, name = "test.parquet", sourceId = UUID.randomUUID()) - try { - val parquetResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(parquetSource), format = Format.parquet, ingestProperties = IngestRequestProperties(format = Format.parquet, enableTracking = true)) - assertNotNull(parquetResponse.ingestionOperationId) - logger.info("Parquet format test: submitted (schema may not match)") - } catch (e: Exception) { - logger.warn("Parquet test skipped (may be due to schema mismatch): ${e.message}") - } - Files.deleteIfExists(tempParquet) - } - - val avroFile = this::class.java.classLoader.getResource("compression/sample.avro") - if (avroFile != null) { - logger.info("Testing AVRO format") - val tempAvro = Files.createTempFile("test_avro", ".avro") - Files.copy(avroFile.openStream(), tempAvro, java.nio.file.StandardCopyOption.REPLACE_EXISTING) - val avroSource = FileSourceInfo(path = tempAvro, format = Format.avro, compressionType = CompressionType.NONE, name = "test.avro", sourceId = UUID.randomUUID()) - try { - val avroResponse = client.submitIngestion(database = database, table = targetTable, sources = listOf(avroSource), format = Format.avro, ingestProperties = IngestRequestProperties(format = Format.avro, enableTracking = true)) - assertNotNull(avroResponse.ingestionOperationId) - logger.info("AVRO format test: submitted (schema may not match)") - } catch (e: Exception) { - logger.warn("AVRO test skipped (may be due to schema mismatch): ${e.message}") - } - Files.deleteIfExists(tempAvro) - } - - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @Test - fun `E2E - format mismatch and mixed format batch`() = runBlocking { - logger.info("E2E: Testing format mismatch detection with mixed formats") - - val client = - QueuedIngestionClientBuilder.create(dmEndpoint) - .withAuthentication(tokenProvider) - .skipSecurityChecks() - .build() - - val jsonContent = """{"name":"test","value":123,"timestamp":"2024-01-01"}""" - val csvContent = """name,value,timestamp -test,123,2024-01-01 -test2,456,2024-01-02""" - - val sources = - listOf( - StreamSourceInfo(stream = ByteArrayInputStream(jsonContent.toByteArray()), format = Format.json, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = "format_json.json"), - StreamSourceInfo(stream = ByteArrayInputStream(csvContent.toByteArray()), format = Format.csv, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = "format_csv.csv"), - StreamSourceInfo(stream = ByteArrayInputStream(jsonContent.toByteArray()), format = Format.json, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = "format_json2.json"), - ) - - try { - logger.info("Uploading ${sources.size} sources with mixed formats (JSON and CSV)") - val response = - client.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = Format.json, - ingestProperties = IngestRequestProperties(format = Format.json, enableTracking = true), - ) - - assertNotNull(response.ingestionOperationId) - logger.info("Mixed format batch submitted: ${response.ingestionOperationId}") - - val finalStatus = - client.pollUntilCompletion( - database = database, - table = targetTable, - operationId = response.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - - val succeededCount = finalStatus.details?.count { it.status == BlobStatus.Status.Succeeded } ?: 0 - val failedCount = finalStatus.details?.count { it.status == BlobStatus.Status.Failed } ?: 0 - - logger.info("Format mismatch results - Success: $succeededCount, Failed: $failedCount") - - if (failedCount > 0) { - finalStatus.details - ?.filter { it.status == BlobStatus.Status.Failed } - ?.forEach { logger.error("Failed: ${it.sourceId}, errorCode: ${it.errorCode}, details: ${it.details}") } - } - - assert(failedCount >= 1) { - "Expected at least one failure due to format mismatch, but got: succeeded=$succeededCount, failed=$failedCount" - } - logger.info("Format mismatch correctly detected by server") - - } catch (e: ConnectException) { - assumeTrue(false, "Skipping test: ${e.message}") - } catch (e: Exception) { - if (e.cause is ConnectException) { - assumeTrue(false, "Skipping test: ${e.cause?.message}") - } else { - throw e - } - } - } - - @ParameterizedTest(name = "[LocalSource] {index} => SourceType={0}, TestName={1}") - @CsvSource( - "file,QueuedIngestion-FileSource,SampleFileSource.json", - "stream,QueuedIngestion-StreamSource,SampleStreamSource.json", - ) - fun `test queued ingestion with local sources`( - sourceType: String, - testName: String, - fileName: String, - ) = runBlocking { - logger.info("Starting LocalSource test: $testName") - val deviceDataUrl = "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json" - val deviceData = java.net.URL(deviceDataUrl).readText() - val targetFormat = Format.multijson - val source: AbstractSourceInfo = - when (sourceType) { - "file" -> { - val tempFile = Files.createTempFile(fileName, null) - Files.write(tempFile, deviceData.toByteArray()) - FileSourceInfo(path = tempFile, format = targetFormat, compressionType = CompressionType.NONE, name = fileName, sourceId = UUID.randomUUID()) - .also { - Runtime.getRuntime().addShutdownHook(Thread { Files.deleteIfExists(tempFile) }) - } - } - "stream" -> - StreamSourceInfo(stream = ByteArrayInputStream(deviceData.toByteArray()), format = targetFormat, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), name = fileName) - else -> error("Unknown sourceType: $sourceType") - } - - val queuedIngestionClient: IngestClient = - QueuedIngestionClient(dmUrl = dmEndpoint, tokenCredential = tokenProvider, skipSecurityChecks = true) - val properties = IngestRequestProperties(format = targetFormat, enableTracking = true) - - val ingestionResponse = queuedIngestionClient.submitIngestion(database = database, table = targetTable, sources = listOf(source), format = targetFormat, ingestProperties = properties) - logger.info("$testName: Submitted with operation ID: ${ingestionResponse.ingestionOperationId}") - assertNotNull(ingestionResponse, "IngestionOperation should not be null") - assertNotNull(ingestionResponse.ingestionOperationId, "Operation ID should not be null") - - val finalStatus = - (queuedIngestionClient as QueuedIngestionClient) - .pollUntilCompletion( - database = database, - table = targetTable, - operationId = ingestionResponse.ingestionOperationId, - pollingInterval = POLLING_INTERVAL, - timeout = POLLING_TIMEOUT, - ) - logger.info("$testName: Completed with status: ${finalStatus.status}") - assert(finalStatus.details?.any { it.status == BlobStatus.Status.Succeeded } == true) { - "Expected at least one successful ingestion for $testName" - } - } -} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index cafcb5611..76e01bf6a 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -2,36 +2,28 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2 +import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.client.IngestClient import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -import com.microsoft.azure.kusto.ingest.v2.source.BlobSourceInfo -import com.microsoft.azure.kusto.ingest.v2.source.CompressionType -import com.microsoft.azure.kusto.ingest.v2.source.StreamSourceInfo +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.assertThrows -import org.junit.jupiter.api.parallel.Execution -import org.junit.jupiter.api.parallel.ExecutionMode import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource -import java.io.ByteArrayInputStream -import java.net.ConnectException import java.util.UUID import java.util.stream.Stream import kotlin.test.assertNotNull @TestInstance(TestInstance.Lifecycle.PER_CLASS) -@Execution(ExecutionMode.CONCURRENT) class StreamingIngestClientTest : IngestV2TestBase(StreamingIngestClientTest::class.java) { private val publicBlobUrl = "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" private val targetUuid = UUID.randomUUID().toString() - private val randomRow: String = - """{"timestamp": "2023-05-02 15:23:50.0000000","deviceId": "$targetUuid","messageId": "7f316225-839a-4593-92b5-1812949279b3","temperature": 31.0301639051317,"humidity": 62.0791099602725}""" - .trimIndent() private fun testParameters(): Stream { return Stream.of( @@ -77,12 +69,9 @@ class StreamingIngestClientTest : blobUrl: String?, ) = runBlocking { logger.info("Running streaming ingest builder test {}", testName) - // Create client using builder val client: IngestClient = - com.microsoft.azure.kusto.ingest.v2.builders - .StreamingIngestClientBuilder - .create(cluster) + StreamingIngestClientBuilder.create(cluster) .withAuthentication(tokenProvider) .skipSecurityChecks() .withClientDetails("BuilderStreamingE2ETest", "1.0") @@ -92,18 +81,18 @@ class StreamingIngestClientTest : if (isException) { if (blobUrl != null) { logger.info( - "Testing error handling for invalid blob URL with builder: {}", + "Testing error handling for invalid blob URL with builder: {} for unreachable host: {}", blobUrl, + isUnreachableHost, ) val exception = assertThrows { - val sources = listOf(BlobSourceInfo(blobUrl)) - client.submitIngestion( + val ingestionSource = BlobSource(blobUrl) + client.ingestAsync( database = database, table = targetTable, - sources = sources, - format = targetTestFormat, - ingestProperties = ingestProps, + source = ingestionSource, + ingestRequestProperties = ingestProps, ) } assertNotNull( @@ -120,206 +109,22 @@ class StreamingIngestClientTest : } } else { if (blobUrl != null) { - logger.info( - "Blob-based streaming ingestion with builder: {}", - blobUrl, - ) - - val sources = listOf(BlobSourceInfo(blobUrl)) - client.submitIngestion( + val ingestionSource = BlobSource(blobUrl) + client.ingestAsync( database = database, table = targetTable, - sources = sources, - format = targetTestFormat, - ingestProperties = ingestProps, + source = ingestionSource, + ingestRequestProperties = ingestProps, ) logger.info( "Blob-based streaming ingestion submitted successfully (builder)", ) - - kotlinx.coroutines.delay(3000) - val results = - adminClusterClient - .executeQuery( - database, - "$targetTable | summarize count=count()", - ) - .primaryResults - - assertNotNull(results, "Query results should not be null") - results.next() - val count: Long = results.getLong("count") - assertNotNull(count, "Count should not be null") - assert(count > 0) { - "Expected records in table after builder streaming ingestion" - } - - logger.info( - "Builder streaming ingestion verified - {} records", - count, + awaitAndQuery( + query = "$targetTable | summarize count=count()", + expectedResultsCount = 5, ) } } } - - @ParameterizedTest(name = "{0}") - @MethodSource("testParameters") - fun `run streaming ingest test with various clusters`( - testName: String, - cluster: String, - isException: Boolean, - isUnreachableHost: Boolean, - blobUrl: String?, - ) = runBlocking { - logger.info("Running streaming ingest test {}", testName) - val client: IngestClient = - StreamingIngestClient(cluster, tokenProvider, true) - val ingestProps = IngestRequestProperties(format = targetTestFormat) - if (isException) { - if (blobUrl != null) { - logger.info( - "Testing error handling for invalid blob URL: {} (using interface method)", - blobUrl, - ) - val exception = - assertThrows { - val sources = listOf(BlobSourceInfo(blobUrl)) - client.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = targetTestFormat, - ingestProperties = ingestProps, - ) - } - assertNotNull( - exception, - "Exception should not be null for invalid blob URL", - ) - logger.info( - "Expected exception caught for invalid blob URL: {}", - exception.message, - ) - logger.info( - "Failure code: {}, isPermanent: {}", - exception.failureCode, - exception.isPermanent, - ) - assert(exception.failureCode != 0) { - "Expected non-zero failure code for invalid blob URL" - } - } else { - logger.info( - "Testing error handling for direct streaming ingestion", - ) - val table = "testtable" - val data = "col1,col2\nval1,val2".toByteArray() - val exception = - assertThrows { - val streamSource = - StreamSourceInfo( - stream = ByteArrayInputStream(data), - format = targetTestFormat, - sourceCompression = - CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "error-test-stream", - ) - client.submitIngestion( - database = database, - table = table, - sources = listOf(streamSource), - format = targetTestFormat, - ingestProperties = ingestProps, - ) - } - assertNotNull(exception, "Exception should not be null") - if (isUnreachableHost) { - assert(exception.cause is java.net.ConnectException) - assert(exception.isPermanent == false) - } else { - assert(exception.failureCode == 404) - assert(exception.isPermanent == false) - } - } - } else { - if (blobUrl != null) { - logger.info( - "Blob-based streaming ingestion with URL: {}", - blobUrl, - ) - val sources = listOf(BlobSourceInfo(blobUrl)) - client.submitIngestion( - database = database, - table = targetTable, - sources = sources, - format = targetTestFormat, - ingestProperties = ingestProps, - ) - - logger.info( - "Blob-based streaming ingestion submitted successfully", - ) - kotlinx.coroutines.delay(3000) - val results = - adminClusterClient - .executeQuery( - database, - "$targetTable | summarize count=count()", - ) - .primaryResults - assertNotNull(results, "Query results should not be null") - results.next() - val count: Long = results.getLong("count") - assertNotNull(count, "Count should not be null") - assert(count > 0) { - "Expected records in table after blob-based streaming ingestion, but got $count" - } - - logger.info( - "Blob-based streaming ingestion verified - {} records in table", - count, - ) - } else { - logger.info("Direct streaming ingestion - success case") - val streamSource = - StreamSourceInfo( - stream = - ByteArrayInputStream( - randomRow.toByteArray(), - ), - format = targetTestFormat, - sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "direct-stream-$targetUuid", - ) - - client.submitIngestion( - database = database, - table = targetTable, - sources = listOf(streamSource), - format = targetTestFormat, - ingestProperties = ingestProps, - ) - - val results = - adminClusterClient - .executeQuery( - database, - "$targetTable | where deviceId == '$targetUuid' | summarize count=count() by deviceId", - ) - .primaryResults - assertNotNull(results, "Query results should not be null") - results.next() - val count: Long = results.getLong("count") - assertNotNull(count, "Count should not be null") - assert(count == 1L) { - "Expected 1 record for $targetUuid, but got $count" - } - } - } - - logger.info("Blob streaming test '{}' completed successfully", testName) - } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt index f7e6a0259..224c8e9f6 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/DefaultConfigurationCacheTest.kt @@ -2,9 +2,13 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo import com.microsoft.azure.kusto.ingest.v2.models.ContainerSettings +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.runBlocking import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertNotNull @@ -56,6 +60,7 @@ class DefaultConfigurationCacheTest { val refreshInterval = Duration.ofMillis(500) // 0.5 seconds val cache = DefaultConfigurationCache( + clientDetails = ClientDetails.createDefault(), refreshInterval = refreshInterval, configurationProvider = ::mockConfigurationProvider, @@ -93,6 +98,170 @@ class DefaultConfigurationCacheTest { ) assertEquals(2, callCount) // callCount should have increased + cache.close() + } + + @Test + fun `configuration is automatically refreshed during client lifetime`(): Unit = runBlocking { + // Simulate a short refresh interval for testing + val refreshInterval = Duration.ofMillis(500) + var fetchCount = 0 + + val configurationProvider: suspend () -> ConfigurationResponse = { + fetchCount++ + ConfigurationResponse( + containerSettings = + ContainerSettings( + preferredUploadMethod = + "METHOD_$fetchCount", + containers = + listOf( + ContainerInfo( + path = + "https://container$fetchCount.blob.core.windows.net/data", + ), + ), + ), + ) + } + + val cache = + DefaultConfigurationCache( + clientDetails = ClientDetails.createDefault(), + refreshInterval = refreshInterval, + configurationProvider = configurationProvider, + ) + + // First operation - should fetch fresh config + val config1 = cache.getConfiguration() + assertNotNull(config1) + assertEquals( + "METHOD_1", + config1.containerSettings?.preferredUploadMethod, + ) + assertEquals(1, fetchCount, "First call should fetch from provider") + + // Second operation immediately - should use cached value + val config2 = cache.getConfiguration() + assertEquals( + "METHOD_1", + config2.containerSettings?.preferredUploadMethod, + ) + assertEquals(1, fetchCount, "Should still use cached value") + + // Wait for cache to expire + Thread.sleep(600) + + // Third operation after expiry - should automatically refresh + val config3 = cache.getConfiguration() + assertNotNull(config3) + assertEquals( + "METHOD_2", + config3.containerSettings?.preferredUploadMethod, + ) + assertEquals(2, fetchCount, "Cache expired, should fetch fresh data") + + // Fourth operation immediately - should use newly cached value + val config4 = cache.getConfiguration() + assertEquals( + "METHOD_2", + config4.containerSettings?.preferredUploadMethod, + ) + assertEquals(2, fetchCount, "Should use newly cached value") + + // Wait for cache to expire again + Thread.sleep(600) + + // Fifth operation - should refresh again + val config5 = cache.getConfiguration() + assertNotNull(config5) + assertEquals( + "METHOD_3", + config5.containerSettings?.preferredUploadMethod, + ) + assertEquals( + 3, + fetchCount, + "Cache expired again, should fetch fresh data", + ) + + cache.close() + } + + @Test + fun `configuration refresh handles concurrent requests safely`(): Unit = + runBlocking { + val refreshInterval = Duration.ofMillis(100) + var fetchCount = 0 + val lock = Any() + + val configurationProvider: suspend () -> ConfigurationResponse = + { + val currentFetch = + synchronized(lock) { + fetchCount++ + fetchCount + } + // Simulate network delay + kotlinx.coroutines.delay(50) + ConfigurationResponse( + containerSettings = + ContainerSettings( + preferredUploadMethod = + "CONCURRENT_$currentFetch", + containers = + listOf( + ContainerInfo( + path = + "https://concurrent.blob.core.windows.net/data", + ), + ), + ), + ) + } + + val cache = + DefaultConfigurationCache( + clientDetails = ClientDetails.createDefault(), + refreshInterval = refreshInterval, + configurationProvider = configurationProvider, + ) + + // First call to populate cache + val initialConfig = cache.getConfiguration() + assertNotNull(initialConfig) + assertEquals(1, synchronized(lock) { fetchCount }) + + // Wait for expiry + Thread.sleep(150) + + // Make multiple concurrent requests after cache expires + val results = coroutineScope { + List(10) { async { cache.getConfiguration() } }.awaitAll() + } + + // All results should be consistent (same data) + // Due to the double-check locking pattern, only one of the concurrent + // fetches will actually update the cache + val uniqueResults = + results.map { + it.containerSettings?.preferredUploadMethod + } + .toSet() + assertEquals( + 1, + uniqueResults.size, + "All concurrent requests should return the same cached value", + ) + + // Multiple provider calls may happen due to concurrent access before + // synchronization, + // but this is acceptable as only one result gets cached + val finalFetchCount = synchronized(lock) { fetchCount } + assert(finalFetchCount >= 2) { + "At least one refresh should have occurred" + } + cache.close() } } diff --git a/ingest-v2/src/test/resources/compression/sample.avro b/ingest-v2/src/test/resources/compression/sample.avro index 866b12c0e2348fbbd8d2a6dbd8d42a88647f582f..a47d3abacbb4ac2ffeb25f379422cdc1a23d3e12 100644 GIT binary patch literal 441 zcmeZI%3@>@ODrqO*DFrWNX<>`VyRXtsVqoUvQjEaP0lY$QPNS$OUwoFgJAr$%+#Ee zVkN8SYM4|>W^QV6Nn&mRP&HI{aY<2TUb<4P4qQ=6YFTD-swc=qoN9o^6ep$=P*jqd zTaa3mSW;S)iZClBzceW)71_Rw(%j6H%#uoknw)%87p3JF|9{^S4hNS=i literal 414 zcmeZI%3@>@Nh~YM*GtY%NloTUNlnX1EJ+mu3l%44q~<0zu~aLSR2HNvSt%$Lr6%VW zr6}nrDCH&Qf<;17i%WvwqG_3_IVr_JmC*=2#U(|VdFjY{N-}eSx)O5>lxlSp@F`45 zEz3+!^-Lk84rp9)VmgMxl>E}9oKzH>Q*#SaixNvpi&9a|#8jG5nwy!DSyHJ~8(Yg@ z8pm68xN_A*=iisE-`S-k%(jg2vVoC-(M3}OmPG>0%-RNBp9~t9Vs%(VB!m{9=wM`# z(vdx2(mcPJZBuERN<_L(kCJe*!G@-Y=MTEG<_dCNn+a6*=8dGlh5`>}M$>wBhK_tE GbXx!)UyT(2 diff --git a/ingest-v2/src/test/resources/compression/sample.json b/ingest-v2/src/test/resources/compression/sample.json index 05acc4567..dff2a7d99 100644 --- a/ingest-v2/src/test/resources/compression/sample.json +++ b/ingest-v2/src/test/resources/compression/sample.json @@ -1,3 +1,3 @@ -{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000001","temperature":25.5,"humidity":60.0} -{"timestamp":"2024-01-01T01:00:00Z","deviceId":"00000000-0000-0000-0000-000000000002","messageId":"00000000-0000-0000-0000-000000000002","temperature":26.3,"humidity":62.5} -{"timestamp":"2024-01-01T02:00:00Z","deviceId":"00000000-0000-0000-0000-000000000003","messageId":"00000000-0000-0000-0000-000000000003","temperature":24.8,"humidity":58.2} +{"timestamp":"2024-01-01T00:00:00Z","deviceId":"00000000-0000-0000-0000-000000000001","messageId":"00000000-0000-0000-0000-000000000001","temperature":25.5,"humidity":60.0,"format":"json"} +{"timestamp":"2024-01-01T01:00:00Z","deviceId":"00000000-0000-0000-0000-000000000002","messageId":"00000000-0000-0000-0000-000000000002","temperature":26.3,"humidity":62.5,"format":"json"} +{"timestamp":"2024-01-01T02:00:00Z","deviceId":"00000000-0000-0000-0000-000000000003","messageId":"00000000-0000-0000-0000-000000000003","temperature":24.8,"humidity":58.2,"format":"json"} diff --git a/ingest-v2/src/test/resources/compression/sample.json.gz b/ingest-v2/src/test/resources/compression/sample.json.gz deleted file mode 100644 index dc1a3461fb34da2fa539cb71e67fc44e829bf64a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 171 zcmV;c095}UiwFok2qtL&19M?*aBO8RYIARH0G-ak3WG2V2H?Aok+X(0wIY3k-FMop z%&_%P5Um3d-+g6x*l-)v@RL*i@GTI1kTJT@g9Rq8D^e+EBKyh(O6cU_uX5_r1#MG( zS=*@8NH0fs%c?{Py$mi+gMej&riA+xeCOi~wjrdoSbk#FQ7q?Ud53LKe_$ESKP(@` Z>U^x;VQXyPv8F{{izg7}cV`Cz0050yP0auR diff --git a/ingest-v2/src/test/resources/compression/sample.multijson b/ingest-v2/src/test/resources/compression/sample.multijson new file mode 100644 index 000000000..ab5a233f6 --- /dev/null +++ b/ingest-v2/src/test/resources/compression/sample.multijson @@ -0,0 +1,8 @@ +{ + "timestamp": "2024-01-01T00:00:00Z", + "deviceId": "00000000-0000-0000-0000-000000000001", + "messageId": "00000000-0000-0000-0000-000000000001", + "temperature": 25.5, + "humidity": 60, + "format": "multijson" +} diff --git a/ingest-v2/src/test/resources/compression/sample.multijson.gz b/ingest-v2/src/test/resources/compression/sample.multijson.gz new file mode 100644 index 0000000000000000000000000000000000000000..7489eb6ec542b746cfd96ce386751af105329dc4 GIT binary patch literal 155 zcmV;M0A&9kiwFqJ<1%Rg19M?*aBO8RZFOvPX=-zCZUC$1QczGT$;?eHE=kNSP_j}` zGBPkS(KRpxq7VZED=>;u(g7(;NiEAvPW4OysWN~9T`U9?7(#RbjV?}1Cs|ubYHmSl zQDRAHQ7TZQk*S_3SU96JH!~%(q!K7*W&jpR%P-1JECCswTbfgnnN^&hr^Hpu1ptM_ J0?XL|003?8HiiHI literal 0 HcmV?d00001 diff --git a/ingest-v2/src/test/resources/compression/sample.multijson.zip b/ingest-v2/src/test/resources/compression/sample.multijson.zip new file mode 100644 index 0000000000000000000000000000000000000000..3413d618f0a89737238a74d9b53b3b65f0d34fe9 GIT binary patch literal 286 zcmWIWW@Zs#U|`^2I612|y7dUt=?Wn4CJ+k%adBd9K~Ab(ZfQ#^2-s)#T0RdVk z&-m)Pu?vbIsR({cz+TLD(B?>^KXjt?(O%;_9g4^0UOtrl;zkXQn z0My1=G)1kOp}5j4PNR{lESu^T&^2moiGK8nlCUA7YV1g%M|CkN#J$c7;+dc3z*kt# z1K#o=L{|_YLICwcGHg~VTeTO}{Dx(lG)!oCkVHdH(YC6NX_p5{N&^hk1`F11F>lQm znw24NteWdh+AaXN;aGOrs+i7B#UfN_f*U(_u~2m0)v^4qwko#ixJXPr32=q727;$?(E4i$E&=tqiv>JwqwYPt40 z4lY27C4t882))gYyXbe{1s;teHWAtD)IWDz7m+XBK;-ugNu(wP=`UGS9M8 z_1h8s)G;ca2_EO1FT$UAo^W2{)9d`PP{GX6DlCg2b3QMYutClstqIk-K$sA#lo;ZM z7@vxv4m~eI&7oYw5r(-Lx#BYuqr0P{aS<<(+s)gtjRGv6reV`tbXxXYQJ%gd@IVKY z@IgLia>X&TnQ|fd29>j%XI=hj$-|YAbkf}nkx&LPAyNSzFykZfF{?si#mOgD-mR@> zVtIR+ZzDe}T|HIU5?}Q4HZ1edszTU~g+mMxT?XNCXFvf%%hddvzt22W&41-tY* zw_K_x1;vMb7<8t_Aw?_#%5~CS{5doV2QK^;-PDnfVP<2c{CH;L98d^$+N|rykmC)l*JYRrOHyRQ1?5vtX|c&WDt$>dMT{ypQ?4`Moy} zyp8!7GtRKg@&@B)&~=1Dj}X#*H>O|QKwo_rfBZhY*M3sI4!*d-Am#%mv;cO~@bS-t z0s{@q`aVH4KSFx#8bWAtZVX1hZ|u?>0?0po%$)(|(exas*!O&B(HB^qKro3tpGPQ8 zhQxW?6*ND`_Mb)aJ~#ALiO`!^#r|)xu}pkGTL>YaFZV39_TvxmY^=KPA8xOmXDk#L zSoCLLLozii9mNLAEO-zneB;HniOchATWin1geSZ?0uHi?5Lko)0~=ZxL(&m&VKa$B z{+3$REt#2wO1U`}y5bm?-g0EiHb1{UbBFHEOo1Yx>4%1@=e3(?V+O=jcm-@*^1eqp z38q&a-Lmzj?6jLYVu&PAZCi$BI9(LXI z*_G&bv;f0Xjmg}0_3jH;495L#eezdDL%(hi;MOJ4Y zK38K?Jg7UO-K*{8B}`i#Cqgy7THXFBWd8b-Pc)#owp zVDN)C-Q*rD$r|JBN(N@P@|M|_xbm_XFCHqnvZK^DjV&Xsh&!GAyw#VPDO<64Q|HRP ze66uB#Mo9oQ>ZENqnIKjU0I@KC|1^ycJeErV`U*`stLZTh}hW3U_-J@Q{ni%SJ|js z7GlZv!~~yIxd#vj_;FcQ5_ww@9?*T1df8EP=AM=_JAJH!ipZN9XFAgT3M<5uJ#i;b z;&Z^SO8hG%CN3FMxTDlYhMYNBCHuMim*cWfGGLzppJW{!R>V@g_!cqBP|vc!+gk46qTDus3tQq$ zR(HAi*J5+;Q_GBT|rn#%-yBjs!-pmxBwy|-Vf5}wqMO%q4(>*qg<=#eZ)7XRF0DT4HT)GN( zx~h_XK>EcNX$l#Yj0zdn`~3o@{5JF_(oRa7w6o-h{-jlb7g=&u=mqA1Eb>XR#On$; z-G)gXYp?Ssd*^r)?u@Bgz>!!mF?Z^3pZGr*jZdT;={LoyMD8gpaq&LsswFbODxKzu z&~1XW|IsDsix@-y8}tAw1wHgt3RNQH!1*|gRZJQzX`ZA})2Gy+MsyP|A4HlXJ*(V< zd+q?K;J;6Jhc|s-tGjnmXx5gSPujY(sM_`-{X&`Te__nJUnSv3@O$6;hd!A?XdC{W F{Rc`5`m_K5 From 8cabbc5a465fc07c5bdbe0a7270ea85f37b5e72a Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Mon, 29 Dec 2025 16:01:41 +0530 Subject: [PATCH 31/50] * Refactor to add GZIP Compression (#449) * * Refactor to add GZIP Compression * Add a simple streaming sample * Fix issue with compression on Streaming data ingest * Fix copilot review comments * Fix imports * Add assertions --------- Co-authored-by: Tanmaya Panda --- ingest-v2/pom.xml | 87 +++- .../azure/kusto/ingest/v2/IngestV2.kt | 3 + .../ManagedStreamingIngestClientBuilder.kt | 4 +- .../kusto/ingest/v2/client/IngestClient.kt | 21 +- .../ingest/v2/client/IngestionOperation.kt | 3 +- .../v2/client/ManagedStreamingIngestClient.kt | 91 +++- .../ingest/v2/client/QueuedIngestClient.kt | 183 +++++-- .../ingest/v2/client/StreamingIngestClient.kt | 290 +++++++++-- .../ingest/v2/common/ConfigurationCache.kt | 54 +- .../ingest/v2/common/models/ClientDetails.kt | 2 +- .../models/IngestRequestPropertiesBuilder.kt | 157 +++--- .../IngestRequestPropertiesExtensions.kt | 110 ++++ .../models/StreamingIngestionErrorResponse.kt | 39 ++ .../v2/uploader/ContainerUploaderBase.kt | 164 +++++- .../compression/CompressionException.kt | 12 + .../compression/CompressionStrategy.kt | 28 + .../compression/GzipCompressionStrategy.kt | 147 ++++++ .../compression/NoCompressionStrategy.kt | 37 ++ ingest-v2/src/main/resources/app.properties | 2 +- ingest-v2/src/main/resources/openapi.yaml | 2 +- .../kusto/ingest/v2/IngestV2JavaTestBase.java | 194 +++++++ .../ManagedStreamingIngestClientJavaTest.java | 238 +++++++++ .../ingest/v2/QueuedIngestClientJavaTest.java | 191 +++++++ .../v2/StreamingIngestClientJavaTest.java | 152 ++++++ .../azure/kusto/ingest/v2/IngestV2TestBase.kt | 14 +- .../v2/ManagedStreamingIngestClientTest.kt | 23 +- .../kusto/ingest/v2/QueuedIngestClientTest.kt | 66 +-- .../ingest/v2/StreamingIngestClientTest.kt | 161 +++++- pom.xml | 5 +- quickstart/README.md | 9 + quickstart/kusto_sample_config.json | 3 +- quickstart/pom.xml | 16 +- .../azure/kusto/quickstart/SampleApp.java | 439 +++++++++++++++- samples/pom.xml | 8 +- .../ingestv2/ManagedStreamingIngestV2.java | 481 ++++++++++++++++++ .../main/java/ingestv2/QueuedIngestV2.java | 400 +++++++++++++++ .../main/java/ingestv2/StreamingIngestV2.java | 200 ++++++++ samples/src/main/resources/create-table.kql | 88 ++++ 38 files changed, 3773 insertions(+), 351 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/StreamingIngestionErrorResponse.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionException.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategy.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/GzipCompressionStrategy.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/NoCompressionStrategy.kt create mode 100644 ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java create mode 100644 ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java create mode 100644 ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java create mode 100644 ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java create mode 100644 samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java create mode 100644 samples/src/main/java/ingestv2/QueuedIngestV2.java create mode 100644 samples/src/main/java/ingestv2/StreamingIngestV2.java create mode 100644 samples/src/main/resources/create-table.kql diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index f1f313cfd..6030e11d1 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -2,9 +2,13 @@ 4.0.0 - ingest-v2 - ingest-v2 - ingest-v2 + kusto-ingest-v2 + ${ingest-v2.revision} + kusto-ingest-v2 + + New ingest module that simplifies ingestion and prepares for next set of iterations to support Private Link for + EventHouse on Fabric + 4.3.0 official @@ -66,6 +70,11 @@ ${kotlin.version} test + + org.jetbrains.kotlinx + kotlinx-coroutines-jdk8 + ${kotlinx.coroutines.debug.version} + org.jetbrains.kotlinx kotlinx-coroutines-debug @@ -107,10 +116,60 @@ ${project.basedir}/src/main/resources + true + + + org.codehaus.mojo + build-helper-maven-plugin + 3.6.1 + + + add-openapi-generated-sources + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/openapi/src/main/kotlin + + + + + add-java-test-sources + generate-test-sources + + add-test-source + + + + ${project.basedir}/src/test/java + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + default-testCompile + test-compile + + testCompile + + + false + + + + kotlin-maven-plugin org.jetbrains.kotlin @@ -207,25 +266,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - 3.6.1 - - - add-openapi-generated-sources - generate-sources - - add-source - - - - ${project.build.directory}/generated-sources/openapi/src/main/kotlin - - - - - com.diffplug.spotless spotless-maven-plugin @@ -299,9 +339,10 @@ **/*Test.kt **/*Test.java + **/*JavaTest.java - \ No newline at end of file + diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt index 654630195..ff96b196e 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2.kt @@ -72,3 +72,6 @@ val MANAGED_STREAMING_RETRY_DELAYS_SECONDS: Array = arrayOf(1, 2, 4) // Maximum jitter to add to retry delays in milliseconds const val MANAGED_STREAMING_RETRY_JITTER_MS: Long = 1000 + +const val STREAM_COMPRESSION_BUFFER_SIZE_BYTES: Int = 64 * 1024 +const val STREAM_PIPE_BUFFER_SIZE_BYTES: Int = 1024 * 1024 diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt index aa50fb8e7..cf8b882a6 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt @@ -20,7 +20,9 @@ private constructor(private val dmUrl: String) : @JvmStatic fun create(dmUrl: String): ManagedStreamingIngestClientBuilder { require(dmUrl.isNotBlank()) { "Data Ingestion URI cannot be blank" } - return ManagedStreamingIngestClientBuilder(dmUrl) + return ManagedStreamingIngestClientBuilder( + normalizeAndCheckDmUrl(dmUrl), + ) } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt index 4cfd30ab3..1cf788a33 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt @@ -26,21 +26,17 @@ import java.io.Closeable interface IngestClient : Closeable { /** - * Ingests data from the specified source into the specified database and - * table. + * Ingests data from the specified source. * * @param source The source to ingest. - * @param database The name of the database to ingest to. - * @param table The name of the table to ingest to. - * @param ingestRequestProperties Optional ingestion properties. + * @param ingestRequestProperties Ingestion properties containing database, + * table, format, and other settings. * @return An [IngestionOperation] object that can be used to track the * status of the ingestion. */ suspend fun ingestAsync( source: IngestionSource, - database: String, - table: String, - ingestRequestProperties: IngestRequestProperties? = null, + ingestRequestProperties: IngestRequestProperties, ): ExtendedIngestResponse /** @@ -85,17 +81,14 @@ interface MultiIngestClient : IngestClient { * Ingest data from multiple sources. * * @param sources The sources to ingest. - * @param database The name of the database to ingest to. - * @param table The name of the table to ingest to. - * @param ingestRequestProperties Optional ingestion properties. + * @param ingestRequestProperties Ingestion properties containing database, + * table, format, and other settings. * @return An [IngestionOperation] object that can be used to track the * status of the ingestion. */ suspend fun ingestAsync( sources: List, - database: String, - table: String, - ingestRequestProperties: IngestRequestProperties? = null, + ingestRequestProperties: IngestRequestProperties, ): ExtendedIngestResponse /** diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt index ddd932496..23155ed5f 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperation.kt @@ -3,12 +3,11 @@ package com.microsoft.azure.kusto.ingest.v2.client import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import java.util.UUID /** Represents an ingestion operation that can be tracked. */ data class IngestionOperation( /** Unique identifier for the ingestion operation. */ - val operationId: UUID, + val operationId: String, /** The database name where data was ingested. */ val database: String, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt index 08c734879..e738ba071 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt @@ -12,22 +12,25 @@ import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestClientExcepti import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.database +import com.microsoft.azure.kusto.ingest.v2.common.models.table import com.microsoft.azure.kusto.ingest.v2.common.runWithRetry -import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import com.microsoft.azure.kusto.ingest.v2.models.Status import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse import com.microsoft.azure.kusto.ingest.v2.source.BlobSource import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.future.future import kotlinx.coroutines.withContext import org.slf4j.LoggerFactory import java.io.InputStream import java.time.Clock import java.time.Duration import java.time.Instant +import java.util.concurrent.CompletableFuture /** * Managed streaming ingestion client that combines streaming and queued @@ -83,34 +86,31 @@ internal constructor( override suspend fun ingestAsync( source: IngestionSource, - database: String, - table: String, - ingestRequestProperties: IngestRequestProperties?, + ingestRequestProperties: IngestRequestProperties, ): ExtendedIngestResponse { + // Extract database and table from properties + val database = ingestRequestProperties.database + val table = ingestRequestProperties.table + requireNotNull(database.trim().isNotEmpty()) { "database cannot be blank" } requireNotNull(table.trim().isNotEmpty()) { "table cannot be blank" } - val effectiveIngestRequestProperties = - ingestRequestProperties - ?: IngestRequestPropertiesBuilder(format = Format.csv) - .build() - return when (source) { is BlobSource -> ingestBlobAsync( source, database, table, - effectiveIngestRequestProperties, + ingestRequestProperties, ) is LocalSource -> ingestLocalAsync( source, database, table, - effectiveIngestRequestProperties, + ingestRequestProperties, ) else -> throw IllegalArgumentException( @@ -162,6 +162,56 @@ internal constructor( } } + /** + * Ingests data from the specified source with the given properties. This is + * the Java-friendly version that returns a CompletableFuture. + * + * @param source The source to ingest. + * @param ingestRequestProperties Ingestion properties containing database, + * table, format, and other settings. + * @return A [CompletableFuture] that completes with an + * [ExtendedIngestResponse]. + */ + @JvmName("ingestAsync") + fun ingestAsyncJava( + source: IngestionSource, + ingestRequestProperties: IngestRequestProperties, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + ingestAsync(source, ingestRequestProperties) + } + + /** + * Gets the operation summary for the specified ingestion operation. This is + * the Java-friendly version that returns a CompletableFuture. + * + * @param operation The ingestion operation to get the status for. + * @return A [CompletableFuture] that completes with a [Status] object. + */ + @JvmName("getOperationSummaryAsync") + fun getOperationSummaryAsyncJava( + operation: IngestionOperation, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + getOperationSummaryAsync(operation) + } + + /** + * Gets the detailed operation status for the specified ingestion operation. + * This is the Java-friendly version that returns a CompletableFuture. + * + * @param operation The ingestion operation to get the details for. + * @return A [CompletableFuture] that completes with a [StatusResponse] + * object. + */ + @JvmName("getOperationDetailsAsync") + fun getOperationDetailsAsyncJava( + operation: IngestionOperation, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + getOperationDetailsAsync(operation) + } + private suspend fun ingestBlobAsync( blobSource: BlobSource, database: String, @@ -178,8 +228,6 @@ internal constructor( ) { return invokeQueuedIngestionAsync( blobSource, - database, - table, ingestRequestProperties, ) } @@ -218,7 +266,7 @@ internal constructor( props, ) ) { - return invokeQueuedIngestionAsync(source, database, table, props) + return invokeQueuedIngestionAsync(source, props) } return invokeStreamingIngestionAsync(source, database, table, props) } @@ -259,8 +307,6 @@ internal constructor( val result = streamingIngestClient.ingestAsync( source, - database, - table, props, ) val requestDuration = @@ -315,7 +361,7 @@ internal constructor( currentAttempt, lastException?.message, ) - return invokeQueuedIngestionAsync(source, database, table, props) + return invokeQueuedIngestionAsync(source, props) } private fun resetLocalSourceIfPossible(source: IngestionSource) { @@ -371,11 +417,9 @@ internal constructor( private suspend fun invokeQueuedIngestionAsync( source: IngestionSource, - database: String, - table: String, props: IngestRequestProperties, ): ExtendedIngestResponse { - return queuedIngestClient.ingestAsync(source, database, table, props) + return queuedIngestClient.ingestAsync(source, props) } private fun shouldUseQueuedIngestByPolicy( @@ -611,9 +655,8 @@ internal constructor( database: String, table: String, operationId: String, - pollingInterval: kotlin.time.Duration = - kotlin.time.Duration.parse("PT30S"), - timeout: kotlin.time.Duration = kotlin.time.Duration.parse("PT5M"), + pollingInterval: Duration = Duration.parse("PT30S"), + timeout: Duration = Duration.parse("PT5M"), ): StatusResponse { return queuedIngestClient.pollUntilCompletion( database, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt index 048ae3b90..c289145e7 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt @@ -10,12 +10,13 @@ import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestSizeLimitExceededException import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.database +import com.microsoft.azure.kusto.ingest.v2.common.models.table +import com.microsoft.azure.kusto.ingest.v2.common.models.withFormatFromSource import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionResultUtils import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.Blob import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus -import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequest import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse @@ -26,16 +27,20 @@ import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource import com.microsoft.azure.kusto.ingest.v2.source.LocalSource import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader import io.ktor.http.HttpStatusCode +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.delay +import kotlinx.coroutines.future.future import kotlinx.coroutines.withTimeoutOrNull import org.slf4j.LoggerFactory import java.net.ConnectException import java.time.Clock +import java.time.Duration import java.time.OffsetDateTime -import kotlin.time.Duration +import java.util.concurrent.CompletableFuture /** * Queued ingestion client for Azure Data Explorer (Kusto). @@ -69,12 +74,111 @@ internal constructor( ) : MultiIngestClient { private val logger = LoggerFactory.getLogger(QueuedIngestClient::class.java) + /** + * Ingests data from multiple sources with the given properties. This is the + * suspend function for Kotlin callers. + */ override suspend fun ingestAsync( sources: List, - database: String, - table: String, - ingestRequestProperties: IngestRequestProperties?, + ingestRequestProperties: IngestRequestProperties, + ): ExtendedIngestResponse = + ingestAsyncInternal(sources, ingestRequestProperties) + + /** + * Ingests data from a single source with the given properties. This is the + * suspend function for Kotlin callers. + */ + override suspend fun ingestAsync( + source: IngestionSource, + ingestRequestProperties: IngestRequestProperties, + ): ExtendedIngestResponse = + ingestAsyncSingleInternal(source, ingestRequestProperties) + + /** + * Ingests data from multiple sources with the given properties. This is the + * Java-friendly version that returns a CompletableFuture. + */ + @JvmName("ingestAsync") + fun ingestAsyncJava( + sources: List, + ingestRequestProperties: IngestRequestProperties, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + ingestAsyncInternal(sources, ingestRequestProperties) + } + + /** + * Ingests data from a single source with the given properties. This is the + * Java-friendly version that returns a CompletableFuture. + */ + @JvmName("ingestAsync") + fun ingestAsyncJava( + source: IngestionSource, + ingestRequestProperties: IngestRequestProperties, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + ingestAsyncSingleInternal(source, ingestRequestProperties) + } + + /** + * Gets the operation summary for the specified ingestion operation. This is + * the Java-friendly version that returns a CompletableFuture. + */ + @JvmName("getOperationSummaryAsync") + fun getOperationSummaryAsyncJava( + operation: IngestionOperation, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + getOperationSummaryAsync(operation) + } + + /** + * Gets the detailed operation status for the specified ingestion operation. + * This is the Java-friendly version that returns a CompletableFuture. + */ + @JvmName("getOperationDetailsAsync") + fun getOperationDetailsAsyncJava( + operation: IngestionOperation, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + getOperationDetailsAsync(operation) + } + + /** + * Polls the ingestion status until completion or timeout. This is the + * Java-friendly version that returns a CompletableFuture. + * + * @param operation The ingestion operation to poll + * @param timeout Maximum time to wait before throwing timeout exception (in + * milliseconds) + * @return CompletableFuture that completes with the final StatusResponse + * when ingestion is completed + */ + @JvmName("pollForCompletion") + fun pollForCompletion( + operation: IngestionOperation, + pollingInterval: Duration, + timeout: Duration, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + pollUntilCompletion( + database = operation.database, + table = operation.table, + operationId = operation.operationId, + pollingInterval = pollingInterval, + timeout = timeout, + ) + } + + /** Internal implementation of ingestAsync for multiple sources. */ + private suspend fun ingestAsyncInternal( + sources: List, + ingestRequestProperties: IngestRequestProperties, ): ExtendedIngestResponse { + // Extract database and table from properties + val database = ingestRequestProperties.database + val table = ingestRequestProperties.table + // Validate sources list is not empty require(sources.isNotEmpty()) { "sources list cannot be empty" } val maxBlobsPerBatch = getMaxSourcesPerMultiIngest() @@ -133,11 +237,16 @@ internal constructor( rawSize = it.blobExactSize, ) } + + // Extract format from the first source (all sources have same format as validated above) + val effectiveProperties = + ingestRequestProperties.withFormatFromSource(sources.first()) + val ingestRequest = IngestRequest( timestamp = OffsetDateTime.now(Clock.systemUTC()), blobs = blobs, - properties = ingestRequestProperties, + properties = effectiveProperties, ) val response: HttpResponse = this.apiClient.api.postQueuedIngest( @@ -171,36 +280,22 @@ internal constructor( } } - override suspend fun ingestAsync( + /** Internal implementation of ingestAsync for a single source. */ + private suspend fun ingestAsyncSingleInternal( source: IngestionSource, - database: String, - table: String, - ingestRequestProperties: IngestRequestProperties?, + ingestRequestProperties: IngestRequestProperties, ): ExtendedIngestResponse { - // Add this as a fallback because the format is mandatory and if that is not present it may - // cause a failure - val effectiveIngestionProperties = - ingestRequestProperties - ?: IngestRequestPropertiesBuilder(format = Format.csv) - .build() when (source) { is BlobSource -> { - return ingestAsync( - listOf(source), - database, - table, - effectiveIngestionProperties, - ) + // Pass the source to multi-source method which will extract format + return ingestAsync(listOf(source), ingestRequestProperties) } is LocalSource -> { - // Upload the local source to blob storage + // Upload the local source to blob storage, then ingest + // Note: We pass the original LocalSource to preserve format information val blobSource = uploader.uploadAsync(source) - return ingestAsync( - listOf(blobSource), - database, - table, - effectiveIngestionProperties, - ) + // Use the original source's format + return ingestAsync(listOf(blobSource), ingestRequestProperties) } else -> { throw IngestClientException( @@ -217,7 +312,7 @@ internal constructor( getIngestionDetails( operation.database, operation.table, - operation.operationId.toString(), + operation.operationId, false, ) return statusResponse.status @@ -235,7 +330,7 @@ internal constructor( return getIngestionDetails( database = operation.database, table = operation.table, - operationId = operation.operationId.toString(), + operationId = operation.operationId, details = true, ) } @@ -498,7 +593,7 @@ internal constructor( timeout: Duration = Duration.parse("PT5M"), ): StatusResponse { val result = - withTimeoutOrNull(timeout.inWholeMilliseconds) { + withTimeoutOrNull(timeout.toMillis()) { var currentStatus: StatusResponse do { currentStatus = @@ -509,7 +604,9 @@ internal constructor( forceDetails = true, ) logger.debug( - "Starting to poll ingestion status for operation: $operationId, timeout: $timeout", + "Starting to poll ingestion status for operation: {}, timeout: {}", + operationId, + timeout, ) if ( IngestionResultUtils.isCompleted( @@ -523,9 +620,9 @@ internal constructor( } logger.debug( - "Ingestion operation $operationId still in progress, waiting ${pollingInterval.inWholeSeconds}s before next check", + "Ingestion operation $operationId still in progress, waiting ${pollingInterval.toMillis()}s before next check", ) - delay(pollingInterval.inWholeMilliseconds) + delay(pollingInterval.toMillis()) } while ( !IngestionResultUtils.isCompleted( currentStatus.details, @@ -583,19 +680,19 @@ internal constructor( -> buildString { append("Error ingesting blob with $sourceId. ") - if (!details.isNullOrBlank()) append("ErrorDetails $details, ") + if (!details.isNullOrBlank()) append("ErrorDetails: $details. ") if (!errorCode.isNullOrBlank()) { - append("ErrorCode $errorCode , ") + append("ErrorCode: $errorCode. ") } - if (status != null) append("Status ${status.value}. ") + if (status != null) append("Status: ${status.value}. ") if (lastUpdateTime != null) { - append("Ingestion lastUpdated at $lastUpdateTime ") + append("Ingestion lastUpdated at $lastUpdateTime. ") } - if (startedAt != null) append("& started at $startedAt. ") + if (startedAt != null) append("Started at $startedAt. ") if (failureStatus != null) { - append("FailureStatus ${failureStatus.value}. ") + append("FailureStatus: ${failureStatus.value}. ") } - append("Is transient failure: $isTransientFailure") + append("IsTransientFailure: $isTransientFailure") } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt index d62e82c62..a6fdc9f77 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt @@ -5,9 +5,13 @@ package com.microsoft.azure.kusto.ingest.v2.client import com.microsoft.azure.kusto.ingest.v2.KustoBaseApiClient import com.microsoft.azure.kusto.ingest.v2.STREAMING_MAX_REQ_BODY_SIZE import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestRequestException +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestServiceException import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.database +import com.microsoft.azure.kusto.ingest.v2.common.models.table +import com.microsoft.azure.kusto.ingest.v2.common.models.withFormatFromSource import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionUtils import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.Format @@ -16,19 +20,29 @@ import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse import com.microsoft.azure.kusto.ingest.v2.models.Status import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType import com.microsoft.azure.kusto.ingest.v2.source.FileSource import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource import com.microsoft.azure.kusto.ingest.v2.source.StreamSource import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode +import io.ktor.client.statement.bodyAsText import io.ktor.http.ContentType import io.ktor.http.HttpStatusCode +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.future.future import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.json.Json +import kotlinx.serialization.json.boolean +import kotlinx.serialization.json.int +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive import org.slf4j.LoggerFactory import java.net.ConnectException import java.net.URI import java.util.UUID +import java.util.concurrent.CompletableFuture /** * Streaming ingestion client for Azure Data Explorer (Kusto). @@ -78,19 +92,102 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { ) } + /** + * Ingests data from the specified source with the given properties. This is + * the suspend function for Kotlin callers. + * + * @param source The source to ingest (FileSource, StreamSource, or + * BlobSource). + * @param ingestRequestProperties Ingestion properties containing database, + * table, format, and other settings. + * @return An ExtendedIngestResponse containing the operation ID and + * ingestion kind. + */ override suspend fun ingestAsync( source: IngestionSource, - database: String, - table: String, - ingestRequestProperties: IngestRequestProperties?, + ingestRequestProperties: IngestRequestProperties, + ): ExtendedIngestResponse = + ingestAsyncInternal(source, ingestRequestProperties) + + /** + * Ingests data from the specified source with the given properties. This is + * the Java-friendly version that returns a CompletableFuture. + * + * @param source The source to ingest (FileSource, StreamSource, or + * BlobSource). + * @param ingestRequestProperties Ingestion properties containing database, + * table, format, and other settings. + * @return A CompletableFuture that completes with an + * ExtendedIngestResponse. + */ + @JvmName("ingestAsync") + fun ingestAsyncJava( + source: IngestionSource, + ingestRequestProperties: IngestRequestProperties, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + ingestAsyncInternal(source, ingestRequestProperties) + } + + /** + * Gets the operation summary for the specified ingestion operation. This is + * the Java-friendly version that returns a CompletableFuture. + * + * Note: Streaming ingestion does not support operation tracking, so this + * always returns an empty status. + * + * @param operation The ingestion operation to get the status for. + * @return A CompletableFuture that completes with a Status object. + */ + @JvmName("getOperationSummaryAsync") + fun getOperationSummaryAsyncJava( + operation: IngestionOperation, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + getOperationSummaryAsync(operation) + } + + /** + * Gets the detailed operation status for the specified ingestion operation. + * This is the Java-friendly version that returns a CompletableFuture. + * + * Note: Streaming ingestion does not support operation tracking, so this + * always returns an empty status response. + * + * @param operation The ingestion operation to get the details for. + * @return A CompletableFuture that completes with a StatusResponse object. + */ + @JvmName("getOperationDetailsAsync") + fun getOperationDetailsAsyncJava( + operation: IngestionOperation, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + getOperationDetailsAsync(operation) + } + + /** + * Internal implementation of ingestAsync that both the suspend and Java + * versions call. + */ + private suspend fun ingestAsyncInternal( + source: IngestionSource, + ingestRequestProperties: IngestRequestProperties, ): ExtendedIngestResponse { + // Inject format from source into properties + val effectiveProperties = + ingestRequestProperties.withFormatFromSource(source) + + // Extract database and table from properties + val database = effectiveProperties.database + val table = effectiveProperties.table + // Streaming ingestion processes one source at a time - val maxSize = getMaxStreamingIngestSize(source = source) + val maxSize = + getMaxStreamingIngestSize( + compressionType = source.compressionType, + format = effectiveProperties.format, + ) val operationId = UUID.randomUUID().toString() - val effectiveIngestionProperties = - ingestRequestProperties - ?: IngestRequestPropertiesBuilder(format = Format.csv) - .build() when (source) { is BlobSource -> { logger.info( @@ -101,8 +198,9 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { table = table, // Not used for blob-based streaming data = ByteArray(0), - ingestProperties = effectiveIngestionProperties, + ingestProperties = effectiveProperties, blobUrl = source.blobPath, + compressionType = source.compressionType, ) } is FileSource, @@ -128,8 +226,9 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { database = database, table = table, data = data, - ingestProperties = effectiveIngestionProperties, + ingestProperties = effectiveProperties, blobUrl = null, + compressionType = source.compressionType, ) source.close() } @@ -167,6 +266,7 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { data: ByteArray, ingestProperties: IngestRequestProperties, blobUrl: String? = null, + compressionType: CompressionType, ) { val host = URI(this.apiClient.engineUrl).host @@ -202,7 +302,7 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { } try { - val response: HttpResponse = + val response = this.apiClient.api.postStreamingIngest( database = database, table = table, @@ -212,9 +312,17 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { ingestProperties.ingestionMappingReference, sourceKind = sourceKind, host = host, - acceptEncoding = "gzip", + acceptEncoding = null, connection = "Keep-Alive", - contentEncoding = null, + contentEncoding = + if ( + compressionType == + CompressionType.GZIP + ) { + "gzip" + } else { + null + }, contentType = contentType, ) return handleIngestResponse( @@ -296,41 +404,124 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { isPermanent = false, ) } - val nonSuccessResponseBody: T = response.body() - val ingestResponseOperationId = - if (nonSuccessResponseBody is IngestResponse) { - if ( - (nonSuccessResponseBody as IngestResponse) - .ingestionOperationId != null - ) { - logger.info( - "Ingestion Operation ID: ${(nonSuccessResponseBody as IngestResponse).ingestionOperationId}", - ) - nonSuccessResponseBody.ingestionOperationId - } else { - "N/A" - } + + // Try to parse the error response as Kusto OneApiError format + val errorDetails = parseKustoErrorResponse(response) + + val errorMessage = + if (errorDetails != null) { + // Use the detailed message from the Kusto error response + val description = + errorDetails.description ?: errorDetails.message + "Failed to submit streaming ingestion to $database.$table. " + + "Error: $description (Code: ${errorDetails.code}, Type: ${errorDetails.type})" } else { - "N/A" + // Fallback to generic error message + "Failed to submit streaming ingestion to $database.$table. Status: ${response.status}" } - val errorMessage = - "Failed to submit streaming ingestion to $database.$table. " + - "Status: ${response.status}, Body: $nonSuccessResponseBody. " + - "OperationId $ingestResponseOperationId" + logger.error(errorMessage) - throw IngestException( - message = errorMessage, - cause = RuntimeException(errorMessage), - isPermanent = true, + + // Determine if the error is permanent based on the parsed response + val isPermanent = errorDetails?.permanent ?: true + val failureCode = errorDetails?.failureCode ?: response.status + + // Use appropriate exception type based on the error + if (isPermanent) { + throw IngestRequestException( + errorCode = errorDetails?.code, + errorReason = errorDetails?.type, + errorMessage = + errorDetails?.description + ?: errorDetails?.message, + databaseName = database, + failureCode = failureCode, + isPermanent = true, + message = errorMessage, + ) + } else { + throw IngestServiceException( + errorCode = errorDetails?.code, + errorReason = errorDetails?.type, + errorMessage = + errorDetails?.description + ?: errorDetails?.message, + failureCode = failureCode, + isPermanent = false, + message = errorMessage, + ) + } + } + } + + /** + * Parses the Kusto error response to extract error details. The error + * response follows the OneApiError format: + * ```json + * { + * "error": { + * "code": "BadRequest", + * "message": "Request is invalid and cannot be executed.", + * "@type": "Kusto.DataNode.Exceptions.StreamingIngestionRequestException", + * "@message": "Bad streaming ingestion request...", + * "@failureCode": 400, + * "@permanent": true + * } + * } + * ``` + */ + private suspend fun parseKustoErrorResponse( + response: HttpResponse, + ): KustoErrorDetails? { + return try { + val bodyText = response.response.bodyAsText() + if (bodyText.isBlank()) { + logger.debug("Empty error response body") + return null + } + + logger.debug("Parsing error response: {}", bodyText) + + val json = Json { ignoreUnknownKeys = true } + val rootObject = json.parseToJsonElement(bodyText).jsonObject + + // The error is wrapped in an "error" object + val errorObject = rootObject["error"]?.jsonObject + if (errorObject == null) { + logger.debug("No 'error' field found in response") + return null + } + + val code = errorObject["code"]?.jsonPrimitive?.content + val message = errorObject["message"]?.jsonPrimitive?.content + val type = errorObject["@type"]?.jsonPrimitive?.content + val description = errorObject["@message"]?.jsonPrimitive?.content + val failureCode = errorObject["@failureCode"]?.jsonPrimitive?.int + val permanent = + errorObject["@permanent"]?.jsonPrimitive?.boolean ?: true + + KustoErrorDetails( + code = code, + message = message, + type = type, + description = description, + failureCode = failureCode, + permanent = permanent, ) + } catch (e: Exception) { + logger.warn("Failed to parse Kusto error response: ${e.message}", e) + null } } - private fun getMaxStreamingIngestSize(source: IngestionSource): Long { + private fun getMaxStreamingIngestSize( + compressionType: CompressionType, + format: Format, + ): Long { val compressionFactor = IngestionUtils.getRowStoreEstimatedFactor( - source.format, - source.compressionType, + format, + compressionType, ) return (STREAMING_MAX_REQ_BODY_SIZE * compressionFactor).toLong() } @@ -342,3 +533,22 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { private data class StreamFromBlobRequestBody( @SerialName("SourceUri") val sourceUri: String, ) + +/** + * Data class to hold parsed Kusto error details from OneApiError format. + * Matches the structure of error responses from Kusto streaming ingestion. + */ +private data class KustoErrorDetails( + /** The error code (e.g., "BadRequest") */ + val code: String?, + /** The high-level error message */ + val message: String?, + /** The exception type (from @type field) */ + val type: String?, + /** The detailed error description (from @message field) */ + val description: String?, + /** The HTTP failure code (from @failureCode field) */ + val failureCode: Int?, + /** Whether the error is permanent (from @permanent field) */ + val permanent: Boolean, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 3dee0f11d..b716a7bb1 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -10,6 +10,7 @@ import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import java.lang.AutoCloseable import java.time.Duration +import java.util.concurrent.atomic.AtomicReference /** * Interface for caching configuration data. @@ -95,32 +96,55 @@ class DefaultConfigurationCache( .getConfigurationDetails() } - @Volatile private var cachedConfiguration: ConfigurationResponse? = null - private var lastRefreshTime: Long = 0 + /** + * Holds both the configuration and its refresh timestamp atomically. This + * prevents race conditions between checking expiration and updating. + */ + private data class CachedData( + val configuration: ConfigurationResponse, + val timestamp: Long, + ) + + private val cache = AtomicReference(null) override suspend fun getConfiguration(): ConfigurationResponse { val currentTime = System.currentTimeMillis() + val cached = cache.get() + + // Check if we need to refresh val needsRefresh = - cachedConfiguration == null || - (currentTime - lastRefreshTime) >= + cached == null || + (currentTime - cached.timestamp) >= refreshInterval.toMillis() + if (needsRefresh) { + // Attempt to refresh - only one thread will succeed val newConfig = runCatching { provider() } - .getOrElse { cachedConfiguration ?: throw it } - synchronized(this) { - // Double-check in case another thread refreshed while we were waiting - val stillNeedsRefresh = - cachedConfiguration == null || - (currentTime - lastRefreshTime) >= - refreshInterval.toMillis() - if (stillNeedsRefresh) { - cachedConfiguration = newConfig - lastRefreshTime = currentTime + .getOrElse { + // If fetch fails, return cached if available, otherwise rethrow + cached?.configuration ?: throw it + } + + // Atomically update if still needed (prevents thundering herd) + cache.updateAndGet { current -> + val currentTimestamp = current?.timestamp ?: 0 + // Only update if current is null or still stale + if ( + current == null || + (currentTime - currentTimestamp) >= + refreshInterval.toMillis() + ) { + CachedData(newConfig, currentTime) + } else { + // Another thread already refreshed + current } } } - return cachedConfiguration!! + + // Return the cached value (guaranteed non-null after refresh) + return cache.get()!!.configuration } override fun close() { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt index 9af839556..3a2705bc5 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetails.kt @@ -169,7 +169,7 @@ data class ClientDetails( return ClientDetails( applicationForTracing = getProcessName(), userNameForTracing = getUserName(), - clientVersionForTracing = getDefaultVersion(), + clientVersionForTracing = null, ) } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt index 4d942ceb8..605b583d0 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt @@ -2,7 +2,6 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.models -import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import java.time.OffsetDateTime @@ -14,14 +13,18 @@ import java.time.OffsetDateTime * * Example usage: * ```kotlin - * val properties = IngestRequestPropertiesBuilder(format = Format.json) + * val properties = IngestRequestPropertiesBuilder.create(database = "db", table = "table") + * .withFormat(Format.json) * .withDropByTags(listOf("tag1", "tag2")) * .withIngestByTags(listOf("tag3")) * .withEnableTracking(true) * .build() * ``` */ -class IngestRequestPropertiesBuilder(private val format: Format) { +class IngestRequestPropertiesBuilder +private constructor(private val database: String, private val table: String) { + private var format: com.microsoft.azure.kusto.ingest.v2.models.Format? = + null private var enableTracking: Boolean? = null private var additionalTags: List? = null private var dropByTags: List? = null @@ -40,6 +43,41 @@ class IngestRequestPropertiesBuilder(private val format: Format) { private var extendSchema: Boolean? = null private var recreateSchema: Boolean? = null + companion object { + internal const val DATABASE_KEY = "_database" + internal const val TABLE_KEY = "_table" + + /** + * Creates a new builder for IngestRequestProperties. + * + * @param database The target database name + * @param table The target table name + * @return A new IngestRequestPropertiesBuilder instance + */ + @JvmStatic + fun create( + database: String, + table: String, + ): IngestRequestPropertiesBuilder { + return IngestRequestPropertiesBuilder(database, table) + } + } + + /** + * Sets the data format for ingestion. + * + * @param value The data format (e.g., Format.json, Format.csv) + * @deprecated Format is automatically extracted from the IngestionSource. + * This method is no longer needed. + */ + @Deprecated( + "Format is automatically extracted from the IngestionSource. This method is no longer needed.", + ) + fun withFormat(value: com.microsoft.azure.kusto.ingest.v2.models.Format) = + apply { + this.format = value + } + fun withEnableTracking(value: Boolean) = apply { this.enableTracking = value } @@ -116,6 +154,16 @@ class IngestRequestPropertiesBuilder(private val format: Format) { * Builds the * [com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties] with * combined tags from dropByTags, ingestByTags, and additionalTags. + * + * The built properties will have database and table information stored in + * the underlying map for retrieval by client implementations. + * + * Note: The format field will be automatically extracted from the + * IngestionSource by the client implementation during ingestion using + * withFormatFromSource(). A placeholder value (Format.csv) is used during + * build and will be overridden with the actual source format. + * + * @return The built IngestRequestProperties */ fun build(): IngestRequestProperties { // Combine all tags: additional tags + prefixed ingest-by tags + prefixed drop-by tags @@ -127,79 +175,34 @@ class IngestRequestPropertiesBuilder(private val format: Format) { dropByTags?.forEach { tag -> combinedTags.add("drop-by:$tag") } - return IngestRequestProperties( - format = format, - enableTracking = enableTracking, - tags = combinedTags.ifEmpty { null }, - ingestIfNotExists = ingestIfNotExists, - skipBatching = skipBatching, - deleteAfterDownload = deleteAfterDownload, - ingestionMappingReference = ingestionMappingReference, - ingestionMapping = ingestionMapping, - validationPolicy = validationPolicy, - ignoreSizeLimit = ignoreSizeLimit, - ignoreFirstRecord = ignoreFirstRecord, - ignoreLastRecordIfInvalid = ignoreLastRecordIfInvalid, - creationTime = creationTime, - zipPattern = zipPattern, - extendSchema = extendSchema, - recreateSchema = recreateSchema, - ) + // Use format if explicitly set, otherwise use placeholder (will be overridden from source) + val effectiveFormat = + format ?: com.microsoft.azure.kusto.ingest.v2.models.Format.csv + + val properties = + IngestRequestProperties( + format = effectiveFormat, + enableTracking = enableTracking, + tags = combinedTags.ifEmpty { null }, + ingestIfNotExists = ingestIfNotExists, + skipBatching = skipBatching, + deleteAfterDownload = deleteAfterDownload, + ingestionMappingReference = ingestionMappingReference, + ingestionMapping = ingestionMapping, + validationPolicy = validationPolicy, + ignoreSizeLimit = ignoreSizeLimit, + ignoreFirstRecord = ignoreFirstRecord, + ignoreLastRecordIfInvalid = ignoreLastRecordIfInvalid, + creationTime = creationTime, + zipPattern = zipPattern, + extendSchema = extendSchema, + recreateSchema = recreateSchema, + ) + + // Store database and table in the HashMap for retrieval + properties.put(DATABASE_KEY, database) + properties.put(TABLE_KEY, table) + + return properties } } - -/** - * Extension property to extract drop-by tags from the combined tags list. - * Returns all tags that start with "drop-by:" prefix. - */ -val IngestRequestProperties.dropByTags: List - get() = - tags?.filter { it.startsWith("drop-by:") } - ?.map { it.removePrefix("drop-by:") } ?: emptyList() - -/** - * Extension property to extract ingest-by tags from the combined tags list. - * Returns all tags that start with "ingest-by:" prefix. - */ -val IngestRequestProperties.ingestByTags: List - get() = - tags?.filter { it.startsWith("ingest-by:") } - ?.map { it.removePrefix("ingest-by:") } ?: emptyList() - -/** - * Extension property to extract additional (non-prefixed) tags from the - * combined tags list. Returns all tags that don't start with "drop-by:" or - * "ingest-by:" prefix. - */ -val IngestRequestProperties.additionalTags: List - get() = - tags?.filter { - !it.startsWith("drop-by:") && !it.startsWith("ingest-by:") - } ?: emptyList() - -/** - * Creates a copy of this [IngestRequestProperties] with modified tags. Useful - * for adding or removing drop-by and ingest-by tags without recreating the - * entire object. - * - * @param dropByTags New drop-by tags to replace existing ones (null means keep - * existing) - * @param ingestByTags New ingest-by tags to replace existing ones (null means - * keep existing) - * @param additionalTags New additional tags to replace existing ones (null - * means keep existing) - */ -fun IngestRequestProperties.copyWithTags( - dropByTags: List? = null, - ingestByTags: List? = null, - additionalTags: List? = null, -): IngestRequestProperties { - val newDropByTags = dropByTags ?: this.dropByTags - val newIngestByTags = ingestByTags ?: this.ingestByTags - val newAdditionalTags = additionalTags ?: this.additionalTags - val combinedTags = mutableListOf() - combinedTags.addAll(newAdditionalTags) - newIngestByTags.forEach { tag -> combinedTags.add("ingest-by:$tag") } - newDropByTags.forEach { tag -> combinedTags.add("drop-by:$tag") } - return this.copy(tags = combinedTags.ifEmpty { null }) -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt new file mode 100644 index 000000000..c6b4878a2 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties + +/** + * Extension properties and functions for + * [com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties]. + * + * These extensions provide convenient access to database, table, and tag + * information stored in the IngestRequestProperties. + */ + +/** + * Extension property to extract the database name from IngestRequestProperties. + */ +val IngestRequestProperties.database: String + get() = + this.get(IngestRequestPropertiesBuilder.DATABASE_KEY) as? String + ?: throw IllegalStateException( + "Database not set in IngestRequestProperties", + ) + +/** + * Extension property to extract the table name from IngestRequestProperties. + */ +val IngestRequestProperties.table: String + get() = + this.get(IngestRequestPropertiesBuilder.TABLE_KEY) as? String + ?: throw IllegalStateException( + "Table not set in IngestRequestProperties", + ) + +/** + * Extension property to extract drop-by tags from the combined tags list. + * Returns all tags that start with "drop-by:" prefix. + */ +val IngestRequestProperties.dropByTags: List + get() = + tags?.filter { it.startsWith("drop-by:") } + ?.map { it.removePrefix("drop-by:") } ?: emptyList() + +/** + * Extension property to extract ingest-by tags from the combined tags list. + * Returns all tags that start with "ingest-by:" prefix. + */ +val IngestRequestProperties.ingestByTags: List + get() = + tags?.filter { it.startsWith("ingest-by:") } + ?.map { it.removePrefix("ingest-by:") } ?: emptyList() + +/** + * Extension property to extract additional (non-prefixed) tags from the + * combined tags list. Returns all tags that don't start with "drop-by:" or + * "ingest-by:" prefix. + */ +val IngestRequestProperties.additionalTags: List + get() = + tags?.filter { + !it.startsWith("drop-by:") && !it.startsWith("ingest-by:") + } ?: emptyList() + +/** + * Creates a copy of this [IngestRequestProperties] with modified tags. Useful + * for adding or removing drop-by and ingest-by tags without recreating the + * entire object. + * + * @param dropByTags New drop-by tags to replace existing ones (null means keep + * existing) + * @param ingestByTags New ingest-by tags to replace existing ones (null means + * keep existing) + * @param additionalTags New additional tags to replace existing ones (null + * means keep existing) + */ +fun IngestRequestProperties.copyWithTags( + dropByTags: List? = null, + ingestByTags: List? = null, + additionalTags: List? = null, +): IngestRequestProperties { + val newDropByTags = dropByTags ?: this.dropByTags + val newIngestByTags = ingestByTags ?: this.ingestByTags + val newAdditionalTags = additionalTags ?: this.additionalTags + val combinedTags = mutableListOf() + combinedTags.addAll(newAdditionalTags) + newIngestByTags.forEach { tag -> combinedTags.add("ingest-by:$tag") } + newDropByTags.forEach { tag -> combinedTags.add("drop-by:$tag") } + return this.copy(tags = combinedTags.ifEmpty { null }) +} + +/** + * Creates a copy of this [IngestRequestProperties] with the format field set + * from the provided + * [com.microsoft.azure.kusto.ingest.v2.source.IngestionSource]. This is used + * internally by ingest clients to inject the source's format. + * + * Note: This function preserves the database and table entries stored in the + * underlying HashMap. + * + * @param source The ingestion source from which to extract the format + * @return A new IngestRequestProperties with the format from the source + */ +fun IngestRequestProperties.withFormatFromSource( + source: com.microsoft.azure.kusto.ingest.v2.source.IngestionSource, +): IngestRequestProperties { + val newProperties = this.copy(format = source.format) + // Copy over HashMap entries (database and table) + this.forEach { (key, value) -> newProperties[key] = value } + return newProperties +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/StreamingIngestionErrorResponse.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/StreamingIngestionErrorResponse.kt new file mode 100644 index 000000000..7052a6151 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/StreamingIngestionErrorResponse.kt @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * Error response model for streaming ingestion failures. This represents the + * error structure returned by Kusto when a streaming ingestion request fails. + */ +@Serializable +data class StreamingIngestionErrorResponse( + @SerialName("error") val error: StreamingIngestionError, +) + +@Serializable +data class StreamingIngestionError( + @SerialName("code") val code: String? = null, + @SerialName("message") val message: String? = null, + @SerialName("@type") val type: String? = null, + @SerialName("@message") val detailedMessage: String? = null, + @SerialName("@failureCode") val failureCode: Int? = null, + @SerialName("@context") val context: ErrorContext? = null, + @SerialName("@permanent") val permanent: Boolean? = null, +) + +@Serializable +data class ErrorContext( + @SerialName("timestamp") val timestamp: String? = null, + @SerialName("serviceAlias") val serviceAlias: String? = null, + @SerialName("clientRequestId") val clientRequestId: String? = null, + @SerialName("activityId") val activityId: String? = null, + @SerialName("subActivityId") val subActivityId: String? = null, + @SerialName("activityType") val activityType: String? = null, + @SerialName("parentActivityId") val parentActivityId: String? = null, + @SerialName("activityStack") val activityStack: String? = null, + @SerialName("serviceFarm") val serviceFarm: String? = null, +) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt index 21a02afc9..06763a43b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -12,13 +12,17 @@ import com.azure.storage.file.datalake.DataLakeFileClient import com.azure.storage.file.datalake.DataLakeServiceClientBuilder import com.azure.storage.file.datalake.options.FileParallelUploadOptions import com.microsoft.azure.kusto.ingest.v2.BLOB_UPLOAD_TIMEOUT_HOURS +import com.microsoft.azure.kusto.ingest.v2.STREAM_COMPRESSION_BUFFER_SIZE_BYTES +import com.microsoft.azure.kusto.ingest.v2.STREAM_PIPE_BUFFER_SIZE_BYTES import com.microsoft.azure.kusto.ingest.v2.UPLOAD_BLOCK_SIZE_BYTES import com.microsoft.azure.kusto.ingest.v2.UPLOAD_MAX_SINGLE_SIZE_BYTES import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.uploader.compression.CompressionException import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults @@ -30,10 +34,14 @@ import kotlinx.coroutines.delay import kotlinx.coroutines.withContext import org.slf4j.Logger import org.slf4j.LoggerFactory +import java.io.IOException import java.io.InputStream +import java.io.PipedInputStream +import java.io.PipedOutputStream import java.time.Clock import java.time.Duration import java.time.Instant +import java.util.zip.GZIPOutputStream /** Represents an abstract base class for uploaders to storage containers. */ abstract class ContainerUploaderBase( @@ -59,10 +67,10 @@ abstract class ContainerUploaderBase( override suspend fun uploadAsync(local: LocalSource): BlobSource { // Get the stream and validate it - val stream = local.data() + val originalStream = local.data() val name = local.generateBlobName() - val errorCode = checkStreamForErrors(stream) + val errorCode = checkStreamForErrors(originalStream) if (errorCode != null) { logger.error( "Stream validation failed for {}: {}", @@ -72,9 +80,10 @@ abstract class ContainerUploaderBase( throw IngestException(errorCode.description, isPermanent = true) } - // Check size limit if not ignored + // Check size limit if not ignored (check original size before compression) val availableSize = - withContext(Dispatchers.IO) { stream.available() }.toLong() + withContext(Dispatchers.IO) { originalStream.available() } + .toLong() if (!ignoreSizeLimit && availableSize > 0) { if (availableSize > maxDataSize) { logger.error( @@ -101,15 +110,146 @@ abstract class ContainerUploaderBase( ) } + // Compress stream if needed (for non-binary, non-compressed formats) + val (uploadStream, effectiveCompressionType, compressionJob) = + if (local.shouldCompress) { + logger.debug( + "Auto-compressing stream for {} (format: {}, original compression: {})", + name, + local.format, + local.compressionType, + ) + val compressResult = compressStreamWithPipe(originalStream) + logger.debug( + "Compression started for {} using streaming approach (original={} bytes)", + name, + availableSize, + ) + Triple( + compressResult.stream, + CompressionType.GZIP, + compressResult.compressionJob, + ) + } else { + Triple(originalStream, local.compressionType, null) + } + // Upload with retry policy and container cycling - return uploadWithRetries( - local = local, - name = name, - stream = stream, - containers = containers, - ) + return try { + uploadWithRetries( + local = local, + name = name, + stream = uploadStream, + containers = containers, + effectiveCompressionType = effectiveCompressionType, + ) + .also { + // Ensure compression job completes successfully + compressionJob?.await() + logger.debug( + "Compression job completed successfully for {}", + name, + ) + } + } catch (e: Exception) { + // Cancel compression job if upload fails + compressionJob?.cancel() + throw e + } } + /** + * Compresses the input stream using GZIP compression with streaming + * approach. Uses piped streams to avoid loading entire file into memory. + * + * This creates a background coroutine that reads from the input stream, + * compresses the data, and writes to a pipe. The returned InputStream reads + * from the other end of the pipe, allowing the uploader to stream + * compressed bytes directly into storage. + */ + private suspend fun compressStreamWithPipe( + inputStream: InputStream, + ): CompressedStreamResult = + withContext(Dispatchers.IO) { + try { + // Create piped streams with 1MB buffer to handle backpressure + val pipeSize = STREAM_PIPE_BUFFER_SIZE_BYTES + val pipedInputStream = PipedInputStream(pipeSize) + val pipedOutputStream = PipedOutputStream(pipedInputStream) + + logger.debug( + "Starting streaming GZIP compression with pipe buffer size: {} bytes", + pipeSize, + ) + + // Start compression in background coroutine + val compressionJob = + async(Dispatchers.IO) { + try { + GZIPOutputStream( + pipedOutputStream, + STREAM_COMPRESSION_BUFFER_SIZE_BYTES, + ) + .use { gzipStream -> + inputStream.use { input -> + input.copyTo( + gzipStream, + bufferSize = + STREAM_COMPRESSION_BUFFER_SIZE_BYTES, + ) + } + } + } catch (e: IOException) { + logger.error( + "Streaming GZIP compression failed: {}", + e.message, + ) + // Close output pipe to signal error to reader + try { + pipedOutputStream.close() + } catch (_: Exception) { + // Ignore close errors during cleanup + } + throw CompressionException( + "Failed to compress stream using streaming GZIP", + e, + ) + } catch (e: OutOfMemoryError) { + logger.error( + "Streaming GZIP compression failed due to memory constraints: {}", + e.message, + ) + try { + pipedOutputStream.close() + } catch (_: Exception) { + // Ignore close errors during cleanup + } + throw CompressionException( + "Insufficient memory for streaming compression", + e, + ) + } + } + + CompressedStreamResult(pipedInputStream, compressionJob) + } catch (e: IOException) { + logger.error( + "Failed to setup compression pipes: {}", + e.message, + ) + throw CompressionException( + "Failed to initialize streaming compression", + e, + ) + } + } + + /** Helper class to hold compressed stream and its completion job */ + private data class CompressedStreamResult( + val stream: InputStream, + val compressionJob: kotlinx.coroutines.Deferred, + ) + /** * Uploads a stream with retry logic and container cycling. Randomly selects * a starting container and cycles through containers on each retry. For @@ -120,6 +260,7 @@ abstract class ContainerUploaderBase( name: String, stream: InputStream, containers: List, + effectiveCompressionType: CompressionType = local.compressionType, ): BlobSource { // Select random starting container index var containerIndex = (0 until containers.size).random() @@ -163,10 +304,11 @@ abstract class ContainerUploaderBase( ) // Return BlobSource with the uploaded blob path + // Use effective compression type (GZIP if auto-compressed) return BlobSource( blobPath = blobUrl, format = local.format, - compressionType = local.compressionType, + compressionType = effectiveCompressionType, sourceId = local.sourceId, ) .apply { blobExactSize = local.size() } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionException.kt new file mode 100644 index 000000000..b4957422d --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionException.kt @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader.compression + +/** + * Exception thrown when compression operations fail. + * + * @param message The detail message + * @param cause The underlying cause of the compression failure + */ +class CompressionException(message: String, cause: Throwable? = null) : + RuntimeException(message, cause) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategy.kt new file mode 100644 index 000000000..7b7967965 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategy.kt @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader.compression + +import java.io.InputStream + +/** + * Strategy interface for stream compression. Follows the Strategy Design + * Pattern to allow different compression algorithms. + */ +interface CompressionStrategy { + /** + * Compresses the input stream and returns a compressed stream. + * + * @param input The input stream to compress + * @param estimatedSize Optional estimated size of the input for buffer + * optimization + * @return A compressed input stream ready for upload + * @throws CompressionException if compression fails + */ + suspend fun compress( + input: InputStream, + estimatedSize: Long = 0, + ): InputStream + + /** Returns the compression type identifier for this strategy. */ + val compressionType: String +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/GzipCompressionStrategy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/GzipCompressionStrategy.kt new file mode 100644 index 000000000..ea7ffd06d --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/GzipCompressionStrategy.kt @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader.compression + +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream +import java.io.IOException +import java.io.InputStream +import java.util.zip.GZIPOutputStream + +/** + * GZIP compression strategy implementation. + * + * Compresses input streams using GZIP compression, which is the standard + * compression format supported by Azure Data Explorer for ingestion. + * + * **Memory Considerations:** + * - For small to medium data (< 100MB), uses in-memory compression + * - Pre-allocates buffer based on estimated size for efficiency + * - Uses Dispatchers.IO for blocking compression operations + * + * **Thread Safety:** This class is thread-safe and stateless. + */ +class GzipCompressionStrategy : CompressionStrategy { + + private val logger: Logger = + LoggerFactory.getLogger(GzipCompressionStrategy::class.java) + + override val compressionType: String = "gzip" + + companion object { + /** + * Default buffer size when estimated size is unknown. 64KB is a good + * balance between memory usage and performance. + */ + private const val DEFAULT_BUFFER_SIZE = 64 * 1024 + + /** + * Maximum initial buffer size to prevent excessive memory allocation. + * Even for large files, we cap at 10MB initial allocation. + */ + private const val MAX_INITIAL_BUFFER_SIZE = 10 * 1024 * 1024 + } + + /** + * Compresses the input stream using GZIP compression. + * + * @param input The input stream to compress + * @param estimatedSize Estimated size of the input for buffer optimization. + * If 0 or unknown, uses a default buffer size. + * @return A ByteArrayInputStream containing the compressed data + * @throws CompressionException if compression fails + */ + override suspend fun compress( + input: InputStream, + estimatedSize: Long, + ): InputStream = + withContext(Dispatchers.IO) { + try { + val initialBufferSize = + calculateInitialBufferSize(estimatedSize) + logger.debug( + "Starting GZIP compression with initial buffer size: {} bytes (estimated input: {} bytes)", + initialBufferSize, + estimatedSize, + ) + + val startTime = System.currentTimeMillis() + val byteArrayOutputStream = + ByteArrayOutputStream(initialBufferSize) + + GZIPOutputStream(byteArrayOutputStream).use { gzipStream -> + input.copyTo(gzipStream) + } + + val compressedBytes = byteArrayOutputStream.toByteArray() + val compressionTime = System.currentTimeMillis() - startTime + + val compressionRatio = + if (estimatedSize > 0) { + ( + ( + 1.0 - + compressedBytes.size + .toDouble() / + estimatedSize + ) * 100 + ) + .coerceIn(0.0, 100.0) + } else { + 0.0 + } + + logger.debug( + "GZIP compression complete: {} bytes -> {} bytes ({}% reduction) in {} ms", + estimatedSize, + compressedBytes.size, + compressionRatio, + compressionTime, + ) + + ByteArrayInputStream(compressedBytes) + } catch (e: IOException) { + logger.error("GZIP compression failed: {}", e.message) + throw CompressionException( + "Failed to compress stream using GZIP", + e, + ) + } catch (e: OutOfMemoryError) { + logger.error( + "GZIP compression failed due to memory constraints: {}", + e.message, + ) + throw CompressionException( + "Insufficient memory to compress stream of estimated size: $estimatedSize bytes", + e, + ) + } + } + + /** + * Calculates optimal initial buffer size based on estimated input size. + * + * For GZIP compression, typical compression ratios are: + * - JSON/CSV: 70-90% reduction + * - Text data: 60-80% reduction + * + * We estimate compressed size as ~30% of original to avoid excessive buffer + * resizing while not over-allocating. + */ + private fun calculateInitialBufferSize(estimatedSize: Long): Int { + return when { + estimatedSize <= 0 -> DEFAULT_BUFFER_SIZE + estimatedSize < DEFAULT_BUFFER_SIZE -> DEFAULT_BUFFER_SIZE + else -> { + // Estimate compressed size as 30% of original, capped at MAX_INITIAL_BUFFER_SIZE + val estimatedCompressedSize = (estimatedSize * 0.3).toLong() + minOf(estimatedCompressedSize, MAX_INITIAL_BUFFER_SIZE.toLong()) + .toInt() + } + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/NoCompressionStrategy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/NoCompressionStrategy.kt new file mode 100644 index 000000000..a918126a3 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/NoCompressionStrategy.kt @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader.compression + +import java.io.InputStream + +/** + * No-op compression strategy that passes through data unchanged. + * + * Used for: + * - Binary formats (Parquet, Avro, ORC) that have built-in compression + * - Already compressed data (GZIP, ZIP) + * - Cases where compression is explicitly disabled + * + * **Thread Safety:** This class is thread-safe and stateless. + */ +class NoCompressionStrategy : CompressionStrategy { + + override val compressionType: String = "none" + + companion object { + /** Singleton instance for reuse (class is stateless). */ + val INSTANCE: NoCompressionStrategy by lazy { NoCompressionStrategy() } + } + + /** + * Returns the input stream unchanged. + * + * @param input The input stream (returned as-is) + * @param estimatedSize Ignored for no-op compression + * @return The same input stream unchanged + */ + override suspend fun compress( + input: InputStream, + estimatedSize: Long, + ): InputStream = input +} diff --git a/ingest-v2/src/main/resources/app.properties b/ingest-v2/src/main/resources/app.properties index e5683df88..b298ea1a5 100644 --- a/ingest-v2/src/main/resources/app.properties +++ b/ingest-v2/src/main/resources/app.properties @@ -1 +1 @@ -version=${project.version} \ No newline at end of file +version=${ingest-v2.revision} diff --git a/ingest-v2/src/main/resources/openapi.yaml b/ingest-v2/src/main/resources/openapi.yaml index d3b73853a..d94464760 100644 --- a/ingest-v2/src/main/resources/openapi.yaml +++ b/ingest-v2/src/main/resources/openapi.yaml @@ -134,7 +134,7 @@ paths: required: true schema: type: string - example: "gzip" + example: "kusto.kusto.windows.net" - name: Accept-Encoding in: header required: false diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java new file mode 100644 index 000000000..348c02d24 --- /dev/null +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java @@ -0,0 +1,194 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package com.microsoft.azure.kusto.ingest.v2; + +import com.azure.core.credential.TokenCredential; +import com.azure.identity.AzureCliCredentialBuilder; +import com.microsoft.azure.kusto.data.Client; +import com.microsoft.azure.kusto.data.ClientFactory; +import com.microsoft.azure.kusto.data.KustoResultSetTable; +import com.microsoft.azure.kusto.data.auth.ConnectionStringBuilder; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.UUID; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Base class for Java regression tests for ingest-v2. + * Provides common setup and utilities + */ +public abstract class IngestV2JavaTestBase { + + protected final Logger logger; + protected final TokenCredential tokenProvider; + protected final String database; + protected final String dmEndpoint; + protected final String engineEndpoint; + protected final String targetTable; + protected final Map columnNamesToTypes; + protected Client adminClusterClient; + + public IngestV2JavaTestBase(Class testClass) { + this.logger = LoggerFactory.getLogger(testClass); + this.tokenProvider = new AzureCliCredentialBuilder().build(); + + // Get configuration from environment variables + this.database = System.getenv("TEST_DATABASE") != null + ? System.getenv("TEST_DATABASE") + : "e2e"; + + this.dmEndpoint = System.getenv("DM_CONNECTION_STRING"); + if (this.dmEndpoint == null) { + throw new IllegalArgumentException("DM_CONNECTION_STRING environment variable is not set"); + } + + this.engineEndpoint = dmEndpoint.replace("https://ingest-", "https://"); + + // Generate unique table name for this test run + this.targetTable = "V2_Java_Tests_Sensor_" + + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + + // Define table schema + this.columnNamesToTypes = new LinkedHashMap<>(); + columnNamesToTypes.put("timestamp", "datetime"); + columnNamesToTypes.put("deviceId", "guid"); + columnNamesToTypes.put("messageId", "guid"); + columnNamesToTypes.put("temperature", "real"); + columnNamesToTypes.put("humidity", "real"); + columnNamesToTypes.put("format", "string"); + columnNamesToTypes.put("SourceLocation", "string"); + columnNamesToTypes.put("Type", "string"); + } + + @BeforeEach + public void createTables() throws Exception { + // Build create table script + StringBuilder columnsBuilder = new StringBuilder(); + boolean first = true; + for (Map.Entry entry : columnNamesToTypes.entrySet()) { + if (!first) { + columnsBuilder.append(","); + } + columnsBuilder.append("['").append(entry.getKey()).append("']:").append(entry.getValue()); + first = false; + } + + String createTableScript = String.format( + ".create-merge table %s (%s)", + targetTable, + columnsBuilder.toString() + ); + + // Build mapping reference script + StringBuilder mappingBuilder = new StringBuilder(); + mappingBuilder.append(".create-or-alter table ").append(targetTable) + .append(" ingestion json mapping '").append(targetTable).append("_mapping' ```[\n"); + + first = true; + for (String col : columnNamesToTypes.keySet()) { + if (!first) { + mappingBuilder.append(",\n"); + } + + if (col.equals("SourceLocation")) { + mappingBuilder.append(" {\"column\":\"").append(col) + .append("\", \"Properties\":{\"Transform\":\"SourceLocation\"}}"); + } else if (col.equals("Type")) { + mappingBuilder.append(" {\"column\":\"").append(col) + .append("\", \"Properties\":{\"ConstValue\":\"MappingRef\"}}"); + } else { + mappingBuilder.append(" {\"column\":\"").append(col) + .append("\", \"Properties\":{\"Path\":\"$.").append(col).append("\"}}"); + } + first = false; + } + mappingBuilder.append("\n]```"); + + // Create admin client + adminClusterClient = ClientFactory.createClient( + ConnectionStringBuilder.createWithAzureCli(engineEndpoint) + ); + + // Execute table creation and mapping + adminClusterClient.executeMgmt(database, createTableScript); + adminClusterClient.executeMgmt(database, mappingBuilder.toString()); + clearDatabaseSchemaCache(); + + logger.info("Created table: {}", targetTable); + } + + protected void alterTableToEnableStreaming() throws Exception { + adminClusterClient.executeMgmt( + database, + String.format(".alter table %s policy streamingingestion enable", targetTable) + ); + logger.info("Enabled streaming ingestion for table: {}", targetTable); + } + + protected void clearDatabaseSchemaCache() throws Exception { + adminClusterClient.executeMgmt( + database, + ".clear database cache streamingingestion schema" + ); + } + + @AfterEach + public void dropTables() throws Exception { + String dropTableScript = String.format(".drop table %s ifexists", targetTable); + logger.info("Dropping table {}", targetTable); + adminClusterClient.executeMgmt(database, dropTableScript); + } + + /** + * Wait for data to appear in the table and verify the expected count. + */ + protected void awaitAndQuery( + String query, + String queryColumnName, + long expectedResultsCount, + boolean isManagementQuery + ) { + Awaitility.await() + .atMost(Duration.of(3, ChronoUnit.MINUTES)) + .pollInterval(Duration.of(5, ChronoUnit.SECONDS)) + .ignoreExceptions() + .untilAsserted(() -> { + KustoResultSetTable results = isManagementQuery + ? adminClusterClient.executeMgmt(database, query).getPrimaryResults() + : adminClusterClient.executeQuery(database, query).getPrimaryResults(); + + assertTrue(results.next(), "Query should return results"); + long actualResultCount = results.getLong(queryColumnName); + + logger.trace( + "For query {}, Current result count: {}, waiting for {}", + query, + actualResultCount, + expectedResultsCount + ); + + assertNotNull(results, "Query results should not be null"); + assertTrue( + actualResultCount >= expectedResultsCount, + String.format("Expected %d counts, got %d", expectedResultsCount, actualResultCount) + ); + }); + } + + /** + * Overloaded version with default column name and management query flag. + */ + protected void awaitAndQuery(String query, long expectedResultsCount) { + awaitAndQuery(query, "count", expectedResultsCount, false); + } +} diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java new file mode 100644 index 000000000..08d215df9 --- /dev/null +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java @@ -0,0 +1,238 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package com.microsoft.azure.kusto.ingest.v2; + +import com.microsoft.azure.kusto.ingest.v2.builders.ManagedStreamingIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.ManagedStreamingIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.UUID; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Java regression test for ManagedStreamingIngestClient. + * Tests basic managed streaming ingestion functionality from Java to ensure + * compatibility with Kotlin-based implementation. + */ +@Execution(ExecutionMode.CONCURRENT) +public class ManagedStreamingIngestClientJavaTest extends IngestV2JavaTestBase { + public ManagedStreamingIngestClientJavaTest() { + super(ManagedStreamingIngestClientJavaTest.class); + } + /** + * Test basic managed streaming ingestion from Java with small data. + * Small data should use streaming ingestion for low latency. + * Verifies that: + * - Client can be created using builder pattern from Java + * - Small data triggers streaming ingestion + * - Data appears in the table after ingestion + */ + @Test + public void testManagedStreamingIngestSmallData() throws Exception { + logger.info("Running Java managed streaming ingest (small data) regression test"); + + // Enable streaming ingestion on the table + alterTableToEnableStreaming(); + + // Create managed streaming client + + try (ManagedStreamingIngestClient client = ManagedStreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .build()) { + // Prepare small JSON data (should use streaming) + String jsonData = "{\"timestamp\":\"2024-01-01T00:00:00Z\",\"deviceId\":\"00000000-0000-0000-0000-000000000001\",\"messageId\":\"00000000-0000-0000-0000-000000000002\",\"temperature\":25.5,\"humidity\":60.0,\"format\":\"json\"}"; + InputStream dataStream = new ByteArrayInputStream(jsonData.getBytes(StandardCharsets.UTF_8)); + + StreamSource source = new StreamSource( + dataStream, + CompressionType.NONE, + Format.json, + UUID.randomUUID(), + "java-managed-streaming-small", + false + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .withIngestionMappingReference(targetTable + "_mapping") + .withEnableTracking(true) + .build(); + + // Ingest data (should use streaming for small data) + logger.info("Ingesting small data via managed streaming..."); + ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + + assertNotNull(response, "Response should not be null"); + assertNotNull(response.getIngestResponse().getIngestionOperationId(), + "Operation ID should not be null"); + + // Verify it used streaming ingestion + IngestKind ingestionType = response.getIngestionType(); + logger.info("Ingest completed using {} method. Operation ID: {}", + ingestionType, response.getIngestResponse().getIngestionOperationId()); + + // Small data typically uses streaming, but fallback to queued is acceptable + assertTrue( + ingestionType == IngestKind.STREAMING || ingestionType == IngestKind.QUEUED, + "Ingestion type should be either STREAMING or QUEUED" + ); + + // Verify data appeared in table + String query = String.format("%s | summarize count=count()", targetTable); + awaitAndQuery(query, 1); + + logger.info("Java managed streaming ingest (small data) regression test PASSED"); + + } + } + + /** + * Test managed streaming with larger data that should trigger queued fallback. + * Verifies that: + * - Larger data automatically falls back to queued ingestion + * - Fallback mechanism works correctly from Java + */ + @Test + public void testManagedStreamingIngestWithFallback() throws Exception { + logger.info("Running Java managed streaming ingest with fallback test"); + + alterTableToEnableStreaming(); + + try (ManagedStreamingIngestClient client = ManagedStreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .build()) { + // Generate larger data (multiple records to increase size) + StringBuilder largeDataBuilder = new StringBuilder(); + for (int i = 0; i < 100; i++) { + largeDataBuilder.append(String.format( + "{\"timestamp\":\"2024-01-01T%02d:00:00Z\",\"deviceId\":\"00000000-0000-0000-0000-00000000%04d\",\"messageId\":\"%s\",\"temperature\":%.1f,\"humidity\":%.1f,\"format\":\"json\"}\n", + i % 24, i, UUID.randomUUID(), 20.0 + (i % 20), 50.0 + (i % 30) + )); + } + String largeData = largeDataBuilder.toString(); + + InputStream dataStream = new ByteArrayInputStream(largeData.getBytes(StandardCharsets.UTF_8)); + dataStream.mark(largeData.length()); // Mark for potential retry + + StreamSource source = new StreamSource( + dataStream, + CompressionType.NONE, + Format.multijson, + UUID.randomUUID(), + "java-managed-streaming-fallback", + false + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .withIngestionMappingReference(targetTable + "_mapping") + .withEnableTracking(true) + .build(); + + logger.info("Ingesting larger data via managed streaming (may trigger fallback)..."); + ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + + assertNotNull(response, "Response should not be null"); + + IngestKind ingestionType = response.getIngestionType(); + logger.info("Ingestion completed using {} method. Operation ID: {}", + ingestionType, response.getIngestResponse().getIngestionOperationId()); + + // Both streaming and queued are valid outcomes + assertTrue( + ingestionType == IngestKind.STREAMING || ingestionType == IngestKind.QUEUED, + "Ingestion type should be either STREAMING or QUEUED" + ); + + if (ingestionType == IngestKind.QUEUED) { + logger.info("Fallback to QUEUED ingestion triggered (expected for larger data)"); + } else { + logger.info("Data ingested via STREAMING (compression may have kept size small)"); + } + + String query = String.format("%s | summarize count=count()", targetTable); + awaitAndQuery(query, 100); + + logger.info("Java managed streaming fallback test PASSED"); + + } + } + + /** + * Test managed streaming with file source from Java. + * Verifies that file-based ingestion works correctly with managed streaming. + */ + @Test + public void testManagedStreamingIngestFromFileSource() throws Exception { + logger.info("Running Java managed streaming ingest from file source test"); + + alterTableToEnableStreaming(); + + try (ManagedStreamingIngestClient client = ManagedStreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .build()) { + // Use test resource file if available - check both module dir and root dir paths + String resourcePath = "src/test/resources/compression/sample.multijson"; + Path filePath = Paths.get(resourcePath); + + // If not found in module directory, try from root directory + if (!Files.exists(filePath)) { + resourcePath = "ingest-v2/src/test/resources/compression/sample.multijson"; + filePath = Paths.get(resourcePath); + } + + if (!Files.exists(filePath)) { + logger.warn("Test file not found at either location, skipping file source test"); + return; + } + + FileSource fileSource = + new FileSource( + filePath, + Format.multijson, + UUID.randomUUID(), + CompressionType.NONE + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .withEnableTracking(true) + .build(); + + logger.info("Ingesting file via managed streaming..."); + ExtendedIngestResponse response = client.ingestAsync(fileSource, properties).get(); + + assertNotNull(response, "Response should not be null"); + assertNotNull(response.getIngestResponse().getIngestionOperationId(), "Operation ID should not be null"); + + IngestKind ingestionType = response.getIngestionType(); + logger.info("File ingestion completed using {} method. Operation ID: {}", + ingestionType, response.getIngestResponse().getIngestionOperationId()); + + String query = String.format("%s | summarize count=count()", targetTable); + awaitAndQuery(query, 1); + + logger.info("Java managed streaming file ingest test PASSED"); + + } + } +} diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java new file mode 100644 index 000000000..83b79b1e3 --- /dev/null +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package com.microsoft.azure.kusto.ingest.v2; + +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.IngestionOperation; +import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.Objects; +import java.util.UUID; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * These are tests to ensure that Kotlin specific operators and constructs do not break Java compatibility. + * Java regression test for QueuedIngestClient. + * Tests basic queued ingestion functionality from Java to ensure + * compatibility with Kotlin-based implementation. + */ +@Execution(ExecutionMode.CONCURRENT) +public class QueuedIngestClientJavaTest extends IngestV2JavaTestBase { + + public QueuedIngestClientJavaTest() { + super(QueuedIngestClientJavaTest.class); + } + + /** + * Test basic queued ingestion from Java. + * Verifies that: + * - Client can be created using builder pattern from Java + * - Simple data can be queued for ingestion + * - Operation can be tracked + * - Data appears in the table after processing + */ + @Test + public void testBasicQueuedIngest() throws Exception { + logger.info("Running Java queued ingest regression test"); + + // Create queued client + + try (QueuedIngestClient client = QueuedIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .withMaxConcurrency(10) + .build()) { + // Prepare simple JSON data + String jsonData = "{\"timestamp\":\"2024-01-01T00:00:00Z\",\"deviceId\":\"00000000-0000-0000-0000-000000000001\",\"messageId\":\"00000000-0000-0000-0000-000000000002\",\"temperature\":25.5,\"humidity\":60.0,\"format\":\"json\"}"; + InputStream dataStream = new ByteArrayInputStream(jsonData.getBytes(StandardCharsets.UTF_8)); + + StreamSource source = new StreamSource( + dataStream, + CompressionType.NONE, + Format.json, + UUID.randomUUID(), + "java-queued-test", + false + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .withIngestionMappingReference(targetTable + "_mapping") + .withEnableTracking(true) + .build(); + + // Queue data for ingestion + logger.info("Queueing data for ingestion..."); + ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + + assertNotNull(response, "Response should not be null"); + assertNotNull(response.getIngestResponse().getIngestionOperationId(), + "Operation ID should not be null"); + + logger.info("Data queued. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + + // Track the operation + IngestionOperation operation = new IngestionOperation( + response.getIngestResponse().getIngestionOperationId(), + database, + targetTable, + response.getIngestionType() + ); + + // Get initial status + StatusResponse initialStatus = client.getOperationDetailsAsync(operation).get(); + assertNotNull(initialStatus, "Initial status should not be null"); + logger.info("Initial status retrieved"); + + // Poll for completion + logger.info("Polling for completion..."); + StatusResponse finalStatus = client.pollForCompletion( + operation, + Duration.ofSeconds(30), + Duration.ofMinutes(2) + ).get(); + + assertNotNull(finalStatus, "Final status should not be null"); + assertNotNull(finalStatus.getStatus(), "Final status summary should not be null"); + assertEquals(0, finalStatus.getStatus().getFailed(), "Ingestion should not record failures"); + assertTrue(finalStatus.getStatus().getSucceeded()!=null && finalStatus.getStatus().getSucceeded() >= 1, "At least one ingestion should succeed"); + logger.info("Polling completed"); + + // Verify data appeared in table + String query = String.format("%s | summarize count=count()", targetTable); + awaitAndQuery(query, 1); + + logger.info("Java queued ingest regression test PASSED"); + + } + } + + /** + * Test queued ingestion with file source from Java. + * Verifies that file-based ingestion works correctly. + */ + @Test + public void testQueuedIngestFromFileSource() throws Exception { + logger.info("Running Java queued ingest from file source test"); + + try (QueuedIngestClient client = QueuedIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .withMaxConcurrency(10) + .build()) { + // Use test resource file if available + String resourcePath = "src/test/resources/compression/sample.multijson"; + java.nio.file.Path filePath = java.nio.file.Paths.get(resourcePath); + + if (!java.nio.file.Files.exists(filePath)) { + logger.warn("Test file not found: {}, skipping file source test", resourcePath); + return; + } + + com.microsoft.azure.kusto.ingest.v2.source.FileSource fileSource = + new com.microsoft.azure.kusto.ingest.v2.source.FileSource( + filePath, + Format.multijson, + UUID.randomUUID(), + CompressionType.NONE + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .withEnableTracking(true) + .build(); + + logger.info("Queueing file for ingestion..."); + ExtendedIngestResponse response = client.ingestAsync(fileSource, properties).get(); + + assertNotNull(response, "Response should not be null"); + logger.info("File queued. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + + // Track operation + IngestionOperation operation = new IngestionOperation( + Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), + database, + targetTable, + response.getIngestionType() + ); + + // Poll for completion + StatusResponse fileFinalStatus = client.pollForCompletion( + operation, + Duration.ofSeconds(30), + Duration.ofMinutes(2) + ).get(); + + assertNotNull(fileFinalStatus, "File ingestion final status should not be null"); + assertNotNull(fileFinalStatus.getStatus(), "File ingestion summary should not be null"); + assertEquals(0, fileFinalStatus.getStatus().getFailed(), "File ingestion should not record failures"); + assertTrue(fileFinalStatus.getStatus().getSucceeded()!=null && fileFinalStatus.getStatus().getSucceeded() >= 1, "File ingestion should report successes"); + + logger.info("Java queued file ingest test PASSED"); + + } + } + } diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java new file mode 100644 index 000000000..39782ca0b --- /dev/null +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package com.microsoft.azure.kusto.ingest.v2; + +import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.StreamingIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.UUID; +import java.util.concurrent.ExecutionException; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +/** + * Java regression test for StreamingIngestClient. + * Tests basic streaming ingestion functionality from Java to ensure + * compatibility with Kotlin-based implementation. + */ +@Execution(ExecutionMode.CONCURRENT) +public class StreamingIngestClientJavaTest extends IngestV2JavaTestBase { + + public StreamingIngestClientJavaTest() { + super(StreamingIngestClientJavaTest.class); + } + + /** + * Test basic streaming ingestion from Java. + * Verifies that: + * - Client can be created using builder pattern from Java + * - Simple CSV data can be ingested via streaming + * - Data appears in the table after ingestion + */ + @Test + public void testBasicStreamingIngest() throws Exception { + logger.info("Running Java streaming ingest regression test"); + + // Enable streaming ingestion on the table + alterTableToEnableStreaming(); + + // Create streaming client + + try (StreamingIngestClient client = StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .withClientDetails("JavaStreamingRegressionTest", "1.0", null) + .build()) { + // Prepare simple CSV data + String jsonData = "{\"timestamp\":\"2024-01-01T00:00:00Z\",\"deviceId\":\"00000000-0000-0000-0000-000000000001\",\"messageId\":\"00000000-0000-0000-0000-000000000002\",\"temperature\":25.5,\"humidity\":60.0,\"format\":\"json\"}"; + InputStream dataStream = new ByteArrayInputStream(jsonData.getBytes(StandardCharsets.UTF_8)); + + StreamSource source = new StreamSource( + dataStream, + CompressionType.NONE, + Format.json, + UUID.randomUUID(), + "java-streaming-test", + false + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .withIngestionMappingReference(targetTable + "_mapping") + .build(); + + // Ingest data + logger.info("Ingesting data via streaming..."); + ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + + assertNotNull(response, "Response should not be null"); + assertNotNull(response.getIngestResponse().getIngestionOperationId(), + "Operation ID should not be null"); + + logger.info("Streaming ingestion completed. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + + // Verify data appeared in table + String query = String.format("%s | summarize count=count()", targetTable); + awaitAndQuery(query, 1); + + logger.info("Java streaming ingest regression test PASSED"); + + } + } + + /** + * Test streaming ingestion with compressed data from Java. + * Verifies that compression handling works correctly from Java. + */ + @Test + public void testStreamingIngestWithCompression() throws Exception { + logger.info("Running Java streaming ingest with compression test"); + + alterTableToEnableStreaming(); + + try (StreamingIngestClient client = StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .withClientDetails("JavaStreamingCompressionTest", "1.0", null) + .build()) { + // Use test resource file + String resourcePath = "src/test/resources/compression/sample.multijson.gz"; + java.nio.file.Path filePath = java.nio.file.Paths.get(resourcePath); + + if (!java.nio.file.Files.exists(filePath)) { + logger.warn("Test file not found: {}, skipping compression test", resourcePath); + return; + } + + InputStream fileStream = java.nio.file.Files.newInputStream(filePath); + + StreamSource source = new StreamSource( + fileStream, + CompressionType.GZIP, + Format.multijson, + UUID.randomUUID(), + "java-compressed-stream-test", + false + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, targetTable) + .build(); + + logger.info("Ingesting compressed data..."); + ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + + assertNotNull(response, "Response should not be null"); + logger.info("Compressed streaming ingestion completed. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + + fileStream.close(); + + logger.info("Java streaming compressed ingest test PASSED"); + + } catch (ExecutionException e) { + logger.error("Ingestion failed", e); + throw e; + } + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt index 9a46de1a7..bc1c129c7 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt @@ -81,6 +81,18 @@ abstract class IngestV2TestBase(testClass: Class<*>) { adminClusterClient.executeMgmt(database, createTableScript) adminClusterClient.executeMgmt(database, mappingReference) clearDatabaseSchemaCache() + + // Allow subclasses to perform additional setup + additionalSetup() + } + + /** + * Hook method for subclasses to perform additional setup after table + * creation. By default, does nothing. Streaming test classes can override + * to enable streaming policy. + */ + protected open fun additionalSetup() { + // Default: no additional setup } protected fun alterTableToEnableStreaming() { @@ -111,7 +123,7 @@ abstract class IngestV2TestBase(testClass: Class<*>) { isManagementQuery: Boolean = false, ) { Awaitility.await() - .atMost(Duration.of(2, ChronoUnit.MINUTES)) + .atMost(Duration.of(3, ChronoUnit.MINUTES)) .pollInterval(Duration.of(5, ChronoUnit.SECONDS)) .ignoreExceptions() .untilAsserted { diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt index b128e036e..1285239b3 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt @@ -8,7 +8,6 @@ import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus import com.microsoft.azure.kusto.ingest.v2.models.Format -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import com.microsoft.azure.kusto.ingest.v2.source.BlobSource import com.microsoft.azure.kusto.ingest.v2.source.CompressionType import com.microsoft.azure.kusto.ingest.v2.source.StreamSource @@ -20,12 +19,12 @@ import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource import java.io.ByteArrayInputStream import java.net.ConnectException +import java.time.Duration import java.util.* import kotlin.test.DefaultAsserter.assertNotNull import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertNotNull -import kotlin.time.Duration /** * End-to-end tests for ManagedStreamingIngestClient. @@ -67,7 +66,6 @@ class ManagedStreamingIngestClientTest : targetFormat: String, ): Unit = runBlocking { logger.info("Starting test: $testName") - val testSources = BlobSource(blobUrl) val format = when (targetFormat.lowercase()) { "json" -> Format.json @@ -77,16 +75,15 @@ class ManagedStreamingIngestClientTest : "Unsupported format: $targetFormat", ) } + val testSources = BlobSource(blobUrl, format = format) val ingestRequestProperties = - IngestRequestPropertiesBuilder(format) + IngestRequestPropertiesBuilder.create(database, targetTable) .withEnableTracking(true) .build() try { // Ingest data - should attempt streaming first val ingestionResponse = managedClient.ingestAsync( - database = database, - table = targetTable, source = testSources, ingestRequestProperties = ingestRequestProperties, ) @@ -199,16 +196,13 @@ class ManagedStreamingIngestClientTest : ) val properties = - IngestRequestProperties( - format = targetTestFormat, - enableTracking = true, - ) + IngestRequestPropertiesBuilder.create(database, targetTable) + .withEnableTracking(true) + .build() try { val ingestionResponse = customManagedClient.ingestAsync( - database = database, - table = targetTable, source = source, ingestRequestProperties = properties, ) @@ -277,15 +271,14 @@ class ManagedStreamingIngestClientTest : val testSource = BlobSource( "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", + format = Format.multijson, ) val ingestRequestProperties = - IngestRequestPropertiesBuilder(Format.multijson) + IngestRequestPropertiesBuilder.create(database, targetTable) .withEnableTracking(true) .build() val ingestionResponse = customManagedClient.ingestAsync( - database = database, - table = targetTable, source = testSource, ingestRequestProperties = ingestRequestProperties, ) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt index c5576fb97..956e56e93 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt @@ -39,12 +39,12 @@ import java.net.ConnectException import java.nio.file.Files import java.nio.file.StandardCopyOption import java.time.Clock +import java.time.Duration import java.time.Instant import java.time.OffsetDateTime import java.time.temporal.ChronoUnit import java.util.UUID import kotlin.test.assertNotNull -import kotlin.time.Duration @Execution(ExecutionMode.CONCURRENT) class QueuedIngestClientTest : @@ -150,11 +150,11 @@ class QueuedIngestClientTest : ): Unit = runBlocking { logger.info("Starting test: $testName") val ingestClient = createTestClient() - val testSources = listOf(BlobSource(blobUrl)) + val testSources = listOf(BlobSource(blobUrl, format = Format.json)) val properties = if (useMappingReference) { - IngestRequestPropertiesBuilder(format = targetTestFormat) + IngestRequestPropertiesBuilder.create(database, targetTable) .withIngestionMappingReference( "${targetTable}_mapping", ) @@ -210,12 +210,12 @@ class QueuedIngestClientTest : jsonPrinter.encodeToString( inlineIngestionMappingInline.columnMappings, ) - IngestRequestPropertiesBuilder(format = targetTestFormat) + IngestRequestPropertiesBuilder.create(database, targetTable) .withIngestionMapping(ingestionMappingString) .withEnableTracking(true) .build() } else { - IngestRequestPropertiesBuilder(format = targetTestFormat) + IngestRequestPropertiesBuilder.create(database, targetTable) .withEnableTracking(true) .build() } @@ -224,8 +224,6 @@ class QueuedIngestClientTest : val ingestionResponse = ingestClient.ingestAsync( sources = testSources, - database = database, - table = targetTable, ingestRequestProperties = properties, ) @@ -345,11 +343,10 @@ class QueuedIngestClientTest : val smallResponse = queuedIngestClient.ingestAsync( source = smallSource, - database = database, - table = targetTable, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = Format.multijson, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .build(), @@ -385,12 +382,11 @@ class QueuedIngestClientTest : ) val largeResponse = queuedIngestClient.ingestAsync( - database = database, - table = targetTable, source = largeSource, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = Format.multijson, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .build(), @@ -428,12 +424,11 @@ class QueuedIngestClientTest : } val batchResponse = queuedIngestClient.ingestAsync( - database = database, - table = targetTable, sources = batchSources, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = Format.multijson, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .build(), @@ -485,12 +480,11 @@ class QueuedIngestClientTest : val startTime = System.currentTimeMillis() val response = queuedIngestClient.ingestAsync( - database = database, - table = targetTable, sources = sources, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = Format.multijson, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .build(), @@ -606,11 +600,10 @@ class QueuedIngestClientTest : val response = queuedIngestClient.ingestAsync( sources = listOf(source), - database = database, - table = targetTable, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = format, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .withIngestByTags( @@ -765,12 +758,11 @@ test2,456,2024-01-02""" val exception = assertThrows { client.ingestAsync( - database = database, - table = targetTable, sources = sources, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = Format.json, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .build(), @@ -837,14 +829,12 @@ test2,456,2024-01-02""" val queuedIngestClient = createTestClient() val properties = - IngestRequestPropertiesBuilder(format = targetFormat) + IngestRequestPropertiesBuilder.create(database, targetTable) .withEnableTracking(true) .build() val ingestionResponse = queuedIngestClient.ingestAsync( - database = database, - table = targetTable, sources = listOf(source), ingestRequestProperties = properties, ) @@ -918,13 +908,11 @@ test2,456,2024-01-02""" try { val response = oneLakeIngestClient.ingestAsync( - database = database, - table = targetTable, source = source, ingestRequestProperties = - IngestRequestPropertiesBuilder( - format = - Format.multijson, + IngestRequestPropertiesBuilder.create( + database, + targetTable, ) .withEnableTracking(true) .build(), diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index 76e01bf6a..26f0fe250 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -5,14 +5,22 @@ package com.microsoft.azure.kusto.ingest.v2 import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder import com.microsoft.azure.kusto.ingest.v2.client.IngestClient import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import com.microsoft.azure.kusto.ingest.v2.source.FileSource +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Test import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.assertThrows import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.CsvSource import org.junit.jupiter.params.provider.MethodSource +import java.io.ByteArrayInputStream +import java.nio.file.Paths import java.util.UUID import java.util.stream.Stream import kotlin.test.assertNotNull @@ -21,9 +29,14 @@ import kotlin.test.assertNotNull class StreamingIngestClientTest : IngestV2TestBase(StreamingIngestClientTest::class.java) { + override fun additionalSetup() { + // Enable streaming ingestion policy for all streaming tests + alterTableToEnableStreaming() + clearDatabaseSchemaCache() + } + private val publicBlobUrl = "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" - private val targetUuid = UUID.randomUUID().toString() private fun testParameters(): Stream { return Stream.of( @@ -69,6 +82,7 @@ class StreamingIngestClientTest : blobUrl: String?, ) = runBlocking { logger.info("Running streaming ingest builder test {}", testName) + // Create client using builder val client: IngestClient = StreamingIngestClientBuilder.create(cluster) @@ -77,7 +91,9 @@ class StreamingIngestClientTest : .withClientDetails("BuilderStreamingE2ETest", "1.0") .build() - val ingestProps = IngestRequestProperties(format = targetTestFormat) + val ingestProps = + IngestRequestPropertiesBuilder.create(database, targetTable) + .build() if (isException) { if (blobUrl != null) { logger.info( @@ -87,10 +103,9 @@ class StreamingIngestClientTest : ) val exception = assertThrows { - val ingestionSource = BlobSource(blobUrl) + val ingestionSource = + BlobSource(blobUrl, format = Format.json) client.ingestAsync( - database = database, - table = targetTable, source = ingestionSource, ingestRequestProperties = ingestProps, ) @@ -109,10 +124,8 @@ class StreamingIngestClientTest : } } else { if (blobUrl != null) { - val ingestionSource = BlobSource(blobUrl) + val ingestionSource = BlobSource(blobUrl, format = Format.json) client.ingestAsync( - database = database, - table = targetTable, source = ingestionSource, ingestRequestProperties = ingestProps, ) @@ -127,4 +140,134 @@ class StreamingIngestClientTest : } } } + + /** + * Test that error response parsing correctly extracts Kusto error details + * from OneApiError JSON format. + * + * This validates that when streaming ingestion fails, the error message + * contains meaningful details from the Kusto error response (code, + * type, @message) rather than just a generic HTTP status code. + */ + @Test + fun `error response parsing extracts Kusto OneApiError details`() = + runBlocking { + logger.info("Testing error parsing for invalid data format") + + val client: IngestClient = + StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .withClientDetails("ErrorParsingE2ETest", "1.0") + .build() + + val properties = + IngestRequestPropertiesBuilder.create( + database, + targetTable, + ) + .build() + + // Send invalid text data claiming to be JSON - this triggers a data format error + val invalidData = "this is not valid json { broken" + val streamSource = + StreamSource( + stream = + ByteArrayInputStream( + invalidData.toByteArray(), + ), + format = Format.json, + sourceId = UUID.randomUUID(), + sourceCompression = CompressionType.NONE, + ) + + val exception = + assertThrows { + client.ingestAsync( + source = streamSource, + ingestRequestProperties = properties, + ) + } + + // Validate exception was captured + assertNotNull( + exception, + "Exception should not be null for invalid data format", + ) + + // Log exception details for debugging + logger.info(" Exception type: ${exception::class.simpleName}") + logger.info(" Message: ${exception.message}") + logger.info(" isPermanent: ${exception.isPermanent}") + logger.info(" failureCode: ${exception.failureCode}") + + // Validate error parsing extracted meaningful details from Kusto OneApiError + assert(exception.message.isNotEmpty()) { + "Exception message should contain error details from Kusto OneApiError" + } + + // Data format errors should be marked as permanent (cannot be retried) + assert(exception.isPermanent == true) { + "Data format errors should be marked as permanent" + } + } + + @ParameterizedTest(name = "Ingest file: {0} with compression: {1}") + @CsvSource("sample.multijson,NONE", "sample.multijson.gz,GZIP") + fun `ingest from file in compressed and uncompressed formats`( + fileName: String, + compressionType: String, + ) = runBlocking { + logger.info( + "Running streaming ingest from file test: $fileName with compression: $compressionType", + ) + + // Create client using builder + val client: IngestClient = + StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(tokenProvider) + .skipSecurityChecks() + .withClientDetails("FileStreamingE2ETest", "1.0") + .build() + + val resourcesDirectory = "src/test/resources/compression/" + + val compression = + when (compressionType) { + "NONE" -> CompressionType.NONE + "GZIP" -> CompressionType.GZIP + "ZIP" -> CompressionType.ZIP + else -> + throw IllegalArgumentException( + "Unknown compression type: $compressionType", + ) + } + + val fileSource = + FileSource( + path = Paths.get(resourcesDirectory + fileName), + format = Format.multijson, + sourceId = UUID.randomUUID(), + compressionType = compression, + ) + + val properties = + IngestRequestPropertiesBuilder.create(database, targetTable) + .withEnableTracking(true) + .build() + + val response = + client.ingestAsync( + source = fileSource, + ingestRequestProperties = properties, + ) + + assertNotNull( + response, + "File ingestion response should not be null for $fileName", + ) + logger.info( + "File ingestion completed for $fileName ($compressionType). Operation ID: ${response.ingestResponse.ingestionOperationId}", + ) + } } diff --git a/pom.xml b/pom.xml index 307da5599..f104a62ef 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ 8.0.0 + 0.0.1-beta UTF-8 11 1.2.37 @@ -64,11 +65,11 @@ 0.8.11 - ingest data + ingest + ingest-v2 samples quickstart - ingest-v2 diff --git a/quickstart/README.md b/quickstart/README.md index 8c3e9f039..94b03ea4c 100644 --- a/quickstart/README.md +++ b/quickstart/README.md @@ -17,6 +17,14 @@ You can use it as a baseline to write your own first kusto client application, a 1. Download the app files from this GitHub repo. 1. Modify the `kusto_sample_config.json` file, changing `KustoUri`, `IngestUri` and `DatabaseName` appropriately for your cluster. +2. The config file can be overridden using +```bash +export KUSTO_SAMPLE_CONFIG_PATH=/path/to/kusto_sample_config.json +``` + +```cmd +set KUSTO_SAMPLE_CONFIG_PATH=C:\Path\To\kusto_sample_config.json +``` ### Retrieving the app from OneClick @@ -32,6 +40,7 @@ You can use it as a baseline to write your own first kusto client application, a 1. Open a command line window and navigate to the folder where you extracted the app. 1. Run `mvn clean install` to compile the source code into a binary. 1. Run the binary using `java -jar target\kusto-quickstart-[version]-jar-with-dependencies.jar`. +1. To run the ingest-v2 path, set `useIngestV2Sample` to true in `kusto_sample_config.json` & run `java -jar target/kusto-quickstart-[version]-jar-with-dependencies.jar`. #### Troubleshooting diff --git a/quickstart/kusto_sample_config.json b/quickstart/kusto_sample_config.json index adeaf30e9..08a4c0648 100644 --- a/quickstart/kusto_sample_config.json +++ b/quickstart/kusto_sample_config.json @@ -3,7 +3,7 @@ "ingestUri": "https://ingest-help.kusto.windows.net", "databaseName": "MyDatabase", "tableName": "SampleTable", - "useExistingTable": true, + "useExistingTable": false, "alterTable": true, "queryData": true, "ingestData": true, @@ -12,6 +12,7 @@ "ignoreFirstRecord": false, "waitForIngestSeconds": 20, "batchingPolicy": "{ 'MaximumBatchingTimeSpan': '00:00:10', 'MaximumNumberOfItems': 500, 'MaximumRawDataSizeMB': 1024 }", + "useIngestV2Sample": true, "tableSchema": "(rownumber:int, rowguid:string, xdouble:real, xfloat:real, xbool:bool, xint16:int, xint32:int, xint64:long, xuint8:long, xuint16:long, xuint32:long, xuint64:long, xdate:datetime, xsmalltext:string, xtext:string, xnumberAsText:string, xtime:timespan, xtextWithNulls:string, xdynamicWithNulls:dynamic)", "data": [ { diff --git a/quickstart/pom.xml b/quickstart/pom.xml index 5c9e5913e..0baa73b4d 100644 --- a/quickstart/pom.xml +++ b/quickstart/pom.xml @@ -33,13 +33,15 @@ - 7.0.5 + 8.0.0 11 3.2.0 3.8.1 3.3.0 1.8.0-beta4 20201115 + 0.0.1-beta + 1.18.1 @@ -107,7 +109,7 @@ io.opentelemetry opentelemetry-bom - 1.31.0 + 1.57.0 pom import @@ -125,6 +127,11 @@ kusto-ingest ${revision} + + com.microsoft.azure.kusto + kusto-ingest-v2 + ${ingest-v2.revision} + org.slf4j slf4j-api @@ -162,6 +169,11 @@ opentelemetry-exporters-logging 0.9.1 + + com.azure + azure-identity + ${azure-identity.version} + diff --git a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java index 057edda16..1c73dd446 100644 --- a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java +++ b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java @@ -1,8 +1,11 @@ package com.microsoft.azure.kusto.quickstart; import com.azure.core.tracing.opentelemetry.OpenTelemetryTracer; +import com.azure.identity.AzureCliCredentialBuilder; +import com.azure.identity.ChainedTokenCredential; +import com.azure.identity.ChainedTokenCredentialBuilder; +import com.azure.identity.ClientSecretCredentialBuilder; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.microsoft.azure.kusto.data.Client; import com.microsoft.azure.kusto.data.ClientFactory; @@ -13,6 +16,16 @@ import com.microsoft.azure.kusto.ingest.IngestClientFactory; import com.microsoft.azure.kusto.ingest.IngestionMapping; import com.microsoft.azure.kusto.ingest.IngestionProperties; +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.IngestionOperation; +import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.*; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.exporters.logging.LoggingSpanExporter; import io.opentelemetry.sdk.OpenTelemetrySdk; @@ -22,11 +35,20 @@ import io.opentelemetry.semconv.ResourceAttributes; import org.jetbrains.annotations.NotNull; -import java.io.File; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.net.URISyntaxException; -import java.util.List; -import java.util.Scanner; -import java.util.UUID; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +import static com.fasterxml.jackson.databind.MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS; /** * SourceType - represents the type of files used for ingestion @@ -49,7 +71,6 @@ public static SourceType valueOfLabel(String label) { return null; } } - /** * AuthenticationModeOptions - represents the different options to authenticate to the system */ @@ -161,19 +182,13 @@ class ConfigJson { private boolean alterTable; private boolean queryData; private boolean ingestData; - /// Recommended default: UserPrompt - /// Some auth modes require additional environment variables to be set in order to work (see usage in generate_connection_string function). - /// Managed Identity Authentication only works when running as an Azure service (webapp, function, etc.) private AuthenticationModeOptions authenticationMode; - /// Recommended default: True - /// Toggle to False to execute this script "unattended" private boolean waitForUser; - /// Ignores the first record in a "X-separated value" type file private boolean ignoreFirstRecord; - /// Sleep time to allow for queued ingestion to complete. private int waitForIngestSeconds; - /// Optional - Customized ingestion batching policy private String batchingPolicy; + private boolean useIngestV2Sample; + private IngestV2QuickstartConfig ingestV2Config; public boolean isUseExistingTable() { return useExistingTable; @@ -239,6 +254,17 @@ public String getBatchingPolicy() { return batchingPolicy; } + public boolean isUseIngestV2Sample() { + return useIngestV2Sample; + } + + public IngestV2QuickstartConfig getIngestV2Config() { + if (ingestV2Config == null) { + ingestV2Config = new IngestV2QuickstartConfig(); + } + return ingestV2Config; + } + @Override public String toString() { return "ConfigJson{" + @@ -258,10 +284,108 @@ public String toString() { ", \nignoreFirstRecord=" + ignoreFirstRecord + ", \nwaitForIngestSeconds=" + waitForIngestSeconds + ", \nbatchingPolicy='" + batchingPolicy + '\'' + + ", \nuseIngestV2Sample=" + useIngestV2Sample + + ", \ningestV2Config=" + ingestV2Config + "}\n"; } } +@JsonIgnoreProperties(ignoreUnknown = true) +class IngestV2QuickstartConfig { + private String clusterPath; + private final boolean trackingEnabled = true; + private final int maxConcurrency = 10; + private final int pollingIntervalSeconds = 30; + private final int pollingTimeoutMinutes = 2; + private final int overallTimeoutMinutes = 5; + + private AuthenticationModeOptions authModeOverride; + private String appId; + private String appKey; + private String tenantId; + private String dataMappingName; + + void applyDefaultsFromRoot(ConfigJson root) { + if (StringUtils.isBlank(clusterPath)) { + clusterPath = root.getKustoUri(); + } + if (authModeOverride == null) { + authModeOverride = root.getAuthenticationMode(); + } + if (authModeOverride == AuthenticationModeOptions.APP_KEY) { + if (StringUtils.isBlank(appId)) { + appId = System.getenv("APP_ID"); + } + if (StringUtils.isBlank(appKey)) { + appKey = System.getenv("APP_KEY"); + } + if (StringUtils.isBlank(tenantId)) { + tenantId = System.getenv("APP_TENANT"); + } + } + } + + public String getClusterPath() { + return clusterPath; + } + + public AuthenticationModeOptions getAuthModeOverride() { + return authModeOverride; + } + + public String getAppId() { + return appId; + } + + public String getAppKey() { + return appKey; + } + + public String getTenantId() { + return tenantId; + } + + public String getDataMappingName() { + return dataMappingName; + } + + public boolean isTrackingEnabled() { + return trackingEnabled; + } + + public int getMaxConcurrency() { + return maxConcurrency; + } + + public int getPollingIntervalSeconds() { + return pollingIntervalSeconds; + } + + public int getPollingTimeoutMinutes() { + return pollingTimeoutMinutes; + } + + public int getOverallTimeoutMinutes() { + return overallTimeoutMinutes; + } + + @Override + public String toString() { + return "IngestV2QuickstartConfig{" + + "clusterPath='" + clusterPath + '\'' + + ", authMode='" + authModeOverride + '\'' + + ", appId='" + appId + '\'' + + ", tenantId='" + tenantId + '\'' + + ", dataMappingName='" + dataMappingName + '\'' + + ", trackingEnabled=" + trackingEnabled + + ", maxConcurrency=" + maxConcurrency + + ", pollingIntervalSeconds=" + pollingIntervalSeconds + + ", pollingTimeoutMinutes=" + pollingTimeoutMinutes + + ", overallTimeoutMinutes=" + overallTimeoutMinutes + + '}'; + } +} + /** * The quick start application is a self-contained and runnable example script that demonstrates authenticating connecting to, administering, ingesting * data into and querying Azure Data Explorer using the azure-kusto C# SDK. You can use it as a baseline to write your own first kusto client application, @@ -270,9 +394,7 @@ public String toString() { * adapting the code to your needs. */ public class SampleApp { - // TODO (config): - // If this quickstart app was downloaded from OneClick, kusto_sample_config.json should be pre-populated with your cluster's details. - // If this quickstart app was downloaded from GitHub, edit kusto_sample_config.json and modify the cluster URL and database fields appropriately. + private static final String CONFIG_ENV_OVERRIDE = "KUSTO_SAMPLE_CONFIG_PATH"; private static final String configFileName = "quickstart/kusto_sample_config.json"; private static int step = 1; private static boolean waitForUser; @@ -288,11 +410,19 @@ public static void main(String[] args) { private static void runSampleApp() { System.out.println("Kusto sample app is starting..."); ConfigJson config = loadConfigs(); + IngestV2QuickstartConfig ingestV2Config = config.getIngestV2Config(); + ingestV2Config.applyDefaultsFromRoot(config); waitForUser = config.isWaitForUser(); if (config.getAuthenticationMode() == AuthenticationModeOptions.USER_PROMPT) { waitForUserToProceed("You will be prompted *twice* for credentials during this script. Please return to the console after authenticating."); } + + if (config.isUseIngestV2Sample()) { + runIngestV2Sample(config); + return; + } + try { IngestClient ingestClient = IngestClientFactory.createClient(Utils.Authentication.generateConnectionString(config.getIngestUri(), config.getAuthenticationMode())); @@ -344,16 +474,15 @@ private static void enableDistributedTracing() { */ @NotNull private static ConfigJson loadConfigs() { - File configFile = new File(".\\" + SampleApp.configFileName); + Path configPath = locateConfigFile(); try { ObjectMapper mapper = com.microsoft.azure.kusto.data.Utils.getObjectMapper(); - mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS, true); - return mapper.readValue(configFile, ConfigJson.class); - + mapper.configure(ACCEPT_CASE_INSENSITIVE_ENUMS, true); + return mapper.readValue(configPath.toFile(), ConfigJson.class); } catch (Exception e) { - Utils.errorHandler(String.format("Couldn't read config file from file '%s'", SampleApp.configFileName), e); + Utils.errorHandler(String.format("Couldn't read config file from file '%s'", configPath), e); } - return new ConfigJson(); // Note: will never reach here. + return new ConfigJson(); } /** @@ -405,10 +534,6 @@ private static void preIngestionQuerying(ConfigJson config, Client kustoClient) // For more information about customizing the ingestion batching policy, see: // https://docs.microsoft.com/azure/data-explorer/kusto/management/batchingpolicy // TODO: Change if needed. Disabled to prevent an existing batching policy from being unintentionally changed - if (false && config.getBatchingPolicy() != null) { - waitForUserToProceed(String.format("Alter the batching policy for table '%s.%s'", config.getDatabaseName(), config.getTableName())); - alterBatchingPolicy(kustoClient, config.getDatabaseName(), config.getTableName(), config.getBatchingPolicy()); - } } /** @@ -591,4 +716,264 @@ private static void postIngestionQuerying(Client kustoClient, String databaseNam waitForUserToProceed(String.format("Get sample (2 records) of %sdata:", optionalPostIngestionPrompt)); queryFirstTwoRows(kustoClient, databaseName, tableName); } + + private static void runIngestV2Sample(ConfigJson config) { + IngestV2QuickstartConfig ingestV2Config = config.getIngestV2Config(); + String clusterPath = ingestV2Config.getClusterPath(); + if (StringUtils.isBlank(clusterPath)) { + Utils.errorHandler("'kustoUri' must be provided to use the ingest-v2 sample."); + } + + System.out.println("Running ingest-v2 quickstart sample..."); + ChainedTokenCredential credential = buildIngestV2Credential(ingestV2Config); + + try (QueuedIngestClient queuedIngestClient = QueuedIngestClientBuilder.create(clusterPath) + .withAuthentication(credential) + .withMaxConcurrency(ingestV2Config.getMaxConcurrency()) + .build()) { + List> operations = new ArrayList<>(); + operations.addAll(ingestV2FromStreams(config, ingestV2Config, queuedIngestClient)); + operations.addAll(ingestV2FromFiles(config, ingestV2Config, queuedIngestClient)); + operations.add(ingestV2BatchIngestion(config, ingestV2Config, queuedIngestClient)); + + CompletableFuture combined = CompletableFuture.allOf(operations.toArray(new CompletableFuture[0])); + combined.get(ingestV2Config.getOverallTimeoutMinutes(), TimeUnit.MINUTES); + System.out.println("All ingest-v2 operations completed successfully!"); + } catch (Exception e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + Utils.errorHandler("Error running ingest-v2 quickstart sample", e); + } + } + + private static ChainedTokenCredential buildIngestV2Credential(IngestV2QuickstartConfig config) { + AuthenticationModeOptions mode = config.getAuthModeOverride(); + if (mode == null) { + mode = AuthenticationModeOptions.USER_PROMPT; + } + ChainedTokenCredentialBuilder builder = new ChainedTokenCredentialBuilder(); + if (mode == AuthenticationModeOptions.APP_KEY) { + if (StringUtils.isBlank(config.getAppId()) || StringUtils.isBlank(config.getAppKey()) || StringUtils.isBlank(config.getTenantId())) { + Utils.errorHandler("AppKey authentication requires 'APP_ID', 'APP_KEY', and 'APP_TENANT' environment variables or ingestV2 overrides."); + } + builder.addFirst(new ClientSecretCredentialBuilder() + .clientId(config.getAppId()) + .clientSecret(config.getAppKey()) + .tenantId(config.getTenantId()) + .build()); + } else { + builder.addFirst(new AzureCliCredentialBuilder().build()); + } + return builder.build(); + } + + private static List> ingestV2FromStreams(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + QueuedIngestClient queuedIngestClient) throws IOException { + System.out.println("\n=== Queued ingestion from streams (ingest-v2) ==="); + List> futures = new ArrayList<>(); + + IngestRequestProperties csvProps = buildIngestV2RequestProperties(config, ingestV2Config, null); + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + StreamSource csvSource = new StreamSource(csvStream, CompressionType.NONE, Format.csv, UUID.randomUUID(), "csv-stream", false); + futures.add(queuedIngestClient.ingestAsync(csvSource, csvProps) + .thenCompose(response -> { + closeQuietly(csvStream); + System.out.println("CSV stream ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "CSV Stream"); + })); + + InputStream jsonStream = Files.newInputStream(resolveQuickstartPath("dataset.json")); + StreamSource jsonSource = new StreamSource(jsonStream, CompressionType.NONE, Format.json, UUID.randomUUID(), "json-stream", false); + IngestRequestProperties jsonProps = buildIngestV2RequestProperties(config, ingestV2Config, ingestV2Config.getDataMappingName()); + futures.add(queuedIngestClient.ingestAsync(jsonSource, jsonProps) + .thenCompose(response -> { + closeQuietly(jsonStream); + System.out.println("JSON stream ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "JSON Stream"); + })); + + return futures; + } + + private static List> ingestV2FromFiles(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + QueuedIngestClient queuedIngestClient) { + System.out.println("\n=== Queued ingestion from files (ingest-v2) ==="); + List> futures = new ArrayList<>(); + + IngestRequestProperties csvProps = buildIngestV2RequestProperties(config, ingestV2Config, null); + FileSource csvFileSource = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + futures.add(queuedIngestClient.ingestAsync(csvFileSource, csvProps) + .thenCompose(response -> { + System.out.println("CSV file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "CSV File"); + })); + + FileSource jsonFileSource = new FileSource(resolveQuickstartPath("dataset.json"), Format.json, UUID.randomUUID(), CompressionType.NONE); + IngestRequestProperties jsonProps = buildIngestV2RequestProperties(config, ingestV2Config, ingestV2Config.getDataMappingName()); + futures.add(queuedIngestClient.ingestAsync(jsonFileSource, jsonProps) + .thenCompose(response -> { + System.out.println("JSON file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "JSON File"); + })); + + return futures; + } + + private static CompletableFuture ingestV2BatchIngestion(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + QueuedIngestClient queuedIngestClient) { + System.out.println("\n=== Queued ingestion from multiple sources (ingest-v2 batch) ==="); + FileSource source1 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + FileSource source2 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + List sources = Arrays.asList(source1, source2); + + IngestRequestProperties props = buildIngestV2RequestProperties(config, ingestV2Config, null); + return queuedIngestClient.ingestAsync(sources, props) + .thenCompose(response -> { + System.out.println("Batch ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + System.out.println("Number of sources in batch: " + sources.size()); + return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "Batch Ingestion"); + }); + } + + private static IngestRequestProperties buildIngestV2RequestProperties(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, String mappingName) { + IngestRequestPropertiesBuilder builder = IngestRequestPropertiesBuilder + .create(config.getDatabaseName(), config.getTableName()) + .withEnableTracking(ingestV2Config.isTrackingEnabled()); + if (StringUtils.isNotBlank(mappingName)) { + builder.withIngestionMappingReference(mappingName); + } + return builder.build(); + } + + private static CompletableFuture trackIngestV2Operation(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + QueuedIngestClient queuedIngestClient, ExtendedIngestResponse response, String operationName) { + IngestionOperation operation = new IngestionOperation( + Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), + config.getDatabaseName(), + config.getTableName(), + response.getIngestionType()); + + Duration pollInterval = Duration.ofSeconds(ingestV2Config.getPollingIntervalSeconds()); + Duration pollTimeout = Duration.ofMinutes(ingestV2Config.getPollingTimeoutMinutes()); + + System.out.println("\n--- Tracking " + operationName + " ---"); + return queuedIngestClient.getOperationDetailsAsync(operation) + .thenCompose(initialDetails -> { + System.out.println("[" + operationName + "] Initial Operation Details:"); + printIngestV2StatusResponse(initialDetails); + System.out.println("[" + operationName + "] Polling for completion..."); + return queuedIngestClient.pollForCompletion(operation, pollInterval, pollTimeout); + }) + .thenCompose(fin -> queuedIngestClient.getOperationDetailsAsync(operation)) + .thenAccept(finalDetails -> { + System.out.println("[" + operationName + "] Final Operation Details:"); + printIngestV2StatusResponse(finalDetails); + System.out.println("[" + operationName + "] Operation tracking completed.\n"); + }) + .exceptionally(error -> { + System.err.println("[" + operationName + "] Error tracking operation: " + error.getMessage()); + error.printStackTrace(); + return null; + }); + } + + private static void printIngestV2StatusResponse(StatusResponse statusResponse) { + if (statusResponse == null) { + System.out.println(" Status: null"); + return; + } + Status status = statusResponse.getStatus(); + if (status != null) { + System.out.println(" Summary:"); + System.out.println(" In Progress: " + status.getInProgress()); + System.out.println(" Succeeded: " + status.getSucceeded()); + System.out.println(" Failed: " + status.getFailed()); + System.out.println(" Canceled: " + status.getCanceled()); + } + List details = statusResponse.getDetails(); + if (details != null && !details.isEmpty()) { + System.out.println(" Blob Details:"); + for (int i = 0; i < details.size(); i++) { + BlobStatus blobStatus = details.get(i); + System.out.println(" Blob " + (i + 1) + ":"); + System.out.println(" Source ID: " + blobStatus.getSourceId()); + System.out.println(" Status: " + blobStatus.getStatus()); + if (blobStatus.getDetails() != null) { + System.out.println(" Details: " + blobStatus.getDetails()); + } + if (blobStatus.getErrorCode() != null) { + System.out.println(" Error Code: " + blobStatus.getErrorCode()); + } + if (blobStatus.getFailureStatus() != null) { + System.out.println(" Failure Status: " + blobStatus.getFailureStatus()); + } + } + } + } + + private static Path resolveQuickstartPath(String fileName) { + Path preferred = Paths.get("quickstart", fileName); + if (Files.exists(preferred)) { + return preferred; + } + return Paths.get(fileName); + } + + private static void closeQuietly(InputStream closeable) { + if (closeable == null) { + return; + } + try { + closeable.close(); + } catch (IOException e) { + System.err.println("Failed to close resource: " + e.getMessage()); + } + } + + private static Path locateConfigFile() { + List candidates = new ArrayList<>(); + String override = System.getenv(CONFIG_ENV_OVERRIDE); + if (StringUtils.isNotBlank(override)) { + candidates.add(Paths.get(override)); + } + Path relative = Paths.get(configFileName); + candidates.add(relative); + candidates.add(Paths.get(System.getProperty("user.dir", ".")).resolve(relative)); + try { + Path jarLocation = Paths.get(SampleApp.class.getProtectionDomain().getCodeSource().getLocation().toURI()); + Path jarDir = jarLocation.getParent(); + if (jarDir != null) { + candidates.add(jarDir.resolve(relative)); + Path parent = jarDir.getParent(); + if (parent != null) { + candidates.add(parent.resolve(relative)); + } + } + } catch (URISyntaxException ignored) { + // Fall through to default locations + } + + return candidates.stream() + .map(Path::normalize) + .map(SampleApp::expandWithWorkingDirectory) + .filter(Files::exists) + .findFirst() + .orElseGet(() -> { + Utils.errorHandler(String.format( + "Couldn't find config file '%s'. Provide it next to the jar, pass an absolute path, or set %s.", + configFileName, + CONFIG_ENV_OVERRIDE)); + return relative; + }); + } + + private static Path expandWithWorkingDirectory(Path path) { + if (Files.exists(path)) { + return path; + } + Path justFileName = Paths.get(path.getFileName().toString()); + return Files.exists(justFileName) ? justFileName : path; + } } diff --git a/samples/pom.xml b/samples/pom.xml index ba994b702..e23265e7d 100644 --- a/samples/pom.xml +++ b/samples/pom.xml @@ -36,6 +36,11 @@ kusto-ingest ${project.parent.version} + + com.microsoft.azure.kusto + kusto-ingest-v2 + ${ingest-v2.revision} + com.fasterxml.jackson.core jackson-databind @@ -47,5 +52,4 @@ ${fasterxml.jackson.core.version} - - \ No newline at end of file + diff --git a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java new file mode 100644 index 000000000..6d243713f --- /dev/null +++ b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java @@ -0,0 +1,481 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package ingestv2; + +import com.azure.identity.AzureCliCredentialBuilder; +import com.azure.identity.ChainedTokenCredential; +import com.azure.identity.ChainedTokenCredentialBuilder; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.microsoft.azure.kusto.data.StringUtils; +import com.microsoft.azure.kusto.ingest.v2.builders.ManagedStreamingIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.IngestionOperation; +import com.microsoft.azure.kusto.ingest.v2.client.ManagedStreamingIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.models.Status; +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; + +/** + * Sample demonstrating managed streaming ingestion using the new ingest-v2 API. + * This is the modern API that uses Kotlin-based clients with coroutines, + * providing better async support and a cleaner API design. + * + * Managed streaming ingestion intelligently chooses between streaming and queued ingestion: + * - Small data (typically under 4MB) is ingested via streaming for low latency + * - Large data automatically falls back to queued ingestion for reliability + * - Server errors (like streaming disabled) trigger automatic fallback to queued + * - Transient errors are retried according to the configured retry policy + * + * This approach provides the best of both worlds: low latency for small data + * and high reliability for all data sizes. + */ +public class ManagedStreamingIngestV2 { + + private static String database; + private static String table; + private static String mapping; + private static ManagedStreamingIngestClient managedStreamingIngestClient; + + public static void main(String[] args) { + try { + // Get configuration from system properties + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String appId = System.getProperty("app-id"); + String appKey = System.getProperty("appKey"); + String tenant = System.getProperty("tenant"); + + database = System.getProperty("dbName"); + table = System.getProperty("tableName"); + mapping = System.getProperty("dataMappingName"); + + ChainedTokenCredential credential; + + // Create Azure AD credential + if (StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { + credential = new ChainedTokenCredentialBuilder() + .addFirst(new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); + } else { + credential = new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); + } + + if (engineEndpoint == null || engineEndpoint.isEmpty()) { + throw new IllegalArgumentException("Cluster endpoint (clusterPath) must be provided as a system property."); + } + + // Create managed streaming ingest client using the new v2 API + // The client will automatically handle streaming vs queued ingestion decisions + managedStreamingIngestClient = ManagedStreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .build(); + + System.out.println("Managed Streaming Ingest Client created successfully"); + System.out.println("This client automatically chooses between streaming and queued ingestion"); + System.out.println("based on data size and server responses.\n"); + + // Run ingestion examples + ingestFromStream(); + ingestFromFile(); + demonstrateFallbackTracking(); + + System.out.println("\nAll managed streaming ingestion operations completed"); + + } catch (Exception e) { + System.err.println("Error during ingestion: " + e.getMessage()); + e.printStackTrace(); + } finally { + if (managedStreamingIngestClient != null) { + managedStreamingIngestClient.close(); + } + } + } + + /** + * Demonstrates ingestion from various stream sources. + * Small data will typically use streaming ingestion for low latency. + * + * Sources include: + * - In-memory string data as CSV (small, will use streaming) + * - Compressed file stream (CSV) + * - JSON file stream with mapping + */ + static void ingestFromStream() throws Exception { + System.out.println("\n=== Managed Streaming Ingestion from Streams ==="); + + // Example 1: Ingest from in-memory CSV string (small data - will use streaming) + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = new StreamSource( + csvInputStream, CompressionType.NONE, Format.csv, + UUID.randomUUID(), "csv-managed-stream", false); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting small CSV data from string..."); + ExtendedIngestResponse csvResponse = managedStreamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); + printIngestionResult("CSV String", csvResponse); + + // Example 2: Ingest from compressed CSV file + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + FileInputStream compressedCsvStream = new FileInputStream(resourcesDirectory + "dataset.csv.gz"); + + StreamSource compressedStreamSource = new StreamSource( + compressedCsvStream, + CompressionType.GZIP, + Format.csv, + UUID.randomUUID(), + "compressed-csv-managed-stream", + false + ); + + System.out.println("Ingesting compressed CSV file..."); + ExtendedIngestResponse compressedResponse = managedStreamingIngestClient.ingestAsync(compressedStreamSource, csvProperties).get(); + printIngestionResult("Compressed CSV", compressedResponse); + compressedCsvStream.close(); + + // Example 3: Ingest JSON with mapping + FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); + + StreamSource jsonStreamSource = new StreamSource( + jsonStream, + CompressionType.NONE, + Format.json, + UUID.randomUUID(), + "json-managed-stream", + false + ); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting JSON file with mapping..."); + ExtendedIngestResponse jsonResponse = managedStreamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); + printIngestionResult("JSON with Mapping", jsonResponse); + jsonStream.close(); + } + + /** + * Demonstrates ingestion from file sources. + * The client will automatically decide between streaming and queued + * based on file size and other factors. + * + * Sources include: + * - CSV file + * - Compressed JSON file with mapping + */ + static void ingestFromFile() throws Exception { + System.out.println("\n=== Managed Streaming Ingestion from Files ==="); + + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + + // Example 1: Ingest CSV file + FileSource csvFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE + ); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting CSV file..."); + ExtendedIngestResponse csvResponse = managedStreamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); + printIngestionResult("CSV File", csvResponse); + + // Example 2: Ingest compressed JSON file with mapping + FileSource jsonFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP + ); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting compressed JSON file with mapping..."); + ExtendedIngestResponse jsonResponse = managedStreamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); + printIngestionResult("Compressed JSON File", jsonResponse); + } + + /** + * Demonstrates the automatic fallback to queued ingestion when data size exceeds + * the streaming limit. + * + * This method creates a large in-memory dataset (~20MB uncompressed) to force the + * client to fall back to queued ingestion. Note that data is automatically compressed + * before ingestion, so we use a larger size (20MB) to ensure the compressed data + * still exceeds the streaming threshold (~4MB compressed). + * + * This demonstrates: + * - Automatic size-based decision making + * - Fallback logging from the client ("Blob size is too big for streaming ingest") + * - Operation tracking for queued ingestion + * + * Note: Streaming ingestion operations are not tracked - they complete immediately + * with success or throw an exception on failure. + */ + static void demonstrateFallbackTracking() throws Exception { + System.out.println("\n=== Demonstrating Size-Based Fallback to Queued Ingestion ==="); + System.out.println("The ManagedStreamingIngestClient automatically falls back to queued ingestion"); + System.out.println("when data size exceeds the streaming limit (~4MB compressed)."); + System.out.println("Since data is automatically compressed, we use a larger dataset (~20MB)"); + System.out.println("to ensure the compressed size still exceeds the threshold.\n"); + + // Generate a large CSV dataset (~20MB uncompressed) that will exceed the streaming limit + // even after compression. This will force the client to fall back to queued ingestion. + int targetSizeBytes = 20 * 1024 * 1024; // 20MB + String largeData = generateLargeCsvData(targetSizeBytes); + byte[] dataBytes = largeData.getBytes(StandardCharsets.UTF_8); + + System.out.println("Generated large CSV dataset:"); + System.out.println(" - Uncompressed data size: " + formatBytes(dataBytes.length)); + System.out.println(" - Streaming limit: ~4MB (after compression)"); + System.out.println(" - Expected behavior: FALL BACK TO QUEUED INGESTION"); + System.out.println(" - Look for log message: 'Blob size is too big for streaming ingest'"); + System.out.println(); + + InputStream largeInputStream = new ByteArrayInputStream(dataBytes); + + // Mark the stream for potential retry (seekable stream) + largeInputStream.mark(dataBytes.length); + + StreamSource largeStreamSource = new StreamSource( + largeInputStream, + CompressionType.NONE, // Will be auto-compressed by the client + Format.csv, + UUID.randomUUID(), + "large-data-fallback-demo", + false + ); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting large dataset (" + formatBytes(dataBytes.length) + " uncompressed)..."); + System.out.println("(Watch for fallback log messages from ManagedStreamingIngestClient)"); + System.out.println(); + + ExtendedIngestResponse response = managedStreamingIngestClient.ingestAsync(largeStreamSource, properties).get(); + printIngestionResult("Large Data Ingestion", response); + + // The large data should trigger queued fallback + if (response.getIngestionType() == IngestKind.QUEUED) { + System.out.println("SUCCESS: Large data correctly triggered QUEUED fallback!"); + System.out.println("This demonstrates the automatic size-based routing.\n"); + + IngestionOperation operation = new IngestionOperation( + response.getIngestResponse().getIngestionOperationId(), + database, + table, + response.getIngestionType() + ); + + // Get initial operation details + CompletableFuture detailsFuture = managedStreamingIngestClient.getOperationDetailsAsync(operation); + StatusResponse details = detailsFuture.get(); + printStatusResponse("Initial Status", details); + + // Poll for completion using getOperationDetailsAsync + System.out.println("\nPolling for completion (checking every 30 seconds, timeout 2 minutes)..."); + StatusResponse finalStatus = pollForCompletionManually(operation, Duration.ofSeconds(30), Duration.ofMinutes(2)); + printStatusResponse("Final Status", finalStatus); + + } else { + System.out.println("NOTE: Data was ingested via STREAMING method."); + System.out.println("This might happen if compression was very effective. Try increasing"); + System.out.println("the data size or using less compressible data patterns."); + } + } + + /** + * Generates a large CSV dataset of approximately the target size. + * The data follows the format expected by the sample table schema. + * Uses varied data to reduce compression effectiveness. + */ + private static String generateLargeCsvData(int targetSizeBytes) { + StringBuilder sb = new StringBuilder(); + int rowCount = 0; + + // Generate varied data to make it less compressible + java.util.Random random = new java.util.Random(42); // Fixed seed for reproducibility + + // Sample CSV row matching the expected schema + // Format: int,guid,int,int,int,int,int,int,int,int,int,int,datetime,string,string,int,timespan,null,null + while (sb.length() < targetSizeBytes) { + // Use random values to reduce compression effectiveness + sb.append(rowCount).append(",") + .append(UUID.randomUUID()).append(",") // Random GUID + .append(random.nextInt(10000)).append(",") + .append(random.nextInt(100000)).append(",") + .append(random.nextLong()).append(",") + .append(random.nextDouble() * 1000000).append(",") + .append(random.nextInt()).append(",") + .append(random.nextInt(1000)).append(",") + .append(random.nextInt(5000)).append(",") + .append(random.nextInt(10000)).append(",") + .append(random.nextInt(100)).append(",") + .append(random.nextInt(50)).append(",") + .append("2024-").append(String.format("%02d", (rowCount % 12) + 1)) + .append("-").append(String.format("%02d", (rowCount % 28) + 1)) + .append("T").append(String.format("%02d", rowCount % 24)) + .append(":").append(String.format("%02d", rowCount % 60)) + .append(":").append(String.format("%02d", rowCount % 60)) + .append(".").append(String.format("%07d", random.nextInt(10000000))) + .append("Z").append(",") + .append("Row_").append(rowCount).append("_").append(random.nextInt(100000)).append(",") + .append("\"Description with random data: ").append(random.nextLong()) + .append(" and more: ").append(UUID.randomUUID()).append("\"").append(",") + .append(random.nextInt(100000)).append(",") + .append(String.format("%02d:%02d:%02d", + random.nextInt(24), random.nextInt(60), random.nextInt(60))).append(",") + .append(",") + .append("null") + .append("\n"); + rowCount++; + } + + System.out.println(" - Generated " + rowCount + " rows of varied data"); + return sb.toString(); + } + + /** + * Formats bytes into a human-readable string (e.g., "10.00 MB"). + */ + private static String formatBytes(long bytes) { + if (bytes < 1024) return bytes + " B"; + if (bytes < 1024 * 1024) return String.format("%.2f KB", bytes / 1024.0); + if (bytes < 1024 * 1024 * 1024) return String.format("%.2f MB", bytes / (1024.0 * 1024.0)); + return String.format("%.2f GB", bytes / (1024.0 * 1024.0 * 1024.0)); + } + + /** + * Manually polls for completion by repeatedly calling getOperationDetailsAsync. + * This demonstrates how to implement polling when the ManagedStreamingIngestClient + * is used and queued fallback occurs. + */ + private static StatusResponse pollForCompletionManually( + IngestionOperation operation, + Duration pollingInterval, + Duration timeout) throws Exception { + + long startTime = System.currentTimeMillis(); + long timeoutMillis = timeout.toMillis(); + long intervalMillis = pollingInterval.toMillis(); + + while (System.currentTimeMillis() - startTime < timeoutMillis) { + StatusResponse status = managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); + + // Check if completed (no more in-progress items) + Status summary = status.getStatus(); + if (summary != null && summary.getInProgress() == 0) { + System.out.println("Operation completed."); + return status; + } + + System.out.println("Still in progress... (In Progress: " + + (summary != null ? summary.getInProgress() : "unknown") + ")"); + + // Wait before next poll + Thread.sleep(intervalMillis); + } + + // Timeout reached, return latest status + System.out.println("Polling timeout reached. Returning latest status."); + return managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); + } + + /** + * Prints the ingestion result including which method (streaming or queued) was used. + */ + private static void printIngestionResult(String operationName, ExtendedIngestResponse response) { + String ingestionMethod = response.getIngestionType() == IngestKind.STREAMING ? "STREAMING" : "QUEUED"; + System.out.println("[" + operationName + "] Ingestion completed using " + ingestionMethod + " method."); + System.out.println(" Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + if (response.getIngestionType() == IngestKind.STREAMING) { + System.out.println(" (Low latency - data available immediately)"); + } else { + System.out.println(" (High reliability - data will be available after batch processing)"); + } + System.out.println(); + } + + /** + * Prints detailed status information from a StatusResponse. + */ + private static void printStatusResponse(String label, StatusResponse statusResponse) { + if (statusResponse == null) { + System.out.println(label + ": null"); + return; + } + + System.out.println(label + ":"); + Status status = statusResponse.getStatus(); + if (status != null) { + System.out.println(" Summary:"); + System.out.println(" In Progress: " + status.getInProgress()); + System.out.println(" Succeeded: " + status.getSucceeded()); + System.out.println(" Failed: " + status.getFailed()); + System.out.println(" Canceled: " + status.getCanceled()); + } + + List details = statusResponse.getDetails(); + if (details != null && !details.isEmpty()) { + System.out.println(" Blob Details:"); + for (int i = 0; i < details.size(); i++) { + BlobStatus blobStatus = details.get(i); + System.out.println(" Blob " + (i + 1) + ":"); + System.out.println(" Source ID: " + blobStatus.getSourceId()); + System.out.println(" Status: " + blobStatus.getStatus()); + if (blobStatus.getDetails() != null) { + System.out.println(" Details: " + blobStatus.getDetails()); + } + if (blobStatus.getErrorCode() != null) { + System.out.println(" Error Code: " + blobStatus.getErrorCode()); + } + if (blobStatus.getFailureStatus() != null) { + System.out.println(" Failure Status: " + blobStatus.getFailureStatus()); + } + } + } + } +} diff --git a/samples/src/main/java/ingestv2/QueuedIngestV2.java b/samples/src/main/java/ingestv2/QueuedIngestV2.java new file mode 100644 index 000000000..9e31a0291 --- /dev/null +++ b/samples/src/main/java/ingestv2/QueuedIngestV2.java @@ -0,0 +1,400 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package ingestv2; + +import com.azure.identity.AzureCliCredentialBuilder; +import com.azure.identity.ChainedTokenCredential; +import com.azure.identity.ChainedTokenCredentialBuilder; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.microsoft.azure.kusto.data.StringUtils; +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.IngestionOperation; +import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.models.Status; +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; +import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +/** + * Sample demonstrating queued ingestion using the new ingest-v2 API. + * This is the modern API that uses Kotlin-based clients with coroutines, + * providing better async support and a cleaner API design. + * Queued ingestion is asynchronous and provides reliable, high-throughput + * data ingestion with operation tracking capabilities. + */ +public class QueuedIngestV2 { + + private static String database; + private static String table; + private static String mapping; + private static QueuedIngestClient queuedIngestClient; + + public static void main(String[] args) { + try { + // Get configuration from system properties + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String appId = System.getProperty("app-id"); + String appKey = System.getProperty("appKey"); + String tenant = System.getProperty("tenant"); + + database = System.getProperty("dbName"); + table = System.getProperty("tableName"); + mapping = System.getProperty("dataMappingName"); + + ChainedTokenCredential credential; + + // Create Azure AD credential + if(StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { + credential = new ChainedTokenCredentialBuilder() + .addFirst(new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); + } else { + credential = new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); + } + + if(engineEndpoint == null || engineEndpoint.isEmpty()) { + throw new IllegalArgumentException("Cluster endpoint (clusterPath) must be provided as a system property."); + } + + // Create queued ingest client using the new v2 API + queuedIngestClient = QueuedIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .withMaxConcurrency(10) // Set maximum concurrent uploads + .build(); + + System.out.println("Queued Ingest Client created successfully"); + + // Collect all futures for non-blocking execution + List> allFutures = new ArrayList<>(); + + // Run ingestion examples + allFutures.addAll(ingestFromStream()); + allFutures.addAll(ingestFromFile()); + allFutures.add(ingestMultipleSources()); + + // Wait for all operations to complete + CompletableFuture allOf = CompletableFuture.allOf( + allFutures.toArray(new CompletableFuture[0]) + ); + + System.out.println("\nWaiting for all ingestion operations to complete..."); + allOf.get(5, TimeUnit.MINUTES); + + System.out.println("\nAll ingestion operations completed successfully!"); + + } catch (Exception e) { + System.err.println("Error during ingestion: " + e.getMessage()); + e.printStackTrace(); + } finally { + if (queuedIngestClient != null) { + queuedIngestClient.close(); + } + } + } + + /** + * Demonstrates ingestion from various stream sources including: + * - In-memory string data as CSV + * - Compressed file stream (CSV) + * - JSON file stream with mapping + */ + static List> ingestFromStream() throws Exception { + System.out.println("\n=== Queued Ingestion from Streams ==="); + + List> futures = new ArrayList<>(); + + // Example 1: Ingest from in-memory CSV string + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = new StreamSource( + csvInputStream, CompressionType.NONE, Format.csv, + UUID.randomUUID(), "csv-queued-stream", false); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Queueing CSV data from string..."); + CompletableFuture csvFuture = queuedIngestClient.ingestAsync(csvStreamSource, csvProperties) + .thenCompose(response -> { + System.out.println("CSV ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestionOperation(response, "CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); + futures.add(csvFuture); + + // Example 2: Ingest from compressed CSV file + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + InputStream compressedCsvStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.csv.gz")); + + StreamSource compressedStreamSource = new StreamSource( + compressedCsvStream, + CompressionType.GZIP, + Format.csv, + UUID.randomUUID(), + "compressed-csv-queued-stream", + false + ); + + System.out.println("Queueing compressed CSV file..."); + CompletableFuture compressedFuture = queuedIngestClient.ingestAsync(compressedStreamSource, csvProperties) + .thenCompose(response -> { + System.out.println("Compressed CSV ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestionOperation(response, "Compressed CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); + futures.add(compressedFuture); + + // Example 3: Ingest JSON with mapping + InputStream jsonStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); + + StreamSource jsonStreamSource = new StreamSource( + jsonStream, + CompressionType.NONE, + Format.json, + UUID.randomUUID(), + "json-queued-stream", + false + ); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); + + System.out.println("Queueing JSON file with mapping..."); + CompletableFuture jsonFuture = queuedIngestClient.ingestAsync(jsonStreamSource, jsonProperties) + .thenCompose(response -> { + System.out.println("JSON ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestionOperation(response, "JSON Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); + futures.add(jsonFuture); + + return futures; + } + + /** + * Demonstrates ingestion from file sources including: + * - CSV file + * - Compressed JSON file with mapping + */ + static List> ingestFromFile() { + System.out.println("\n=== Queued Ingestion from Files ==="); + + List> futures = new ArrayList<>(); + + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + + // Example 1: Ingest CSV file + FileSource csvFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE + ); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Queueing CSV file..."); + CompletableFuture csvFuture = queuedIngestClient.ingestAsync(csvFileSource, csvProperties) + .thenCompose(response -> { + System.out.println("CSV file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestionOperation(response, "CSV File"); + }); + futures.add(csvFuture); + + // Example 2: Ingest compressed JSON file with mapping + FileSource jsonFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP + ); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); + + System.out.println("Queueing compressed JSON file with mapping..."); + CompletableFuture jsonFuture = queuedIngestClient.ingestAsync(jsonFileSource, jsonProperties) + .thenCompose(response -> { + System.out.println("Compressed JSON file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + return trackIngestionOperation(response, "Compressed JSON File"); + }); + futures.add(jsonFuture); + + return futures; + } + + /** + * Demonstrates batch ingestion from multiple sources in a single operation. + * This is more efficient than ingesting sources one by one when you have multiple files. + */ + static CompletableFuture ingestMultipleSources() { + System.out.println("\n=== Queued Ingestion from Multiple Sources (Batch) ==="); + + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + + // Create multiple file sources + FileSource source1 = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE + ); + + FileSource source2 = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv.gz"), + Format.csv, + UUID.randomUUID(), + CompressionType.GZIP + ); + + List sources = Arrays.asList(source1, source2); + + IngestRequestProperties properties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Queueing multiple sources in batch..."); + return queuedIngestClient.ingestAsync(sources, properties) + .thenCompose(response -> { + System.out.println("Batch ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + System.out.println("Number of sources in batch: " + sources.size()); + return trackIngestionOperation(response, "Batch Ingestion"); + }); + } + + /** + * Tracks an ingestion operation by: + * 1. Getting operation details immediately after queueing + * 2. Polling for completion + * 3. Getting final operation details + * 4. Printing status information + */ + private static CompletableFuture trackIngestionOperation(ExtendedIngestResponse response, String operationName) { + IngestionOperation operation = new IngestionOperation( + Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), + database, + table, + response.getIngestionType() + ); + + System.out.println("\n--- Tracking " + operationName + " ---"); + + // Get initial operation details + return queuedIngestClient.getOperationDetailsAsync(operation) + .thenCompose(initialDetails -> { + System.out.println("[" + operationName + "] Initial Operation Details:"); + printStatusResponse(initialDetails); + + // Poll for completion + System.out.println("[" + operationName + "] Polling for completion..."); + return queuedIngestClient.pollForCompletion(operation, Duration.ofSeconds(30),Duration.ofMinutes(2)); // 2 minutes timeout + }) + .thenCompose(finalStatus -> { + System.out.println("[" + operationName + "] Polling completed."); + // Get final operation details + return queuedIngestClient.getOperationDetailsAsync(operation); + }) + .thenAccept(finalDetails -> { + System.out.println("[" + operationName + "] Final Operation Details:"); + printStatusResponse(finalDetails); + System.out.println("[" + operationName + "] Operation tracking completed.\n"); + }) + .exceptionally(error -> { + System.err.println("[" + operationName + "] Error tracking operation: " + error.getMessage()); + error.printStackTrace(); + return null; + }); + } + + /** + * Prints detailed status information from a StatusResponse + */ + private static void printStatusResponse(StatusResponse statusResponse) { + if (statusResponse == null) { + System.out.println(" Status: null"); + return; + } + + Status status = statusResponse.getStatus(); + if (status != null) { + System.out.println(" Summary:"); + System.out.println(" In Progress: " + status.getInProgress()); + System.out.println(" Succeeded: " + status.getSucceeded()); + System.out.println(" Failed: " + status.getFailed()); + System.out.println(" Canceled: " + status.getCanceled()); + } + + List details = statusResponse.getDetails(); + if (details != null && !details.isEmpty()) { + System.out.println(" Blob Details:"); + for (int i = 0; i < details.size(); i++) { + BlobStatus blobStatus = details.get(i); + System.out.println(" Blob " + (i + 1) + ":"); + System.out.println(" Source ID: " + blobStatus.getSourceId()); + System.out.println(" Status: " + blobStatus.getStatus()); + if (blobStatus.getDetails() != null) { + System.out.println(" Details: " + blobStatus.getDetails()); + } + if (blobStatus.getErrorCode() != null) { + System.out.println(" Error Code: " + blobStatus.getErrorCode()); + } + if (blobStatus.getFailureStatus() != null) { + System.out.println(" Failure Status: " + blobStatus.getFailureStatus()); + } + } + } + } + + private static byte[] readResourceBytes(String baseDirectory, String fileName) throws IOException { + return Files.readAllBytes(Paths.get(baseDirectory, fileName)); + } + + private static void closeQuietly(InputStream stream) { + if (stream == null) { + return; + } + try { + stream.close(); + } catch (Exception e) { + System.err.println("Failed to close stream: " + e.getMessage()); + } + } +} diff --git a/samples/src/main/java/ingestv2/StreamingIngestV2.java b/samples/src/main/java/ingestv2/StreamingIngestV2.java new file mode 100644 index 000000000..ddc774a30 --- /dev/null +++ b/samples/src/main/java/ingestv2/StreamingIngestV2.java @@ -0,0 +1,200 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package ingestv2; + +import com.azure.identity.AzureCliCredentialBuilder; +import com.azure.identity.ChainedTokenCredential; +import com.azure.identity.ChainedTokenCredentialBuilder; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.microsoft.azure.kusto.data.StringUtils; +import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.StreamingIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.util.UUID; + +/** + * Sample demonstrating streaming ingestion using the new ingest-v2 API. + * This is the modern API that uses Kotlin-based clients with coroutines, + * providing better async support and a cleaner API design. + */ +public class StreamingIngestV2 { + + private static String database; + private static String table; + private static String mapping; + private static StreamingIngestClient streamingIngestClient; + + public static void main(String[] args) { + try { + // Get configuration from system properties + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String appId = System.getProperty("app-id"); + String appKey = System.getProperty("appKey"); + String tenant = System.getProperty("tenant"); + + database = System.getProperty("dbName"); + table = System.getProperty("tableName"); + mapping = System.getProperty("dataMappingName"); + + ChainedTokenCredential credential; + + // Create Azure AD credential + if(StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { + credential = new ChainedTokenCredentialBuilder() + .addFirst(new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); + } else { + credential = new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); + } + + // Create streaming ingest client using the new v2 API + streamingIngestClient = StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .build(); + + System.out.println("Streaming Ingest Client created successfully"); + + // Run ingestion examples + ingestFromStream(); + ingestFromFile(); + + System.out.println("All ingestion operations completed"); + + } catch (Exception e) { + System.err.println("Error during ingestion: " + e.getMessage()); + } + } + + /** + * Demonstrates ingestion from various stream sources including: + * - In-memory string data as CSV + * - Compressed file stream (CSV) + * - JSON file stream with mapping + */ + static void ingestFromStream() throws Exception { + System.out.println("\n=== Ingesting from Streams ==="); + + // Example 1: Ingest from in-memory CSV string + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = new StreamSource( + csvInputStream, CompressionType.NONE, Format.csv, + UUID.randomUUID(), "csv-test-src", false); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting CSV data from string..."); + ExtendedIngestResponse ingestResponse = streamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); + System.out.println("CSV ingestion completed. Operation ID: " + ingestResponse.getIngestResponse().getIngestionOperationId()); + + // Example 2: Ingest from compressed CSV file + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + FileInputStream compressedCsvStream = new FileInputStream(resourcesDirectory + "dataset.csv.gz"); + + StreamSource compressedStreamSource = new StreamSource( + compressedCsvStream, + CompressionType.GZIP, + Format.csv, + UUID.randomUUID(), + "compressed-csv-stream", + false + ); + System.out.println("Ingesting compressed CSV file..."); + ExtendedIngestResponse compressedResponse = streamingIngestClient.ingestAsync(compressedStreamSource, csvProperties).get(); + System.out.println("Compressed CSV ingestion completed. Operation ID: " + compressedResponse.getIngestResponse().getIngestionOperationId()); + compressedCsvStream.close(); + + // Example 3: Ingest JSON with mapping + FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); + + StreamSource jsonStreamSource = new StreamSource( + jsonStream, + CompressionType.NONE, + Format.json, + UUID.randomUUID(), + "json-data-stream", + false + ); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting JSON file with mapping..."); + ExtendedIngestResponse jsonResponse = streamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); + System.out.println("JSON ingestion completed. Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); + jsonStream.close(); + } + + /** + * Demonstrates ingestion from file sources including: + * - CSV file + * - Compressed JSON file with mapping + */ + static void ingestFromFile() throws Exception { + System.out.println("\n=== Ingesting from Files ==="); + + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + + // Example 1: Ingest CSV file + FileSource csvFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE + ); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting CSV file..."); + ExtendedIngestResponse csvResponse = streamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); + System.out.println("CSV file ingestion completed. Operation ID: " + csvResponse.getIngestResponse().getIngestionOperationId()); + + // Example 2: Ingest compressed JSON file with mapping + FileSource jsonFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP + ); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder + .create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); + + System.out.println("Ingesting compressed JSON file with mapping..."); + ExtendedIngestResponse jsonResponse = streamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); + System.out.println("Compressed JSON file ingestion completed. Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); + } +} + diff --git a/samples/src/main/resources/create-table.kql b/samples/src/main/resources/create-table.kql new file mode 100644 index 000000000..891f650fb --- /dev/null +++ b/samples/src/main/resources/create-table.kql @@ -0,0 +1,88 @@ +// KQL Table Creation Script for the sample dataset +// This script creates a table matching the schema of dataset.csv + +.create table SampleData ( + rownumber: int, + rowguid: guid, + xdouble: real, + xfloat: real, + xbool: bool, + xint16: int, + xint32: int, + xint64: long, + xuint8: int, + xuint16: int, + xuint32: long, + xuint64: long, + xdate: datetime, + xsmalltext: string, + xtext: string, + xnumberAsText: string, + xtime: timespan, + xtextWithNulls: string, + xdynamicWithNulls: dynamic +) + +// Enable streaming ingestion (optional, for better performance) +.alter table SampleData policy streamingingestion enable + +// Create an ingestion mapping for CSV +.create table SampleData ingestion csv mapping 'SampleDataMapping' +``` +[ + {"column": "rownumber", "Properties": {"Ordinal": "0"}}, + {"column": "rowguid", "Properties": {"Ordinal": "1"}}, + {"column": "xdouble", "Properties": {"Ordinal": "2"}}, + {"column": "xfloat", "Properties": {"Ordinal": "3"}}, + {"column": "xbool", "Properties": {"Ordinal": "4"}}, + {"column": "xint16", "Properties": {"Ordinal": "5"}}, + {"column": "xint32", "Properties": {"Ordinal": "6"}}, + {"column": "xint64", "Properties": {"Ordinal": "7"}}, + {"column": "xuint8", "Properties": {"Ordinal": "8"}}, + {"column": "xuint16", "Properties": {"Ordinal": "9"}}, + {"column": "xuint32", "Properties": {"Ordinal": "10"}}, + {"column": "xuint64", "Properties": {"Ordinal": "11"}}, + {"column": "xdate", "Properties": {"Ordinal": "12"}}, + {"column": "xsmalltext", "Properties": {"Ordinal": "13"}}, + {"column": "xtext", "Properties": {"Ordinal": "14"}}, + {"column": "xnumberAsText", "Properties": {"Ordinal": "15"}}, + {"column": "xtime", "Properties": {"Ordinal": "16"}}, + {"column": "xtextWithNulls", "Properties": {"Ordinal": "17"}}, + {"column": "xdynamicWithNulls", "Properties": {"Ordinal": "18"}} +] +``` + +// Create an ingestion mapping for JSON (identity mapping - JSON property names match column names) +.create table SampleData ingestion json mapping 'SampleDataMapping' +``` +[ + {"column": "rownumber", "Properties": {"Path": "$.rownumber"}}, + {"column": "rowguid", "Properties": {"Path": "$.rowguid"}}, + {"column": "xdouble", "Properties": {"Path": "$.xdouble"}}, + {"column": "xfloat", "Properties": {"Path": "$.xfloat"}}, + {"column": "xbool", "Properties": {"Path": "$.xbool"}}, + {"column": "xint16", "Properties": {"Path": "$.xint16"}}, + {"column": "xint32", "Properties": {"Path": "$.xint32"}}, + {"column": "xint64", "Properties": {"Path": "$.xint64"}}, + {"column": "xuint8", "Properties": {"Path": "$.xuint8"}}, + {"column": "xuint16", "Properties": {"Path": "$.xuint16"}}, + {"column": "xuint32", "Properties": {"Path": "$.xuint32"}}, + {"column": "xuint64", "Properties": {"Path": "$.xuint64"}}, + {"column": "xdate", "Properties": {"Path": "$.xdate"}}, + {"column": "xsmalltext", "Properties": {"Path": "$.xsmalltext"}}, + {"column": "xtext", "Properties": {"Path": "$.xtext"}}, + {"column": "xnumberAsText", "Properties": {"Path": "$.xnumberAsText"}}, + {"column": "xtime", "Properties": {"Path": "$.xtime"}}, + {"column": "xtextWithNulls", "Properties": {"Path": "$.xtextWithNulls"}}, + {"column": "xdynamicWithNulls", "Properties": {"Path": "$.xdynamicWithNulls"}} +] +``` + +// Alternative: Create a table with retention policy (example: 90 days) +// .alter-merge table SampleData policy retention softdelete = 90d + +// Query examples after data ingestion: +// SampleData | take 10 +// SampleData | where xbool == true | project rownumber, xtext, xdate +// SampleData | extend ParsedJson = parse_json(xdynamicWithNulls) | project rownumber, ParsedJson.rowId, ParsedJson.arr + From 4a17c4923061f7d0d249e884f4faef3682d0ad31 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Mon, 29 Dec 2025 17:56:38 +0530 Subject: [PATCH 32/50] * Remove test for missing blob * Remove lints --- .../ingest/v2/StreamingIngestClientTest.kt | 18 +++++++++--------- .../ingestv2/ManagedStreamingIngestV2.java | 14 ++++---------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index 26f0fe250..55c019211 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -60,15 +60,15 @@ class StreamingIngestClientTest : false, publicBlobUrl, ), - Arguments.of( - "Blob based ingest- Invalid blob URL", - engineEndpoint, - // isException - true, - // isUnreachableHost - false, - "https://nonexistentaccount.blob.core.windows.net/container/file.json", - ), +// Arguments.of( +// "Blob based ingest- Invalid blob URL", +// engineEndpoint, +// // isException +// true, +// // isUnreachableHost +// false, +// "https://nonexistentaccount.blob.core.windows.net/container/file.json", +// ), ) } diff --git a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java index 6d243713f..c67757f34 100644 --- a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java +++ b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java @@ -30,6 +30,7 @@ import java.nio.file.Paths; import java.time.Duration; import java.util.List; +import java.util.Objects; import java.util.UUID; import java.util.concurrent.CompletableFuture; @@ -37,13 +38,11 @@ * Sample demonstrating managed streaming ingestion using the new ingest-v2 API. * This is the modern API that uses Kotlin-based clients with coroutines, * providing better async support and a cleaner API design. - * * Managed streaming ingestion intelligently chooses between streaming and queued ingestion: * - Small data (typically under 4MB) is ingested via streaming for low latency * - Large data automatically falls back to queued ingestion for reliability * - Server errors (like streaming disabled) trigger automatic fallback to queued * - Transient errors are retried according to the configured retry policy - * * This approach provides the best of both worlds: low latency for small data * and high reliability for all data sizes. */ @@ -117,7 +116,6 @@ public static void main(String[] args) { /** * Demonstrates ingestion from various stream sources. * Small data will typically use streaming ingestion for low latency. - * * Sources include: * - In-memory string data as CSV (small, will use streaming) * - Compressed file stream (CSV) @@ -189,7 +187,6 @@ static void ingestFromStream() throws Exception { * Demonstrates ingestion from file sources. * The client will automatically decide between streaming and queued * based on file size and other factors. - * * Sources include: * - CSV file * - Compressed JSON file with mapping @@ -238,17 +235,14 @@ static void ingestFromFile() throws Exception { /** * Demonstrates the automatic fallback to queued ingestion when data size exceeds * the streaming limit. - * - * This method creates a large in-memory dataset (~20MB uncompressed) to force the + * This method creates a large in-memory dataset (~20MB uncompressed) to force the * client to fall back to queued ingestion. Note that data is automatically compressed * before ingestion, so we use a larger size (20MB) to ensure the compressed data * still exceeds the streaming threshold (~4MB compressed). - * * This demonstrates: * - Automatic size-based decision making * - Fallback logging from the client ("Blob size is too big for streaming ingest") * - Operation tracking for queued ingestion - * * Note: Streaming ingestion operations are not tracked - they complete immediately * with success or throw an exception on failure. */ @@ -304,7 +298,7 @@ static void demonstrateFallbackTracking() throws Exception { System.out.println("This demonstrates the automatic size-based routing.\n"); IngestionOperation operation = new IngestionOperation( - response.getIngestResponse().getIngestionOperationId(), + Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), database, table, response.getIngestionType() @@ -407,7 +401,7 @@ private static StatusResponse pollForCompletionManually( // Check if completed (no more in-progress items) Status summary = status.getStatus(); - if (summary != null && summary.getInProgress() == 0) { + if (summary != null && summary.getInProgress()!=null && summary.getInProgress() == 0) { System.out.println("Operation completed."); return status; } From 307ff6a037a525a27e756195f4d382afe33dc980 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Mon, 29 Dec 2025 18:16:52 +0530 Subject: [PATCH 33/50] * Rebase POM changes --- data/pom.xml | 18 +++++++++++++++++- ingest-v2/pom.xml | 13 +++++++++++++ ingest/pom.xml | 28 +++++++++++++++++----------- pom.xml | 43 ++++++++++++++++++------------------------- 4 files changed, 65 insertions(+), 37 deletions(-) diff --git a/data/pom.xml b/data/pom.xml index 2cd5c012c..4cb0d9e9c 100644 --- a/data/pom.xml +++ b/data/pom.xml @@ -18,6 +18,18 @@ ${revision} + + + + com.azure + azure-sdk-bom + ${azure-bom-version} + pom + import + + + + @@ -174,10 +186,12 @@ jackson-databind com.fasterxml.jackson.core + ${fasterxml.jackson.core.version} jackson-annotations com.fasterxml.jackson.core + ${fasterxml.jackson.core.version} org.slf4j @@ -230,10 +244,12 @@ jackson-core com.fasterxml.jackson.core + ${fasterxml.jackson.core.version} com.fasterxml.jackson.datatype jackson-datatype-jsr310 + ${fasterxml.jackson.core.version} io.projectreactor @@ -242,4 +258,4 @@ test - + \ No newline at end of file diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 6030e11d1..8bf0c74a8 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -10,6 +10,7 @@ EventHouse on Fabric + 1.2.37 4.3.0 official 2.2.10 @@ -17,6 +18,7 @@ 3.1.1 1.10.2 1.4.14 + 11 5.10.0 7.15.0 2.0.9 @@ -30,6 +32,17 @@ ${revision} + + + + com.azure + azure-sdk-bom + ${azure-bom-version} + pom + import + + + io.ktor diff --git a/ingest/pom.xml b/ingest/pom.xml index 9c9aeae48..f9f5397b4 100644 --- a/ingest/pom.xml +++ b/ingest/pom.xml @@ -18,6 +18,18 @@ ${revision} + + + + com.azure + azure-sdk-bom + ${azure-bom-version} + pom + import + + + + @@ -168,6 +180,7 @@ com.azure azure-core + org.slf4j slf4j-api @@ -176,6 +189,7 @@ com.fasterxml.jackson.core jackson-databind + ${fasterxml.jackson.core.version} jackson-annotations @@ -186,6 +200,7 @@ com.fasterxml.jackson.core jackson-annotations + ${fasterxml.jackson.core.version} com.univocity @@ -232,16 +247,7 @@ annotations ${annotations.version} - - io.github.resilience4j - resilience4j-retry - ${resilience4j.version} - - - io.vavr - vavr - ${io.vavr.version} - + io.projectreactor reactor-test @@ -267,4 +273,4 @@ test - + \ No newline at end of file diff --git a/pom.xml b/pom.xml index f104a62ef..3ad5144f8 100644 --- a/pom.xml +++ b/pom.xml @@ -33,10 +33,9 @@ 8.0.0 - 0.0.1-beta - UTF-8 - 11 - 1.2.37 + 0.0.1-beta UTF-8 + 1.8 + 1.2.28 @@ -64,10 +63,22 @@ 5.11.0 0.8.11 + + + + java8 + + [1.8,11) + + + 4.5.1 + + + + - data ingest - ingest-v2 + data samples quickstart @@ -112,22 +123,4 @@ - - - - com.azure - azure-sdk-bom - ${azure-bom-version} - pom - import - - - com.fasterxml.jackson - jackson-bom - ${fasterxml.jackson.core.version} - import - pom - - - - + \ No newline at end of file From e85af023695af81aa35d8e6794d8caeae21880b1 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Mon, 29 Dec 2025 18:28:03 +0530 Subject: [PATCH 34/50] * Rebase POM changes --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3ad5144f8..700b70939 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 8.0.0 0.0.1-beta UTF-8 - 1.8 + 11 1.2.28 From 1d0a6faf14d7fe31391750d814968e0113859474 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Mon, 29 Dec 2025 18:37:07 +0530 Subject: [PATCH 35/50] * Rebase POM changes --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 700b70939..25e0a50c0 100644 --- a/pom.xml +++ b/pom.xml @@ -78,6 +78,7 @@ ingest + ingest-v2 data samples quickstart From 3a31d4d00fc5cb78e20605eebb2a00cd1d5c549f Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Tue, 30 Dec 2025 10:14:27 +0530 Subject: [PATCH 36/50] * Add JACOCO for coverage --- ingest-v2/pom.xml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 8bf0c74a8..b8836ae28 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -328,6 +328,26 @@ + + org.jacoco + jacoco-maven-plugin + 0.8.11 + + + prepare-agent + + prepare-agent + + + + report + test + + report + + + + org.apache.maven.plugins maven-surefire-plugin From 4931ac089a25ca129f05fd36a2dd105a6b2cc451 Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Wed, 31 Dec 2025 20:49:52 +0530 Subject: [PATCH 37/50] Feature/fix method signature orders (#452) * * Make signatures consistent for all sources --- .../models/IngestRequestPropertiesBuilder.kt | 15 - .../kusto/ingest/v2/source/BlobSource.kt | 2 +- .../kusto/ingest/v2/source/FileSource.kt | 3 +- .../kusto/ingest/v2/source/StreamSource.kt | 9 +- .../v2/uploader/ContainerUploaderBase.kt | 2 +- .../ManagedStreamingIngestClientJavaTest.java | 9 +- .../ingest/v2/QueuedIngestClientJavaTest.java | 6 +- .../v2/StreamingIngestClientJavaTest.java | 6 +- .../v2/ManagedStreamingIngestClientTest.kt | 3 +- .../kusto/ingest/v2/QueuedIngestClientTest.kt | 15 +- .../ingest/v2/StreamingIngestClientTest.kt | 20 +- .../azure/kusto/quickstart/SampleApp.java | 36 +- .../ingestv2/ManagedStreamingIngestV2.java | 487 ++++++++++-------- .../main/java/ingestv2/QueuedIngestV2.java | 438 +++++++++------- .../main/java/ingestv2/StreamingIngestV2.java | 217 ++++---- 15 files changed, 700 insertions(+), 568 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt index 605b583d0..99a90e738 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt @@ -63,21 +63,6 @@ private constructor(private val database: String, private val table: String) { } } - /** - * Sets the data format for ingestion. - * - * @param value The data format (e.g., Format.json, Format.csv) - * @deprecated Format is automatically extracted from the IngestionSource. - * This method is no longer needed. - */ - @Deprecated( - "Format is automatically extracted from the IngestionSource. This method is no longer needed.", - ) - fun withFormat(value: com.microsoft.azure.kusto.ingest.v2.models.Format) = - apply { - this.format = value - } - fun withEnableTracking(value: Boolean) = apply { this.enableTracking = value } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index fb83d7846..7ba44ba26 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -12,8 +12,8 @@ import java.util.UUID class BlobSource( val blobPath: String, format: Format = Format.csv, - compressionType: CompressionType = CompressionType.NONE, sourceId: UUID = UUID.randomUUID(), + compressionType: CompressionType = CompressionType.NONE, baseName: String? = null, ) : IngestionSource(format, compressionType, baseName, sourceId) { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt index dc91048d1..a7d5e7437 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -18,13 +18,14 @@ class FileSource( format: Format, sourceId: UUID = UUID.randomUUID(), compressionType: CompressionType? = null, + baseName: String? = null, ) : LocalSource( format, leaveOpen = false, compressionType = compressionType ?: detectCompressionFromPath(path), - baseName = path.fileName?.toString(), + baseName = baseName ?: path.fileName?.toString(), sourceId = sourceId, ) { override fun data(): InputStream { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt index 5803e1969..b7fb43f9d 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt @@ -9,15 +9,16 @@ import java.util.UUID /** Represents a stream-based ingestion source. */ class StreamSource( stream: InputStream, - sourceCompression: CompressionType, format: Format, + sourceCompression: CompressionType, sourceId: UUID = UUID.randomUUID(), - name: String? = null, + baseName: String? = null, leaveOpen: Boolean = false, -) : LocalSource(format, leaveOpen, sourceCompression, name, sourceId) { +) : LocalSource(format, leaveOpen, sourceCompression, baseName, sourceId) { + init { mStream = stream - initName(name) + initName(baseName) } override fun data(): InputStream { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt index 06763a43b..2058bf7c5 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -308,8 +308,8 @@ abstract class ContainerUploaderBase( return BlobSource( blobPath = blobUrl, format = local.format, - compressionType = effectiveCompressionType, sourceId = local.sourceId, + compressionType = effectiveCompressionType, ) .apply { blobExactSize = local.size() } } catch (e: Exception) { diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java index 08d215df9..224531051 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java @@ -64,8 +64,7 @@ public void testManagedStreamingIngestSmallData() throws Exception { StreamSource source = new StreamSource( dataStream, - CompressionType.NONE, - Format.json, + Format.json, CompressionType.NONE, UUID.randomUUID(), "java-managed-streaming-small", false @@ -135,8 +134,7 @@ public void testManagedStreamingIngestWithFallback() throws Exception { StreamSource source = new StreamSource( dataStream, - CompressionType.NONE, - Format.multijson, + Format.multijson, CompressionType.NONE, UUID.randomUUID(), "java-managed-streaming-fallback", false @@ -210,7 +208,8 @@ public void testManagedStreamingIngestFromFileSource() throws Exception { filePath, Format.multijson, UUID.randomUUID(), - CompressionType.NONE + CompressionType.NONE, + null ); IngestRequestProperties properties = IngestRequestPropertiesBuilder diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java index 83b79b1e3..27292632f 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java @@ -63,8 +63,7 @@ public void testBasicQueuedIngest() throws Exception { StreamSource source = new StreamSource( dataStream, - CompressionType.NONE, - Format.json, + Format.json, CompressionType.NONE, UUID.randomUUID(), "java-queued-test", false @@ -149,7 +148,8 @@ public void testQueuedIngestFromFileSource() throws Exception { filePath, Format.multijson, UUID.randomUUID(), - CompressionType.NONE + CompressionType.NONE, + null ); IngestRequestProperties properties = IngestRequestPropertiesBuilder diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java index 39782ca0b..9b23290ab 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java @@ -62,8 +62,7 @@ public void testBasicStreamingIngest() throws Exception { StreamSource source = new StreamSource( dataStream, - CompressionType.NONE, - Format.json, + Format.json, CompressionType.NONE, UUID.randomUUID(), "java-streaming-test", false @@ -122,8 +121,7 @@ public void testStreamingIngestWithCompression() throws Exception { StreamSource source = new StreamSource( fileStream, - CompressionType.GZIP, - Format.multijson, + Format.multijson, CompressionType.GZIP, UUID.randomUUID(), "java-compressed-stream-test", false diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt index 1285239b3..98a8e749a 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt @@ -191,8 +191,7 @@ class ManagedStreamingIngestClientTest : stream = ByteArrayInputStream(testData.toByteArray()), format = targetTestFormat, sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "test-custom-policy", + baseName = "test-custom-policy", ) val properties = diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt index 956e56e93..c53a30dd4 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt @@ -324,8 +324,7 @@ class QueuedIngestClientTest : stream = ByteArrayInputStream(data), format = Format.multijson, sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = name, + baseName = name, ) } @@ -727,8 +726,7 @@ test2,456,2024-01-02""" ), format = Format.json, sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "format_json.json", + baseName = "format_json.json", ), StreamSource( stream = @@ -737,8 +735,7 @@ test2,456,2024-01-02""" ), format = Format.csv, sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "format_csv.csv", + baseName = "format_csv.csv", ), StreamSource( stream = @@ -747,8 +744,7 @@ test2,456,2024-01-02""" ), format = Format.json, sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = "format_json2.json", + baseName = "format_json2.json", ), ) @@ -821,8 +817,7 @@ test2,456,2024-01-02""" ), format = targetFormat, sourceCompression = CompressionType.NONE, - sourceId = UUID.randomUUID(), - name = fileName, + baseName = fileName, ) else -> error("Unknown sourceType: $sourceType") } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index 55c019211..ab232e3e5 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -60,15 +60,16 @@ class StreamingIngestClientTest : false, publicBlobUrl, ), -// Arguments.of( -// "Blob based ingest- Invalid blob URL", -// engineEndpoint, -// // isException -// true, -// // isUnreachableHost -// false, -// "https://nonexistentaccount.blob.core.windows.net/container/file.json", -// ), + // Arguments.of( + // "Blob based ingest- Invalid blob URL", + // engineEndpoint, + // // isException + // true, + // // isUnreachableHost + // false, + // + // "https://nonexistentaccount.blob.core.windows.net/container/file.json", + // ), ) } @@ -177,7 +178,6 @@ class StreamingIngestClientTest : invalidData.toByteArray(), ), format = Format.json, - sourceId = UUID.randomUUID(), sourceCompression = CompressionType.NONE, ) diff --git a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java index 1c73dd446..5198549b2 100644 --- a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java +++ b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java @@ -717,7 +717,7 @@ private static void postIngestionQuerying(Client kustoClient, String databaseNam queryFirstTwoRows(kustoClient, databaseName, tableName); } - private static void runIngestV2Sample(ConfigJson config) { + private static void runIngestV2Sample(@NotNull ConfigJson config) { IngestV2QuickstartConfig ingestV2Config = config.getIngestV2Config(); String clusterPath = ingestV2Config.getClusterPath(); if (StringUtils.isBlank(clusterPath)) { @@ -747,7 +747,7 @@ private static void runIngestV2Sample(ConfigJson config) { } } - private static ChainedTokenCredential buildIngestV2Credential(IngestV2QuickstartConfig config) { + private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2QuickstartConfig config) { AuthenticationModeOptions mode = config.getAuthModeOverride(); if (mode == null) { mode = AuthenticationModeOptions.USER_PROMPT; @@ -768,15 +768,15 @@ private static ChainedTokenCredential buildIngestV2Credential(IngestV2Quickstart return builder.build(); } - private static List> ingestV2FromStreams(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - QueuedIngestClient queuedIngestClient) throws IOException { + private static @NotNull List> ingestV2FromStreams(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + @NotNull QueuedIngestClient queuedIngestClient) throws IOException { System.out.println("\n=== Queued ingestion from streams (ingest-v2) ==="); List> futures = new ArrayList<>(); IngestRequestProperties csvProps = buildIngestV2RequestProperties(config, ingestV2Config, null); String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; InputStream csvStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - StreamSource csvSource = new StreamSource(csvStream, CompressionType.NONE, Format.csv, UUID.randomUUID(), "csv-stream", false); + StreamSource csvSource = new StreamSource(csvStream, Format.csv, CompressionType.NONE, UUID.randomUUID(), "csv-stream", false); futures.add(queuedIngestClient.ingestAsync(csvSource, csvProps) .thenCompose(response -> { closeQuietly(csvStream); @@ -785,7 +785,7 @@ private static List> ingestV2FromStreams(ConfigJson conf })); InputStream jsonStream = Files.newInputStream(resolveQuickstartPath("dataset.json")); - StreamSource jsonSource = new StreamSource(jsonStream, CompressionType.NONE, Format.json, UUID.randomUUID(), "json-stream", false); + StreamSource jsonSource = new StreamSource(jsonStream, Format.json, CompressionType.NONE, UUID.randomUUID(), "json-stream", false); IngestRequestProperties jsonProps = buildIngestV2RequestProperties(config, ingestV2Config, ingestV2Config.getDataMappingName()); futures.add(queuedIngestClient.ingestAsync(jsonSource, jsonProps) .thenCompose(response -> { @@ -797,20 +797,20 @@ private static List> ingestV2FromStreams(ConfigJson conf return futures; } - private static List> ingestV2FromFiles(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - QueuedIngestClient queuedIngestClient) { + private static @NotNull List> ingestV2FromFiles(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + @NotNull QueuedIngestClient queuedIngestClient) { System.out.println("\n=== Queued ingestion from files (ingest-v2) ==="); List> futures = new ArrayList<>(); IngestRequestProperties csvProps = buildIngestV2RequestProperties(config, ingestV2Config, null); - FileSource csvFileSource = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + FileSource csvFileSource = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE, "csv-file"); futures.add(queuedIngestClient.ingestAsync(csvFileSource, csvProps) .thenCompose(response -> { System.out.println("CSV file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "CSV File"); })); - FileSource jsonFileSource = new FileSource(resolveQuickstartPath("dataset.json"), Format.json, UUID.randomUUID(), CompressionType.NONE); + FileSource jsonFileSource = new FileSource(resolveQuickstartPath("dataset.json"), Format.json, UUID.randomUUID(), CompressionType.NONE, "json-file"); IngestRequestProperties jsonProps = buildIngestV2RequestProperties(config, ingestV2Config, ingestV2Config.getDataMappingName()); futures.add(queuedIngestClient.ingestAsync(jsonFileSource, jsonProps) .thenCompose(response -> { @@ -821,11 +821,11 @@ private static List> ingestV2FromFiles(ConfigJson config return futures; } - private static CompletableFuture ingestV2BatchIngestion(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - QueuedIngestClient queuedIngestClient) { + private static @NotNull CompletableFuture ingestV2BatchIngestion(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, + @NotNull QueuedIngestClient queuedIngestClient) { System.out.println("\n=== Queued ingestion from multiple sources (ingest-v2 batch) ==="); - FileSource source1 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); - FileSource source2 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + FileSource source1 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE, "source-1"); + FileSource source2 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE, "source-2"); List sources = Arrays.asList(source1, source2); IngestRequestProperties props = buildIngestV2RequestProperties(config, ingestV2Config, null); @@ -837,7 +837,7 @@ private static CompletableFuture ingestV2BatchIngestion(ConfigJson config, }); } - private static IngestRequestProperties buildIngestV2RequestProperties(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, String mappingName) { + private static @NotNull IngestRequestProperties buildIngestV2RequestProperties(@NotNull ConfigJson config, @NotNull IngestV2QuickstartConfig ingestV2Config, String mappingName) { IngestRequestPropertiesBuilder builder = IngestRequestPropertiesBuilder .create(config.getDatabaseName(), config.getTableName()) .withEnableTracking(ingestV2Config.isTrackingEnabled()); @@ -847,8 +847,8 @@ private static IngestRequestProperties buildIngestV2RequestProperties(ConfigJson return builder.build(); } - private static CompletableFuture trackIngestV2Operation(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - QueuedIngestClient queuedIngestClient, ExtendedIngestResponse response, String operationName) { + private static @NotNull CompletableFuture trackIngestV2Operation(@NotNull ConfigJson config, @NotNull IngestV2QuickstartConfig ingestV2Config, + @NotNull QueuedIngestClient queuedIngestClient, @NotNull ExtendedIngestResponse response, String operationName) { IngestionOperation operation = new IngestionOperation( Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), config.getDatabaseName(), @@ -913,7 +913,7 @@ private static void printIngestV2StatusResponse(StatusResponse statusResponse) { } } - private static Path resolveQuickstartPath(String fileName) { + private static @NotNull Path resolveQuickstartPath(String fileName) { Path preferred = Paths.get("quickstart", fileName); if (Files.exists(preferred)) { return preferred; diff --git a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java index c67757f34..e370c03be 100644 --- a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java +++ b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java @@ -22,6 +22,7 @@ import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.FileSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import org.jetbrains.annotations.NotNull; import java.io.ByteArrayInputStream; import java.io.FileInputStream; @@ -35,16 +36,14 @@ import java.util.concurrent.CompletableFuture; /** - * Sample demonstrating managed streaming ingestion using the new ingest-v2 API. - * This is the modern API that uses Kotlin-based clients with coroutines, - * providing better async support and a cleaner API design. - * Managed streaming ingestion intelligently chooses between streaming and queued ingestion: - * - Small data (typically under 4MB) is ingested via streaming for low latency - * - Large data automatically falls back to queued ingestion for reliability - * - Server errors (like streaming disabled) trigger automatic fallback to queued - * - Transient errors are retried according to the configured retry policy - * This approach provides the best of both worlds: low latency for small data - * and high reliability for all data sizes. + * Sample demonstrating managed streaming ingestion using the new ingest-v2 API. This is the modern + * API that uses Kotlin-based clients with coroutines, providing better async support and a cleaner + * API design. Managed streaming ingestion intelligently chooses between streaming and queued + * ingestion: - Small data (typically under 4MB) is ingested via streaming for low latency - Large + * data automatically falls back to queued ingestion for reliability - Server errors (like streaming + * disabled) trigger automatic fallback to queued - Transient errors are retried according to the + * configured retry policy This approach provides the best of both worlds: low latency for small + * data and high reliability for all data sizes. */ public class ManagedStreamingIngestV2 { @@ -56,7 +55,8 @@ public class ManagedStreamingIngestV2 { public static void main(String[] args) { try { // Get configuration from system properties - String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String engineEndpoint = + System.getProperty("clusterPath"); // "https://.kusto.windows.net" String appId = System.getProperty("app-id"); String appKey = System.getProperty("appKey"); String tenant = System.getProperty("tenant"); @@ -68,32 +68,40 @@ public static void main(String[] args) { ChainedTokenCredential credential; // Create Azure AD credential - if (StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { - credential = new ChainedTokenCredentialBuilder() - .addFirst(new ClientSecretCredentialBuilder() - .clientId(appId) - .clientSecret(appKey) - .tenantId(tenant) - .build()) - .build(); + if (StringUtils.isNotBlank(appId) + && StringUtils.isNotBlank(appKey) + && StringUtils.isNotBlank(tenant)) { + credential = + new ChainedTokenCredentialBuilder() + .addFirst( + new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); } else { - credential = new ChainedTokenCredentialBuilder() - .addFirst(new AzureCliCredentialBuilder().build()) - .build(); + credential = + new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); } if (engineEndpoint == null || engineEndpoint.isEmpty()) { - throw new IllegalArgumentException("Cluster endpoint (clusterPath) must be provided as a system property."); + throw new IllegalArgumentException( + "Cluster endpoint (clusterPath) must be provided as a system property."); } // Create managed streaming ingest client using the new v2 API // The client will automatically handle streaming vs queued ingestion decisions - managedStreamingIngestClient = ManagedStreamingIngestClientBuilder.create(engineEndpoint) - .withAuthentication(credential) - .build(); + managedStreamingIngestClient = + ManagedStreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .build(); System.out.println("Managed Streaming Ingest Client created successfully"); - System.out.println("This client automatically chooses between streaming and queued ingestion"); + System.out.println( + "This client automatically chooses between streaming and queued ingestion"); System.out.println("based on data size and server responses.\n"); // Run ingestion examples @@ -114,82 +122,89 @@ public static void main(String[] args) { } /** - * Demonstrates ingestion from various stream sources. - * Small data will typically use streaming ingestion for low latency. - * Sources include: - * - In-memory string data as CSV (small, will use streaming) - * - Compressed file stream (CSV) - * - JSON file stream with mapping + * Demonstrates ingestion from various stream sources. Small data will typically use streaming + * ingestion for low latency. Sources include: - In-memory string data as CSV (small, will use + * streaming) - Compressed file stream (CSV) - JSON file stream with mapping */ static void ingestFromStream() throws Exception { System.out.println("\n=== Managed Streaming Ingestion from Streams ==="); // Example 1: Ingest from in-memory CSV string (small data - will use streaming) - String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - - StreamSource csvStreamSource = new StreamSource( - csvInputStream, CompressionType.NONE, Format.csv, - UUID.randomUUID(), "csv-managed-stream", false); - - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + String csvData = + "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = + new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = + new StreamSource( + csvInputStream, + Format.csv, + CompressionType.NONE, + UUID.randomUUID(), + "csv-managed-stream", + false); + + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Ingesting small CSV data from string..."); - ExtendedIngestResponse csvResponse = managedStreamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); + ExtendedIngestResponse csvResponse = + managedStreamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); printIngestionResult("CSV String", csvResponse); // Example 2: Ingest from compressed CSV file String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - FileInputStream compressedCsvStream = new FileInputStream(resourcesDirectory + "dataset.csv.gz"); - - StreamSource compressedStreamSource = new StreamSource( - compressedCsvStream, - CompressionType.GZIP, - Format.csv, - UUID.randomUUID(), - "compressed-csv-managed-stream", - false - ); + FileInputStream compressedCsvStream = + new FileInputStream(resourcesDirectory + "dataset.csv.gz"); + + StreamSource compressedStreamSource = + new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + "compressed-csv-managed-stream", + false); System.out.println("Ingesting compressed CSV file..."); - ExtendedIngestResponse compressedResponse = managedStreamingIngestClient.ingestAsync(compressedStreamSource, csvProperties).get(); + ExtendedIngestResponse compressedResponse = + managedStreamingIngestClient + .ingestAsync(compressedStreamSource, csvProperties) + .get(); printIngestionResult("Compressed CSV", compressedResponse); compressedCsvStream.close(); // Example 3: Ingest JSON with mapping FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); - StreamSource jsonStreamSource = new StreamSource( - jsonStream, - CompressionType.NONE, - Format.json, - UUID.randomUUID(), - "json-managed-stream", - false - ); - - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + StreamSource jsonStreamSource = + new StreamSource( + jsonStream, + Format.json, + CompressionType.NONE, + UUID.randomUUID(), + "json-managed-stream", + false); + + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); System.out.println("Ingesting JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = managedStreamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); + ExtendedIngestResponse jsonResponse = + managedStreamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); printIngestionResult("JSON with Mapping", jsonResponse); jsonStream.close(); } /** - * Demonstrates ingestion from file sources. - * The client will automatically decide between streaming and queued - * based on file size and other factors. - * Sources include: - * - CSV file - * - Compressed JSON file with mapping + * Demonstrates ingestion from file sources. The client will automatically decide between + * streaming and queued based on file size and other factors. Sources include: - CSV file - + * Compressed JSON file with mapping */ static void ingestFromFile() throws Exception { System.out.println("\n=== Managed Streaming Ingestion from Files ==="); @@ -197,60 +212,62 @@ static void ingestFromFile() throws Exception { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Example 1: Ingest CSV file - FileSource csvFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE - ); - - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + FileSource csvFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE, + "m-ds-csv"); + + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Ingesting CSV file..."); - ExtendedIngestResponse csvResponse = managedStreamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); + ExtendedIngestResponse csvResponse = + managedStreamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); printIngestionResult("CSV File", csvResponse); // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP - ); - - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + FileSource jsonFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP, + "m-ds-json-compressed"); + + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); System.out.println("Ingesting compressed JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = managedStreamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); + ExtendedIngestResponse jsonResponse = + managedStreamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); printIngestionResult("Compressed JSON File", jsonResponse); } /** - * Demonstrates the automatic fallback to queued ingestion when data size exceeds - * the streaming limit. - * This method creates a large in-memory dataset (~20MB uncompressed) to force the - * client to fall back to queued ingestion. Note that data is automatically compressed - * before ingestion, so we use a larger size (20MB) to ensure the compressed data - * still exceeds the streaming threshold (~4MB compressed). - * This demonstrates: - * - Automatic size-based decision making - * - Fallback logging from the client ("Blob size is too big for streaming ingest") - * - Operation tracking for queued ingestion - * Note: Streaming ingestion operations are not tracked - they complete immediately - * with success or throw an exception on failure. + * Demonstrates the automatic fallback to queued ingestion when data size exceeds the streaming + * limit. This method creates a large in-memory dataset (~20MB uncompressed) to force the client + * to fall back to queued ingestion. Note that data is automatically compressed before + * ingestion, so we use a larger size (20MB) to ensure the compressed data still exceeds the + * streaming threshold (~4MB compressed). This demonstrates: - Automatic size-based + * decision-making - Fallback logging from the client ("Blob size is too big for streaming + * ingest") - Operation tracking for queued ingestion Note: Streaming ingestion operations are + * not tracked - they complete immediately with success or throw an exception on failure. */ static void demonstrateFallbackTracking() throws Exception { System.out.println("\n=== Demonstrating Size-Based Fallback to Queued Ingestion ==="); - System.out.println("The ManagedStreamingIngestClient automatically falls back to queued ingestion"); + System.out.println( + "The ManagedStreamingIngestClient automatically falls back to queued ingestion"); System.out.println("when data size exceeds the streaming limit (~4MB compressed)."); - System.out.println("Since data is automatically compressed, we use a larger dataset (~20MB)"); + System.out.println( + "Since data is automatically compressed, we use a larger dataset (~20MB)"); System.out.println("to ensure the compressed size still exceeds the threshold.\n"); // Generate a large CSV dataset (~20MB uncompressed) that will exceed the streaming limit @@ -258,7 +275,7 @@ static void demonstrateFallbackTracking() throws Exception { int targetSizeBytes = 20 * 1024 * 1024; // 20MB String largeData = generateLargeCsvData(targetSizeBytes); byte[] dataBytes = largeData.getBytes(StandardCharsets.UTF_8); - + System.out.println("Generated large CSV dataset:"); System.out.println(" - Uncompressed data size: " + formatBytes(dataBytes.length)); System.out.println(" - Streaming limit: ~4MB (after compression)"); @@ -267,29 +284,31 @@ static void demonstrateFallbackTracking() throws Exception { System.out.println(); InputStream largeInputStream = new ByteArrayInputStream(dataBytes); - + // Mark the stream for potential retry (seekable stream) largeInputStream.mark(dataBytes.length); - StreamSource largeStreamSource = new StreamSource( - largeInputStream, - CompressionType.NONE, // Will be auto-compressed by the client - Format.csv, - UUID.randomUUID(), - "large-data-fallback-demo", - false - ); - - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); - - System.out.println("Ingesting large dataset (" + formatBytes(dataBytes.length) + " uncompressed)..."); + StreamSource largeStreamSource = + new StreamSource( + largeInputStream, + Format.csv, + CompressionType.NONE, // Will be auto-compressed by the client + UUID.randomUUID(), + "large-data-fallback-demo", + false); + + IngestRequestProperties properties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); + + System.out.println( + "Ingesting large dataset (" + formatBytes(dataBytes.length) + " uncompressed)..."); System.out.println("(Watch for fallback log messages from ManagedStreamingIngestClient)"); System.out.println(); - - ExtendedIngestResponse response = managedStreamingIngestClient.ingestAsync(largeStreamSource, properties).get(); + + ExtendedIngestResponse response = + managedStreamingIngestClient.ingestAsync(largeStreamSource, properties).get(); printIngestionResult("Large Data Ingestion", response); // The large data should trigger queued fallback @@ -297,85 +316,120 @@ static void demonstrateFallbackTracking() throws Exception { System.out.println("SUCCESS: Large data correctly triggered QUEUED fallback!"); System.out.println("This demonstrates the automatic size-based routing.\n"); - IngestionOperation operation = new IngestionOperation( - Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), - database, - table, - response.getIngestionType() - ); + IngestionOperation operation = + new IngestionOperation( + Objects.requireNonNull( + response.getIngestResponse().getIngestionOperationId()), + database, + table, + response.getIngestionType()); // Get initial operation details - CompletableFuture detailsFuture = managedStreamingIngestClient.getOperationDetailsAsync(operation); + CompletableFuture detailsFuture = + managedStreamingIngestClient.getOperationDetailsAsync(operation); StatusResponse details = detailsFuture.get(); printStatusResponse("Initial Status", details); // Poll for completion using getOperationDetailsAsync - System.out.println("\nPolling for completion (checking every 30 seconds, timeout 2 minutes)..."); - StatusResponse finalStatus = pollForCompletionManually(operation, Duration.ofSeconds(30), Duration.ofMinutes(2)); + System.out.println( + "\nPolling for completion (checking every 30 seconds, timeout 2 minutes)..."); + StatusResponse finalStatus = + pollForCompletionManually( + operation, Duration.ofSeconds(30), Duration.ofMinutes(2)); printStatusResponse("Final Status", finalStatus); } else { System.out.println("NOTE: Data was ingested via STREAMING method."); - System.out.println("This might happen if compression was very effective. Try increasing"); + System.out.println( + "This might happen if compression was very effective. Try increasing"); System.out.println("the data size or using less compressible data patterns."); } } /** - * Generates a large CSV dataset of approximately the target size. - * The data follows the format expected by the sample table schema. - * Uses varied data to reduce compression effectiveness. + * Generates a large CSV dataset of approximately the target size. The data follows the format + * expected by the sample table schema. Uses varied data to reduce compression effectiveness. */ - private static String generateLargeCsvData(int targetSizeBytes) { + private static @NotNull String generateLargeCsvData(int targetSizeBytes) { StringBuilder sb = new StringBuilder(); int rowCount = 0; - + // Generate varied data to make it less compressible java.util.Random random = new java.util.Random(42); // Fixed seed for reproducibility - + // Sample CSV row matching the expected schema - // Format: int,guid,int,int,int,int,int,int,int,int,int,int,datetime,string,string,int,timespan,null,null + // Format: + // int,guid,int,int,int,int,int,int,int,int,int,int,datetime,string,string,int,timespan,null,null while (sb.length() < targetSizeBytes) { // Use random values to reduce compression effectiveness - sb.append(rowCount).append(",") - .append(UUID.randomUUID()).append(",") // Random GUID - .append(random.nextInt(10000)).append(",") - .append(random.nextInt(100000)).append(",") - .append(random.nextLong()).append(",") - .append(random.nextDouble() * 1000000).append(",") - .append(random.nextInt()).append(",") - .append(random.nextInt(1000)).append(",") - .append(random.nextInt(5000)).append(",") - .append(random.nextInt(10000)).append(",") - .append(random.nextInt(100)).append(",") - .append(random.nextInt(50)).append(",") - .append("2024-").append(String.format("%02d", (rowCount % 12) + 1)) - .append("-").append(String.format("%02d", (rowCount % 28) + 1)) - .append("T").append(String.format("%02d", rowCount % 24)) - .append(":").append(String.format("%02d", rowCount % 60)) - .append(":").append(String.format("%02d", rowCount % 60)) - .append(".").append(String.format("%07d", random.nextInt(10000000))) - .append("Z").append(",") - .append("Row_").append(rowCount).append("_").append(random.nextInt(100000)).append(",") - .append("\"Description with random data: ").append(random.nextLong()) - .append(" and more: ").append(UUID.randomUUID()).append("\"").append(",") - .append(random.nextInt(100000)).append(",") - .append(String.format("%02d:%02d:%02d", - random.nextInt(24), random.nextInt(60), random.nextInt(60))).append(",") - .append(",") - .append("null") - .append("\n"); + sb.append(rowCount) + .append(",") + .append(UUID.randomUUID()) + .append(",") // Random GUID + .append(random.nextInt(10000)) + .append(",") + .append(random.nextInt(100000)) + .append(",") + .append(random.nextLong()) + .append(",") + .append(random.nextDouble() * 1000000) + .append(",") + .append(random.nextInt()) + .append(",") + .append(random.nextInt(1000)) + .append(",") + .append(random.nextInt(5000)) + .append(",") + .append(random.nextInt(10000)) + .append(",") + .append(random.nextInt(100)) + .append(",") + .append(random.nextInt(50)) + .append(",") + .append("2024-") + .append(String.format("%02d", (rowCount % 12) + 1)) + .append("-") + .append(String.format("%02d", (rowCount % 28) + 1)) + .append("T") + .append(String.format("%02d", rowCount % 24)) + .append(":") + .append(String.format("%02d", rowCount % 60)) + .append(":") + .append(String.format("%02d", rowCount % 60)) + .append(".") + .append(String.format("%07d", random.nextInt(10000000))) + .append("Z") + .append(",") + .append("Row_") + .append(rowCount) + .append("_") + .append(random.nextInt(100000)) + .append(",") + .append("\"Description with random data: ") + .append(random.nextLong()) + .append(" and more: ") + .append(UUID.randomUUID()) + .append("\"") + .append(",") + .append(random.nextInt(100000)) + .append(",") + .append( + String.format( + "%02d:%02d:%02d", + random.nextInt(24), random.nextInt(60), random.nextInt(60))) + .append(",") + .append(",") + .append("null") + .append("\n"); rowCount++; } - + System.out.println(" - Generated " + rowCount + " rows of varied data"); return sb.toString(); } - /** - * Formats bytes into a human-readable string (e.g., "10.00 MB"). - */ - private static String formatBytes(long bytes) { + /** Formats bytes into a human-readable string (e.g., "10.00 MB"). */ + private static @NotNull String formatBytes(long bytes) { if (bytes < 1024) return bytes + " B"; if (bytes < 1024 * 1024) return String.format("%.2f KB", bytes / 1024.0); if (bytes < 1024 * 1024 * 1024) return String.format("%.2f MB", bytes / (1024.0 * 1024.0)); @@ -383,59 +437,68 @@ private static String formatBytes(long bytes) { } /** - * Manually polls for completion by repeatedly calling getOperationDetailsAsync. - * This demonstrates how to implement polling when the ManagedStreamingIngestClient - * is used and queued fallback occurs. + * Manually polls for completion by repeatedly calling getOperationDetailsAsync. This + * demonstrates how to implement polling when the ManagedStreamingIngestClient is used and + * queued fallback occurs. */ private static StatusResponse pollForCompletionManually( - IngestionOperation operation, - Duration pollingInterval, - Duration timeout) throws Exception { - + IngestionOperation operation, @NotNull Duration pollingInterval, @NotNull Duration timeout) + throws Exception { + long startTime = System.currentTimeMillis(); long timeoutMillis = timeout.toMillis(); long intervalMillis = pollingInterval.toMillis(); - + while (System.currentTimeMillis() - startTime < timeoutMillis) { - StatusResponse status = managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); - + StatusResponse status = + managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); + // Check if completed (no more in-progress items) Status summary = status.getStatus(); - if (summary != null && summary.getInProgress()!=null && summary.getInProgress() == 0) { + if (summary != null + && summary.getInProgress() != null + && summary.getInProgress() == 0) { System.out.println("Operation completed."); return status; } - - System.out.println("Still in progress... (In Progress: " + - (summary != null ? summary.getInProgress() : "unknown") + ")"); - + + System.out.println( + "Still in progress... (In Progress: " + + (summary != null ? summary.getInProgress() : "unknown") + + ")"); + // Wait before next poll Thread.sleep(intervalMillis); } - + // Timeout reached, return latest status System.out.println("Polling timeout reached. Returning latest status."); return managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); } - /** - * Prints the ingestion result including which method (streaming or queued) was used. - */ - private static void printIngestionResult(String operationName, ExtendedIngestResponse response) { - String ingestionMethod = response.getIngestionType() == IngestKind.STREAMING ? "STREAMING" : "QUEUED"; - System.out.println("[" + operationName + "] Ingestion completed using " + ingestionMethod + " method."); - System.out.println(" Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + /** Prints the ingestion result including which method (streaming or queued) was used. */ + private static void printIngestionResult( + String operationName, @NotNull ExtendedIngestResponse response) { + String ingestionMethod = + response.getIngestionType() == IngestKind.STREAMING ? "STREAMING" : "QUEUED"; + System.out.println( + "[" + + operationName + + "] Ingestion completed using " + + ingestionMethod + + " method."); + System.out.println( + " Operation ID: " + response.getIngestResponse().getIngestionOperationId()); if (response.getIngestionType() == IngestKind.STREAMING) { System.out.println(" (Low latency - data available immediately)"); } else { - System.out.println(" (High reliability - data will be available after batch processing)"); + System.out.println( + " (High reliability - data will be available after batch processing)"); } System.out.println(); } - /** - * Prints detailed status information from a StatusResponse. - */ + /** Prints detailed status information from a StatusResponse. */ private static void printStatusResponse(String label, StatusResponse statusResponse) { if (statusResponse == null) { System.out.println(label + ": null"); diff --git a/samples/src/main/java/ingestv2/QueuedIngestV2.java b/samples/src/main/java/ingestv2/QueuedIngestV2.java index 9e31a0291..ed0dfb7f2 100644 --- a/samples/src/main/java/ingestv2/QueuedIngestV2.java +++ b/samples/src/main/java/ingestv2/QueuedIngestV2.java @@ -22,7 +22,6 @@ import com.microsoft.azure.kusto.ingest.v2.source.FileSource; import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -35,11 +34,10 @@ import java.util.concurrent.TimeUnit; /** - * Sample demonstrating queued ingestion using the new ingest-v2 API. - * This is the modern API that uses Kotlin-based clients with coroutines, - * providing better async support and a cleaner API design. - * Queued ingestion is asynchronous and provides reliable, high-throughput - * data ingestion with operation tracking capabilities. + * Sample demonstrating queued ingestion using the new ingest-v2 API. This is the modern API that + * uses Kotlin-based clients with coroutines, providing better async support and a cleaner API + * design. Queued ingestion is asynchronous and provides reliable, high-throughput data ingestion + * with operation tracking capabilities. */ public class QueuedIngestV2 { @@ -51,7 +49,8 @@ public class QueuedIngestV2 { public static void main(String[] args) { try { // Get configuration from system properties - String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String engineEndpoint = + System.getProperty("clusterPath"); // "https://.kusto.windows.net" String appId = System.getProperty("app-id"); String appKey = System.getProperty("appKey"); String tenant = System.getProperty("tenant"); @@ -63,29 +62,36 @@ public static void main(String[] args) { ChainedTokenCredential credential; // Create Azure AD credential - if(StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { - credential = new ChainedTokenCredentialBuilder() - .addFirst(new ClientSecretCredentialBuilder() - .clientId(appId) - .clientSecret(appKey) - .tenantId(tenant) - .build()) - .build(); + if (StringUtils.isNotBlank(appId) + && StringUtils.isNotBlank(appKey) + && StringUtils.isNotBlank(tenant)) { + credential = + new ChainedTokenCredentialBuilder() + .addFirst( + new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); } else { - credential = new ChainedTokenCredentialBuilder() - .addFirst(new AzureCliCredentialBuilder().build()) - .build(); + credential = + new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); } - if(engineEndpoint == null || engineEndpoint.isEmpty()) { - throw new IllegalArgumentException("Cluster endpoint (clusterPath) must be provided as a system property."); + if (engineEndpoint == null || engineEndpoint.isEmpty()) { + throw new IllegalArgumentException( + "Cluster endpoint (clusterPath) must be provided as a system property."); } // Create queued ingest client using the new v2 API - queuedIngestClient = QueuedIngestClientBuilder.create(engineEndpoint) - .withAuthentication(credential) - .withMaxConcurrency(10) // Set maximum concurrent uploads - .build(); + queuedIngestClient = + QueuedIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .withMaxConcurrency(10) // Set maximum concurrent uploads + .build(); System.out.println("Queued Ingest Client created successfully"); @@ -98,9 +104,8 @@ public static void main(String[] args) { allFutures.add(ingestMultipleSources()); // Wait for all operations to complete - CompletableFuture allOf = CompletableFuture.allOf( - allFutures.toArray(new CompletableFuture[0]) - ); + CompletableFuture allOf = + CompletableFuture.allOf(allFutures.toArray(new CompletableFuture[0])); System.out.println("\nWaiting for all ingestion operations to complete..."); allOf.get(5, TimeUnit.MINUTES); @@ -118,10 +123,8 @@ public static void main(String[] args) { } /** - * Demonstrates ingestion from various stream sources including: - * - In-memory string data as CSV - * - Compressed file stream (CSV) - * - JSON file stream with mapping + * Demonstrates ingestion from various stream sources including: - In-memory string data as CSV + * - Compressed file stream (CSV) - JSON file stream with mapping */ static List> ingestFromStream() throws Exception { System.out.println("\n=== Queued Ingestion from Streams ==="); @@ -129,83 +132,110 @@ static List> ingestFromStream() throws Exception { List> futures = new ArrayList<>(); // Example 1: Ingest from in-memory CSV string - String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - - StreamSource csvStreamSource = new StreamSource( - csvInputStream, CompressionType.NONE, Format.csv, - UUID.randomUUID(), "csv-queued-stream", false); - - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + String csvData = + "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = + new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = + new StreamSource( + csvInputStream, + Format.csv, + CompressionType.NONE, + UUID.randomUUID(), + "csv-queued-stream", + false); + + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Queueing CSV data from string..."); - CompletableFuture csvFuture = queuedIngestClient.ingestAsync(csvStreamSource, csvProperties) - .thenCompose(response -> { - System.out.println("CSV ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - return trackIngestionOperation(response, "CSV Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); + CompletableFuture csvFuture = + queuedIngestClient + .ingestAsync(csvStreamSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "CSV ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); futures.add(csvFuture); // Example 2: Ingest from compressed CSV file String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - InputStream compressedCsvStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.csv.gz")); - - StreamSource compressedStreamSource = new StreamSource( - compressedCsvStream, - CompressionType.GZIP, - Format.csv, - UUID.randomUUID(), - "compressed-csv-queued-stream", - false - ); + InputStream compressedCsvStream = + new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.csv.gz")); + + StreamSource compressedStreamSource = + new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + "compressed-csv-queued-stream", + false); System.out.println("Queueing compressed CSV file..."); - CompletableFuture compressedFuture = queuedIngestClient.ingestAsync(compressedStreamSource, csvProperties) - .thenCompose(response -> { - System.out.println("Compressed CSV ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - return trackIngestionOperation(response, "Compressed CSV Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); + CompletableFuture compressedFuture = + queuedIngestClient + .ingestAsync(compressedStreamSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "Compressed CSV ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation( + response, "Compressed CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); futures.add(compressedFuture); // Example 3: Ingest JSON with mapping - InputStream jsonStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); - - StreamSource jsonStreamSource = new StreamSource( - jsonStream, - CompressionType.NONE, - Format.json, - UUID.randomUUID(), - "json-queued-stream", - false - ); - - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + InputStream jsonStream = + new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); + + StreamSource jsonStreamSource = + new StreamSource( + jsonStream, + Format.json, + CompressionType.NONE, + UUID.randomUUID(), + "json-queued-stream", + false); + + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); System.out.println("Queueing JSON file with mapping..."); - CompletableFuture jsonFuture = queuedIngestClient.ingestAsync(jsonStreamSource, jsonProperties) - .thenCompose(response -> { - System.out.println("JSON ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - return trackIngestionOperation(response, "JSON Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); + CompletableFuture jsonFuture = + queuedIngestClient + .ingestAsync(jsonStreamSource, jsonProperties) + .thenCompose( + response -> { + System.out.println( + "JSON ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "JSON Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); futures.add(jsonFuture); return futures; } /** - * Demonstrates ingestion from file sources including: - * - CSV file - * - Compressed JSON file with mapping + * Demonstrates ingestion from file sources including: - CSV file - Compressed JSON file with + * mapping */ static List> ingestFromFile() { System.out.println("\n=== Queued Ingestion from Files ==="); @@ -215,54 +245,69 @@ static List> ingestFromFile() { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Example 1: Ingest CSV file - FileSource csvFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE - ); - - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + FileSource csvFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE, + "dataset.csv"); + + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Queueing CSV file..."); - CompletableFuture csvFuture = queuedIngestClient.ingestAsync(csvFileSource, csvProperties) - .thenCompose(response -> { - System.out.println("CSV file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - return trackIngestionOperation(response, "CSV File"); - }); + CompletableFuture csvFuture = + queuedIngestClient + .ingestAsync(csvFileSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "CSV file ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "CSV File"); + }); futures.add(csvFuture); // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP - ); - - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + FileSource jsonFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP, + "dataset.jsonz"); + + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); System.out.println("Queueing compressed JSON file with mapping..."); - CompletableFuture jsonFuture = queuedIngestClient.ingestAsync(jsonFileSource, jsonProperties) - .thenCompose(response -> { - System.out.println("Compressed JSON file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - return trackIngestionOperation(response, "Compressed JSON File"); - }); + CompletableFuture jsonFuture = + queuedIngestClient + .ingestAsync(jsonFileSource, jsonProperties) + .thenCompose( + response -> { + System.out.println( + "Compressed JSON file ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation( + response, "Compressed JSON File"); + }); futures.add(jsonFuture); return futures; } /** - * Demonstrates batch ingestion from multiple sources in a single operation. - * This is more efficient than ingesting sources one by one when you have multiple files. + * Demonstrates batch ingestion from multiple sources in a single operation. This is more + * efficient than ingesting sources one by one when you have multiple files. */ static CompletableFuture ingestMultipleSources() { System.out.println("\n=== Queued Ingestion from Multiple Sources (Batch) ==="); @@ -270,83 +315,101 @@ static CompletableFuture ingestMultipleSources() { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Create multiple file sources - FileSource source1 = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE - ); - - FileSource source2 = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv.gz"), - Format.csv, - UUID.randomUUID(), - CompressionType.GZIP - ); + FileSource source1 = + new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE, + "dataset.csv"); + + FileSource source2 = + new FileSource( + Paths.get(resourcesDirectory + "dataset.csv.gz"), + Format.csv, + UUID.randomUUID(), + CompressionType.GZIP, + "dataset.csv.gz"); List sources = Arrays.asList(source1, source2); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + IngestRequestProperties properties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Queueing multiple sources in batch..."); - return queuedIngestClient.ingestAsync(sources, properties) - .thenCompose(response -> { - System.out.println("Batch ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - System.out.println("Number of sources in batch: " + sources.size()); - return trackIngestionOperation(response, "Batch Ingestion"); - }); + return queuedIngestClient + .ingestAsync(sources, properties) + .thenCompose( + response -> { + System.out.println( + "Batch ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + System.out.println("Number of sources in batch: " + sources.size()); + return trackIngestionOperation(response, "Batch Ingestion"); + }); } /** - * Tracks an ingestion operation by: - * 1. Getting operation details immediately after queueing - * 2. Polling for completion - * 3. Getting final operation details - * 4. Printing status information + * Tracks an ingestion operation by: 1. Getting operation details immediately after queueing 2. + * Polling for completion 3. Getting final operation details 4. Printing status information */ - private static CompletableFuture trackIngestionOperation(ExtendedIngestResponse response, String operationName) { - IngestionOperation operation = new IngestionOperation( - Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), - database, - table, - response.getIngestionType() - ); + private static CompletableFuture trackIngestionOperation( + ExtendedIngestResponse response, String operationName) { + IngestionOperation operation = + new IngestionOperation( + Objects.requireNonNull( + response.getIngestResponse().getIngestionOperationId()), + database, + table, + response.getIngestionType()); System.out.println("\n--- Tracking " + operationName + " ---"); // Get initial operation details - return queuedIngestClient.getOperationDetailsAsync(operation) - .thenCompose(initialDetails -> { - System.out.println("[" + operationName + "] Initial Operation Details:"); - printStatusResponse(initialDetails); - - // Poll for completion - System.out.println("[" + operationName + "] Polling for completion..."); - return queuedIngestClient.pollForCompletion(operation, Duration.ofSeconds(30),Duration.ofMinutes(2)); // 2 minutes timeout - }) - .thenCompose(finalStatus -> { - System.out.println("[" + operationName + "] Polling completed."); - // Get final operation details - return queuedIngestClient.getOperationDetailsAsync(operation); - }) - .thenAccept(finalDetails -> { - System.out.println("[" + operationName + "] Final Operation Details:"); - printStatusResponse(finalDetails); - System.out.println("[" + operationName + "] Operation tracking completed.\n"); - }) - .exceptionally(error -> { - System.err.println("[" + operationName + "] Error tracking operation: " + error.getMessage()); - error.printStackTrace(); - return null; - }); + return queuedIngestClient + .getOperationDetailsAsync(operation) + .thenCompose( + initialDetails -> { + System.out.println( + "[" + operationName + "] Initial Operation Details:"); + printStatusResponse(initialDetails); + + // Poll for completion + System.out.println("[" + operationName + "] Polling for completion..."); + return queuedIngestClient.pollForCompletion( + operation, + Duration.ofSeconds(30), + Duration.ofMinutes(2)); // 2 minutes timeout + }) + .thenCompose( + finalStatus -> { + System.out.println("[" + operationName + "] Polling completed."); + // Get final operation details + return queuedIngestClient.getOperationDetailsAsync(operation); + }) + .thenAccept( + finalDetails -> { + System.out.println("[" + operationName + "] Final Operation Details:"); + printStatusResponse(finalDetails); + System.out.println( + "[" + operationName + "] Operation tracking completed.\n"); + }) + .exceptionally( + error -> { + System.err.println( + "[" + + operationName + + "] Error tracking operation: " + + error.getMessage()); + error.printStackTrace(); + return null; + }); } - /** - * Prints detailed status information from a StatusResponse - */ + /** Prints detailed status information from a StatusResponse */ private static void printStatusResponse(StatusResponse statusResponse) { if (statusResponse == null) { System.out.println(" Status: null"); @@ -383,7 +446,8 @@ private static void printStatusResponse(StatusResponse statusResponse) { } } - private static byte[] readResourceBytes(String baseDirectory, String fileName) throws IOException { + private static byte[] readResourceBytes(String baseDirectory, String fileName) + throws IOException { return Files.readAllBytes(Paths.get(baseDirectory, fileName)); } diff --git a/samples/src/main/java/ingestv2/StreamingIngestV2.java b/samples/src/main/java/ingestv2/StreamingIngestV2.java index ddc774a30..351ab89aa 100644 --- a/samples/src/main/java/ingestv2/StreamingIngestV2.java +++ b/samples/src/main/java/ingestv2/StreamingIngestV2.java @@ -17,7 +17,6 @@ import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.FileSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; - import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.InputStream; @@ -26,9 +25,9 @@ import java.util.UUID; /** - * Sample demonstrating streaming ingestion using the new ingest-v2 API. - * This is the modern API that uses Kotlin-based clients with coroutines, - * providing better async support and a cleaner API design. + * Sample demonstrating streaming ingestion using the new ingest-v2 API. This is the modern API that + * uses Kotlin-based clients with coroutines, providing better async support and a cleaner API + * design. */ public class StreamingIngestV2 { @@ -40,7 +39,8 @@ public class StreamingIngestV2 { public static void main(String[] args) { try { // Get configuration from system properties - String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String engineEndpoint = + System.getProperty("clusterPath"); // "https://.kusto.windows.net" String appId = System.getProperty("app-id"); String appKey = System.getProperty("appKey"); String tenant = System.getProperty("tenant"); @@ -52,24 +52,30 @@ public static void main(String[] args) { ChainedTokenCredential credential; // Create Azure AD credential - if(StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { - credential = new ChainedTokenCredentialBuilder() - .addFirst(new ClientSecretCredentialBuilder() - .clientId(appId) - .clientSecret(appKey) - .tenantId(tenant) - .build()) - .build(); + if (StringUtils.isNotBlank(appId) + && StringUtils.isNotBlank(appKey) + && StringUtils.isNotBlank(tenant)) { + credential = + new ChainedTokenCredentialBuilder() + .addFirst( + new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); } else { - credential = new ChainedTokenCredentialBuilder() - .addFirst(new AzureCliCredentialBuilder().build()) - .build(); + credential = + new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); } // Create streaming ingest client using the new v2 API - streamingIngestClient = StreamingIngestClientBuilder.create(engineEndpoint) - .withAuthentication(credential) - .build(); + streamingIngestClient = + StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .build(); System.out.println("Streaming Ingest Client created successfully"); @@ -85,76 +91,90 @@ public static void main(String[] args) { } /** - * Demonstrates ingestion from various stream sources including: - * - In-memory string data as CSV - * - Compressed file stream (CSV) - * - JSON file stream with mapping + * Demonstrates ingestion from various stream sources including: - In-memory string data as CSV + * - Compressed file stream (CSV) - JSON file stream with mapping */ static void ingestFromStream() throws Exception { System.out.println("\n=== Ingesting from Streams ==="); // Example 1: Ingest from in-memory CSV string - String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - - StreamSource csvStreamSource = new StreamSource( - csvInputStream, CompressionType.NONE, Format.csv, - UUID.randomUUID(), "csv-test-src", false); - - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + String csvData = + "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = + new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = + new StreamSource( + csvInputStream, + Format.csv, + CompressionType.NONE, + UUID.randomUUID(), + "csv-test-src", + false); + + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Ingesting CSV data from string..."); - ExtendedIngestResponse ingestResponse = streamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); - System.out.println("CSV ingestion completed. Operation ID: " + ingestResponse.getIngestResponse().getIngestionOperationId()); + ExtendedIngestResponse ingestResponse = + streamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); + System.out.println( + "CSV ingestion completed. Operation ID: " + + ingestResponse.getIngestResponse().getIngestionOperationId()); // Example 2: Ingest from compressed CSV file String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - FileInputStream compressedCsvStream = new FileInputStream(resourcesDirectory + "dataset.csv.gz"); - - StreamSource compressedStreamSource = new StreamSource( - compressedCsvStream, - CompressionType.GZIP, - Format.csv, - UUID.randomUUID(), - "compressed-csv-stream", - false - ); + FileInputStream compressedCsvStream = + new FileInputStream(resourcesDirectory + "dataset.csv.gz"); + + StreamSource compressedStreamSource = + new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + "compressed-csv-stream", + false); System.out.println("Ingesting compressed CSV file..."); - ExtendedIngestResponse compressedResponse = streamingIngestClient.ingestAsync(compressedStreamSource, csvProperties).get(); - System.out.println("Compressed CSV ingestion completed. Operation ID: " + compressedResponse.getIngestResponse().getIngestionOperationId()); + ExtendedIngestResponse compressedResponse = + streamingIngestClient.ingestAsync(compressedStreamSource, csvProperties).get(); + System.out.println( + "Compressed CSV ingestion completed. Operation ID: " + + compressedResponse.getIngestResponse().getIngestionOperationId()); compressedCsvStream.close(); // Example 3: Ingest JSON with mapping FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); - StreamSource jsonStreamSource = new StreamSource( - jsonStream, - CompressionType.NONE, - Format.json, - UUID.randomUUID(), - "json-data-stream", - false - ); - - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + StreamSource jsonStreamSource = + new StreamSource( + jsonStream, + Format.json, + CompressionType.NONE, + UUID.randomUUID(), + "json-data-stream", + false); + + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); System.out.println("Ingesting JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = streamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); - System.out.println("JSON ingestion completed. Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); + ExtendedIngestResponse jsonResponse = + streamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); + System.out.println( + "JSON ingestion completed. Operation ID: " + + jsonResponse.getIngestResponse().getIngestionOperationId()); jsonStream.close(); } /** - * Demonstrates ingestion from file sources including: - * - CSV file - * - Compressed JSON file with mapping + * Demonstrates ingestion from file sources including: - CSV file - Compressed JSON file with + * mapping */ static void ingestFromFile() throws Exception { System.out.println("\n=== Ingesting from Files ==="); @@ -162,39 +182,46 @@ static void ingestFromFile() throws Exception { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Example 1: Ingest CSV file - FileSource csvFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE - ); - - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withEnableTracking(true) - .build(); + FileSource csvFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE, + "jcsv-file-source"); + + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withEnableTracking(true) + .build(); System.out.println("Ingesting CSV file..."); - ExtendedIngestResponse csvResponse = streamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); - System.out.println("CSV file ingestion completed. Operation ID: " + csvResponse.getIngestResponse().getIngestionOperationId()); + ExtendedIngestResponse csvResponse = + streamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); + System.out.println( + "CSV file ingestion completed. Operation ID: " + + csvResponse.getIngestResponse().getIngestionOperationId()); // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP - ); - - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder - .create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + FileSource jsonFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP, + "sjson-compressed-file"); + + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create(database, table) + .withIngestionMappingReference(mapping) + .withEnableTracking(true) + .build(); System.out.println("Ingesting compressed JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = streamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); - System.out.println("Compressed JSON file ingestion completed. Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); + ExtendedIngestResponse jsonResponse = + streamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); + System.out.println( + "Compressed JSON file ingestion completed. Operation ID: " + + jsonResponse.getIngestResponse().getIngestionOperationId()); } } - From bca955e298c8a1f5ee5d65646ade8ebc4ec8b6c5 Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Thu, 8 Jan 2026 10:47:49 +0530 Subject: [PATCH 38/50] * Rebase changes with master * Reformat code * Add alias for tests --- .../azure/kusto/ingest/v2/IngestV2TestBase.kt | 2 ++ .../ingest/v2/ManagedStreamingIngestClientTest.kt | 3 +++ .../kusto/ingest/v2/QueuedIngestClientTest.kt | 2 ++ .../kusto/ingest/v2/StreamingIngestClientTest.kt | 1 + pom.xml | 14 -------------- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt index bc1c129c7..a469eb44b 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt @@ -121,11 +121,13 @@ abstract class IngestV2TestBase(testClass: Class<*>) { queryColumnName: String = "count", expectedResultsCount: Long, isManagementQuery: Boolean = false, + testName: String, ) { Awaitility.await() .atMost(Duration.of(3, ChronoUnit.MINUTES)) .pollInterval(Duration.of(5, ChronoUnit.SECONDS)) .ignoreExceptions() + .alias("Awaiting query result: $query on test: $testName") .untilAsserted { val results = if (isManagementQuery) { diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt index 98a8e749a..eec393ea9 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt @@ -136,6 +136,7 @@ class ManagedStreamingIngestClientTest : awaitAndQuery( query = "$targetTable | summarize count=count()", expectedResultsCount = 5, + testName = testName, ) } } catch (e: ConnectException) { @@ -238,6 +239,7 @@ class ManagedStreamingIngestClientTest : query = "$targetTable | where deviceId == '$targetUuid' | summarize count=count()", expectedResultsCount = 1, + testName = testName, ) } catch (e: ConnectException) { assumeTrue( @@ -293,6 +295,7 @@ class ManagedStreamingIngestClientTest : awaitAndQuery( query = "$targetTable | summarize count=count()", expectedResultsCount = 5, + testName = "FallbackToQueuedIngestionTest", ) } } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt index c53a30dd4..27704e376 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt @@ -285,6 +285,7 @@ class QueuedIngestClientTest : query = "$targetTable | where Type == '$filterType' | summarize count=count() by SourceLocation", expectedResultsCount = 5L, + testName = testName, ) } } @@ -647,6 +648,7 @@ class QueuedIngestClientTest : query = "$targetTable | where format == '$format' |summarize count=count() by format", expectedResultsCount = expectedRecordCount.toLong(), + testName = "$formatName format test", ) val extentDetailsResults = diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index ab232e3e5..08f4f7898 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -137,6 +137,7 @@ class StreamingIngestClientTest : awaitAndQuery( query = "$targetTable | summarize count=count()", expectedResultsCount = 5, + testName = testName, ) } } diff --git a/pom.xml b/pom.xml index 25e0a50c0..b493f99cb 100644 --- a/pom.xml +++ b/pom.xml @@ -63,19 +63,6 @@ 5.11.0 0.8.11 - - - - java8 - - [1.8,11) - - - 4.5.1 - - - - ingest ingest-v2 @@ -83,7 +70,6 @@ samples quickstart - From d6a66f77d3b730f4867a8618642c4c74415b858f Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Thu, 8 Jan 2026 12:59:31 +0530 Subject: [PATCH 39/50] Feature/add junits (#451) * Added Unit tests for coverage * Rebase changes base branch * Reformat tests --------- Co-authored-by: ag-ramachandran --- .../kusto/ingest/v2/source/BlobSource.kt | 4 +- .../kusto/ingest/v2/source/FileSource.kt | 4 +- .../kusto/ingest/v2/source/StreamSource.kt | 4 +- .../v2/uploader/ManagedUploaderBuilder.kt | 2 +- ...ManagedStreamingIngestClientBuilderTest.kt | 121 ++++ .../builders/QueuedIngestClientBuilderTest.kt | 199 +++++++ .../StreamingIngestClientBuilderTest.kt | 96 ++++ .../v2/client/IngestionOperationTest.kt | 160 ++++++ .../policy/ManagedStreamingPolicyTest.kt | 481 ++++++++++++++++ .../v2/common/BatchOperationResultTest.kt | 203 +++++++ .../kusto/ingest/v2/common/RetryPolicyTest.kt | 91 +++ .../common/exceptions/IngestExceptionTest.kt | 540 ++++++++++++++++++ .../v2/common/models/ClientDetailsTest.kt | 277 +++++++++ .../models/ExtendedResponseTypesTest.kt | 111 ++++ .../v2/common/models/mapping/MappingTest.kt | 456 +++++++++++++++ .../common/utils/IngestionResultUtilsTest.kt | 271 +++++++++ .../v2/common/utils/IngestionUtilsTest.kt | 161 ++++++ .../ingest/v2/common/utils/PathUtilsTest.kt | 187 ++++++ .../ingest/v2/source/SourceClassesTest.kt | 181 ++++++ .../v2/uploader/ManagedUploaderBuilderTest.kt | 180 ++++++ .../ingest/v2/uploader/UploaderModelsTest.kt | 203 +++++++ .../compression/CompressionStrategyTest.kt | 249 ++++++++ .../v2/uploader/models/UploadModelsTest.kt | 402 +++++++++++++ 23 files changed, 4579 insertions(+), 4 deletions(-) create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilderTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilderTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilderTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperationTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicyTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResultTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestExceptionTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetailsTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypesTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtilsTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtilsTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilderTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploaderModelsTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadModelsTest.kt diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index 7ba44ba26..10345abcf 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -9,7 +9,9 @@ import java.util.UUID * Represents a blob-based ingestion source. This source references data that * already exists in blob storage. */ -class BlobSource( +class BlobSource +@JvmOverloads +constructor( val blobPath: String, format: Format = Format.csv, sourceId: UUID = UUID.randomUUID(), diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt index a7d5e7437..071a4210c 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -13,7 +13,9 @@ import java.nio.file.Path import java.util.UUID /** Represents a file-based ingestion source. */ -class FileSource( +class FileSource +@JvmOverloads +constructor( val path: Path, format: Format, sourceId: UUID = UUID.randomUUID(), diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt index b7fb43f9d..527fabce4 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt @@ -7,7 +7,9 @@ import java.io.InputStream import java.util.UUID /** Represents a stream-based ingestion source. */ -class StreamSource( +class StreamSource +@JvmOverloads +constructor( stream: InputStream, format: Format, sourceCompression: CompressionType, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt index 085f37e8e..62115c700 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilder.kt @@ -122,7 +122,7 @@ class ManagedUploaderBuilder private constructor() { * @throws IllegalStateException if required configuration is missing */ fun build(): ManagedUploader { - requireNotNull(configurationCache) { + check(configurationCache != null) { "Configuration cache is required. Call withConfigurationCache() before build()" } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilderTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilderTest.kt new file mode 100644 index 000000000..3d4c94d39 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilderTest.kt @@ -0,0 +1,121 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.client.policy.ManagedStreamingPolicy +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader +import io.mockk.mockk +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class ManagedStreamingIngestClientBuilderTest { + + private val validDmUrl = "https://ingest-test.kusto.windows.net" + private val mockTokenCredential: TokenCredential = mockk(relaxed = true) + + @Test + fun `create with valid URL should succeed`() { + val builder = ManagedStreamingIngestClientBuilder.create(validDmUrl) + assertNotNull(builder) + } + + @Test + fun `create with blank URL should throw exception`() { + assertThrows { + ManagedStreamingIngestClientBuilder.create("") + } + } + + @Test + fun `create with whitespace URL should throw exception`() { + assertThrows { + ManagedStreamingIngestClientBuilder.create(" ") + } + } + + @Test + fun `build without authentication should throw exception`() { + val builder = ManagedStreamingIngestClientBuilder.create(validDmUrl) + assertThrows { builder.build() } + } + + @Test + fun `build with authentication should succeed`() { + val client = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .build() + assertNotNull(client) + } + + @Test + fun `withUploader should accept custom uploader`() { + val mockUploader: IUploader = mockk(relaxed = true) + val builder = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withUploader(mockUploader, true) + assertNotNull(builder) + } + + @Test + fun `withManagedStreamingIngestPolicy should accept custom policy`() { + val mockPolicy: ManagedStreamingPolicy = mockk(relaxed = true) + val builder = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withManagedStreamingIngestPolicy(mockPolicy) + assertNotNull(builder) + } + + @Test + fun `builder methods should return self for chaining`() { + val builder = ManagedStreamingIngestClientBuilder.create(validDmUrl) + val result = + builder.withAuthentication(mockTokenCredential) + .withClientDetails("TestApp", "1.0") + + assertEquals(builder, result) + } + + @Test + fun `withClientDetails should accept custom client details`() { + val builder = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withClientDetails("TestApp", "1.0.0") + assertNotNull(builder) + } + + @Test + fun `skipSecurityChecks should be accepted`() { + val builder = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .skipSecurityChecks() + assertNotNull(builder) + } + + @Test + fun `build with all optional parameters should succeed`() { + val mockUploader: IUploader = mockk(relaxed = true) + val mockPolicy: ManagedStreamingPolicy = mockk(relaxed = true) + + val client = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withUploader(mockUploader, true) + .withManagedStreamingIngestPolicy(mockPolicy) + .withClientDetails("TestApp", "2.0") + .skipSecurityChecks() + .build() + + assertNotNull(client) + } + + @Test + fun `create should normalize engine URL to ingest URL`() { + val engineUrl = "https://test.kusto.windows.net" + val builder = ManagedStreamingIngestClientBuilder.create(engineUrl) + assertNotNull(builder) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilderTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilderTest.kt new file mode 100644 index 000000000..c59461342 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilderTest.kt @@ -0,0 +1,199 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader +import io.mockk.mockk +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class QueuedIngestClientBuilderTest { + + private val validDmUrl = "https://ingest-test.kusto.windows.net" + private val mockTokenCredential: TokenCredential = mockk(relaxed = true) + + @Test + fun `create with valid URL should succeed`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + assertNotNull(builder) + } + + @Test + fun `create with blank URL should throw exception`() { + assertThrows { + QueuedIngestClientBuilder.create("") + } + } + + @Test + fun `create with whitespace URL should throw exception`() { + assertThrows { + QueuedIngestClientBuilder.create(" ") + } + } + + @Test + fun `withMaxConcurrency with positive value should succeed`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withMaxConcurrency(10) + assertNotNull(builder) + } + + @Test + fun `withMaxConcurrency with zero should throw exception`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + assertThrows { builder.withMaxConcurrency(0) } + } + + @Test + fun `withMaxConcurrency with negative value should throw exception`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + assertThrows { + builder.withMaxConcurrency(-1) + } + } + + @Test + fun `withMaxDataSize with positive value should succeed`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withMaxDataSize(1024L) + assertNotNull(builder) + } + + @Test + fun `withMaxDataSize with zero should throw exception`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + assertThrows { builder.withMaxDataSize(0L) } + } + + @Test + fun `withMaxDataSize with negative value should throw exception`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + assertThrows { + builder.withMaxDataSize(-100L) + } + } + + @Test + fun `withIgnoreFileSize should accept true`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withIgnoreFileSize(true) + assertNotNull(builder) + } + + @Test + fun `withIgnoreFileSize should accept false`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withIgnoreFileSize(false) + assertNotNull(builder) + } + + @Test + fun `withUploader should accept custom uploader`() { + val mockUploader: IUploader = mockk(relaxed = true) + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withUploader(mockUploader, true) + assertNotNull(builder) + } + + @Test + fun `withConfiguration should accept custom configuration`() { + val mockConfig = + DefaultConfigurationCache( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = false, + clientDetails = ClientDetails.createDefault(), + ) + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withConfiguration(mockConfig) + assertNotNull(builder) + } + + @Test + fun `build without authentication should throw exception`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + assertThrows { builder.build() } + } + + @Test + fun `build with authentication should succeed`() { + val client = + QueuedIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .build() + assertNotNull(client) + } + + @Test + fun `builder methods should return self for chaining`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + val result = + builder.withMaxConcurrency(5) + .withMaxDataSize(2048L) + .withIgnoreFileSize(true) + .withAuthentication(mockTokenCredential) + + assertEquals(builder, result) + } + + @Test + fun `create should normalize engine URL to ingest URL`() { + val engineUrl = "https://test.kusto.windows.net" + val builder = QueuedIngestClientBuilder.create(engineUrl) + assertNotNull(builder) + } + + @Test + fun `withClientDetails should accept custom client details`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withClientDetails("TestApp", "1.0.0") + assertNotNull(builder) + } + + @Test + fun `skipSecurityChecks should be accepted`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .skipSecurityChecks() + assertNotNull(builder) + } + + @Test + fun `build with all optional parameters should succeed`() { + val mockUploader: IUploader = mockk(relaxed = true) + val mockConfig = + DefaultConfigurationCache( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = false, + clientDetails = ClientDetails.createDefault(), + ) + + val client = + QueuedIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withMaxConcurrency(10) + .withMaxDataSize(4096L) + .withIgnoreFileSize(true) + .withUploader(mockUploader, true) + .withConfiguration(mockConfig) + .withClientDetails("TestApp", "2.0") + .skipSecurityChecks() + .build() + + assertNotNull(client) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilderTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilderTest.kt new file mode 100644 index 000000000..c8f870844 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/StreamingIngestClientBuilderTest.kt @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.builders + +import com.azure.core.credential.TokenCredential +import io.mockk.mockk +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class StreamingIngestClientBuilderTest { + + private val validClusterUrl = "https://test.kusto.windows.net" + private val mockTokenCredential: TokenCredential = mockk(relaxed = true) + + @Test + fun `create with valid URL should succeed`() { + val builder = StreamingIngestClientBuilder.create(validClusterUrl) + assertNotNull(builder) + } + + @Test + fun `create with blank URL should throw exception`() { + assertThrows { + StreamingIngestClientBuilder.create("") + } + } + + @Test + fun `create with whitespace URL should throw exception`() { + assertThrows { + StreamingIngestClientBuilder.create(" ") + } + } + + @Test + fun `build without authentication should throw exception`() { + val builder = StreamingIngestClientBuilder.create(validClusterUrl) + assertThrows { builder.build() } + } + + @Test + fun `build with authentication should succeed`() { + val client = + StreamingIngestClientBuilder.create(validClusterUrl) + .withAuthentication(mockTokenCredential) + .build() + assertNotNull(client) + } + + @Test + fun `builder methods should return self for chaining`() { + val builder = StreamingIngestClientBuilder.create(validClusterUrl) + val result = + builder.withAuthentication(mockTokenCredential) + .withClientDetails("TestApp", "1.0") + + assertEquals(builder, result) + } + + @Test + fun `create should normalize ingest URL to cluster URL`() { + val ingestUrl = "https://ingest-test.kusto.windows.net" + val builder = StreamingIngestClientBuilder.create(ingestUrl) + assertNotNull(builder) + } + + @Test + fun `withClientDetails should accept custom client details`() { + val builder = + StreamingIngestClientBuilder.create(validClusterUrl) + .withClientDetails("TestApp", "1.0.0") + assertNotNull(builder) + } + + @Test + fun `skipSecurityChecks should be accepted`() { + val builder = + StreamingIngestClientBuilder.create(validClusterUrl) + .skipSecurityChecks() + assertNotNull(builder) + } + + @Test + fun `build with all optional parameters should succeed`() { + val client = + StreamingIngestClientBuilder.create(validClusterUrl) + .withAuthentication(mockTokenCredential) + .withClientDetails("TestApp", "2.0") + .skipSecurityChecks() + .build() + + assertNotNull(client) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperationTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperationTest.kt new file mode 100644 index 000000000..ace8b3c57 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestionOperationTest.kt @@ -0,0 +1,160 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client + +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotEquals +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class IngestionOperationTest { + + @Test + fun `IngestionOperation creates correctly with all fields`() { + val operation = + IngestionOperation( + operationId = "op-12345", + database = "TestDB", + table = "TestTable", + ingestKind = IngestKind.STREAMING, + ) + + assertEquals("op-12345", operation.operationId) + assertEquals("TestDB", operation.database) + assertEquals("TestTable", operation.table) + assertEquals(IngestKind.STREAMING, operation.ingestKind) + } + + @Test + fun `IngestionOperation creates correctly with QUEUED kind`() { + val operation = + IngestionOperation( + operationId = "op-67890", + database = "ProductionDB", + table = "Logs", + ingestKind = IngestKind.QUEUED, + ) + + assertEquals("op-67890", operation.operationId) + assertEquals("ProductionDB", operation.database) + assertEquals("Logs", operation.table) + assertEquals(IngestKind.QUEUED, operation.ingestKind) + } + + @Test + fun `IngestionOperation data class equality works correctly`() { + val op1 = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + val op2 = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + val op3 = + IngestionOperation("op-2", "db", "table", IngestKind.STREAMING) + val op4 = IngestionOperation("op-1", "db", "table", IngestKind.QUEUED) + + assertEquals(op1, op2) + assertNotEquals(op1, op3) + assertNotEquals(op1, op4) + } + + @Test + fun `IngestionOperation data class hashCode works correctly`() { + val op1 = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + val op2 = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + + assertEquals(op1.hashCode(), op2.hashCode()) + } + + @Test + fun `IngestionOperation data class copy works correctly`() { + val original = + IngestionOperation( + "op-1", + "db1", + "table1", + IngestKind.STREAMING, + ) + val copied = original.copy(database = "db2", table = "table2") + + assertEquals("op-1", copied.operationId) + assertEquals("db2", copied.database) + assertEquals("table2", copied.table) + assertEquals(IngestKind.STREAMING, copied.ingestKind) + } + + @Test + fun `IngestionOperation copy can change operationId`() { + val original = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + val copied = original.copy(operationId = "op-2") + + assertEquals("op-2", copied.operationId) + assertEquals(original.database, copied.database) + assertEquals(original.table, copied.table) + assertEquals(original.ingestKind, copied.ingestKind) + } + + @Test + fun `IngestionOperation copy can change ingestKind`() { + val original = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + val copied = original.copy(ingestKind = IngestKind.QUEUED) + + assertEquals(original.operationId, copied.operationId) + assertEquals(original.database, copied.database) + assertEquals(original.table, copied.table) + assertEquals(IngestKind.QUEUED, copied.ingestKind) + } + + @Test + fun `IngestionOperation toString contains all fields`() { + val operation = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + val stringRep = operation.toString() + + assertTrue(stringRep.contains("op-1")) + assertTrue(stringRep.contains("db")) + assertTrue(stringRep.contains("table")) + assertTrue(stringRep.contains("STREAMING")) + } + + @Test + fun `IngestionOperation handles special characters in fields`() { + val operation = + IngestionOperation( + operationId = "op-with-dashes-123", + database = "Database.With.Dots", + table = "Table_With_Underscores", + ingestKind = IngestKind.STREAMING, + ) + + assertEquals("op-with-dashes-123", operation.operationId) + assertEquals("Database.With.Dots", operation.database) + assertEquals("Table_With_Underscores", operation.table) + } + + @Test + fun `IngestionOperation handles empty strings`() { + val operation = IngestionOperation("", "", "", IngestKind.QUEUED) + + assertEquals("", operation.operationId) + assertEquals("", operation.database) + assertEquals("", operation.table) + assertEquals(IngestKind.QUEUED, operation.ingestKind) + } + + @Test + fun `IngestionOperation component functions work correctly`() { + val operation = + IngestionOperation("op-1", "db", "table", IngestKind.STREAMING) + + val (id, db, tbl, kind) = operation + + assertEquals("op-1", id) + assertEquals("db", db) + assertEquals("table", tbl) + assertEquals(IngestKind.STREAMING, kind) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicyTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicyTest.kt new file mode 100644 index 000000000..1ebde9b4f --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicyTest.kt @@ -0,0 +1,481 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.client.policy + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource +import java.io.ByteArrayInputStream +import java.time.Duration +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +/** Unit tests for managed streaming policy classes. */ +class ManagedStreamingPolicyTest { + + private fun createTestSource() = + StreamSource( + ByteArrayInputStream("test".toByteArray()), + Format.csv, + CompressionType.NONE, + ) + + private fun createTestProps() = IngestRequestProperties(format = Format.csv) + + // ==================== ManagedStreamingErrorCategory Tests ==================== + + @Test + fun `ManagedStreamingErrorCategory should have all expected values`() { + val values = ManagedStreamingErrorCategory.values() + assertEquals(6, values.size) + assertTrue( + values.contains( + ManagedStreamingErrorCategory + .REQUEST_PROPERTIES_PREVENT_STREAMING, + ), + ) + assertTrue( + values.contains( + ManagedStreamingErrorCategory + .TABLE_CONFIGURATION_PREVENTS_STREAMING, + ), + ) + assertTrue( + values.contains( + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, + ), + ) + assertTrue(values.contains(ManagedStreamingErrorCategory.THROTTLED)) + assertTrue(values.contains(ManagedStreamingErrorCategory.OTHER_ERRORS)) + assertTrue( + values.contains(ManagedStreamingErrorCategory.UNKNOWN_ERRORS), + ) + } + + @Test + fun `ManagedStreamingErrorCategory valueOf should return correct enum`() { + assertEquals( + ManagedStreamingErrorCategory.THROTTLED, + ManagedStreamingErrorCategory.valueOf("THROTTLED"), + ) + assertEquals( + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, + ManagedStreamingErrorCategory.valueOf("STREAMING_INGESTION_OFF"), + ) + } + + // ==================== ManagedStreamingRequestSuccessDetails Tests ==================== + + @Test + fun `ManagedStreamingRequestSuccessDetails should store duration`() { + val duration = Duration.ofSeconds(5) + val details = ManagedStreamingRequestSuccessDetails(duration) + + assertEquals(duration, details.duration) + } + + @Test + fun `ManagedStreamingRequestSuccessDetails should support data class features`() { + val details1 = + ManagedStreamingRequestSuccessDetails(Duration.ofSeconds(5)) + val details2 = + ManagedStreamingRequestSuccessDetails(Duration.ofSeconds(5)) + + assertEquals(details1, details2) + assertEquals(details1.hashCode(), details2.hashCode()) + } + + @Test + fun `ManagedStreamingRequestSuccessDetails should support copy`() { + val original = + ManagedStreamingRequestSuccessDetails(Duration.ofSeconds(5)) + val copied = original.copy(duration = Duration.ofSeconds(10)) + + assertEquals(Duration.ofSeconds(10), copied.duration) + } + + // ==================== ManagedStreamingRequestFailureDetails Tests ==================== + + @Test + fun `ManagedStreamingRequestFailureDetails should store all properties`() { + val duration = Duration.ofSeconds(3) + val exception = RuntimeException("Test error") + + val details = + ManagedStreamingRequestFailureDetails( + duration = duration, + isPermanent = true, + errorCategory = ManagedStreamingErrorCategory.THROTTLED, + exception = exception, + ) + + assertEquals(duration, details.duration) + assertTrue(details.isPermanent) + assertEquals( + ManagedStreamingErrorCategory.THROTTLED, + details.errorCategory, + ) + assertEquals(exception, details.exception) + } + + @Test + fun `ManagedStreamingRequestFailureDetails should have default values`() { + val exception = RuntimeException("Error") + + val details = + ManagedStreamingRequestFailureDetails( + isPermanent = false, + exception = exception, + ) + + assertEquals(Duration.ZERO, details.duration) + assertEquals( + ManagedStreamingErrorCategory.OTHER_ERRORS, + details.errorCategory, + ) + assertFalse(details.isPermanent) + } + + @Test + fun `ManagedStreamingRequestFailureDetails errorCategory should be mutable`() { + val exception = RuntimeException("Error") + val details = + ManagedStreamingRequestFailureDetails( + isPermanent = false, + exception = exception, + ) + + details.errorCategory = + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF + + assertEquals( + ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, + details.errorCategory, + ) + } + + @Test + fun `ManagedStreamingRequestFailureDetails should support data class features`() { + val exception = RuntimeException("Error") + val details1 = + ManagedStreamingRequestFailureDetails( + duration = Duration.ofSeconds(2), + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory.OTHER_ERRORS, + exception = exception, + ) + val details2 = + ManagedStreamingRequestFailureDetails( + duration = Duration.ofSeconds(2), + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory.OTHER_ERRORS, + exception = exception, + ) + + assertEquals(details1, details2) + assertEquals(details1.hashCode(), details2.hashCode()) + } + + // ==================== DefaultManagedStreamingPolicy Tests ==================== + + @Test + fun `DefaultManagedStreamingPolicy should have correct default values`() { + val policy = DefaultManagedStreamingPolicy() + + // Default value from constants - continueWhenStreamingIngestionUnavailable defaults to + // false + assertFalse(policy.continueWhenStreamingIngestionUnavailable) + assertEquals(1.0, policy.dataSizeFactor) + assertNotNull(policy.retryPolicy) + } + + @Test + fun `DefaultManagedStreamingPolicy should accept custom values`() { + val policy = + DefaultManagedStreamingPolicy( + continueWhenStreamingIngestionUnavailable = false, + dataSizeFactor = 0.5, + throttleBackoffPeriod = Duration.ofMinutes(2), + timeUntilResumingStreamingIngest = + Duration.ofMinutes(30), + ) + + assertFalse(policy.continueWhenStreamingIngestionUnavailable) + assertEquals(0.5, policy.dataSizeFactor) + assertEquals(Duration.ofMinutes(2), policy.throttleBackoffPeriod) + assertEquals( + Duration.ofMinutes(30), + policy.timeUntilResumingStreamingIngest, + ) + } + + @Test + fun `DefaultManagedStreamingPolicy shouldDefaultToQueuedIngestion returns false initially`() { + val policy = DefaultManagedStreamingPolicy() + + val result = + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ) + + assertFalse(result) + } + + @Test + fun `DefaultManagedStreamingPolicy should queue after streaming ingestion off error`() { + val policy = + DefaultManagedStreamingPolicy( + continueWhenStreamingIngestionUnavailable = true, + timeUntilResumingStreamingIngest = Duration.ofMinutes(1), + ) + + // Simulate streaming ingestion off error + policy.streamingErrorCallback( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ManagedStreamingRequestFailureDetails( + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory + .STREAMING_INGESTION_OFF, + exception = + RuntimeException("Streaming ingestion is off"), + ), + ) + + // Should now default to queued + val result = + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ) + assertTrue(result) + } + + @Test + fun `DefaultManagedStreamingPolicy should queue after table configuration prevents streaming`() { + val policy = + DefaultManagedStreamingPolicy( + timeUntilResumingStreamingIngest = Duration.ofMinutes(1), + ) + + policy.streamingErrorCallback( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ManagedStreamingRequestFailureDetails( + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory + .TABLE_CONFIGURATION_PREVENTS_STREAMING, + exception = + RuntimeException( + "Table config prevents streaming", + ), + ), + ) + + val result = + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ) + assertTrue(result) + } + + @Test + fun `DefaultManagedStreamingPolicy should queue after throttling`() { + val policy = + DefaultManagedStreamingPolicy( + throttleBackoffPeriod = Duration.ofSeconds(30), + ) + + policy.streamingErrorCallback( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ManagedStreamingRequestFailureDetails( + isPermanent = false, + errorCategory = ManagedStreamingErrorCategory.THROTTLED, + exception = RuntimeException("Throttled"), + ), + ) + + val result = + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ) + assertTrue(result) + } + + @Test + fun `DefaultManagedStreamingPolicy should not queue for other errors`() { + val policy = DefaultManagedStreamingPolicy() + + policy.streamingErrorCallback( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ManagedStreamingRequestFailureDetails( + isPermanent = false, + errorCategory = + ManagedStreamingErrorCategory.OTHER_ERRORS, + exception = RuntimeException("Some other error"), + ), + ) + + val result = + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ) + assertFalse(result) + } + + @Test + fun `DefaultManagedStreamingPolicy should track errors by database and table`() { + val policy = + DefaultManagedStreamingPolicy( + continueWhenStreamingIngestionUnavailable = true, + timeUntilResumingStreamingIngest = Duration.ofMinutes(1), + ) + + // Error on table1 + policy.streamingErrorCallback( + createTestSource(), + "db1", + "table1", + createTestProps(), + ManagedStreamingRequestFailureDetails( + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory + .STREAMING_INGESTION_OFF, + exception = RuntimeException("Off"), + ), + ) + + // table1 should queue (continueWhenStreamingIngestionUnavailable = true) + assertTrue( + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "db1", + "table1", + createTestProps(), + ), + ) + + // table2 should not queue + assertFalse( + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "db1", + "table2", + createTestProps(), + ), + ) + + // Different database should not queue + assertFalse( + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "db2", + "table1", + createTestProps(), + ), + ) + } + + @Test + fun `DefaultManagedStreamingPolicy streamingSuccessCallback should not throw`() { + val policy = DefaultManagedStreamingPolicy() + + // Should not throw + policy.streamingSuccessCallback( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ManagedStreamingRequestSuccessDetails(Duration.ofSeconds(1)), + ) + } + + @Test + fun `DefaultManagedStreamingPolicy should return false when streaming off and continueWhenUnavailable is false`() { + val policy = + DefaultManagedStreamingPolicy( + continueWhenStreamingIngestionUnavailable = false, + timeUntilResumingStreamingIngest = Duration.ofMinutes(1), + ) + + policy.streamingErrorCallback( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ManagedStreamingRequestFailureDetails( + isPermanent = true, + errorCategory = + ManagedStreamingErrorCategory + .STREAMING_INGESTION_OFF, + exception = + RuntimeException("Streaming ingestion is off"), + ), + ) + + // Should return false (not queue, fail instead) when streaming is off and we don't continue + val result = + policy.shouldDefaultToQueuedIngestion( + createTestSource(), + "testdb", + "testtable", + createTestProps(), + ) + assertFalse(result) + } + + @Test + fun `DefaultManagedStreamingPolicy createDefaultRetryPolicy should return valid policy`() { + val retryPolicy = + DefaultManagedStreamingPolicy.createDefaultRetryPolicy() + + assertNotNull(retryPolicy) + // Should allow retries initially + val retry = retryPolicy.moveNext(0u) + assertTrue(retry.shouldRetry) + assertTrue(retry.interval > Duration.ZERO) + } + + @Test + fun `DefaultManagedStreamingPolicy DEFAULT_MANAGED_STREAMING_POLICY should be accessible`() { + val defaultPolicy = + DefaultManagedStreamingPolicy.DEFAULT_MANAGED_STREAMING_POLICY + + assertNotNull(defaultPolicy) + // Default value from constants is false + assertFalse(defaultPolicy.continueWhenStreamingIngestionUnavailable) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResultTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResultTest.kt new file mode 100644 index 000000000..2ff759dd3 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/BatchOperationResultTest.kt @@ -0,0 +1,203 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common + +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults +import java.time.Instant +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertTrue + +/** Unit tests for BatchOperationResult interface. */ +class BatchOperationResultTest { + + // Helper class implementing BatchOperationResult + data class TestBatchResult( + override val successes: List, + override val failures: List, + ) : BatchOperationResult + + @Test + fun `hasFailures should return true when there are failures`() { + val result = + TestBatchResult( + successes = listOf("success1"), + failures = listOf("failure1"), + ) + + assertTrue(result.hasFailures) + } + + @Test + fun `hasFailures should return false when there are no failures`() { + val result = + TestBatchResult( + successes = listOf("success1", "success2"), + failures = emptyList(), + ) + + assertFalse(result.hasFailures) + } + + @Test + fun `allSucceeded should return true when there are no failures`() { + val result = + TestBatchResult( + successes = listOf("success1", "success2"), + failures = emptyList(), + ) + + assertTrue(result.allSucceeded) + } + + @Test + fun `allSucceeded should return false when there are failures`() { + val result = + TestBatchResult( + successes = listOf("success1"), + failures = listOf("failure1"), + ) + + assertFalse(result.allSucceeded) + } + + @Test + fun `totalCount should return sum of successes and failures`() { + val result = + TestBatchResult( + successes = listOf("s1", "s2", "s3"), + failures = listOf("f1", "f2"), + ) + + assertEquals(5, result.totalCount) + } + + @Test + fun `totalCount should return 0 for empty result`() { + val result = + TestBatchResult( + successes = emptyList(), + failures = emptyList(), + ) + + assertEquals(0, result.totalCount) + } + + @Test + fun `totalCount should return only successes count when no failures`() { + val result = + TestBatchResult( + successes = listOf("s1", "s2"), + failures = emptyList(), + ) + + assertEquals(2, result.totalCount) + } + + @Test + fun `totalCount should return only failures count when no successes`() { + val result = + TestBatchResult( + successes = emptyList(), + failures = listOf("f1", "f2", "f3"), + ) + + assertEquals(3, result.totalCount) + } + + // Test with UploadResults which implements BatchOperationResult + @Test + fun `UploadResults should implement BatchOperationResult correctly`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val successes = + listOf( + UploadResult.Success( + "file1.csv", + startTime, + endTime, + "https://blob1", + 100, + ), + UploadResult.Success( + "file2.csv", + startTime, + endTime, + "https://blob2", + 200, + ), + ) + + val failures = + listOf( + UploadResult.Failure( + "file3.csv", + startTime, + endTime, + UploadErrorCode.UPLOAD_FAILED, + "Error", + null, + ), + ) + + val results = UploadResults(successes, failures) + + // Test BatchOperationResult interface methods + assertEquals(2, results.successes.size) + assertEquals(1, results.failures.size) + assertTrue(results.hasFailures) + assertFalse(results.allSucceeded) + assertEquals(3, results.totalCount) + } + + @Test + fun `UploadResults with all successes should show allSucceeded true`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val successes = + listOf( + UploadResult.Success( + "file1.csv", + startTime, + endTime, + "https://blob1", + 100, + ), + ) + + val results = UploadResults(successes, emptyList()) + + assertFalse(results.hasFailures) + assertTrue(results.allSucceeded) + assertEquals(1, results.totalCount) + } + + @Test + fun `UploadResults with all failures should show hasFailures true`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val failures = + listOf( + UploadResult.Failure( + "file1.csv", + startTime, + endTime, + UploadErrorCode.UPLOAD_FAILED, + "Error", + null, + ), + ) + + val results = UploadResults(emptyList(), failures) + + assertTrue(results.hasFailures) + assertFalse(results.allSucceeded) + assertEquals(1, results.totalCount) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyTest.kt new file mode 100644 index 000000000..782a77395 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/RetryPolicyTest.kt @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common + +import java.time.Duration +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertTrue + +/** Unit tests for retry policy implementations. */ +class RetryPolicyTest { + + @Test + fun `NoRetryPolicy should never retry`() { + val policy = NoRetryPolicy + + val retry1 = policy.moveNext(1u) + assertFalse(retry1.shouldRetry) + + val retry2 = policy.moveNext(5u) + assertFalse(retry2.shouldRetry) + } + + @Test + fun `SimpleRetryPolicy should retry with default intervals`() { + val policy = SimpleRetryPolicy() + + val retry1 = policy.moveNext(1u) + assertTrue(retry1.shouldRetry) + assertEquals(Duration.ofSeconds(10), retry1.interval) + + val retry2 = policy.moveNext(2u) + assertTrue(retry2.shouldRetry) + assertEquals(Duration.ofSeconds(10), retry2.interval) + } + + @Test + fun `SimpleRetryPolicy should respect total retries`() { + val policy = SimpleRetryPolicy(totalRetries = 3) + + assertTrue(policy.moveNext(1u).shouldRetry) + assertTrue(policy.moveNext(2u).shouldRetry) + assertTrue(policy.moveNext(3u).shouldRetry) + assertFalse(policy.moveNext(4u).shouldRetry) + } + + @Test + fun `SimpleRetryPolicy should use custom interval duration`() { + val customInterval = Duration.ofSeconds(5) + val policy = SimpleRetryPolicy(intervalDuration = customInterval) + + val retry = policy.moveNext(1u) + assertTrue(retry.shouldRetry) + assertEquals(customInterval, retry.interval) + } + + @Test + fun `CustomRetryPolicy should use provided intervals`() { + val intervals = + arrayOf( + Duration.ofSeconds(1), + Duration.ofSeconds(2), + Duration.ofSeconds(5), + ) + val policy = CustomRetryPolicy(intervals) + + assertEquals(Duration.ofSeconds(1), policy.moveNext(0u).interval) + assertEquals(Duration.ofSeconds(2), policy.moveNext(1u).interval) + assertEquals(Duration.ofSeconds(5), policy.moveNext(2u).interval) + assertFalse(policy.moveNext(3u).shouldRetry) + } + + @Test + fun `CustomRetryPolicy with empty intervals should not retry`() { + val policy = CustomRetryPolicy(arrayOf()) + + assertFalse(policy.moveNext(0u).shouldRetry) + } + + @Test + fun `Retry data class should hold correct values`() { + val retry1 = Retry(true, Duration.ofSeconds(5)) + assertTrue(retry1.shouldRetry) + assertEquals(Duration.ofSeconds(5), retry1.interval) + + val retry2 = Retry(false, Duration.ZERO) + assertFalse(retry2.shouldRetry) + assertEquals(Duration.ZERO, retry2.interval) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestExceptionTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestExceptionTest.kt new file mode 100644 index 000000000..21f4c5761 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestExceptionTest.kt @@ -0,0 +1,540 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.exceptions + +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode +import java.io.IOException +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertSame +import kotlin.test.assertTrue + +/** Unit tests for IngestException and its subclasses. */ +class IngestExceptionTest { + + // ==================== IngestException Tests ==================== + + @Test + fun `IngestException should use provided message`() { + val exception = IngestException("Test error message") + assertEquals("Test error message", exception.message) + } + + @Test + fun `IngestException should use fallback message when null`() { + val exception = IngestException() + assertEquals( + "Something went wrong calling Kusto client library (fallback message).", + exception.message, + ) + } + + @Test + fun `IngestException should store cause`() { + val cause = IOException("Original error") + val exception = IngestException("Wrapper", cause) + + assertEquals("Wrapper", exception.message) + assertSame(cause, exception.cause) + } + + @Test + fun `IngestException should store failure code`() { + val exception = IngestException(failureCode = 500) + assertEquals(500, exception.failureCode) + } + + @Test + fun `IngestException should store failure sub code`() { + val exception = IngestException(failureSubCode = "SubCode123") + assertEquals("SubCode123", exception.failureSubCode) + } + + @Test + fun `IngestException should store isPermanent flag`() { + val permanentException = IngestException(isPermanent = true) + assertTrue(permanentException.isPermanent == true) + + val transientException = IngestException(isPermanent = false) + assertFalse(transientException.isPermanent == true) + } + + @Test + fun `IngestException toString should return message`() { + val exception = IngestException("Error message") + assertEquals("Error message", exception.toString()) + } + + // ==================== IngestRequestException Tests ==================== + + @Test + fun `IngestRequestException should format message correctly`() { + val exception = + IngestRequestException( + errorCode = "ERR001", + errorReason = "Bad Request", + errorMessage = "Invalid parameter", + ) + + assertTrue(exception.message.contains("Bad Request")) + assertTrue(exception.message.contains("ERR001")) + assertTrue(exception.message.contains("Invalid parameter")) + } + + @Test + fun `IngestRequestException should use custom message when provided`() { + val exception = + IngestRequestException( + errorCode = "ERR001", + errorReason = "Bad Request", + errorMessage = "Invalid parameter", + message = "Custom error message", + ) + + assertEquals("Custom error message", exception.message) + } + + @Test + fun `IngestRequestException should store all properties`() { + val cause = RuntimeException("Cause") + val exception = + IngestRequestException( + errorCode = "ERR001", + errorReason = "Bad Request", + errorMessage = "Invalid parameter", + dataSource = "test-source", + databaseName = "test-db", + clientRequestId = "req-123", + activityId = "act-456", + failureCode = 400, + failureSubCode = "SUB001", + isPermanent = true, + cause = cause, + ) + + assertEquals("ERR001", exception.errorCode) + assertEquals("Bad Request", exception.errorReason) + assertEquals("Invalid parameter", exception.errorMessage) + assertEquals("test-source", exception.dataSource) + assertEquals("test-db", exception.databaseName) + assertEquals("req-123", exception.clientRequestId) + assertEquals("act-456", exception.activityId) + assertEquals(400, exception.failureCode) + assertTrue(exception.isPermanent == true) + assertSame(cause, exception.cause) + } + + @Test + fun `IngestRequestException should default isPermanent to true`() { + val exception = IngestRequestException() + assertTrue(exception.isPermanent == true) + } + + // ==================== IngestServiceException Tests ==================== + + @Test + fun `IngestServiceException should format message correctly`() { + val exception = + IngestServiceException( + errorCode = "SVC001", + errorReason = "Service Unavailable", + errorMessage = "Server is busy", + ) + + assertTrue(exception.message.contains("Service Unavailable")) + assertTrue(exception.message.contains("SVC001")) + assertTrue(exception.message.contains("Server is busy")) + assertTrue(exception.message.contains("temporary")) + } + + @Test + fun `IngestServiceException should use custom message when provided`() { + val exception = + IngestServiceException( + errorCode = "SVC001", + message = "Custom service error", + ) + + assertEquals("Custom service error", exception.message) + } + + @Test + fun `IngestServiceException should store all properties`() { + val exception = + IngestServiceException( + errorCode = "SVC001", + errorReason = "Service Error", + errorMessage = "Internal error", + dataSource = "kusto-cluster", + clientRequestId = "client-req-1", + activityId = "activity-1", + failureCode = 503, + failureSubCode = "RETRY", + isPermanent = false, + ) + + assertEquals("SVC001", exception.errorCode) + assertEquals("Service Error", exception.errorReason) + assertEquals("Internal error", exception.errorMessage) + assertEquals("kusto-cluster", exception.dataSource) + assertEquals("client-req-1", exception.clientRequestId) + assertEquals("activity-1", exception.activityId) + assertEquals(503, exception.failureCode) + assertEquals("RETRY", exception.failureSubCode) + assertFalse(exception.isPermanent == true) + } + + @Test + fun `IngestServiceException should default failureCode to 500`() { + val exception = IngestServiceException() + assertEquals(500, exception.failureCode) + } + + // ==================== IngestClientException Tests ==================== + + @Test + fun `IngestClientException should format message correctly`() { + val exception = + IngestClientException( + ingestionSource = "test-file.csv", + error = "File not found", + ) + + assertTrue(exception.message.contains("test-file.csv")) + assertTrue(exception.message.contains("File not found")) + } + + @Test + fun `IngestClientException should use custom message when provided`() { + val exception = + IngestClientException( + ingestionSource = "test-file.csv", + error = "File not found", + message = "Custom client error", + ) + + assertEquals("Custom client error", exception.message) + } + + @Test + fun `IngestClientException should store all properties`() { + val exception = + IngestClientException( + ingestionSourceId = "source-123", + ingestionSource = "data.json", + error = "Parse error", + failureCode = 400, + failureSubCode = "PARSE", + isPermanent = true, + ) + + assertEquals("source-123", exception.ingestionSourceId) + assertEquals("data.json", exception.ingestionSource) + assertEquals("Parse error", exception.error) + assertEquals(400, exception.failureCode) + assertEquals("PARSE", exception.failureSubCode) + assertTrue(exception.isPermanent == true) + } + + @Test + fun `IngestClientException should default failureCode to 400`() { + val exception = IngestClientException() + assertEquals(400, exception.failureCode) + } + + // ==================== IngestSizeLimitExceededException Tests ==================== + + @Test + fun `IngestSizeLimitExceededException should format message correctly`() { + val exception = + IngestSizeLimitExceededException( + size = 1000000, + maxNumberOfBlobs = 500000, + ingestionSource = "large-file.csv", + ) + + assertTrue(exception.message.contains("large-file.csv")) + assertTrue(exception.message.contains("1000000")) + assertTrue(exception.message.contains("500000")) + } + + @Test + fun `IngestSizeLimitExceededException should use custom message when provided`() { + val exception = + IngestSizeLimitExceededException( + size = 1000000, + maxNumberOfBlobs = 500000, + message = "Custom size limit message", + ) + + assertEquals("Custom size limit message", exception.message) + } + + @Test + fun `IngestSizeLimitExceededException should store size properties`() { + val exception = + IngestSizeLimitExceededException( + size = 2000000, + maxNumberOfBlobs = 1000000, + ) + + assertEquals(2000000, exception.size) + assertEquals(1000000, exception.maxNumberOfBlobs) + } + + @Test + fun `IngestSizeLimitExceededException should default isPermanent to true`() { + val exception = + IngestSizeLimitExceededException( + size = 100, + maxNumberOfBlobs = 50, + ) + assertTrue(exception.isPermanent == true) + } + + // ==================== InvalidIngestionMappingException Tests ==================== + + @Test + fun `InvalidIngestionMappingException should format message correctly`() { + val exception = + InvalidIngestionMappingException( + ingestionSource = "data.json", + error = "Missing required column", + ) + + assertTrue(exception.message.contains("Ingestion mapping is invalid")) + } + + @Test + fun `InvalidIngestionMappingException should use custom message when provided`() { + val exception = + InvalidIngestionMappingException( + message = "Custom mapping error", + ) + + assertEquals("Custom mapping error", exception.message) + } + + @Test + fun `InvalidIngestionMappingException should default isPermanent to true`() { + val exception = InvalidIngestionMappingException() + assertTrue(exception.isPermanent == true) + } + + // ==================== MultipleIngestionMappingPropertiesException Tests ==================== + + @Test + fun `MultipleIngestionMappingPropertiesException should format message correctly`() { + val exception = MultipleIngestionMappingPropertiesException() + + assertTrue(exception.message.contains("At most one property")) + assertTrue(exception.message.contains("ingestion mapping")) + } + + @Test + fun `MultipleIngestionMappingPropertiesException should use custom message when provided`() { + val exception = + MultipleIngestionMappingPropertiesException( + message = "Custom multiple mapping error", + ) + + assertEquals("Custom multiple mapping error", exception.message) + } + + @Test + fun `MultipleIngestionMappingPropertiesException should default isPermanent to true`() { + val exception = MultipleIngestionMappingPropertiesException() + assertTrue(exception.isPermanent == true) + } + + // ==================== UploadFailedException Tests ==================== + + @Test + fun `UploadFailedException should format message correctly`() { + val exception = + UploadFailedException( + fileName = "test.csv", + blobName = "container/blob.csv", + failureSubCode = UploadErrorCode.UPLOAD_FAILED, + ) + + assertTrue(exception.message.contains("test.csv")) + assertTrue(exception.message.contains("container/blob.csv")) + } + + @Test + fun `UploadFailedException should use custom message when provided`() { + val exception = + UploadFailedException( + fileName = "test.csv", + blobName = "blob.csv", + failureSubCode = UploadErrorCode.UPLOAD_FAILED, + message = "Custom upload error", + ) + + assertEquals("Custom upload error", exception.message) + } + + @Test + fun `UploadFailedException should store all properties`() { + val cause = IOException("Network error") + val exception = + UploadFailedException( + fileName = "data.json", + blobName = "container/data.json", + failureCode = 500, + failureSubCode = UploadErrorCode.NETWORK_ERROR, + isPermanent = false, + cause = cause, + ) + + assertEquals("data.json", exception.fileName) + assertEquals("container/data.json", exception.blobName) + assertEquals(500, exception.failureCode) + assertEquals( + UploadErrorCode.NETWORK_ERROR.toString(), + exception.failureSubCode, + ) + assertFalse(exception.isPermanent == true) + assertSame(cause, exception.cause) + } + + // ==================== NoAvailableIngestContainersException Tests ==================== + + @Test + fun `NoAvailableIngestContainersException should format message correctly`() { + val exception = + NoAvailableIngestContainersException( + failureSubCode = UploadErrorCode.NO_CONTAINERS_AVAILABLE, + ) + + assertTrue(exception.message.contains("No available containers")) + } + + @Test + fun `NoAvailableIngestContainersException should use custom message when provided`() { + val exception = + NoAvailableIngestContainersException( + failureSubCode = + UploadErrorCode.NO_CONTAINERS_AVAILABLE, + message = "Custom no containers message", + ) + + assertEquals("Custom no containers message", exception.message) + } + + @Test + fun `NoAvailableIngestContainersException should default failureCode to 500`() { + val exception = + NoAvailableIngestContainersException( + failureSubCode = UploadErrorCode.NO_CONTAINERS_AVAILABLE, + ) + assertEquals(500, exception.failureCode) + } + + @Test + fun `NoAvailableIngestContainersException should default isPermanent to false`() { + val exception = + NoAvailableIngestContainersException( + failureSubCode = UploadErrorCode.NO_CONTAINERS_AVAILABLE, + ) + assertFalse(exception.isPermanent == true) + } + + // ==================== InvalidUploadStreamException Tests ==================== + + @Test + fun `InvalidUploadStreamException should format message correctly`() { + val exception = + InvalidUploadStreamException( + fileName = "empty.csv", + failureSubCode = UploadErrorCode.SOURCE_IS_EMPTY, + ) + + assertTrue(exception.message.contains("invalid")) + assertTrue( + exception.message.contains( + UploadErrorCode.SOURCE_IS_EMPTY.toString(), + ), + ) + } + + @Test + fun `InvalidUploadStreamException should use custom message when provided`() { + val exception = + InvalidUploadStreamException( + failureSubCode = UploadErrorCode.SOURCE_IS_EMPTY, + message = "Custom invalid stream message", + ) + + assertEquals("Custom invalid stream message", exception.message) + } + + @Test + fun `InvalidUploadStreamException should default isPermanent to true`() { + val exception = + InvalidUploadStreamException( + failureSubCode = UploadErrorCode.SOURCE_IS_NULL, + ) + assertTrue(exception.isPermanent == true) + } + + // ==================== UploadSizeLimitExceededException Tests ==================== + + @Test + fun `UploadSizeLimitExceededException should format message correctly`() { + val exception = + UploadSizeLimitExceededException( + size = 1000000, + maxSize = 500000, + fileName = "large.csv", + failureSubCode = + UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED, + ) + + assertTrue(exception.message.contains("large.csv")) + assertTrue(exception.message.contains("1000000")) + assertTrue(exception.message.contains("500000")) + } + + @Test + fun `UploadSizeLimitExceededException should use custom message when provided`() { + val exception = + UploadSizeLimitExceededException( + size = 1000000, + maxSize = 500000, + failureSubCode = + UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED, + message = "Custom size error", + ) + + assertEquals("Custom size error", exception.message) + } + + @Test + fun `UploadSizeLimitExceededException should store size properties`() { + val exception = + UploadSizeLimitExceededException( + size = 2000000, + maxSize = 1000000, + failureSubCode = + UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED, + ) + + assertEquals(2000000, exception.size) + assertEquals(1000000, exception.maxSize) + } + + @Test + fun `UploadSizeLimitExceededException should default isPermanent to true`() { + val exception = + UploadSizeLimitExceededException( + size = 100, + maxSize = 50, + failureSubCode = + UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED, + ) + assertTrue(exception.isPermanent == true) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetailsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetailsTest.kt new file mode 100644 index 000000000..f9d8483eb --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ClientDetailsTest.kt @@ -0,0 +1,277 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNotEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class ClientDetailsTest { + + @Test + fun `createDefault creates ClientDetails with default values`() { + val clientDetails = ClientDetails.createDefault() + + assertNotNull(clientDetails) + assertNotNull(clientDetails.applicationForTracing) + assertNotNull(clientDetails.userNameForTracing) + assertNull(clientDetails.clientVersionForTracing) + } + + @Test + fun `getApplicationForTracing returns provided value when not null`() { + val clientDetails = + ClientDetails( + applicationForTracing = "TestApp", + userNameForTracing = "TestUser", + clientVersionForTracing = null, + ) + + assertEquals("TestApp", clientDetails.getApplicationForTracing()) + } + + @Test + fun `getApplicationForTracing returns default when null`() { + val clientDetails = + ClientDetails( + applicationForTracing = null, + userNameForTracing = "TestUser", + clientVersionForTracing = null, + ) + + val result = clientDetails.getApplicationForTracing() + assertNotNull(result) + assertFalse(result.isBlank()) + } + + @Test + fun `getUserNameForTracing returns provided value when not null`() { + val clientDetails = + ClientDetails( + applicationForTracing = "TestApp", + userNameForTracing = "TestUser", + clientVersionForTracing = null, + ) + + assertEquals("TestUser", clientDetails.getUserNameForTracing()) + } + + @Test + fun `getUserNameForTracing returns default when null`() { + val clientDetails = + ClientDetails( + applicationForTracing = "TestApp", + userNameForTracing = null, + clientVersionForTracing = null, + ) + + val result = clientDetails.getUserNameForTracing() + assertNotNull(result) + } + + @Test + fun `getClientVersionForTracing returns default version when null`() { + val clientDetails = + ClientDetails( + applicationForTracing = "TestApp", + userNameForTracing = "TestUser", + clientVersionForTracing = null, + ) + + val version = clientDetails.getClientVersionForTracing() + assertNotNull(version) + assertTrue(version.contains("Kusto.Java.Client.V2")) + assertTrue(version.contains("Runtime")) + } + + @Test + fun `getClientVersionForTracing appends custom version when provided`() { + val clientDetails = + ClientDetails( + applicationForTracing = "TestApp", + userNameForTracing = "TestUser", + clientVersionForTracing = "CustomVersion:1.0.0", + ) + + val version = clientDetails.getClientVersionForTracing() + assertNotNull(version) + assertTrue(version.contains("Kusto.Java.Client.V2")) + assertTrue(version.contains("CustomVersion:1.0.0")) + } + + @Test + fun `fromConnectorDetails creates ClientDetails with basic info`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + ) + + assertNotNull(clientDetails) + assertNotNull(clientDetails.applicationForTracing) + assertTrue( + clientDetails.applicationForTracing!!.contains( + "Kusto.MyConnector", + ), + ) + assertTrue(clientDetails.applicationForTracing.contains("1.0.0")) + assertEquals(ClientDetails.NONE, clientDetails.userNameForTracing) + } + + @Test + fun `fromConnectorDetails includes user when sendUser is true`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + sendUser = true, + ) + + assertNotNull(clientDetails.userNameForTracing) + assertNotEquals(ClientDetails.NONE, clientDetails.userNameForTracing) + } + + @Test + fun `fromConnectorDetails uses override user when provided`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + sendUser = true, + overrideUser = "CustomUser@example.com", + ) + + assertEquals("CustomUser@example.com", clientDetails.userNameForTracing) + } + + @Test + fun `fromConnectorDetails includes appName and appVersion`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + appName = "MyApp", + appVersion = "2.0.0", + ) + + assertNotNull(clientDetails.applicationForTracing) + assertTrue(clientDetails.applicationForTracing!!.contains("MyApp")) + assertTrue(clientDetails.applicationForTracing.contains("2.0.0")) + } + + @Test + fun `fromConnectorDetails includes additional fields`() { + val additionalFields = mapOf("JobId" to "job-123", "RunId" to "run-456") + + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + additionalFields = additionalFields, + ) + + assertNotNull(clientDetails.applicationForTracing) + assertTrue(clientDetails.applicationForTracing!!.contains("JobId")) + assertTrue(clientDetails.applicationForTracing.contains("job-123")) + assertTrue(clientDetails.applicationForTracing.contains("RunId")) + assertTrue(clientDetails.applicationForTracing.contains("run-456")) + } + + @Test + fun `fromConnectorDetails formats fields with pipe separator`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + ) + + assertNotNull(clientDetails.applicationForTracing) + assertTrue(clientDetails.applicationForTracing!!.contains("|")) + } + + @Test + fun `fromConnectorDetails wraps values in curly braces`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + ) + + assertNotNull(clientDetails.applicationForTracing) + assertTrue(clientDetails.applicationForTracing!!.contains("{")) + assertTrue(clientDetails.applicationForTracing.contains("}")) + } + + @Test + fun `data class equality works correctly`() { + val client1 = ClientDetails("app1", "user1", "v1") + val client2 = ClientDetails("app1", "user1", "v1") + val client3 = ClientDetails("app2", "user1", "v1") + + assertEquals(client1, client2) + assertNotEquals(client1, client3) + } + + @Test + fun `data class hashCode works correctly`() { + val client1 = ClientDetails("app1", "user1", "v1") + val client2 = ClientDetails("app1", "user1", "v1") + + assertEquals(client1.hashCode(), client2.hashCode()) + } + + @Test + fun `data class copy works correctly`() { + val original = ClientDetails("app1", "user1", "v1") + val copied = original.copy(applicationForTracing = "app2") + + assertEquals("app2", copied.applicationForTracing) + assertEquals("user1", copied.userNameForTracing) + assertEquals("v1", copied.clientVersionForTracing) + } + + @Test + fun `fromConnectorDetails handles empty additional fields`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + additionalFields = emptyMap(), + ) + + assertNotNull(clientDetails) + assertNotNull(clientDetails.applicationForTracing) + } + + @Test + fun `fromConnectorDetails handles null appVersion uses NONE`() { + val clientDetails = + ClientDetails.fromConnectorDetails( + name = "MyConnector", + version = "1.0.0", + appName = "MyApp", + appVersion = null, + ) + + assertNotNull(clientDetails.applicationForTracing) + assertTrue( + clientDetails.applicationForTracing!!.contains( + ClientDetails.NONE, + ), + ) + } + + @Test + fun `NONE constant has correct value`() { + assertEquals("[none]", ClientDetails.NONE) + } + + @Test + fun `DEFAULT_APP_NAME constant has correct value`() { + assertEquals("Kusto.Java.Client.V2", ClientDetails.DEFAULT_APP_NAME) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypesTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypesTest.kt new file mode 100644 index 000000000..a67bf517f --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/ExtendedResponseTypesTest.kt @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import com.microsoft.azure.kusto.ingest.v2.models.IngestResponse +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class ExtendedResponseTypesTest { + + @Test + fun `IngestKind enum has correct values`() { + assertEquals(2, IngestKind.entries.size) + assertNotNull(IngestKind.STREAMING) + assertNotNull(IngestKind.QUEUED) + } + + @Test + fun `IngestKind STREAMING has correct name`() { + assertEquals("STREAMING", IngestKind.STREAMING.name) + } + + @Test + fun `IngestKind QUEUED has correct name`() { + assertEquals("QUEUED", IngestKind.QUEUED.name) + } + + @Test + fun `IngestKind valueOf works correctly`() { + assertEquals(IngestKind.STREAMING, IngestKind.valueOf("STREAMING")) + assertEquals(IngestKind.QUEUED, IngestKind.valueOf("QUEUED")) + } + + @Test + fun `ExtendedIngestResponse creates correctly with STREAMING kind`() { + val ingestResponse = IngestResponse(ingestionOperationId = "op-123") + val extended = + ExtendedIngestResponse( + ingestResponse = ingestResponse, + ingestionType = IngestKind.STREAMING, + ) + + assertEquals(ingestResponse, extended.ingestResponse) + assertEquals(IngestKind.STREAMING, extended.ingestionType) + assertEquals("op-123", extended.ingestResponse.ingestionOperationId) + } + + @Test + fun `ExtendedIngestResponse creates correctly with QUEUED kind`() { + val ingestResponse = IngestResponse(ingestionOperationId = "op-456") + val extended = + ExtendedIngestResponse( + ingestResponse = ingestResponse, + ingestionType = IngestKind.QUEUED, + ) + + assertEquals(ingestResponse, extended.ingestResponse) + assertEquals(IngestKind.QUEUED, extended.ingestionType) + assertEquals("op-456", extended.ingestResponse.ingestionOperationId) + } + + @Test + fun `ExtendedIngestResponse data class equality works`() { + val response1 = IngestResponse(ingestionOperationId = "op-123") + val response2 = IngestResponse(ingestionOperationId = "op-123") + val response3 = IngestResponse(ingestionOperationId = "op-456") + + val extended1 = ExtendedIngestResponse(response1, IngestKind.STREAMING) + val extended2 = ExtendedIngestResponse(response2, IngestKind.STREAMING) + val extended3 = ExtendedIngestResponse(response3, IngestKind.STREAMING) + val extended4 = ExtendedIngestResponse(response1, IngestKind.QUEUED) + + assertEquals(extended1, extended2) + assertNotEquals(extended1, extended3) + assertNotEquals(extended1, extended4) + } + + @Test + fun `ExtendedIngestResponse data class hashCode works`() { + val response = IngestResponse(ingestionOperationId = "op-123") + val extended1 = ExtendedIngestResponse(response, IngestKind.STREAMING) + val extended2 = + ExtendedIngestResponse(response.copy(), IngestKind.STREAMING) + + assertEquals(extended1.hashCode(), extended2.hashCode()) + } + + @Test + fun `ExtendedIngestResponse data class copy works`() { + val response = IngestResponse(ingestionOperationId = "op-123") + val original = ExtendedIngestResponse(response, IngestKind.STREAMING) + val copied = original.copy(ingestionType = IngestKind.QUEUED) + + assertEquals(original.ingestResponse, copied.ingestResponse) + assertEquals(IngestKind.STREAMING, original.ingestionType) + assertEquals(IngestKind.QUEUED, copied.ingestionType) + } + + @Test + fun `ExtendedIngestResponse toString includes all fields`() { + val response = IngestResponse(ingestionOperationId = "op-123") + val extended = ExtendedIngestResponse(response, IngestKind.STREAMING) + + val stringRep = extended.toString() + assertTrue(stringRep.contains("ingestResponse")) + assertTrue(stringRep.contains("ingestionType")) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt new file mode 100644 index 000000000..c73fe205d --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt @@ -0,0 +1,456 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models.mapping + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertNull +import kotlin.test.assertTrue + +/** Unit tests for mapping classes. */ +class MappingTest { + + // ==================== ColumnMapping Tests ==================== + + @Test + fun `ColumnMapping should store column name and type`() { + val mapping = + ColumnMapping(columnName = "TestColumn", columnType = "string") + + assertEquals("TestColumn", mapping.columnName) + assertEquals("string", mapping.columnType) + } + + @Test + fun `ColumnMapping should set and get path`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setPath("$.data.value") + assertEquals("$.data.value", mapping.getPath()) + } + + @Test + fun `ColumnMapping getPath should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getPath()) + } + + @Test + fun `ColumnMapping should set and get transform`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setTransform(TransformationMethod.SourceLineNumber) + assertEquals( + TransformationMethod.SourceLineNumber, + mapping.getTransform(), + ) + } + + @Test + fun `ColumnMapping getTransform should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getTransform()) + } + + @Test + fun `ColumnMapping getTransform should return null for blank transform`() { + val mapping = ColumnMapping("col1", "string") + mapping.properties[MappingConstants.Transform.name] = "" + assertNull(mapping.getTransform()) + } + + @Test + fun `ColumnMapping should set and get ordinal`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setOrdinal(5) + assertEquals(5, mapping.getOrdinal()) + } + + @Test + fun `ColumnMapping getOrdinal should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getOrdinal()) + } + + @Test + fun `ColumnMapping getOrdinal should return null for blank ordinal`() { + val mapping = ColumnMapping("col1", "string") + mapping.properties[MappingConstants.Ordinal.name] = "" + assertNull(mapping.getOrdinal()) + } + + @Test + fun `ColumnMapping should set and get constant value`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setConstantValue("constant-value") + assertEquals("constant-value", mapping.getConstantValue()) + } + + @Test + fun `ColumnMapping getConstantValue should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getConstantValue()) + } + + @Test + fun `ColumnMapping should set and get field`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setField("fieldName") + assertEquals("fieldName", mapping.getField()) + } + + @Test + fun `ColumnMapping getField should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getField()) + } + + @Test + fun `ColumnMapping should set and get columns`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setColumns("col1,col2,col3") + assertEquals("col1,col2,col3", mapping.getColumns()) + } + + @Test + fun `ColumnMapping getColumns should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getColumns()) + } + + @Test + fun `ColumnMapping should set and get storage data type`() { + val mapping = ColumnMapping("col1", "string") + + mapping.setStorageDataType("int64") + assertEquals("int64", mapping.getStorageDataType()) + } + + @Test + fun `ColumnMapping getStorageDataType should return null when not set`() { + val mapping = ColumnMapping("col1", "string") + assertNull(mapping.getStorageDataType()) + } + + // ==================== ColumnMapping isValid Tests ==================== + + @Test + fun `ColumnMapping isValid for CSV should require non-blank columnName`() { + val validMapping = ColumnMapping("col1", "string") + assertTrue(validMapping.isValid(Format.csv)) + + val invalidMapping = ColumnMapping("", "string") + assertFalse(invalidMapping.isValid(Format.csv)) + } + + @Test + fun `ColumnMapping isValid for sstream should require non-blank columnName`() { + val validMapping = ColumnMapping("col1", "string") + assertTrue(validMapping.isValid(Format.sstream)) + + val invalidMapping = ColumnMapping("", "string") + assertFalse(invalidMapping.isValid(Format.sstream)) + } + + @Test + fun `ColumnMapping isValid for JSON should require columnName and path`() { + val validMapping = ColumnMapping("col1", "string") + validMapping.setPath("$.data") + assertTrue(validMapping.isValid(Format.json)) + + val invalidNoPath = ColumnMapping("col1", "string") + assertFalse(invalidNoPath.isValid(Format.json)) + + val invalidNoName = ColumnMapping("", "string") + invalidNoName.setPath("$.data") + assertFalse(invalidNoName.isValid(Format.json)) + } + + @Test + fun `ColumnMapping isValid for JSON should accept SourceLineNumber transform without path`() { + val mapping = ColumnMapping("col1", "long") + mapping.setTransform(TransformationMethod.SourceLineNumber) + assertTrue(mapping.isValid(Format.json)) + } + + @Test + fun `ColumnMapping isValid for JSON should accept SourceLocation transform without path`() { + val mapping = ColumnMapping("col1", "string") + mapping.setTransform(TransformationMethod.SourceLocation) + assertTrue(mapping.isValid(Format.json)) + } + + @Test + fun `ColumnMapping isValid for parquet should require columnName and path`() { + val validMapping = ColumnMapping("col1", "string") + validMapping.setPath("$.data") + assertTrue(validMapping.isValid(Format.parquet)) + + val invalidNoPath = ColumnMapping("col1", "string") + assertFalse(invalidNoPath.isValid(Format.parquet)) + } + + @Test + fun `ColumnMapping isValid for orc should require columnName and path`() { + val validMapping = ColumnMapping("col1", "string") + validMapping.setPath("$.data") + assertTrue(validMapping.isValid(Format.orc)) + } + + @Test + fun `ColumnMapping isValid for w3clogfile should require columnName and path`() { + val validMapping = ColumnMapping("col1", "string") + validMapping.setPath("$.field") + assertTrue(validMapping.isValid(Format.w3clogfile)) + } + + @Test + fun `ColumnMapping isValid for avro should require columnName and columns`() { + val validMapping = ColumnMapping("col1", "string") + validMapping.setColumns("avroCol1,avroCol2") + assertTrue(validMapping.isValid(Format.avro)) + + val invalidNoColumns = ColumnMapping("col1", "string") + assertFalse(invalidNoColumns.isValid(Format.avro)) + } + + @Test + fun `ColumnMapping isValid for apacheavro should require columnName and columns`() { + val validMapping = ColumnMapping("col1", "string") + validMapping.setColumns("avroCol1") + assertTrue(validMapping.isValid(Format.apacheavro)) + } + + @Test + fun `ColumnMapping isValid should return false for unsupported format`() { + val mapping = ColumnMapping("col1", "string") + // txt format doesn't have specific validation rules in the switch + assertFalse(mapping.isValid(Format.txt)) + } + + // ==================== ColumnMapping data class Tests ==================== + + @Test + fun `ColumnMapping should support equals and hashCode`() { + val mapping1 = ColumnMapping("col1", "string") + val mapping2 = ColumnMapping("col1", "string") + + assertEquals(mapping1, mapping2) + assertEquals(mapping1.hashCode(), mapping2.hashCode()) + } + + @Test + fun `ColumnMapping should support copy`() { + val original = ColumnMapping("col1", "string") + original.setPath("$.path") + + val copied = original.copy(columnName = "col2") + + assertEquals("col2", copied.columnName) + assertEquals("string", copied.columnType) + assertEquals("$.path", copied.getPath()) + } + + // ==================== TransformationMethod Tests ==================== + + @Test + fun `TransformationMethod should have all expected values`() { + val values = TransformationMethod.values() + assertTrue(values.contains(TransformationMethod.None)) + assertTrue( + values.contains( + TransformationMethod.PropertyBagArrayToDictionary, + ), + ) + assertTrue(values.contains(TransformationMethod.SourceLocation)) + assertTrue(values.contains(TransformationMethod.SourceLineNumber)) + assertTrue(values.contains(TransformationMethod.GetPathElement)) + assertTrue(values.contains(TransformationMethod.UnknownMethod)) + assertTrue( + values.contains(TransformationMethod.DateTimeFromUnixSeconds), + ) + assertTrue( + values.contains( + TransformationMethod.DateTimeFromUnixMilliseconds, + ), + ) + } + + @Test + fun `TransformationMethod valueOf should return correct enum`() { + assertEquals( + TransformationMethod.None, + TransformationMethod.valueOf("None"), + ) + assertEquals( + TransformationMethod.SourceLineNumber, + TransformationMethod.valueOf("SourceLineNumber"), + ) + } + + // ==================== InlineIngestionMapping Tests ==================== + + @Test + fun `InlineIngestionMapping should store column mappings and type`() { + val columnMappings = + listOf( + ColumnMapping("col1", "string"), + ColumnMapping("col2", "int"), + ) + + val mapping = + InlineIngestionMapping( + columnMappings = columnMappings, + ingestionMappingType = + InlineIngestionMapping.IngestionMappingType.JSON, + ) + + assertEquals(2, mapping.columnMappings?.size) + assertEquals( + InlineIngestionMapping.IngestionMappingType.JSON, + mapping.ingestionMappingType, + ) + } + + @Test + fun `InlineIngestionMapping should support null values`() { + val mapping = InlineIngestionMapping() + + assertNull(mapping.columnMappings) + assertNull(mapping.ingestionMappingType) + } + + @Test + fun `InlineIngestionMapping copy constructor should create deep copy`() { + val columnMappings = + listOf( + ColumnMapping("col1", "string").apply { + setPath("$.data") + }, + ) + + val original = + InlineIngestionMapping( + columnMappings = columnMappings, + ingestionMappingType = + InlineIngestionMapping.IngestionMappingType.JSON, + ) + + val copied = InlineIngestionMapping(original) + + assertEquals(original.columnMappings?.size, copied.columnMappings?.size) + assertEquals(original.ingestionMappingType, copied.ingestionMappingType) + assertEquals("col1", copied.columnMappings?.get(0)?.columnName) + } + + @Test + fun `InlineIngestionMapping copy constructor should handle null columnMappings`() { + val original = + InlineIngestionMapping( + columnMappings = null, + ingestionMappingType = + InlineIngestionMapping.IngestionMappingType.CSV, + ) + + val copied = InlineIngestionMapping(original) + + assertNull(copied.columnMappings) + assertEquals( + InlineIngestionMapping.IngestionMappingType.CSV, + copied.ingestionMappingType, + ) + } + + // ==================== IngestionMappingType Tests ==================== + + @Test + fun `IngestionMappingType CSV should have correct kusto value`() { + assertEquals( + "Csv", + InlineIngestionMapping.IngestionMappingType.CSV.kustoValue, + ) + } + + @Test + fun `IngestionMappingType JSON should have correct kusto value`() { + assertEquals( + "Json", + InlineIngestionMapping.IngestionMappingType.JSON.kustoValue, + ) + } + + @Test + fun `IngestionMappingType AVRO should have correct kusto value`() { + assertEquals( + "Avro", + InlineIngestionMapping.IngestionMappingType.AVRO.kustoValue, + ) + } + + @Test + fun `IngestionMappingType PARQUET should have correct kusto value`() { + assertEquals( + "Parquet", + InlineIngestionMapping.IngestionMappingType.PARQUET.kustoValue, + ) + } + + @Test + fun `IngestionMappingType SSTREAM should have correct kusto value`() { + assertEquals( + "SStream", + InlineIngestionMapping.IngestionMappingType.SSTREAM.kustoValue, + ) + } + + @Test + fun `IngestionMappingType ORC should have correct kusto value`() { + assertEquals( + "Orc", + InlineIngestionMapping.IngestionMappingType.ORC.kustoValue, + ) + } + + @Test + fun `IngestionMappingType APACHEAVRO should have correct kusto value`() { + assertEquals( + "ApacheAvro", + InlineIngestionMapping.IngestionMappingType.APACHEAVRO + .kustoValue, + ) + } + + @Test + fun `IngestionMappingType W3CLOGFILE should have correct kusto value`() { + assertEquals( + "W3CLogFile", + InlineIngestionMapping.IngestionMappingType.W3CLOGFILE + .kustoValue, + ) + } + + @Test + fun `IngestionMappingType should have all expected values`() { + val values = InlineIngestionMapping.IngestionMappingType.values() + assertEquals(8, values.size) + } + + // ==================== MappingConstants Tests ==================== + + @Test + fun `MappingConstants should have expected constant names`() { + assertEquals("Path", MappingConstants.Path.name) + assertEquals("Transform", MappingConstants.Transform.name) + assertEquals("Ordinal", MappingConstants.Ordinal.name) + assertEquals("ConstValue", MappingConstants.ConstValue.name) + assertEquals("Field", MappingConstants.Field.name) + assertEquals("Columns", MappingConstants.Columns.name) + assertEquals("StorageDataType", MappingConstants.StorageDataType.name) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtilsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtilsTest.kt new file mode 100644 index 000000000..97f69b9f6 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionResultUtilsTest.kt @@ -0,0 +1,271 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.utils + +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import java.time.OffsetDateTime + +class IngestionResultUtilsTest { + + private fun createBlobStatus( + status: BlobStatus.Status, + sourceId: String = "test-id", + ): BlobStatus { + return BlobStatus( + sourceId = sourceId, + status = status, + startedAt = OffsetDateTime.now(), + lastUpdateTime = OffsetDateTime.now(), + errorCode = null, + failureStatus = null, + details = null, + ) + } + + @Test + fun `hasFailedResults returns true when list contains failed status`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Failed), + createBlobStatus(BlobStatus.Status.Queued), + ) + + assertTrue(IngestionResultUtils.hasFailedResults(results)) + } + + @Test + fun `hasFailedResults returns false when list has no failed status`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Queued), + createBlobStatus(BlobStatus.Status.InProgress), + ) + + assertFalse(IngestionResultUtils.hasFailedResults(results)) + } + + @Test + fun `hasFailedResults returns false for empty list`() { + val results = emptyList() + + assertFalse(IngestionResultUtils.hasFailedResults(results)) + } + + @Test + fun `isCompleted returns true when all results are in terminal states`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Failed), + createBlobStatus(BlobStatus.Status.Canceled), + ) + + assertTrue(IngestionResultUtils.isCompleted(results)) + } + + @Test + fun `isCompleted returns false when some results are in progress`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.InProgress), + createBlobStatus(BlobStatus.Status.Failed), + ) + + assertFalse(IngestionResultUtils.isCompleted(results)) + } + + @Test + fun `isCompleted returns false when some results are queued`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Queued), + ) + + assertFalse(IngestionResultUtils.isCompleted(results)) + } + + @Test + fun `isCompleted returns false for null list`() { + assertFalse(IngestionResultUtils.isCompleted(null)) + } + + @Test + fun `isCompleted returns false for empty list`() { + assertFalse(IngestionResultUtils.isCompleted(emptyList())) + } + + @Test + fun `isCompleted returns true when only succeeded results`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Succeeded), + ) + + assertTrue(IngestionResultUtils.isCompleted(results)) + } + + @Test + fun `isCompleted returns true when only failed results`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Failed), + createBlobStatus(BlobStatus.Status.Failed), + ) + + assertTrue(IngestionResultUtils.isCompleted(results)) + } + + @Test + fun `isInProgress returns true when results contain queued status`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Queued), + ) + + assertTrue(IngestionResultUtils.isInProgress(results)) + } + + @Test + fun `isInProgress returns true when results contain in progress status`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.InProgress), + ) + + assertTrue(IngestionResultUtils.isInProgress(results)) + } + + @Test + fun `isInProgress returns false when all results are terminal`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Failed), + createBlobStatus(BlobStatus.Status.Canceled), + ) + + assertFalse(IngestionResultUtils.isInProgress(results)) + } + + @Test + fun `isInProgress returns false for null list`() { + assertFalse(IngestionResultUtils.isInProgress(null)) + } + + @Test + fun `isInProgress returns false for empty list`() { + assertFalse(IngestionResultUtils.isInProgress(emptyList())) + } + + @Test + fun `getFailedResults returns only failed results`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded, "id1"), + createBlobStatus(BlobStatus.Status.Failed, "id2"), + createBlobStatus(BlobStatus.Status.Failed, "id3"), + createBlobStatus(BlobStatus.Status.Queued, "id4"), + ) + + val failedResults = IngestionResultUtils.getFailedResults(results) + + assertEquals(2, failedResults.size) + assertTrue(failedResults.all { it.status == BlobStatus.Status.Failed }) + assertEquals("id2", failedResults[0].sourceId) + assertEquals("id3", failedResults[1].sourceId) + } + + @Test + fun `getFailedResults returns empty list when no failures`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded), + createBlobStatus(BlobStatus.Status.Queued), + ) + + val failedResults = IngestionResultUtils.getFailedResults(results) + + assertTrue(failedResults.isEmpty()) + } + + @Test + fun `getFailedResults returns empty list for null input`() { + val failedResults = IngestionResultUtils.getFailedResults(null) + + assertTrue(failedResults.isEmpty()) + } + + @Test + fun `getSucceededResults returns only succeeded results`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded, "id1"), + createBlobStatus(BlobStatus.Status.Failed, "id2"), + createBlobStatus(BlobStatus.Status.Succeeded, "id3"), + createBlobStatus(BlobStatus.Status.Queued, "id4"), + ) + + val succeededResults = IngestionResultUtils.getSucceededResults(results) + + assertEquals(2, succeededResults.size) + assertTrue( + succeededResults.all { + it.status == BlobStatus.Status.Succeeded + }, + ) + assertEquals("id1", succeededResults[0].sourceId) + assertEquals("id3", succeededResults[1].sourceId) + } + + @Test + fun `getSucceededResults returns empty list when no successes`() { + val results = + listOf( + createBlobStatus(BlobStatus.Status.Failed), + createBlobStatus(BlobStatus.Status.Queued), + ) + + val succeededResults = IngestionResultUtils.getSucceededResults(results) + + assertTrue(succeededResults.isEmpty()) + } + + @Test + fun `getSucceededResults returns empty list for null input`() { + val succeededResults = IngestionResultUtils.getSucceededResults(null) + + assertTrue(succeededResults.isEmpty()) + } + + @Test + fun `test mixed status scenarios`() { + val allStatuses = + listOf( + createBlobStatus(BlobStatus.Status.Succeeded, "id1"), + createBlobStatus(BlobStatus.Status.Failed, "id2"), + createBlobStatus(BlobStatus.Status.Queued, "id3"), + createBlobStatus(BlobStatus.Status.InProgress, "id4"), + createBlobStatus(BlobStatus.Status.Canceled, "id5"), + ) + + assertTrue(IngestionResultUtils.hasFailedResults(allStatuses)) + assertFalse(IngestionResultUtils.isCompleted(allStatuses)) + assertTrue(IngestionResultUtils.isInProgress(allStatuses)) + assertEquals(1, IngestionResultUtils.getFailedResults(allStatuses).size) + assertEquals( + 1, + IngestionResultUtils.getSucceededResults(allStatuses).size, + ) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtilsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtilsTest.kt new file mode 100644 index 000000000..04aea3271 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/IngestionUtilsTest.kt @@ -0,0 +1,161 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.utils + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class IngestionUtilsTest { + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for uncompressed avro`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.avro, + CompressionType.NONE, + ) + assertEquals(0.55, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for uncompressed apacheavro`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.apacheavro, + CompressionType.NONE, + ) + assertEquals(0.55, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for uncompressed csv`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.csv, + CompressionType.NONE, + ) + assertEquals(0.45, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for compressed csv`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.csv, + CompressionType.GZIP, + ) + assertEquals(3.6, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for uncompressed json`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.json, + CompressionType.NONE, + ) + assertEquals(0.33, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for compressed json`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.json, + CompressionType.GZIP, + ) + assertEquals(3.60, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for compressed multijson`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.multijson, + CompressionType.GZIP, + ) + assertEquals(5.15, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for uncompressed txt`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.txt, + CompressionType.NONE, + ) + assertEquals(0.15, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for compressed txt`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.txt, + CompressionType.GZIP, + ) + assertEquals(1.8, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for compressed psv`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.psv, + CompressionType.GZIP, + ) + assertEquals(1.5, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for uncompressed parquet`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.parquet, + CompressionType.NONE, + ) + assertEquals(3.35, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns default factor for unknown format`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.w3clogfile, + CompressionType.NONE, + ) + assertEquals(1.0, factor, 0.001) + } + + @Test + fun `getRowStoreEstimatedFactor returns default factor for null format with no compression`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + null, + CompressionType.NONE, + ) + assertEquals(0.45, factor, 0.001) // Defaults to CSV + } + + @Test + fun `getRowStoreEstimatedFactor returns correct factor for null format with compression`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + null, + CompressionType.GZIP, + ) + assertEquals(3.6, factor, 0.001) // Defaults to compressed CSV + } + + @Test + fun `getRowStoreEstimatedFactor handles ZIP compression type`() { + val factor = + IngestionUtils.getRowStoreEstimatedFactor( + Format.json, + CompressionType.ZIP, + ) + assertEquals(3.60, factor, 0.001) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt new file mode 100644 index 000000000..f18d2d7db --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt @@ -0,0 +1,187 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.utils + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotEquals +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import java.util.* + +class PathUtilsTest { + + @Test + fun `sanitizeFileName creates valid name with baseName and sourceId`() { + val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") + val result = PathUtils.sanitizeFileName("myfile.csv", sourceId) + + assertTrue(result.contains("e493b23d-684f-4f4c-8ba8-3edfaca09427")) + assertTrue(result.contains("myfile-csv")) + } + + @Test + fun `sanitizeFileName handles null baseName`() { + val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") + val result = PathUtils.sanitizeFileName(null, sourceId) + + assertEquals("e493b23d-684f-4f4c-8ba8-3edfaca09427", result) + } + + @Test + fun `sanitizeFileName handles empty baseName`() { + val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") + val result = PathUtils.sanitizeFileName("", sourceId) + + assertEquals("e493b23d-684f-4f4c-8ba8-3edfaca09427", result) + } + + @Test + fun `sanitizeFileName replaces forbidden characters`() { + val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") + val result = PathUtils.sanitizeFileName("my file@#\$%.csv", sourceId) + + assertTrue(result.contains("my-file")) + assertTrue(result.contains("csv")) + } + + @Test + fun `sanitizeFileName truncates long names`() { + val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") + val longName = "a".repeat(150) + ".csv" + val result = PathUtils.sanitizeFileName(longName, sourceId) + + assertTrue(result.contains("__trunc")) + assertTrue(result.length <= 160) + } + + @Test + fun `createFileNameForUpload generates valid format`() { + val name = "dataset.csv" + val result = PathUtils.createFileNameForUpload(name) + + assertTrue(result.startsWith("Ingest.V2.Java_")) + assertTrue(result.endsWith("_dataset.csv")) + assertTrue(result.contains("_")) + } + + @Test + fun `createFileNameForUpload generates unique names`() { + val name = "test.json" + val result1 = PathUtils.createFileNameForUpload(name) + Thread.sleep(10) // Ensure different timestamps + val result2 = PathUtils.createFileNameForUpload(name) + + assertNotEquals(result1, result2) + } + + @Test + fun `getBasename extracts filename from windows path`() { + val result = PathUtils.getBasename("C:\\path\\to\\file.csv.gz") + assertEquals("file.csv.gz", result) + } + + @Test + fun `getBasename extracts filename from unix path`() { + val result = PathUtils.getBasename("/path/to/file.csv.gz") + assertEquals("file.csv.gz", result) + } + + @Test + fun `getBasename extracts filename from URL`() { + val result = + PathUtils.getBasename("https://example.com/path/to/file.csv.gz") + assertEquals("file.csv.gz", result) + } + + @Test + fun `getBasename handles URL with query parameters`() { + val result = + PathUtils.getBasename( + "https://example.com/path/file.csv?query=value", + ) + assertEquals("file.csv", result) + } + + @Test + fun `getBasename handles URL with fragment`() { + val result = + PathUtils.getBasename( + "https://example.com/path/file.csv#section", + ) + assertEquals("file.csv", result) + } + + @Test + fun `getBasename handles URL with semicolon`() { + val result = + PathUtils.getBasename( + "https://example.com/path/file.csv;jsessionid=123", + ) + assertEquals("file.csv", result) + } + + @Test + fun `getBasename returns null for null input`() { + val result = PathUtils.getBasename(null) + assertNull(result) + } + + @Test + fun `getBasename returns blank for blank input`() { + val result = PathUtils.getBasename(" ") + assertEquals(" ", result) + } + + @Test + fun `getBasename handles simple filename`() { + val result = PathUtils.getBasename("file.csv") + assertEquals("file.csv", result) + } + + @Test + fun `getBasename handles mixed path separators`() { + val result = PathUtils.getBasename("C:\\path/to\\file.csv") + assertEquals("file.csv", result) + } + + @Test + fun `getBasename handles path ending with separator`() { + val result = PathUtils.getBasename("/path/to/") + assertEquals("", result) + } + + @Test + fun `getBasename handles blob storage URL`() { + val result = + PathUtils.getBasename( + "https://account.blob.core.windows.net/container/file.csv.gz?sp=r&st=2024", + ) + assertEquals("file.csv.gz", result) + } + + @Test + fun `sanitizeFileName preserves hyphens and underscores`() { + val sourceId = UUID.randomUUID() + val result = PathUtils.sanitizeFileName("my-file_name.csv", sourceId) + + assertTrue(result.contains("my-file_name-csv")) + } + + @Test + fun `sanitizeFileName preserves alphanumeric characters`() { + val sourceId = UUID.randomUUID() + val result = PathUtils.sanitizeFileName("file123ABC.csv", sourceId) + + assertTrue(result.contains("file123ABC-csv")) + } + + @Test + fun `createFileNameForUpload handles special characters in name`() { + val name = "my-file@#\$.csv" + val result = PathUtils.createFileNameForUpload(name) + + assertTrue(result.startsWith("Ingest.V2.Java_")) + assertTrue(result.endsWith("_my-file@#\$.csv")) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt new file mode 100644 index 000000000..20d69168a --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt @@ -0,0 +1,181 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.source + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import org.junit.jupiter.api.io.TempDir +import java.io.ByteArrayInputStream +import java.nio.file.Files +import java.nio.file.Path +import java.util.UUID +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +/** Unit tests for Source classes (FileSource, StreamSource, BlobSource). */ +class SourceClassesTest { + + @Test + fun `FileSource should detect GZIP compression from file extension`( + @TempDir tempDir: Path, + ) { + val testFile = tempDir.resolve("test.json.gz") + Files.write(testFile, byteArrayOf(1, 2, 3)) + + val source = FileSource(testFile, Format.json) + + assertEquals(CompressionType.GZIP, source.compressionType) + assertEquals(Format.json, source.format) + assertNotNull(source.sourceId) + } + + @Test + fun `FileSource should detect ZIP compression from file extension`( + @TempDir tempDir: Path, + ) { + val testFile = tempDir.resolve("test.json.zip") + Files.write(testFile, byteArrayOf(1, 2, 3)) + + val source = FileSource(testFile, Format.json) + + assertEquals(CompressionType.ZIP, source.compressionType) + } + + @Test + fun `FileSource should default to NONE compression for uncompressed files`( + @TempDir tempDir: Path, + ) { + val testFile = tempDir.resolve("test.json") + Files.write(testFile, byteArrayOf(1, 2, 3)) + + val source = FileSource(testFile, Format.json) + + assertEquals(CompressionType.NONE, source.compressionType) + } + + @Test + fun `FileSource should allow explicit compression type override`( + @TempDir tempDir: Path, + ) { + val testFile = tempDir.resolve("test.json") + Files.write(testFile, byteArrayOf(1, 2, 3)) + + val source = + FileSource( + testFile, + Format.json, + compressionType = CompressionType.GZIP, + ) + + assertEquals(CompressionType.GZIP, source.compressionType) + } + + @Test + fun `FileSource should return file size`(@TempDir tempDir: Path) { + val testData = "Test data".toByteArray() + val testFile = tempDir.resolve("test.json") + Files.write(testFile, testData) + + val source = FileSource(testFile, Format.json) + + assertEquals(testData.size.toLong(), source.size()) + } + + @Test + fun `FileSource should use custom sourceId when provided`( + @TempDir tempDir: Path, + ) { + val testFile = tempDir.resolve("test.json") + Files.write(testFile, byteArrayOf(1, 2, 3)) + val customId = UUID.randomUUID() + + val source = + FileSource( + testFile, + Format.json, + customId, + CompressionType.NONE, + ) + + assertEquals(customId, source.sourceId) + } + + @Test + fun `StreamSource should create with basic parameters`() { + val testData = "Test data".toByteArray() + val stream = ByteArrayInputStream(testData) + + val source = + StreamSource( + stream = stream, + format = Format.json, + sourceCompression = CompressionType.NONE, + sourceId = UUID.randomUUID(), + baseName = "test-stream", + leaveOpen = false, + ) + + assertEquals(CompressionType.NONE, source.compressionType) + assertEquals(Format.json, source.format) + assertNotNull(source.data()) + } + + @Test + fun `BlobSource should create with blob path and format`() { + val blobPath = + "https://storage.blob.core.windows.net/container/blob.json" + + val source = + BlobSource( + blobPath = blobPath, + format = Format.json, + sourceId = UUID.randomUUID(), + compressionType = CompressionType.NONE, + ) + + assertEquals(blobPath, source.blobPath) + assertEquals(Format.json, source.format) + assertEquals(CompressionType.NONE, source.compressionType) + } + + @Test + fun `CompressionType enum should have expected values`() { + assertEquals(3, CompressionType.values().size) + assertTrue(CompressionType.values().contains(CompressionType.NONE)) + assertTrue(CompressionType.values().contains(CompressionType.GZIP)) + assertTrue(CompressionType.values().contains(CompressionType.ZIP)) + } + + @Test + fun `FileSource detectCompressionFromPath should handle various extensions`() { + val gzPath = Path.of("test.json.gz") + assertEquals( + CompressionType.GZIP, + FileSource.detectCompressionFromPath(gzPath), + ) + + val zipPath = Path.of("test.json.zip") + assertEquals( + CompressionType.ZIP, + FileSource.detectCompressionFromPath(zipPath), + ) + + val plainPath = Path.of("test.json") + assertEquals( + CompressionType.NONE, + FileSource.detectCompressionFromPath(plainPath), + ) + } + + @Test + fun `FormatUtil should correctly identify binary formats`() { + assertTrue(FormatUtil.isBinaryFormat(Format.avro)) + assertTrue(FormatUtil.isBinaryFormat(Format.parquet)) + assertTrue(FormatUtil.isBinaryFormat(Format.orc)) + + assertTrue(!FormatUtil.isBinaryFormat(Format.json)) + assertTrue(!FormatUtil.isBinaryFormat(Format.csv)) + assertTrue(!FormatUtil.isBinaryFormat(Format.tsv)) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilderTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilderTest.kt new file mode 100644 index 000000000..7f6975b56 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderBuilderTest.kt @@ -0,0 +1,180 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.IngestRetryPolicy +import io.mockk.mockk +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import kotlin.test.assertNotNull + +class ManagedUploaderBuilderTest { + + private val mockConfigurationCache: ConfigurationCache = + mockk(relaxed = true) + private val mockTokenCredential: TokenCredential = mockk(relaxed = true) + private val mockRetryPolicy: IngestRetryPolicy = mockk(relaxed = true) + + @Test + fun `create should return builder instance`() { + val builder = ManagedUploaderBuilder.create() + assertNotNull(builder) + } + + @Test + fun `withIgnoreSizeLimit true should succeed`() { + val builder = ManagedUploaderBuilder.create().withIgnoreSizeLimit(true) + assertNotNull(builder) + } + + @Test + fun `withIgnoreSizeLimit false should succeed`() { + val builder = ManagedUploaderBuilder.create().withIgnoreSizeLimit(false) + assertNotNull(builder) + } + + @Test + fun `withMaxConcurrency with positive value should succeed`() { + val builder = ManagedUploaderBuilder.create().withMaxConcurrency(10) + assertNotNull(builder) + } + + @Test + fun `withMaxConcurrency with zero should throw exception`() { + val builder = ManagedUploaderBuilder.create() + assertThrows { builder.withMaxConcurrency(0) } + } + + @Test + fun `withMaxConcurrency with negative value should throw exception`() { + val builder = ManagedUploaderBuilder.create() + assertThrows { + builder.withMaxConcurrency(-1) + } + } + + @Test + fun `withMaxDataSize with positive value should succeed`() { + val builder = ManagedUploaderBuilder.create().withMaxDataSize(1024L) + assertNotNull(builder) + } + + @Test + fun `withMaxDataSize with zero should throw exception`() { + val builder = ManagedUploaderBuilder.create() + assertThrows { builder.withMaxDataSize(0L) } + } + + @Test + fun `withMaxDataSize with negative value should throw exception`() { + val builder = ManagedUploaderBuilder.create() + assertThrows { + builder.withMaxDataSize(-100L) + } + } + + @Test + fun `withConfigurationCache should accept configuration`() { + val builder = + ManagedUploaderBuilder.create() + .withConfigurationCache(mockConfigurationCache) + assertNotNull(builder) + } + + @Test + fun `withUploadMethod STORAGE should succeed`() { + val builder = + ManagedUploaderBuilder.create() + .withUploadMethod(UploadMethod.STORAGE) + assertNotNull(builder) + } + + @Test + fun `withUploadMethod LAKE should succeed`() { + val builder = + ManagedUploaderBuilder.create() + .withUploadMethod(UploadMethod.LAKE) + assertNotNull(builder) + } + + @Test + fun `withUploadMethod DEFAULT should succeed`() { + val builder = + ManagedUploaderBuilder.create() + .withUploadMethod(UploadMethod.DEFAULT) + assertNotNull(builder) + } + + @Test + fun `withRetryPolicy should accept custom policy`() { + val builder = + ManagedUploaderBuilder.create().withRetryPolicy(mockRetryPolicy) + assertNotNull(builder) + } + + @Test + fun `withTokenCredential should accept credential`() { + val builder = + ManagedUploaderBuilder.create() + .withTokenCredential(mockTokenCredential) + assertNotNull(builder) + } + + @Test + fun `build without configuration cache should throw exception`() { + val builder = ManagedUploaderBuilder.create() + assertThrows { builder.build() } + } + + @Test + fun `build with configuration cache should succeed`() { + val uploader = + ManagedUploaderBuilder.create() + .withConfigurationCache(mockConfigurationCache) + .build() + assertNotNull(uploader) + } + + @Test + fun `builder methods should return self for chaining`() { + val builder = ManagedUploaderBuilder.create() + val result = + builder.withIgnoreSizeLimit(true) + .withMaxConcurrency(5) + .withMaxDataSize(2048L) + .withConfigurationCache(mockConfigurationCache) + .withUploadMethod(UploadMethod.STORAGE) + .withRetryPolicy(mockRetryPolicy) + .withTokenCredential(mockTokenCredential) + + assertNotNull(result) + } + + @Test + fun `build with all optional parameters should succeed`() { + val uploader = + ManagedUploaderBuilder.create() + .withIgnoreSizeLimit(true) + .withMaxConcurrency(10) + .withMaxDataSize(4096L) + .withConfigurationCache(mockConfigurationCache) + .withUploadMethod(UploadMethod.LAKE) + .withRetryPolicy(mockRetryPolicy) + .withTokenCredential(mockTokenCredential) + .build() + + assertNotNull(uploader) + } + + @Test + fun `build with minimal parameters should succeed`() { + val uploader = + ManagedUploaderBuilder.create() + .withConfigurationCache(mockConfigurationCache) + .build() + + assertNotNull(uploader) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploaderModelsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploaderModelsTest.kt new file mode 100644 index 000000000..1643ac3b9 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/UploaderModelsTest.kt @@ -0,0 +1,203 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.microsoft.azure.kusto.ingest.v2.models.ContainerInfo +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class UploaderModelsTest { + + // UploadMethod enum tests + @Test + fun `UploadMethod enum has correct number of values`() { + assertEquals(3, UploadMethod.values().size) + } + + @Test + fun `UploadMethod DEFAULT exists`() { + assertNotNull(UploadMethod.DEFAULT) + assertEquals("DEFAULT", UploadMethod.DEFAULT.name) + } + + @Test + fun `UploadMethod STORAGE exists`() { + assertNotNull(UploadMethod.STORAGE) + assertEquals("STORAGE", UploadMethod.STORAGE.name) + } + + @Test + fun `UploadMethod LAKE exists`() { + assertNotNull(UploadMethod.LAKE) + assertEquals("LAKE", UploadMethod.LAKE.name) + } + + @Test + fun `UploadMethod valueOf works correctly`() { + assertEquals(UploadMethod.DEFAULT, UploadMethod.valueOf("DEFAULT")) + assertEquals(UploadMethod.STORAGE, UploadMethod.valueOf("STORAGE")) + assertEquals(UploadMethod.LAKE, UploadMethod.valueOf("LAKE")) + } + + @Test + fun `UploadMethod values returns all enum constants`() { + val values = UploadMethod.entries.toTypedArray() + assertTrue(values.contains(UploadMethod.DEFAULT)) + assertTrue(values.contains(UploadMethod.STORAGE)) + assertTrue(values.contains(UploadMethod.LAKE)) + } + + // ExtendedContainerInfo tests + @Test + fun `ExtendedContainerInfo creates correctly with DEFAULT method`() { + val containerInfo = + ContainerInfo( + path = + "https://example.blob.core.windows.net/container?sv=2020-08-04&st=...", + ) + val extended = + ExtendedContainerInfo(containerInfo, UploadMethod.DEFAULT) + + assertEquals(containerInfo, extended.containerInfo) + assertEquals(UploadMethod.DEFAULT, extended.uploadMethod) + } + + @Test + fun `ExtendedContainerInfo creates correctly with STORAGE method`() { + val containerInfo = + ContainerInfo( + path = + "https://storage.blob.core.windows.net/data?sv=2020-08-04&st=...", + ) + val extended = + ExtendedContainerInfo(containerInfo, UploadMethod.STORAGE) + + assertEquals(containerInfo, extended.containerInfo) + assertEquals(UploadMethod.STORAGE, extended.uploadMethod) + } + + @Test + fun `ExtendedContainerInfo creates correctly with LAKE method`() { + val containerInfo = + ContainerInfo( + path = + "https://onelake.dfs.fabric.microsoft.com/workspace/lakehouse?sv=2020-08-04&st=...", + ) + val extended = ExtendedContainerInfo(containerInfo, UploadMethod.LAKE) + + assertEquals(containerInfo, extended.containerInfo) + assertEquals(UploadMethod.LAKE, extended.uploadMethod) + } + + @Test + fun `ExtendedContainerInfo data class equality works`() { + val containerInfo1 = + ContainerInfo("https://url1.blob.core.windows.net?token1") + val containerInfo2 = + ContainerInfo("https://url1.blob.core.windows.net?token1") + val containerInfo3 = + ContainerInfo("https://url2.blob.core.windows.net?token2") + + val extended1 = + ExtendedContainerInfo(containerInfo1, UploadMethod.DEFAULT) + val extended2 = + ExtendedContainerInfo(containerInfo2, UploadMethod.DEFAULT) + val extended3 = + ExtendedContainerInfo(containerInfo3, UploadMethod.DEFAULT) + val extended4 = + ExtendedContainerInfo(containerInfo1, UploadMethod.STORAGE) + + assertEquals(extended1, extended2) + assertNotEquals(extended1, extended3) + assertNotEquals(extended1, extended4) + } + + @Test + fun `ExtendedContainerInfo data class hashCode works`() { + val containerInfo = + ContainerInfo("https://url.blob.core.windows.net?token") + val extended1 = + ExtendedContainerInfo(containerInfo, UploadMethod.DEFAULT) + val extended2 = + ExtendedContainerInfo(containerInfo, UploadMethod.DEFAULT) + + assertEquals(extended1.hashCode(), extended2.hashCode()) + } + + @Test + fun `ExtendedContainerInfo data class copy works`() { + val containerInfo = + ContainerInfo("https://url.blob.core.windows.net?token") + val original = + ExtendedContainerInfo(containerInfo, UploadMethod.DEFAULT) + val copied = original.copy(uploadMethod = UploadMethod.LAKE) + + assertEquals(original.containerInfo, copied.containerInfo) + assertEquals(UploadMethod.DEFAULT, original.uploadMethod) + assertEquals(UploadMethod.LAKE, copied.uploadMethod) + } + + @Test + fun `ExtendedContainerInfo copy can change containerInfo`() { + val containerInfo1 = + ContainerInfo("https://url1.blob.core.windows.net?token1") + val containerInfo2 = + ContainerInfo("https://url2.blob.core.windows.net?token2") + val original = + ExtendedContainerInfo(containerInfo1, UploadMethod.DEFAULT) + val copied = original.copy(containerInfo = containerInfo2) + + assertEquals(containerInfo2, copied.containerInfo) + assertEquals(original.uploadMethod, copied.uploadMethod) + } + + @Test + fun `ExtendedContainerInfo toString contains all fields`() { + val containerInfo = + ContainerInfo("https://url.blob.core.windows.net?token") + val extended = + ExtendedContainerInfo(containerInfo, UploadMethod.STORAGE) + + val stringRep = extended.toString() + assertTrue(stringRep.contains("containerInfo")) + assertTrue(stringRep.contains("uploadMethod")) + } + + @Test + fun `ExtendedContainerInfo component functions work`() { + val containerInfo = + ContainerInfo("https://url.blob.core.windows.net?token") + val extended = ExtendedContainerInfo(containerInfo, UploadMethod.LAKE) + + val (info, method) = extended + + assertEquals(containerInfo, info) + assertEquals(UploadMethod.LAKE, method) + } + + @Test + fun `ExtendedContainerInfo works with different upload methods`() { + val containerInfo = + ContainerInfo("https://url.blob.core.windows.net?token") + + val default = ExtendedContainerInfo(containerInfo, UploadMethod.DEFAULT) + val storage = ExtendedContainerInfo(containerInfo, UploadMethod.STORAGE) + val lake = ExtendedContainerInfo(containerInfo, UploadMethod.LAKE) + + assertNotEquals(default, storage) + assertNotEquals(storage, lake) + assertNotEquals(default, lake) + } + + @Test + fun `ExtendedContainerInfo handles empty SAS token`() { + val containerInfo = ContainerInfo("https://url.blob.core.windows.net") + val extended = + ExtendedContainerInfo(containerInfo, UploadMethod.DEFAULT) + + assertNotNull(extended.containerInfo.path) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt new file mode 100644 index 000000000..419253a48 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt @@ -0,0 +1,249 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader.compression + +import kotlinx.coroutines.runBlocking +import java.io.ByteArrayInputStream +import java.io.IOException +import java.io.InputStream +import java.util.zip.GZIPInputStream +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFailsWith +import kotlin.test.assertSame +import kotlin.test.assertTrue + +/** Unit tests for compression strategy implementations. */ +class CompressionStrategyTest { + + // ==================== GzipCompressionStrategy Tests ==================== + + @Test + fun `GzipCompressionStrategy should have correct compression type`() { + val strategy = GzipCompressionStrategy() + assertEquals("gzip", strategy.compressionType) + } + + @Test + fun `GzipCompressionStrategy should compress data correctly`() = + runBlocking { + val strategy = GzipCompressionStrategy() + val originalData = + "Hello, World! This is a test string for compression." + val inputStream = + ByteArrayInputStream(originalData.toByteArray()) + + val compressedStream = + strategy.compress( + inputStream, + originalData.length.toLong(), + ) + + // Verify we can decompress and get original data + val decompressedData = + GZIPInputStream(compressedStream) + .bufferedReader() + .readText() + assertEquals(originalData, decompressedData) + } + + @Test + fun `GzipCompressionStrategy should compress empty stream`() = runBlocking { + val strategy = GzipCompressionStrategy() + val inputStream = ByteArrayInputStream(ByteArray(0)) + + val compressedStream = strategy.compress(inputStream, 0) + + // Verify we can decompress empty data + val decompressedData = + GZIPInputStream(compressedStream).bufferedReader().readText() + assertEquals("", decompressedData) + } + + @Test + fun `GzipCompressionStrategy should handle large data`() = runBlocking { + val strategy = GzipCompressionStrategy() + val largeData = "A".repeat(100_000) + val inputStream = ByteArrayInputStream(largeData.toByteArray()) + + val compressedStream = + strategy.compress(inputStream, largeData.length.toLong()) + + // Verify compressed size is smaller than original + val compressedBytes = compressedStream.readBytes() + assertTrue(compressedBytes.size < largeData.length) + + // Verify we can decompress and get original data + val decompressedData = + GZIPInputStream(ByteArrayInputStream(compressedBytes)) + .bufferedReader() + .readText() + assertEquals(largeData, decompressedData) + } + + @Test + fun `GzipCompressionStrategy should work with unknown estimated size`() = + runBlocking { + val strategy = GzipCompressionStrategy() + val originalData = "Test data without estimated size" + val inputStream = + ByteArrayInputStream(originalData.toByteArray()) + + // Use 0 as estimated size (unknown) + val compressedStream = strategy.compress(inputStream, 0) + + val decompressedData = + GZIPInputStream(compressedStream) + .bufferedReader() + .readText() + assertEquals(originalData, decompressedData) + } + + @Test + fun `GzipCompressionStrategy should work with negative estimated size`() = + runBlocking { + val strategy = GzipCompressionStrategy() + val originalData = "Test data with negative estimated size" + val inputStream = + ByteArrayInputStream(originalData.toByteArray()) + + // Use negative as estimated size (invalid) + val compressedStream = strategy.compress(inputStream, -1) + + val decompressedData = + GZIPInputStream(compressedStream) + .bufferedReader() + .readText() + assertEquals(originalData, decompressedData) + } + + @Test + fun `GzipCompressionStrategy should handle binary data`() = runBlocking { + val strategy = GzipCompressionStrategy() + val binaryData = ByteArray(256) { it.toByte() } + val inputStream = ByteArrayInputStream(binaryData) + + val compressedStream = + strategy.compress(inputStream, binaryData.size.toLong()) + + val decompressedData = GZIPInputStream(compressedStream).readBytes() + assertTrue(binaryData.contentEquals(decompressedData)) + } + + @Test + fun `GzipCompressionStrategy should throw CompressionException on IO error`() = + runBlocking { + val strategy = GzipCompressionStrategy() + val failingStream = + object : InputStream() { + override fun read(): Int { + throw IOException("Simulated IO error") + } + } + + assertFailsWith { + strategy.compress(failingStream, 100) + } + } + + // ==================== NoCompressionStrategy Tests ==================== + + @Test + fun `NoCompressionStrategy should have correct compression type`() { + val strategy = NoCompressionStrategy() + assertEquals("none", strategy.compressionType) + } + + @Test + fun `NoCompressionStrategy should return same stream`() = runBlocking { + val strategy = NoCompressionStrategy() + val originalData = "Test data" + val inputStream = ByteArrayInputStream(originalData.toByteArray()) + + val resultStream = + strategy.compress(inputStream, originalData.length.toLong()) + + // Should be the exact same stream instance + assertSame(inputStream, resultStream) + } + + @Test + fun `NoCompressionStrategy should pass through data unchanged`() = + runBlocking { + val strategy = NoCompressionStrategy() + val originalData = "Unchanged data" + val inputStream = + ByteArrayInputStream(originalData.toByteArray()) + + val resultStream = + strategy.compress( + inputStream, + originalData.length.toLong(), + ) + val resultData = resultStream.bufferedReader().readText() + + assertEquals(originalData, resultData) + } + + @Test + fun `NoCompressionStrategy INSTANCE should be singleton`() { + val instance1 = NoCompressionStrategy.INSTANCE + val instance2 = NoCompressionStrategy.INSTANCE + + assertSame(instance1, instance2) + } + + @Test + fun `NoCompressionStrategy should handle empty stream`() = runBlocking { + val strategy = NoCompressionStrategy() + val inputStream = ByteArrayInputStream(ByteArray(0)) + + val resultStream = strategy.compress(inputStream, 0) + + assertSame(inputStream, resultStream) + assertEquals(0, resultStream.available()) + } + + @Test + fun `NoCompressionStrategy should ignore estimated size`() = runBlocking { + val strategy = NoCompressionStrategy() + val originalData = "Test" + val inputStream = ByteArrayInputStream(originalData.toByteArray()) + + // Any estimated size should work the same + val resultStream = strategy.compress(inputStream, 999999) + + assertSame(inputStream, resultStream) + } + + // ==================== CompressionException Tests ==================== + + @Test + fun `CompressionException should store message`() { + val exception = CompressionException("Test error message") + assertEquals("Test error message", exception.message) + } + + @Test + fun `CompressionException should store cause`() { + val cause = IOException("Original error") + val exception = CompressionException("Wrapper message", cause) + + assertEquals("Wrapper message", exception.message) + assertSame(cause, exception.cause) + } + + @Test + fun `CompressionException should allow null cause`() { + val exception = CompressionException("Message only") + + assertEquals("Message only", exception.message) + assertEquals(null, exception.cause) + } + + @Test + fun `CompressionException should be RuntimeException`() { + val exception = CompressionException("Test") + assertTrue(exception is RuntimeException) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadModelsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadModelsTest.kt new file mode 100644 index 000000000..b64371f4b --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/models/UploadModelsTest.kt @@ -0,0 +1,402 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader.models + +import java.time.Instant +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertNull +import kotlin.test.assertTrue + +/** Unit tests for upload models. */ +class UploadModelsTest { + + // ==================== UploadErrorCode Tests ==================== + + @Test + fun `UploadErrorCode SOURCE_IS_NULL should have correct code and description`() { + val errorCode = UploadErrorCode.SOURCE_IS_NULL + assertEquals("UploadError_SourceIsNull", errorCode.code) + assertEquals("Upload source is null", errorCode.description) + assertEquals("UploadError_SourceIsNull", errorCode.toString()) + } + + @Test + fun `UploadErrorCode SOURCE_NOT_FOUND should have correct code and description`() { + val errorCode = UploadErrorCode.SOURCE_NOT_FOUND + assertEquals("UploadError_SourceNotFound", errorCode.code) + assertEquals("Upload source not found", errorCode.description) + } + + @Test + fun `UploadErrorCode SOURCE_NOT_READABLE should have correct code and description`() { + val errorCode = UploadErrorCode.SOURCE_NOT_READABLE + assertEquals("UploadError_SourceNotReadable", errorCode.code) + assertEquals("Upload source is not readable", errorCode.description) + } + + @Test + fun `UploadErrorCode SOURCE_IS_EMPTY should have correct code and description`() { + val errorCode = UploadErrorCode.SOURCE_IS_EMPTY + assertEquals("UploadError_SourceIsEmpty", errorCode.code) + assertEquals("Upload source is empty", errorCode.description) + } + + @Test + fun `UploadErrorCode SOURCE_SIZE_LIMIT_EXCEEDED should have correct code and description`() { + val errorCode = UploadErrorCode.SOURCE_SIZE_LIMIT_EXCEEDED + assertEquals("UploadError_SourceSizeLimitExceeded", errorCode.code) + assertEquals( + "Upload source exceeds maximum allowed size", + errorCode.description, + ) + } + + @Test + fun `UploadErrorCode UPLOAD_FAILED should have correct code and description`() { + val errorCode = UploadErrorCode.UPLOAD_FAILED + assertEquals("UploadError_Failed", errorCode.code) + assertEquals("Upload operation failed", errorCode.description) + } + + @Test + fun `UploadErrorCode NO_CONTAINERS_AVAILABLE should have correct code and description`() { + val errorCode = UploadErrorCode.NO_CONTAINERS_AVAILABLE + assertEquals("UploadError_NoContainersAvailable", errorCode.code) + assertEquals("No upload containers available", errorCode.description) + } + + @Test + fun `UploadErrorCode CONTAINER_UNAVAILABLE should have correct code and description`() { + val errorCode = UploadErrorCode.CONTAINER_UNAVAILABLE + assertEquals("UploadError_ContainerUnavailable", errorCode.code) + assertEquals("Upload container is unavailable", errorCode.description) + } + + @Test + fun `UploadErrorCode NETWORK_ERROR should have correct code and description`() { + val errorCode = UploadErrorCode.NETWORK_ERROR + assertEquals("UploadError_NetworkError", errorCode.code) + assertEquals("Network error during upload", errorCode.description) + } + + @Test + fun `UploadErrorCode AUTHENTICATION_FAILED should have correct code and description`() { + val errorCode = UploadErrorCode.AUTHENTICATION_FAILED + assertEquals("UploadError_AuthenticationFailed", errorCode.code) + assertEquals("Authentication failed for upload", errorCode.description) + } + + @Test + fun `UploadErrorCode UNKNOWN should have correct code and description`() { + val errorCode = UploadErrorCode.UNKNOWN + assertEquals("UploadError_Unknown", errorCode.code) + assertEquals("Unknown upload error", errorCode.description) + } + + @Test + fun `UploadErrorCode values should return all error codes`() { + val values = UploadErrorCode.values() + assertEquals(11, values.size) + assertTrue(values.contains(UploadErrorCode.SOURCE_IS_NULL)) + assertTrue(values.contains(UploadErrorCode.UNKNOWN)) + } + + @Test + fun `UploadErrorCode valueOf should return correct enum`() { + assertEquals( + UploadErrorCode.SOURCE_IS_NULL, + UploadErrorCode.valueOf("SOURCE_IS_NULL"), + ) + assertEquals( + UploadErrorCode.UPLOAD_FAILED, + UploadErrorCode.valueOf("UPLOAD_FAILED"), + ) + } + + // ==================== UploadResult.Success Tests ==================== + + @Test + fun `UploadResult Success should store all properties`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val success = + UploadResult.Success( + sourceName = "test-file.csv", + startedAt = startTime, + completedAt = endTime, + blobUrl = + "https://storage.blob.core.windows.net/container/blob", + sizeBytes = 1024, + ) + + assertEquals("test-file.csv", success.sourceName) + assertEquals(startTime, success.startedAt) + assertEquals(endTime, success.completedAt) + assertEquals( + "https://storage.blob.core.windows.net/container/blob", + success.blobUrl, + ) + assertEquals(1024, success.sizeBytes) + } + + @Test + fun `UploadResult Success should support data class copy`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val original = + UploadResult.Success( + sourceName = "original.csv", + startedAt = startTime, + completedAt = endTime, + blobUrl = "https://original.blob", + sizeBytes = 100, + ) + + val copied = original.copy(sourceName = "copied.csv", sizeBytes = 200) + + assertEquals("copied.csv", copied.sourceName) + assertEquals(200, copied.sizeBytes) + assertEquals(original.blobUrl, copied.blobUrl) + } + + @Test + fun `UploadResult Success should support equals and hashCode`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val success1 = + UploadResult.Success( + sourceName = "file.csv", + startedAt = startTime, + completedAt = endTime, + blobUrl = "https://blob", + sizeBytes = 100, + ) + + val success2 = + UploadResult.Success( + sourceName = "file.csv", + startedAt = startTime, + completedAt = endTime, + blobUrl = "https://blob", + sizeBytes = 100, + ) + + assertEquals(success1, success2) + assertEquals(success1.hashCode(), success2.hashCode()) + } + + // ==================== UploadResult.Failure Tests ==================== + + @Test + fun `UploadResult Failure should store all properties`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(5) + val exception = RuntimeException("Test error") + + val failure = + UploadResult.Failure( + sourceName = "failed-file.csv", + startedAt = startTime, + completedAt = endTime, + errorCode = UploadErrorCode.UPLOAD_FAILED, + errorMessage = "Upload failed due to network error", + exception = exception, + isPermanent = true, + ) + + assertEquals("failed-file.csv", failure.sourceName) + assertEquals(startTime, failure.startedAt) + assertEquals(endTime, failure.completedAt) + assertEquals(UploadErrorCode.UPLOAD_FAILED, failure.errorCode) + assertEquals("Upload failed due to network error", failure.errorMessage) + assertEquals(exception, failure.exception) + assertTrue(failure.isPermanent) + } + + @Test + fun `UploadResult Failure should have default isPermanent false`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(5) + + val failure = + UploadResult.Failure( + sourceName = "file.csv", + startedAt = startTime, + completedAt = endTime, + errorCode = UploadErrorCode.NETWORK_ERROR, + errorMessage = "Network timeout", + exception = null, + ) + + assertFalse(failure.isPermanent) + } + + @Test + fun `UploadResult Failure should allow null exception`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(5) + + val failure = + UploadResult.Failure( + sourceName = "file.csv", + startedAt = startTime, + completedAt = endTime, + errorCode = UploadErrorCode.SOURCE_NOT_FOUND, + errorMessage = "File not found", + exception = null, + isPermanent = true, + ) + + assertNull(failure.exception) + } + + @Test + fun `UploadResult Failure should support data class copy`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(5) + + val original = + UploadResult.Failure( + sourceName = "original.csv", + startedAt = startTime, + completedAt = endTime, + errorCode = UploadErrorCode.UPLOAD_FAILED, + errorMessage = "Original error", + exception = null, + isPermanent = false, + ) + + val copied = + original.copy(errorMessage = "Copied error", isPermanent = true) + + assertEquals("Copied error", copied.errorMessage) + assertTrue(copied.isPermanent) + assertEquals(original.sourceName, copied.sourceName) + } + + // ==================== UploadResults Tests ==================== + + @Test + fun `UploadResults should store successes and failures`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val successes = + listOf( + UploadResult.Success( + "file1.csv", + startTime, + endTime, + "https://blob1", + 100, + ), + UploadResult.Success( + "file2.csv", + startTime, + endTime, + "https://blob2", + 200, + ), + ) + + val failures = + listOf( + UploadResult.Failure( + "file3.csv", + startTime, + endTime, + UploadErrorCode.UPLOAD_FAILED, + "Error", + null, + ), + ) + + val results = UploadResults(successes, failures) + + assertEquals(2, results.successes.size) + assertEquals(1, results.failures.size) + assertEquals("file1.csv", results.successes[0].sourceName) + assertEquals("file3.csv", results.failures[0].sourceName) + } + + @Test + fun `UploadResults should support empty lists`() { + val results = UploadResults(emptyList(), emptyList()) + + assertTrue(results.successes.isEmpty()) + assertTrue(results.failures.isEmpty()) + } + + @Test + fun `UploadResults should support only successes`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val successes = + listOf( + UploadResult.Success( + "file.csv", + startTime, + endTime, + "https://blob", + 100, + ), + ) + + val results = UploadResults(successes, emptyList()) + + assertEquals(1, results.successes.size) + assertTrue(results.failures.isEmpty()) + } + + @Test + fun `UploadResults should support only failures`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val failures = + listOf( + UploadResult.Failure( + "file.csv", + startTime, + endTime, + UploadErrorCode.UPLOAD_FAILED, + "Error", + null, + ), + ) + + val results = UploadResults(emptyList(), failures) + + assertTrue(results.successes.isEmpty()) + assertEquals(1, results.failures.size) + } + + @Test + fun `UploadResults should support data class equality`() { + val startTime = Instant.now() + val endTime = startTime.plusSeconds(10) + + val successes = + listOf( + UploadResult.Success( + "file.csv", + startTime, + endTime, + "https://blob", + 100, + ), + ) + + val results1 = UploadResults(successes, emptyList()) + val results2 = UploadResults(successes, emptyList()) + + assertEquals(results1, results2) + assertEquals(results1.hashCode(), results2.hashCode()) + } +} From 4ebf781c1fe845aa88ff273e0e7a5d8626688fad Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Thu, 8 Jan 2026 13:04:23 +0530 Subject: [PATCH 40/50] * Remove unused test assertion --- .../v2/uploader/compression/CompressionStrategyTest.kt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt index 419253a48..0a23ac0d4 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt @@ -240,10 +240,4 @@ class CompressionStrategyTest { assertEquals("Message only", exception.message) assertEquals(null, exception.cause) } - - @Test - fun `CompressionException should be RuntimeException`() { - val exception = CompressionException("Test") - assertTrue(exception is RuntimeException) - } } From d3428ec7b93b1b19e818a34d286364e4ffd3f05c Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Thu, 8 Jan 2026 14:26:37 +0530 Subject: [PATCH 41/50] * Remove JVMOverloads and add log supression for bytebuddy in MOCKK and unit tests --- ingest-v2/pom.xml | 2 ++ .../com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt | 4 +--- .../com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt | 1 - .../microsoft/azure/kusto/ingest/v2/source/StreamSource.kt | 4 +--- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index b8836ae28..f06c611e5 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -358,6 +358,8 @@ 8 1 true + + -Dorg.slf4j.simpleLogger.log.io.mockk.proxy.jvm.transformation.InliningClassTransformer=error diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index 10345abcf..7ba44ba26 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -9,9 +9,7 @@ import java.util.UUID * Represents a blob-based ingestion source. This source references data that * already exists in blob storage. */ -class BlobSource -@JvmOverloads -constructor( +class BlobSource( val blobPath: String, format: Format = Format.csv, sourceId: UUID = UUID.randomUUID(), diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt index 071a4210c..8346141ed 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -14,7 +14,6 @@ import java.util.UUID /** Represents a file-based ingestion source. */ class FileSource -@JvmOverloads constructor( val path: Path, format: Format, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt index 527fabce4..b7fb43f9d 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt @@ -7,9 +7,7 @@ import java.io.InputStream import java.util.UUID /** Represents a stream-based ingestion source. */ -class StreamSource -@JvmOverloads -constructor( +class StreamSource( stream: InputStream, format: Format, sourceCompression: CompressionType, From bdce36687db23318d82e5280e4d58c522a7f89bf Mon Sep 17 00:00:00 2001 From: ag-ramachandran Date: Thu, 8 Jan 2026 14:34:55 +0530 Subject: [PATCH 42/50] * Fix lint in tests --- .../ingest/v2/uploader/compression/CompressionStrategyTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt index 0a23ac0d4..fcedf5fb1 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt @@ -131,7 +131,7 @@ class CompressionStrategyTest { } @Test - fun `GzipCompressionStrategy should throw CompressionException on IO error`() = + fun `GzipCompressionStrategy should throw CompressionException on IO error`(): Unit = runBlocking { val strategy = GzipCompressionStrategy() val failingStream = From b9ccf14c5d435f78915eb583fa4814c4ba8efb88 Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Fri, 9 Jan 2026 12:13:20 +0530 Subject: [PATCH 43/50] * Minor changes to Streaming ingest tests (#454) --- .../kusto/ingest/v2/source/FileSource.kt | 3 +- .../azure/kusto/ingest/v2/IngestV2TestBase.kt | 5 +- .../ingest/v2/StreamingIngestClientTest.kt | 70 +++++++++++-------- .../compression/CompressionStrategyTest.kt | 21 +++--- .../src/test/resources/ingest/simple.json | 5 ++ 5 files changed, 59 insertions(+), 45 deletions(-) create mode 100644 ingest-v2/src/test/resources/ingest/simple.json diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt index 8346141ed..a7d5e7437 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -13,8 +13,7 @@ import java.nio.file.Path import java.util.UUID /** Represents a file-based ingestion source. */ -class FileSource -constructor( +class FileSource( val path: Path, format: Format, sourceId: UUID = UUID.randomUUID(), diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt index a469eb44b..25bf6dc5f 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/IngestV2TestBase.kt @@ -33,8 +33,7 @@ abstract class IngestV2TestBase(testClass: Class<*>) { protected val targetTestFormat = Format.json protected val engineEndpoint: String = dmEndpoint.replace("https://ingest-", "https://") - protected val targetTable: String = - "V2_Java_Tests_Sensor_${UUID.randomUUID().toString().replace("-", "").take(8)}" + lateinit var targetTable: String protected val columnNamesToTypes: Map = mapOf( "timestamp" to "datetime", @@ -50,6 +49,8 @@ abstract class IngestV2TestBase(testClass: Class<*>) { @BeforeEach fun createTables() { + targetTable = + "IngestV2Test_${UUID.randomUUID().toString().replace("-", "_")}" val createTableScript = """ .create-merge table $targetTable ( diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index 08f4f7898..6e8518a97 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -49,7 +49,7 @@ class StreamingIngestClientTest : // isUnreachableHost false, // blobUrl - null, + publicBlobUrl, ), Arguments.of( "Blob based ingest - success", @@ -58,24 +58,23 @@ class StreamingIngestClientTest : false, // isUnreachableHost false, - publicBlobUrl, + null, + ), + Arguments.of( + "Blob based ingest- Invalid blob URL", + engineEndpoint, + // isException + true, + // isUnreachableHost + false, + "https://nonexistentaccount.blob.core.windows.net/container/file.json", ), - // Arguments.of( - // "Blob based ingest- Invalid blob URL", - // engineEndpoint, - // // isException - // true, - // // isUnreachableHost - // false, - // - // "https://nonexistentaccount.blob.core.windows.net/container/file.json", - // ), ) } @ParameterizedTest(name = "{0}") @MethodSource("testParameters") - fun `run streaming ingest test using builder pattern`( + fun `run streaming ingest`( testName: String, cluster: String, isException: Boolean, @@ -105,7 +104,10 @@ class StreamingIngestClientTest : val exception = assertThrows { val ingestionSource = - BlobSource(blobUrl, format = Format.json) + BlobSource( + blobUrl, + format = targetTestFormat, + ) client.ingestAsync( source = ingestionSource, ingestRequestProperties = ingestProps, @@ -124,22 +126,30 @@ class StreamingIngestClientTest : } } } else { - if (blobUrl != null) { - val ingestionSource = BlobSource(blobUrl, format = Format.json) - client.ingestAsync( - source = ingestionSource, - ingestRequestProperties = ingestProps, - ) + val ingestionSource = + if (blobUrl != null) { + BlobSource(blobUrl, format = targetTestFormat) + } else { + val ingestFile = "src/test/resources/ingest/simple.json" + FileSource( + path = Paths.get(ingestFile), + format = targetTestFormat, + sourceId = UUID.randomUUID(), + ) + } + client.ingestAsync( + source = ingestionSource, + ingestRequestProperties = ingestProps, + ) - logger.info( - "Blob-based streaming ingestion submitted successfully (builder)", - ) - awaitAndQuery( - query = "$targetTable | summarize count=count()", - expectedResultsCount = 5, - testName = testName, - ) - } + logger.info( + "Blob-based streaming ingestion submitted successfully (builder)", + ) + awaitAndQuery( + query = "$targetTable | summarize count=count()", + expectedResultsCount = 5L, + testName = testName, + ) } } @@ -178,7 +188,7 @@ class StreamingIngestClientTest : ByteArrayInputStream( invalidData.toByteArray(), ), - format = Format.json, + format = targetTestFormat, sourceCompression = CompressionType.NONE, ) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt index fcedf5fb1..2f0eb80ce 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/compression/CompressionStrategyTest.kt @@ -131,20 +131,19 @@ class CompressionStrategyTest { } @Test - fun `GzipCompressionStrategy should throw CompressionException on IO error`(): Unit = - runBlocking { - val strategy = GzipCompressionStrategy() - val failingStream = - object : InputStream() { - override fun read(): Int { - throw IOException("Simulated IO error") - } + fun `GzipCompressionStrategy should throw CompressionException on IO error`(): Unit = runBlocking { + val strategy = GzipCompressionStrategy() + val failingStream = + object : InputStream() { + override fun read(): Int { + throw IOException("Simulated IO error") } - - assertFailsWith { - strategy.compress(failingStream, 100) } + + assertFailsWith { + strategy.compress(failingStream, 100) } + } // ==================== NoCompressionStrategy Tests ==================== diff --git a/ingest-v2/src/test/resources/ingest/simple.json b/ingest-v2/src/test/resources/ingest/simple.json new file mode 100644 index 000000000..c11d10aa9 --- /dev/null +++ b/ingest-v2/src/test/resources/ingest/simple.json @@ -0,0 +1,5 @@ +{ "timestamp": "2019-05-02 15:23:50.0000000", "deviceId": "ddbc1bf5-096f-42c0-a771-bc3dca77ac71", "messageId": "7f316225-839a-4593-92b5-1812949279b3", "temperature": 31.0301639051317, "humidity": 62.0791099602725 } +{ "timestamp": "2019-05-02 15:23:51.0000000", "deviceId": "ddbc1bf5-096f-42c0-a771-bc3dca77ac71", "messageId": "57de2821-7581-40e4-861e-ea3bde102364", "temperature": 33.7529423105311, "humidity": 75.4787976739364 } +{ "timestamp": "2019-05-02 15:23:52.0000000", "deviceId": "ce2a9367-f01d-4d42-b5b0-1841f2965a8e", "messageId": "3c9754ed-839e-4448-bced-ea32b2a557fc", "temperature": 31.8848099801153, "humidity": 72.9356174231207 } +{ "timestamp": "2019-05-02 15:23:53.0000000", "deviceId": "ce2a9367-f01d-4d42-b5b0-1841f2965a8e", "messageId": "dbbd46f2-bc50-4f79-833e-edeae533e786", "temperature": 34.1053494201532, "humidity": 63.0325346454198 } +{ "timestamp": "2019-05-02 15:23:54.0000000", "deviceId": "ce2a9367-f01d-4d42-b5b0-1841f2965a8e", "messageId": "cbfc7c31-4c08-4abf-83ef-718f5d75169a", "temperature": 29.0367494332775, "humidity": 64.7741715818523 } \ No newline at end of file From ce36f64b40b9ae5acbf12aa9e17ac25308f2bac6 Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Fri, 16 Jan 2026 01:17:53 +0530 Subject: [PATCH 44/50] Users/ramacg/fix review comments (#457) * * Fix review comments - Make the IngestRequestProperties optional * * Fix review comments - Make sure some of the validations like passing IngestionReference and mapping cannot be passed together * * Fix review comments - Make sure some of the validations like passing IngestionReference and mapping cannot be passed together * Add tests for QueuedIngest with inline mapping * * Fix breaking changes to IngestRequestProperties * * Add tests for IngestRequestProperties being optional * * Remove baseName parameter * * Fix review comments * * Fix review comments * * Fix review comments * * Fix review comments --- .../kusto/ingest/v2/client/IngestClient.kt | 8 +- .../v2/client/ManagedStreamingIngestClient.kt | 56 +-- .../ingest/v2/client/QueuedIngestClient.kt | 77 +++- .../ingest/v2/client/StreamingIngestClient.kt | 54 ++- .../policy/DefaultManagedStreamingPolicy.kt | 7 +- .../client/policy/ManagedStreamingPolicy.kt | 6 +- .../v2/common/exceptions/IngestException.kt | 24 +- .../models/IngestRequestPropertiesBuilder.kt | 86 ++--- .../IngestRequestPropertiesExtensions.kt | 28 -- .../common/models/mapping/IngestionMapping.kt | 71 ++++ .../models/mapping/InlineIngestionMapping.kt | 35 -- .../kusto/ingest/v2/common/utils/PathUtils.kt | 11 +- .../kusto/ingest/v2/source/BlobSource.kt | 3 +- .../kusto/ingest/v2/source/FileSource.kt | 2 - .../kusto/ingest/v2/source/IngestionSource.kt | 7 +- .../kusto/ingest/v2/source/LocalSource.kt | 3 +- .../kusto/ingest/v2/source/StreamSource.kt | 4 +- .../kusto/ingest/v2/IngestV2JavaTestBase.java | 11 +- .../ManagedStreamingIngestClientJavaTest.java | 126 ++++--- .../ingest/v2/QueuedIngestClientJavaTest.java | 102 +++--- .../v2/StreamingIngestClientJavaTest.java | 55 +-- .../v2/ManagedStreamingIngestClientTest.kt | 13 +- .../kusto/ingest/v2/QueuedIngestClientTest.kt | 181 ++++++--- .../ingest/v2/StreamingIngestClientTest.kt | 21 +- .../IngestRequestPropertiesBuilderTest.kt | 217 +++++++++++ .../v2/common/models/mapping/MappingTest.kt | 69 +--- .../ingest/v2/common/utils/PathUtilsTest.kt | 52 +-- .../ingest/v2/source/SourceClassesTest.kt | 21 +- .../src/test/resources/compression/sample.csv | 2 + .../azure/kusto/quickstart/SampleApp.java | 49 +-- .../ingestv2/ManagedStreamingIngestV2.java | 238 +++++------- .../main/java/ingestv2/QueuedIngestV2.java | 346 ++++++++---------- .../main/java/ingestv2/StreamingIngestV2.java | 173 ++++----- 33 files changed, 1200 insertions(+), 958 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/IngestionMapping.kt delete mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilderTest.kt create mode 100644 ingest-v2/src/test/resources/compression/sample.csv diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt index 1cf788a33..4799f850d 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt @@ -35,8 +35,10 @@ interface IngestClient : Closeable { * status of the ingestion. */ suspend fun ingestAsync( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse /** @@ -87,8 +89,10 @@ interface MultiIngestClient : IngestClient { * status of the ingestion. */ suspend fun ingestAsync( + database: String, + table: String, sources: List, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse /** diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt index e738ba071..f63fc534c 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/ManagedStreamingIngestClient.kt @@ -12,8 +12,8 @@ import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestClientExcepti import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import com.microsoft.azure.kusto.ingest.v2.common.models.database -import com.microsoft.azure.kusto.ingest.v2.common.models.table +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.withFormatFromSource import com.microsoft.azure.kusto.ingest.v2.common.runWithRetry import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import com.microsoft.azure.kusto.ingest.v2.models.Status @@ -85,32 +85,38 @@ internal constructor( } override suspend fun ingestAsync( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse { // Extract database and table from properties - val database = ingestRequestProperties.database - val table = ingestRequestProperties.table requireNotNull(database.trim().isNotEmpty()) { "database cannot be blank" } requireNotNull(table.trim().isNotEmpty()) { "table cannot be blank" } + val effectiveProperties = + ingestRequestProperties?.withFormatFromSource(source) + ?: IngestRequestPropertiesBuilder.create() + .build() + .withFormatFromSource(source) + return when (source) { is BlobSource -> ingestBlobAsync( source, database, table, - ingestRequestProperties, + effectiveProperties, ) is LocalSource -> ingestLocalAsync( source, database, table, - ingestRequestProperties, + effectiveProperties, ) else -> throw IllegalArgumentException( @@ -174,11 +180,13 @@ internal constructor( */ @JvmName("ingestAsync") fun ingestAsyncJava( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): CompletableFuture = CoroutineScope(Dispatchers.IO).future { - ingestAsync(source, ingestRequestProperties) + ingestAsync(database, table, source, ingestRequestProperties) } /** @@ -216,7 +224,7 @@ internal constructor( blobSource: BlobSource, database: String, table: String, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse { if ( shouldUseQueuedIngestByPolicy( @@ -227,6 +235,8 @@ internal constructor( ) ) { return invokeQueuedIngestionAsync( + database, + table, blobSource, ingestRequestProperties, ) @@ -243,7 +253,7 @@ internal constructor( source: LocalSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): ExtendedIngestResponse { val stream = source.data() if (!stream.isValidForIngest()) { @@ -266,7 +276,7 @@ internal constructor( props, ) ) { - return invokeQueuedIngestionAsync(source, props) + return invokeQueuedIngestionAsync(database, table, source, props) } return invokeStreamingIngestionAsync(source, database, table, props) } @@ -292,7 +302,7 @@ internal constructor( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): ExtendedIngestResponse { var startTime: Long var currentAttempt = 1u @@ -306,6 +316,8 @@ internal constructor( currentAttempt = attempt val result = streamingIngestClient.ingestAsync( + database, + table, source, props, ) @@ -361,7 +373,7 @@ internal constructor( currentAttempt, lastException?.message, ) - return invokeQueuedIngestionAsync(source, props) + return invokeQueuedIngestionAsync(database, table, source, props) } private fun resetLocalSourceIfPossible(source: IngestionSource) { @@ -381,7 +393,7 @@ internal constructor( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, isPermanent: Boolean, ex: Exception, ): RetryDecision { @@ -416,17 +428,19 @@ internal constructor( } private suspend fun invokeQueuedIngestionAsync( + database: String, + table: String, source: IngestionSource, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): ExtendedIngestResponse { - return queuedIngestClient.ingestAsync(source, props) + return queuedIngestClient.ingestAsync(database, table, source, props) } private fun shouldUseQueuedIngestByPolicy( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): Boolean { if ( managedStreamingPolicy.shouldDefaultToQueuedIngestion( @@ -449,7 +463,7 @@ internal constructor( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ex: Exception, ) { val failureDetails = @@ -484,7 +498,7 @@ internal constructor( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ex: Exception, ) { logger.error("Unexpected error occurred during streaming ingestion", ex) @@ -508,7 +522,7 @@ internal constructor( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): Boolean { val failureDetails = ManagedStreamingRequestFailureDetails( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt index c289145e7..038ac94dc 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt @@ -10,8 +10,7 @@ import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestSizeLimitExceededException import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import com.microsoft.azure.kusto.ingest.v2.common.models.database -import com.microsoft.azure.kusto.ingest.v2.common.models.table +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder import com.microsoft.azure.kusto.ingest.v2.common.models.withFormatFromSource import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionResultUtils import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse @@ -79,20 +78,34 @@ internal constructor( * suspend function for Kotlin callers. */ override suspend fun ingestAsync( + database: String, + table: String, sources: List, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse = - ingestAsyncInternal(sources, ingestRequestProperties) + ingestAsyncInternal( + database, + table, + sources, + ingestRequestProperties, + ) /** * Ingests data from a single source with the given properties. This is the * suspend function for Kotlin callers. */ override suspend fun ingestAsync( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse = - ingestAsyncSingleInternal(source, ingestRequestProperties) + ingestAsyncSingleInternal( + database, + table, + source, + ingestRequestProperties, + ) /** * Ingests data from multiple sources with the given properties. This is the @@ -100,11 +113,18 @@ internal constructor( */ @JvmName("ingestAsync") fun ingestAsyncJava( + database: String, + table: String, sources: List, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): CompletableFuture = CoroutineScope(Dispatchers.IO).future { - ingestAsyncInternal(sources, ingestRequestProperties) + ingestAsyncInternal( + database, + table, + sources, + ingestRequestProperties, + ) } /** @@ -113,11 +133,18 @@ internal constructor( */ @JvmName("ingestAsync") fun ingestAsyncJava( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): CompletableFuture = CoroutineScope(Dispatchers.IO).future { - ingestAsyncSingleInternal(source, ingestRequestProperties) + ingestAsyncSingleInternal( + database, + table, + source, + ingestRequestProperties, + ) } /** @@ -172,13 +199,12 @@ internal constructor( /** Internal implementation of ingestAsync for multiple sources. */ private suspend fun ingestAsyncInternal( + database: String, + table: String, sources: List, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse { // Extract database and table from properties - val database = ingestRequestProperties.database - val table = ingestRequestProperties.table - // Validate sources list is not empty require(sources.isNotEmpty()) { "sources list cannot be empty" } val maxBlobsPerBatch = getMaxSourcesPerMultiIngest() @@ -240,7 +266,10 @@ internal constructor( // Extract format from the first source (all sources have same format as validated above) val effectiveProperties = - ingestRequestProperties.withFormatFromSource(sources.first()) + ingestRequestProperties?.withFormatFromSource(sources.first()) + ?: IngestRequestPropertiesBuilder.create() + .build() + .withFormatFromSource(sources.first()) val ingestRequest = IngestRequest( @@ -282,20 +311,32 @@ internal constructor( /** Internal implementation of ingestAsync for a single source. */ private suspend fun ingestAsyncSingleInternal( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse { when (source) { is BlobSource -> { // Pass the source to multi-source method which will extract format - return ingestAsync(listOf(source), ingestRequestProperties) + return ingestAsync( + database, + table, + listOf(source), + ingestRequestProperties, + ) } is LocalSource -> { // Upload the local source to blob storage, then ingest // Note: We pass the original LocalSource to preserve format information val blobSource = uploader.uploadAsync(source) // Use the original source's format - return ingestAsync(listOf(blobSource), ingestRequestProperties) + return ingestAsync( + database, + table, + listOf(blobSource), + ingestRequestProperties, + ) } else -> { throw IngestClientException( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt index a6fdc9f77..7883cb411 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/StreamingIngestClient.kt @@ -9,8 +9,7 @@ import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestRequestExcept import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestServiceException import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind -import com.microsoft.azure.kusto.ingest.v2.common.models.database -import com.microsoft.azure.kusto.ingest.v2.common.models.table +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder import com.microsoft.azure.kusto.ingest.v2.common.models.withFormatFromSource import com.microsoft.azure.kusto.ingest.v2.common.utils.IngestionUtils import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse @@ -104,10 +103,17 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { * ingestion kind. */ override suspend fun ingestAsync( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse = - ingestAsyncInternal(source, ingestRequestProperties) + ingestAsyncInternal( + database, + table, + source, + ingestRequestProperties, + ) /** * Ingests data from the specified source with the given properties. This is @@ -122,11 +128,18 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { */ @JvmName("ingestAsync") fun ingestAsyncJava( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): CompletableFuture = CoroutineScope(Dispatchers.IO).future { - ingestAsyncInternal(source, ingestRequestProperties) + ingestAsyncInternal( + database, + table, + source, + ingestRequestProperties, + ) } /** @@ -170,17 +183,17 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { * versions call. */ private suspend fun ingestAsyncInternal( + database: String, + table: String, source: IngestionSource, - ingestRequestProperties: IngestRequestProperties, + ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse { // Inject format from source into properties val effectiveProperties = - ingestRequestProperties.withFormatFromSource(source) - - // Extract database and table from properties - val database = effectiveProperties.database - val table = effectiveProperties.table - + ingestRequestProperties?.withFormatFromSource(source) + ?: IngestRequestPropertiesBuilder.create() + .build() + .withFormatFromSource(source) // Streaming ingestion processes one source at a time val maxSize = getMaxStreamingIngestSize( @@ -264,7 +277,7 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { database: String, table: String, data: ByteArray, - ingestProperties: IngestRequestProperties, + ingestProperties: IngestRequestProperties?, blobUrl: String? = null, compressionType: CompressionType, ) { @@ -306,10 +319,11 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { this.apiClient.api.postStreamingIngest( database = database, table = table, - streamFormat = ingestProperties.format, + streamFormat = + ingestProperties?.format ?: Format.csv, body = bodyContent, mappingName = - ingestProperties.ingestionMappingReference, + ingestProperties?.ingestionMappingReference, sourceKind = sourceKind, host = host, acceptEncoding = null, @@ -441,11 +455,11 @@ internal constructor(private val apiClient: KustoBaseApiClient) : IngestClient { ) } else { throw IngestServiceException( - errorCode = errorDetails?.code, - errorReason = errorDetails?.type, + errorCode = errorDetails.code, + errorReason = errorDetails.type, errorMessage = - errorDetails?.description - ?: errorDetails?.message, + errorDetails.description + ?: errorDetails.message, failureCode = failureCode, isPermanent = false, message = errorMessage, diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt index 83f452f18..8a17c610d 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/DefaultManagedStreamingPolicy.kt @@ -58,7 +58,7 @@ class DefaultManagedStreamingPolicy( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): Boolean { val key = "$database-$table" @@ -83,11 +83,10 @@ class DefaultManagedStreamingPolicy( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, failureDetails: ManagedStreamingRequestFailureDetails, ) { val key = "$database-$table" - when (failureDetails.errorCategory) { ManagedStreamingErrorCategory.STREAMING_INGESTION_OFF, ManagedStreamingErrorCategory @@ -119,7 +118,7 @@ class DefaultManagedStreamingPolicy( source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, successDetails: ManagedStreamingRequestSuccessDetails, ) { // Default implementation does nothing diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt index f254cbb4c..bdc1fba64 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/policy/ManagedStreamingPolicy.kt @@ -95,7 +95,7 @@ interface ManagedStreamingPolicy { source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, ): Boolean /** This callback will be called when a streaming error occurs. */ @@ -103,7 +103,7 @@ interface ManagedStreamingPolicy { source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, failureDetails: ManagedStreamingRequestFailureDetails, ) @@ -112,7 +112,7 @@ interface ManagedStreamingPolicy { source: IngestionSource, database: String, table: String, - props: IngestRequestProperties, + props: IngestRequestProperties?, successDetails: ManagedStreamingRequestSuccessDetails, ) } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt index 803728cb2..a41bb5b50 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/exceptions/IngestException.kt @@ -78,9 +78,27 @@ open class IngestClientException( cause: Throwable? = null, ) : IngestException(message, cause, failureCode, failureSubCode, isPermanent) { override val message: String - get() = - creationMessage - ?: "An error occurred for source: '${ingestionSource ?: ""}'. Error: '${error ?: ""}'" + get() { + creationMessage?.let { + return it + } + // Fallback message with all fields + return buildString { + append("IngestClientException occurred") + if (!ingestionSourceId.isNullOrBlank()) { + append(" [ID: $ingestionSourceId]") + } + if (!ingestionSource.isNullOrBlank()) { + append(" for source: '$ingestionSource'") + } + if (!error.isNullOrBlank()) append(". Error: '$error'") + if (failureCode != null) append(" (Failure code: $failureCode") + if (!failureSubCode.isNullOrBlank()) { + append(", Sub-code: $failureSubCode") + } + if (failureCode != null) append(")") + } + } } class IngestSizeLimitExceededException( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt index 99a90e738..a79ee40c4 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilder.kt @@ -2,6 +2,9 @@ // Licensed under the MIT License. package com.microsoft.azure.kusto.ingest.v2.common.models +import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestClientException +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping +import com.microsoft.azure.kusto.ingest.v2.models.Format import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties import java.time.OffsetDateTime @@ -13,18 +16,15 @@ import java.time.OffsetDateTime * * Example usage: * ```kotlin - * val properties = IngestRequestPropertiesBuilder.create(database = "db", table = "table") - * .withFormat(Format.json) + * val properties = IngestRequestPropertiesBuilder.create() * .withDropByTags(listOf("tag1", "tag2")) * .withIngestByTags(listOf("tag3")) * .withEnableTracking(true) * .build() * ``` */ -class IngestRequestPropertiesBuilder -private constructor(private val database: String, private val table: String) { - private var format: com.microsoft.azure.kusto.ingest.v2.models.Format? = - null +class IngestRequestPropertiesBuilder private constructor() { + private var format: Format? = null private var enableTracking: Boolean? = null private var additionalTags: List? = null private var dropByTags: List? = null @@ -33,7 +33,8 @@ private constructor(private val database: String, private val table: String) { private var skipBatching: Boolean? = null private var deleteAfterDownload: Boolean? = null private var ingestionMappingReference: String? = null - private var ingestionMapping: String? = null + private var inlineIngestionMapping: String? = null + private var ingestionMapping: IngestionMapping? = null private var validationPolicy: String? = null private var ignoreSizeLimit: Boolean? = null private var ignoreFirstRecord: Boolean? = null @@ -44,22 +45,9 @@ private constructor(private val database: String, private val table: String) { private var recreateSchema: Boolean? = null companion object { - internal const val DATABASE_KEY = "_database" - internal const val TABLE_KEY = "_table" - - /** - * Creates a new builder for IngestRequestProperties. - * - * @param database The target database name - * @param table The target table name - * @return A new IngestRequestPropertiesBuilder instance - */ @JvmStatic - fun create( - database: String, - table: String, - ): IngestRequestPropertiesBuilder { - return IngestRequestPropertiesBuilder(database, table) + fun create(): IngestRequestPropertiesBuilder { + return IngestRequestPropertiesBuilder() } } @@ -99,12 +87,20 @@ private constructor(private val database: String, private val table: String) { this.deleteAfterDownload = value } - fun withIngestionMappingReference(value: String) = apply { - this.ingestionMappingReference = value - } - - fun withIngestionMapping(value: String) = apply { + fun withIngestionMapping(value: IngestionMapping) = apply { this.ingestionMapping = value + // Set format from mapping type if not already set + if (this.format == null) { + this.format = value.ingestionMappingType.format + } + // Only set reference OR inline mapping, not both + if (value.ingestionMappingReference.isNotBlank()) { + this.ingestionMappingReference = value.ingestionMappingReference + this.inlineIngestionMapping = null + } else if (value.columnMappings.isNotEmpty()) { + this.inlineIngestionMapping = value.serializeColumnMappingsToJson() + this.ingestionMappingReference = null + } } fun withValidationPolicy(value: String) = apply { @@ -136,9 +132,8 @@ private constructor(private val database: String, private val table: String) { } /** - * Builds the - * [com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties] with - * combined tags from dropByTags, ingestByTags, and additionalTags. + * Builds the [IngestRequestProperties] with combined tags from dropByTags, + * ingestByTags, and additionalTags. * * The built properties will have database and table information stored in * the underlying map for retrieval by client implementations. @@ -149,21 +144,31 @@ private constructor(private val database: String, private val table: String) { * build and will be overridden with the actual source format. * * @return The built IngestRequestProperties + * @throws IngestClientException if both ingestionMappingReference and + * inlineIngestionMapping are set */ fun build(): IngestRequestProperties { + // Validate that both mapping reference and inline mapping are not set simultaneously + if ( + !ingestionMappingReference.isNullOrBlank() && + !inlineIngestionMapping.isNullOrBlank() + ) { + throw IngestClientException( + message = + "Both mapping reference and column mappings were defined. " + + "Please provide either a mapping reference OR column mappings, not both.", + isPermanent = true, + failureCode = 400, + ) + } + // Combine all tags: additional tags + prefixed ingest-by tags + prefixed drop-by tags val combinedTags = mutableListOf() - additionalTags?.let { combinedTags.addAll(it) } - ingestByTags?.forEach { tag -> combinedTags.add("ingest-by:$tag") } - dropByTags?.forEach { tag -> combinedTags.add("drop-by:$tag") } - // Use format if explicitly set, otherwise use placeholder (will be overridden from source) - val effectiveFormat = - format ?: com.microsoft.azure.kusto.ingest.v2.models.Format.csv - + val effectiveFormat = format ?: Format.csv val properties = IngestRequestProperties( format = effectiveFormat, @@ -173,7 +178,7 @@ private constructor(private val database: String, private val table: String) { skipBatching = skipBatching, deleteAfterDownload = deleteAfterDownload, ingestionMappingReference = ingestionMappingReference, - ingestionMapping = ingestionMapping, + ingestionMapping = inlineIngestionMapping, validationPolicy = validationPolicy, ignoreSizeLimit = ignoreSizeLimit, ignoreFirstRecord = ignoreFirstRecord, @@ -183,11 +188,6 @@ private constructor(private val database: String, private val table: String) { extendSchema = extendSchema, recreateSchema = recreateSchema, ) - - // Store database and table in the HashMap for retrieval - properties.put(DATABASE_KEY, database) - properties.put(TABLE_KEY, table) - return properties } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt index c6b4878a2..c10abc326 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesExtensions.kt @@ -4,34 +4,6 @@ package com.microsoft.azure.kusto.ingest.v2.common.models import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties -/** - * Extension properties and functions for - * [com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties]. - * - * These extensions provide convenient access to database, table, and tag - * information stored in the IngestRequestProperties. - */ - -/** - * Extension property to extract the database name from IngestRequestProperties. - */ -val IngestRequestProperties.database: String - get() = - this.get(IngestRequestPropertiesBuilder.DATABASE_KEY) as? String - ?: throw IllegalStateException( - "Database not set in IngestRequestProperties", - ) - -/** - * Extension property to extract the table name from IngestRequestProperties. - */ -val IngestRequestProperties.table: String - get() = - this.get(IngestRequestPropertiesBuilder.TABLE_KEY) as? String - ?: throw IllegalStateException( - "Table not set in IngestRequestProperties", - ) - /** * Extension property to extract drop-by tags from the combined tags list. * Returns all tags that start with "drop-by:" prefix. diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/IngestionMapping.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/IngestionMapping.kt new file mode 100644 index 000000000..453799383 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/IngestionMapping.kt @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models.mapping + +import com.microsoft.azure.kusto.ingest.v2.models.Format +import kotlinx.serialization.json.Json +import kotlin.collections.emptyList +import kotlinx.serialization.Serializable as KSerializable + +@KSerializable +class IngestionMapping +private constructor( + val ingestionMappingReference: String, + val columnMappings: List, + val ingestionMappingType: IngestionMappingType, +) { + constructor( + other: IngestionMapping, + ) : this( + other.ingestionMappingReference, + other.columnMappings.map { + ColumnMapping( + it.columnName, + columnType = it.columnType, + properties = it.properties, + ) + }, + other.ingestionMappingType, + ) + + constructor( + ingestionMappingReference: String, + ingestionMappingType: IngestionMappingType, + ) : this( + columnMappings = emptyList(), + ingestionMappingReference = ingestionMappingReference, + ingestionMappingType = ingestionMappingType, + ) + + constructor( + columnMappings: List, + ingestionMappingType: IngestionMappingType, + ) : this( + columnMappings = columnMappings, + ingestionMappingReference = "", + ingestionMappingType = ingestionMappingType, + ) + + /** + * Serializes the column mappings to a JSON string representation. + * + * @return JSON string representation of the column mappings + */ + fun serializeColumnMappingsToJson(): String { + return Json.encodeToString(columnMappings) + } + + enum class IngestionMappingType( + val kustoValue: String, + val format: Format, + ) { + CSV("Csv", Format.csv), + JSON("Json", Format.json), + AVRO("Avro", Format.avro), + PARQUET("Parquet", Format.parquet), + SSTREAM("SStream", Format.sstream), + ORC("Orc", Format.orc), + APACHEAVRO("ApacheAvro", Format.apacheavro), + W3CLOGFILE("W3CLogFile", Format.w3clogfile), + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt deleted file mode 100644 index d645b5566..000000000 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/InlineIngestionMapping.kt +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -package com.microsoft.azure.kusto.ingest.v2.common.models.mapping - -import kotlinx.serialization.Serializable as KSerializable - -@KSerializable -data class InlineIngestionMapping( - var columnMappings: List? = null, - var ingestionMappingType: IngestionMappingType? = null, -) { - constructor( - other: InlineIngestionMapping, - ) : this( - other.columnMappings?.map { - ColumnMapping( - it.columnName, - columnType = it.columnType, - properties = it.properties, - ) - }, - other.ingestionMappingType, - ) - - enum class IngestionMappingType(val kustoValue: String) { - CSV("Csv"), - JSON("Json"), - AVRO("Avro"), - PARQUET("Parquet"), - SSTREAM("SStream"), - ORC("Orc"), - APACHEAVRO("ApacheAvro"), - W3CLOGFILE("W3CLogFile"), - } -} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt index fd0fea81c..95d784afa 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtils.kt @@ -17,15 +17,8 @@ object PathUtils { private val FORBIDDEN_CHARS = Pattern.compile("[^\\w-]", Pattern.CASE_INSENSITIVE) - fun sanitizeFileName(baseName: String?, sourceId: UUID): String { - val base = getBasename(baseName) - val fileNameSegment = sanitize(base, FILE_NAME_SEGMENT_MAX_LENGTH) - val baseNamePart = - if (!base.isNullOrEmpty()) "_$fileNameSegment" else "" - return sanitize( - sourceId.toString(), - TOTAL_TWO_SEGMENT_MAX_LENGTH - fileNameSegment.length, - ) + baseNamePart + fun sanitizeFileName(sourceId: UUID): String { + return sanitize(sourceId.toString(), TOTAL_TWO_SEGMENT_MAX_LENGTH) } private fun sanitize(name: String?, maxSize: Int): String { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt index 7ba44ba26..8115f2baf 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/BlobSource.kt @@ -14,8 +14,7 @@ class BlobSource( format: Format = Format.csv, sourceId: UUID = UUID.randomUUID(), compressionType: CompressionType = CompressionType.NONE, - baseName: String? = null, -) : IngestionSource(format, compressionType, baseName, sourceId) { +) : IngestionSource(format, compressionType, sourceId) { /** * The exact size of the blob in bytes if available. This is only set when diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt index a7d5e7437..359f1e8cd 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -18,14 +18,12 @@ class FileSource( format: Format, sourceId: UUID = UUID.randomUUID(), compressionType: CompressionType? = null, - baseName: String? = null, ) : LocalSource( format, leaveOpen = false, compressionType = compressionType ?: detectCompressionFromPath(path), - baseName = baseName ?: path.fileName?.toString(), sourceId = sourceId, ) { override fun data(): InputStream { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt index bb73f9dc4..a4722f847 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/IngestionSource.kt @@ -10,7 +10,6 @@ import java.util.UUID abstract class IngestionSource( open val format: Format, open val compressionType: CompressionType, - baseName: String? = null, open val sourceId: UUID = UUID.randomUUID(), ) : Closeable { @@ -18,18 +17,18 @@ abstract class IngestionSource( private set init { - name = initName(baseName) + name = initName() } override fun close() { // No-op by default, override if needed } - protected fun initName(baseName: String? = null): String { + protected fun initName(): String { val type = this::class.simpleName?.removeSuffix("Source")?.lowercase() ?: "source" - return "${type}_${PathUtils.sanitizeFileName(baseName, sourceId)}${format.value}$compressionType" + return "${type}_${PathUtils.sanitizeFileName(sourceId)}${format.value}$compressionType" } override fun toString(): String { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt index 1d1e36ecb..95e762c6e 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/LocalSource.kt @@ -11,9 +11,8 @@ abstract class LocalSource( format: Format, val leaveOpen: Boolean, compressionType: CompressionType = CompressionType.NONE, - baseName: String? = null, sourceId: UUID = UUID.randomUUID(), -) : IngestionSource(format, compressionType, baseName, sourceId) { +) : IngestionSource(format, compressionType, sourceId) { protected var mStream: InputStream? = null /** diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt index b7fb43f9d..3b3e88a73 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt @@ -12,13 +12,11 @@ class StreamSource( format: Format, sourceCompression: CompressionType, sourceId: UUID = UUID.randomUUID(), - baseName: String? = null, leaveOpen: Boolean = false, -) : LocalSource(format, leaveOpen, sourceCompression, baseName, sourceId) { +) : LocalSource(format, leaveOpen, sourceCompression, sourceId) { init { mStream = stream - initName(baseName) } override fun data(): InputStream { diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java index 348c02d24..b4acdc240 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/IngestV2JavaTestBase.java @@ -34,7 +34,7 @@ public abstract class IngestV2JavaTestBase { protected final String database; protected final String dmEndpoint; protected final String engineEndpoint; - protected final String targetTable; + protected String targetTable; protected final Map columnNamesToTypes; protected Client adminClusterClient; @@ -54,10 +54,7 @@ public IngestV2JavaTestBase(Class testClass) { this.engineEndpoint = dmEndpoint.replace("https://ingest-", "https://"); - // Generate unique table name for this test run - this.targetTable = "V2_Java_Tests_Sensor_" + - UUID.randomUUID().toString().replace("-", "").substring(0, 8); - + // Define table schema this.columnNamesToTypes = new LinkedHashMap<>(); columnNamesToTypes.put("timestamp", "datetime"); @@ -72,6 +69,10 @@ public IngestV2JavaTestBase(Class testClass) { @BeforeEach public void createTables() throws Exception { + // Generate unique table name for this test run + this.targetTable = "V2_Java_Tests_Sensor_" + + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + // Build create table script StringBuilder columnsBuilder = new StringBuilder(); boolean first = true; diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java index 224531051..c77d0bc3e 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientJavaTest.java @@ -8,14 +8,16 @@ import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; import com.microsoft.azure.kusto.ingest.v2.models.Format; import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.FileSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; import java.io.InputStream; @@ -46,8 +48,9 @@ public ManagedStreamingIngestClientJavaTest() { * - Small data triggers streaming ingestion * - Data appears in the table after ingestion */ - @Test - public void testManagedStreamingIngestSmallData() throws Exception { + @ParameterizedTest(name = "Managed Streaming Ingest Small Data - useIngestRequestProperties={0}") + @ValueSource(booleans = {true, false}) + public void testManagedStreamingIngestSmallData(boolean useIngestRequestProperties) throws Exception { logger.info("Running Java managed streaming ingest (small data) regression test"); // Enable streaming ingestion on the table @@ -66,34 +69,38 @@ public void testManagedStreamingIngestSmallData() throws Exception { dataStream, Format.json, CompressionType.NONE, UUID.randomUUID(), - "java-managed-streaming-small", false ); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) - .withIngestionMappingReference(targetTable + "_mapping") + IngestionMapping mappingReference = new IngestionMapping(targetTable + "_mapping", + IngestionMapping.IngestionMappingType.JSON); + + IngestRequestProperties properties = useIngestRequestProperties ? IngestRequestPropertiesBuilder + .create() + .withIngestionMapping(mappingReference) .withEnableTracking(true) - .build(); + .build() : null; // Ingest data (should use streaming for small data) logger.info("Ingesting small data via managed streaming..."); - ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database,targetTable,source, properties).get(); assertNotNull(response, "Response should not be null"); - assertNotNull(response.getIngestResponse().getIngestionOperationId(), - "Operation ID should not be null"); - - // Verify it used streaming ingestion - IngestKind ingestionType = response.getIngestionType(); - logger.info("Ingest completed using {} method. Operation ID: {}", - ingestionType, response.getIngestResponse().getIngestionOperationId()); - - // Small data typically uses streaming, but fallback to queued is acceptable - assertTrue( - ingestionType == IngestKind.STREAMING || ingestionType == IngestKind.QUEUED, - "Ingestion type should be either STREAMING or QUEUED" - ); + if (useIngestRequestProperties) { + assertNotNull(response.getIngestResponse().getIngestionOperationId(), + "Operation ID should not be null"); + + // Verify it used streaming ingestion + IngestKind ingestionType = response.getIngestionType(); + logger.info("Ingest completed using {} method. Operation ID: {}", + ingestionType, response.getIngestResponse().getIngestionOperationId()); + + // Small data typically uses streaming, but fallback to queued is acceptable + assertTrue( + ingestionType == IngestKind.STREAMING || ingestionType == IngestKind.QUEUED, + "Ingestion type should be either STREAMING or QUEUED" + ); + } // Verify data appeared in table String query = String.format("%s | summarize count=count()", targetTable); @@ -110,8 +117,9 @@ public void testManagedStreamingIngestSmallData() throws Exception { * - Larger data automatically falls back to queued ingestion * - Fallback mechanism works correctly from Java */ - @Test - public void testManagedStreamingIngestWithFallback() throws Exception { + @ParameterizedTest(name = "Managed Streaming Ingest with Fallback - useIngestRequestProperties={0}") + @ValueSource(booleans = {true, false}) + public void testManagedStreamingIngestWithFallback(boolean useIngestRequestProperties) throws Exception { logger.info("Running Java managed streaming ingest with fallback test"); alterTableToEnableStreaming(); @@ -136,35 +144,39 @@ public void testManagedStreamingIngestWithFallback() throws Exception { dataStream, Format.multijson, CompressionType.NONE, UUID.randomUUID(), - "java-managed-streaming-fallback", false ); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) - .withIngestionMappingReference(targetTable + "_mapping") + IngestionMapping mappingReference = new IngestionMapping(targetTable + "_mapping", + IngestionMapping.IngestionMappingType.JSON); + + IngestRequestProperties properties = useIngestRequestProperties ? IngestRequestPropertiesBuilder + .create() + .withIngestionMapping(mappingReference) .withEnableTracking(true) - .build(); + .build() : null; logger.info("Ingesting larger data via managed streaming (may trigger fallback)..."); - ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database,targetTable,source, properties).get(); assertNotNull(response, "Response should not be null"); - IngestKind ingestionType = response.getIngestionType(); - logger.info("Ingestion completed using {} method. Operation ID: {}", - ingestionType, response.getIngestResponse().getIngestionOperationId()); - - // Both streaming and queued are valid outcomes - assertTrue( - ingestionType == IngestKind.STREAMING || ingestionType == IngestKind.QUEUED, - "Ingestion type should be either STREAMING or QUEUED" - ); - - if (ingestionType == IngestKind.QUEUED) { - logger.info("Fallback to QUEUED ingestion triggered (expected for larger data)"); - } else { - logger.info("Data ingested via STREAMING (compression may have kept size small)"); + if (useIngestRequestProperties) { + IngestKind ingestionType = response.getIngestionType(); + logger.info("Ingestion completed using {} method. Operation ID: {}", + ingestionType, response.getIngestResponse().getIngestionOperationId()); + + // Both streaming and queued are valid outcomes + assertTrue( + ingestionType == IngestKind.STREAMING || ingestionType == IngestKind.QUEUED, + "Ingestion type should be either STREAMING or QUEUED" + ); + + if (ingestionType == IngestKind.QUEUED) { + logger.info("Fallback to QUEUED ingestion triggered (expected for larger data)"); + } else { + logger.info("Data ingested via STREAMING (compression may have kept size small)"); + } } String query = String.format("%s | summarize count=count()", targetTable); @@ -179,8 +191,9 @@ public void testManagedStreamingIngestWithFallback() throws Exception { * Test managed streaming with file source from Java. * Verifies that file-based ingestion works correctly with managed streaming. */ - @Test - public void testManagedStreamingIngestFromFileSource() throws Exception { + @ParameterizedTest(name = "Managed Streaming Ingest from File - useIngestRequestProperties={0}") + @ValueSource(booleans = {true, false}) + public void testManagedStreamingIngestFromFileSource(boolean useIngestRequestProperties) throws Exception { logger.info("Running Java managed streaming ingest from file source test"); alterTableToEnableStreaming(); @@ -208,24 +221,25 @@ public void testManagedStreamingIngestFromFileSource() throws Exception { filePath, Format.multijson, UUID.randomUUID(), - CompressionType.NONE, - null + CompressionType.NONE ); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) + IngestRequestProperties properties = useIngestRequestProperties ? IngestRequestPropertiesBuilder + .create() .withEnableTracking(true) - .build(); + .build() : null; logger.info("Ingesting file via managed streaming..."); - ExtendedIngestResponse response = client.ingestAsync(fileSource, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database,targetTable,fileSource, properties).get(); assertNotNull(response, "Response should not be null"); - assertNotNull(response.getIngestResponse().getIngestionOperationId(), "Operation ID should not be null"); + if (useIngestRequestProperties) { + assertNotNull(response.getIngestResponse().getIngestionOperationId(), "Operation ID should not be null"); - IngestKind ingestionType = response.getIngestionType(); - logger.info("File ingestion completed using {} method. Operation ID: {}", - ingestionType, response.getIngestResponse().getIngestionOperationId()); + IngestKind ingestionType = response.getIngestionType(); + logger.info("File ingestion completed using {} method. Operation ID: {}", + ingestionType, response.getIngestResponse().getIngestionOperationId()); + } String query = String.format("%s | summarize count=count()", targetTable); awaitAndQuery(query, 1); diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java index 27292632f..9f0de901c 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientJavaTest.java @@ -8,14 +8,18 @@ import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; import com.microsoft.azure.kusto.ingest.v2.models.Format; import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; import java.io.InputStream; @@ -47,8 +51,9 @@ public QueuedIngestClientJavaTest() { * - Operation can be tracked * - Data appears in the table after processing */ - @Test - public void testBasicQueuedIngest() throws Exception { + @ParameterizedTest(name = "Queued Ingest Basic Test - useIngestRequestProperties={0}") + @ValueSource(booleans = {true,false}) + public void testBasicQueuedIngest(boolean useIngestRequestProperties) throws Exception { logger.info("Running Java queued ingest regression test"); // Create queued client @@ -65,54 +70,58 @@ public void testBasicQueuedIngest() throws Exception { dataStream, Format.json, CompressionType.NONE, UUID.randomUUID(), - "java-queued-test", false ); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) - .withIngestionMappingReference(targetTable + "_mapping") + IngestionMapping mappingReference = new IngestionMapping(targetTable + "_mapping", + IngestionMapping.IngestionMappingType.JSON); + + IngestRequestProperties properties = useIngestRequestProperties ?IngestRequestPropertiesBuilder + .create() + .withIngestionMapping(mappingReference) .withEnableTracking(true) - .build(); + .build(): null; // Queue data for ingestion logger.info("Queueing data for ingestion..."); - ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database, targetTable,source, properties).get(); assertNotNull(response, "Response should not be null"); - assertNotNull(response.getIngestResponse().getIngestionOperationId(), - "Operation ID should not be null"); - - logger.info("Data queued. Operation ID: {}", - response.getIngestResponse().getIngestionOperationId()); - - // Track the operation - IngestionOperation operation = new IngestionOperation( - response.getIngestResponse().getIngestionOperationId(), - database, - targetTable, - response.getIngestionType() - ); - - // Get initial status - StatusResponse initialStatus = client.getOperationDetailsAsync(operation).get(); - assertNotNull(initialStatus, "Initial status should not be null"); - logger.info("Initial status retrieved"); - - // Poll for completion - logger.info("Polling for completion..."); - StatusResponse finalStatus = client.pollForCompletion( - operation, - Duration.ofSeconds(30), - Duration.ofMinutes(2) - ).get(); - - assertNotNull(finalStatus, "Final status should not be null"); - assertNotNull(finalStatus.getStatus(), "Final status summary should not be null"); - assertEquals(0, finalStatus.getStatus().getFailed(), "Ingestion should not record failures"); - assertTrue(finalStatus.getStatus().getSucceeded()!=null && finalStatus.getStatus().getSucceeded() >= 1, "At least one ingestion should succeed"); - logger.info("Polling completed"); - + if(useIngestRequestProperties) { + + assertNotNull(response.getIngestResponse().getIngestionOperationId(), + "Operation ID should not be null"); + + logger.info("Data queued. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + + // Track the operation + IngestionOperation operation = new IngestionOperation( + response.getIngestResponse().getIngestionOperationId(), + database, + targetTable, + response.getIngestionType() + ); + + // Get initial status + StatusResponse initialStatus = client.getOperationDetailsAsync(operation).get(); + assertNotNull(initialStatus, "Initial status should not be null"); + logger.info("Initial status retrieved"); + + // Poll for completion + logger.info("Polling for completion..."); + StatusResponse finalStatus = client.pollForCompletion( + operation, + Duration.ofSeconds(30), + Duration.ofMinutes(2) + ).get(); + + assertNotNull(finalStatus, "Final status should not be null"); + assertNotNull(finalStatus.getStatus(), "Final status summary should not be null"); + assertEquals(0, finalStatus.getStatus().getFailed(), "Ingestion should not record failures"); + assertTrue(finalStatus.getStatus().getSucceeded() != null && finalStatus.getStatus().getSucceeded() >= 1, "At least one ingestion should succeed"); + logger.info("Polling completed"); + } // Verify data appeared in table String query = String.format("%s | summarize count=count()", targetTable); awaitAndQuery(query, 1); @@ -143,22 +152,21 @@ public void testQueuedIngestFromFileSource() throws Exception { return; } - com.microsoft.azure.kusto.ingest.v2.source.FileSource fileSource = - new com.microsoft.azure.kusto.ingest.v2.source.FileSource( + FileSource fileSource = + new FileSource( filePath, Format.multijson, UUID.randomUUID(), - CompressionType.NONE, - null + CompressionType.NONE ); IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) + .create() .withEnableTracking(true) .build(); logger.info("Queueing file for ingestion..."); - ExtendedIngestResponse response = client.ingestAsync(fileSource, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database, targetTable,fileSource, properties).get(); assertNotNull(response, "Response should not be null"); logger.info("File queued. Operation ID: {}", diff --git a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java index 9b23290ab..16dd6f126 100644 --- a/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java +++ b/ingest-v2/src/test/java/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientJavaTest.java @@ -7,17 +7,20 @@ import com.microsoft.azure.kusto.ingest.v2.client.StreamingIngestClient; import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; import com.microsoft.azure.kusto.ingest.v2.models.Format; import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.UUID; import java.util.concurrent.ExecutionException; @@ -42,8 +45,9 @@ public StreamingIngestClientJavaTest() { * - Simple CSV data can be ingested via streaming * - Data appears in the table after ingestion */ - @Test - public void testBasicStreamingIngest() throws Exception { + @ParameterizedTest(name = "Streaming Ingest Basic Test - useIngestRequestProperties={0}") + @ValueSource(booleans = {true, false}) + public void testBasicStreamingIngest(boolean useIngestRequestProperties) throws Exception { logger.info("Running Java streaming ingest regression test"); // Enable streaming ingestion on the table @@ -64,25 +68,28 @@ public void testBasicStreamingIngest() throws Exception { dataStream, Format.json, CompressionType.NONE, UUID.randomUUID(), - "java-streaming-test", false ); + IngestionMapping mappingReference = new IngestionMapping(targetTable + "_mapping", + IngestionMapping.IngestionMappingType.JSON); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) - .withIngestionMappingReference(targetTable + "_mapping") - .build(); + IngestRequestProperties properties = useIngestRequestProperties ? IngestRequestPropertiesBuilder + .create() + .withIngestionMapping(mappingReference) + .build() : null; // Ingest data logger.info("Ingesting data via streaming..."); - ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database, targetTable,source, properties).get(); assertNotNull(response, "Response should not be null"); - assertNotNull(response.getIngestResponse().getIngestionOperationId(), - "Operation ID should not be null"); + if (useIngestRequestProperties) { + assertNotNull(response.getIngestResponse().getIngestionOperationId(), + "Operation ID should not be null"); - logger.info("Streaming ingestion completed. Operation ID: {}", - response.getIngestResponse().getIngestionOperationId()); + logger.info("Streaming ingestion completed. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + } // Verify data appeared in table String query = String.format("%s | summarize count=count()", targetTable); @@ -97,8 +104,9 @@ public void testBasicStreamingIngest() throws Exception { * Test streaming ingestion with compressed data from Java. * Verifies that compression handling works correctly from Java. */ - @Test - public void testStreamingIngestWithCompression() throws Exception { + @ParameterizedTest(name = "Streaming Ingest with Compression - useIngestRequestProperties={0}") + @ValueSource(booleans = {true, false}) + public void testStreamingIngestWithCompression(boolean useIngestRequestProperties) throws Exception { logger.info("Running Java streaming ingest with compression test"); alterTableToEnableStreaming(); @@ -117,26 +125,27 @@ public void testStreamingIngestWithCompression() throws Exception { return; } - InputStream fileStream = java.nio.file.Files.newInputStream(filePath); + InputStream fileStream = Files.newInputStream(filePath); StreamSource source = new StreamSource( fileStream, Format.multijson, CompressionType.GZIP, UUID.randomUUID(), - "java-compressed-stream-test", false ); - IngestRequestProperties properties = IngestRequestPropertiesBuilder - .create(database, targetTable) - .build(); + IngestRequestProperties properties = useIngestRequestProperties ? IngestRequestPropertiesBuilder + .create() + .build() : null; logger.info("Ingesting compressed data..."); - ExtendedIngestResponse response = client.ingestAsync(source, properties).get(); + ExtendedIngestResponse response = client.ingestAsync(database, targetTable,source, properties).get(); assertNotNull(response, "Response should not be null"); - logger.info("Compressed streaming ingestion completed. Operation ID: {}", - response.getIngestResponse().getIngestionOperationId()); + if (useIngestRequestProperties) { + logger.info("Compressed streaming ingestion completed. Operation ID: {}", + response.getIngestResponse().getIngestionOperationId()); + } fileStream.close(); diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt index eec393ea9..6771562f7 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/ManagedStreamingIngestClientTest.kt @@ -77,13 +77,15 @@ class ManagedStreamingIngestClientTest : } val testSources = BlobSource(blobUrl, format = format) val ingestRequestProperties = - IngestRequestPropertiesBuilder.create(database, targetTable) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build() try { // Ingest data - should attempt streaming first val ingestionResponse = managedClient.ingestAsync( + database = database, + table = targetTable, source = testSources, ingestRequestProperties = ingestRequestProperties, ) @@ -192,17 +194,18 @@ class ManagedStreamingIngestClientTest : stream = ByteArrayInputStream(testData.toByteArray()), format = targetTestFormat, sourceCompression = CompressionType.NONE, - baseName = "test-custom-policy", ) val properties = - IngestRequestPropertiesBuilder.create(database, targetTable) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build() try { val ingestionResponse = customManagedClient.ingestAsync( + database = database, + table = targetTable, source = source, ingestRequestProperties = properties, ) @@ -275,11 +278,13 @@ class ManagedStreamingIngestClientTest : format = Format.multijson, ) val ingestRequestProperties = - IngestRequestPropertiesBuilder.create(database, targetTable) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build() val ingestionResponse = customManagedClient.ingestAsync( + database = database, + table = targetTable, source = testSource, ingestRequestProperties = ingestRequestProperties, ) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt index 27704e376..12cbd5eec 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt @@ -10,7 +10,7 @@ import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.ColumnMapping -import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.InlineIngestionMapping +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.TransformationMethod import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus @@ -154,9 +154,14 @@ class QueuedIngestClientTest : val properties = if (useMappingReference) { - IngestRequestPropertiesBuilder.create(database, targetTable) - .withIngestionMappingReference( - "${targetTable}_mapping", + IngestRequestPropertiesBuilder.create() + .withIngestionMapping( + IngestionMapping( + "${targetTable}_mapping", + IngestionMapping + .IngestionMappingType + .JSON, + ), ) .withEnableTracking(true) .build() @@ -198,24 +203,20 @@ class QueuedIngestClientTest : .apply { setPath("$.$col") } } } - val inlineIngestionMappingInline = - InlineIngestionMapping( + val inlineIngestionMapping = + IngestionMapping( columnMappings = ingestionColumnMappings, ingestionMappingType = - InlineIngestionMapping + IngestionMapping .IngestionMappingType .JSON, ) - val ingestionMappingString = - jsonPrinter.encodeToString( - inlineIngestionMappingInline.columnMappings, - ) - IngestRequestPropertiesBuilder.create(database, targetTable) - .withIngestionMapping(ingestionMappingString) + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(inlineIngestionMapping) .withEnableTracking(true) .build() } else { - IngestRequestPropertiesBuilder.create(database, targetTable) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build() } @@ -223,6 +224,8 @@ class QueuedIngestClientTest : try { val ingestionResponse = ingestClient.ingestAsync( + database = database, + table = targetTable, sources = testSources, ingestRequestProperties = properties, ) @@ -325,7 +328,6 @@ class QueuedIngestClientTest : stream = ByteArrayInputStream(data), format = Format.multijson, sourceCompression = CompressionType.NONE, - baseName = name, ) } @@ -342,12 +344,11 @@ class QueuedIngestClientTest : createTestStreamSource(1024, "combined_small.json") val smallResponse = queuedIngestClient.ingestAsync( + database = database, + table = targetTable, source = smallSource, ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) @@ -382,12 +383,11 @@ class QueuedIngestClientTest : ) val largeResponse = queuedIngestClient.ingestAsync( + database = database, + table = targetTable, source = largeSource, ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) @@ -424,12 +424,11 @@ class QueuedIngestClientTest : } val batchResponse = queuedIngestClient.ingestAsync( + database = database, + table = targetTable, sources = batchSources, ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) @@ -480,12 +479,11 @@ class QueuedIngestClientTest : val startTime = System.currentTimeMillis() val response = queuedIngestClient.ingestAsync( + database = database, + table = targetTable, sources = sources, ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) @@ -548,6 +546,7 @@ class QueuedIngestClientTest : "json,compression/sample.json,NONE,3", "parquet,compression/sample.parquet,NONE,1", "avro,compression/sample.avro,NONE,1", + "csv,compression/sample.csv,NONE,1", ) fun `E2E - compression format tests`( formatName: String, @@ -597,23 +596,35 @@ class QueuedIngestClientTest : .minusHours((1..5L).random()) val extentTags = listOf("ingest-by:i-tag") + listOf("drop-by:d-tag") + + // really want to squeeze in that test for CSV mapping as this uses ordinal types + // for tests + + val irpBuilder = + IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .withIngestByTags(listOf("i-tag")) + .withDropByTags(listOf("d-tag")) + .withCreationTime(createdTimeTag) + val irp = + if (formatName == Format.csv.value) { + // use inline mapping and ignore the first record + irpBuilder + .withIngestionMapping( + getInlineCsvMapping(), + ) + .withIgnoreFirstRecord(true) + } else { + irpBuilder + } + .build() + val response = queuedIngestClient.ingestAsync( + database = database, + table = targetTable, sources = listOf(source), - ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) - .withEnableTracking(true) - .withIngestByTags( - listOf("i-tag"), - ) - .withDropByTags(listOf("d-tag")) - .withCreationTime( - createdTimeTag, - ) - .build(), + ingestRequestProperties = irp, ) val operationId = @@ -644,9 +655,17 @@ class QueuedIngestClientTest : logger.info( "$formatName format test: passed ($succeededCount succeeded)", ) + + val targetQuery = + if (formatName == Format.csv.value) { + "$targetTable | where format == '$format' and Type == 'IngestionMapping' and " + + "isnotempty(SourceLocation) | summarize count=count() by format" + } else { + "$targetTable | where format == '$format' | summarize count=count() by format" + } + awaitAndQuery( - query = - "$targetTable | where format == '$format' |summarize count=count() by format", + query = targetQuery, expectedResultsCount = expectedRecordCount.toLong(), testName = "$formatName format test", ) @@ -706,6 +725,52 @@ class QueuedIngestClientTest : } } + private fun getInlineCsvMapping(): IngestionMapping { + var ordinal = 0 + columnNamesToTypes.keys + .map { col -> + when (col) { + "SourceLocation" -> + ColumnMapping( + columnName = col, + columnType = "string", + ) + .apply { + setTransform( + TransformationMethod + .SourceLocation, + ) + } + + "Type" -> + ColumnMapping( + columnName = col, + columnType = "string", + ) + .apply { + setConstantValue("IngestionMapping") + } + + else -> + ColumnMapping( + columnName = col, + columnType = + columnNamesToTypes[ + col, + ]!!, + ) + .apply { setOrdinal(ordinal++) } + } + } + .let { ingestionColumnMappings -> + return IngestionMapping( + columnMappings = ingestionColumnMappings, + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + } + } + @Test fun `E2E - format mismatch and mixed format batch`(): Unit = runBlocking { logger.info("E2E: Testing format mismatch detection with mixed formats") @@ -728,7 +793,6 @@ test2,456,2024-01-02""" ), format = Format.json, sourceCompression = CompressionType.NONE, - baseName = "format_json.json", ), StreamSource( stream = @@ -737,7 +801,6 @@ test2,456,2024-01-02""" ), format = Format.csv, sourceCompression = CompressionType.NONE, - baseName = "format_csv.csv", ), StreamSource( stream = @@ -746,7 +809,6 @@ test2,456,2024-01-02""" ), format = Format.json, sourceCompression = CompressionType.NONE, - baseName = "format_json2.json", ), ) @@ -756,12 +818,11 @@ test2,456,2024-01-02""" val exception = assertThrows { client.ingestAsync( + database = database, + table = targetTable, sources = sources, ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) @@ -819,19 +880,20 @@ test2,456,2024-01-02""" ), format = targetFormat, sourceCompression = CompressionType.NONE, - baseName = fileName, ) else -> error("Unknown sourceType: $sourceType") } val queuedIngestClient = createTestClient() val properties = - IngestRequestPropertiesBuilder.create(database, targetTable) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build() val ingestionResponse = queuedIngestClient.ingestAsync( + database = database, + table = targetTable, sources = listOf(source), ingestRequestProperties = properties, ) @@ -905,12 +967,11 @@ test2,456,2024-01-02""" try { val response = oneLakeIngestClient.ingestAsync( + database = database, + table = targetTable, source = source, ingestRequestProperties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt index 6e8518a97..17823f76e 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/StreamingIngestClientTest.kt @@ -91,9 +91,7 @@ class StreamingIngestClientTest : .withClientDetails("BuilderStreamingE2ETest", "1.0") .build() - val ingestProps = - IngestRequestPropertiesBuilder.create(database, targetTable) - .build() + val ingestProps = IngestRequestPropertiesBuilder.create().build() if (isException) { if (blobUrl != null) { logger.info( @@ -109,6 +107,8 @@ class StreamingIngestClientTest : format = targetTestFormat, ) client.ingestAsync( + database = database, + table = targetTable, source = ingestionSource, ingestRequestProperties = ingestProps, ) @@ -138,6 +138,8 @@ class StreamingIngestClientTest : ) } client.ingestAsync( + database = database, + table = targetTable, source = ingestionSource, ingestRequestProperties = ingestProps, ) @@ -173,12 +175,7 @@ class StreamingIngestClientTest : .withClientDetails("ErrorParsingE2ETest", "1.0") .build() - val properties = - IngestRequestPropertiesBuilder.create( - database, - targetTable, - ) - .build() + val properties = IngestRequestPropertiesBuilder.create().build() // Send invalid text data claiming to be JSON - this triggers a data format error val invalidData = "this is not valid json { broken" @@ -195,6 +192,8 @@ class StreamingIngestClientTest : val exception = assertThrows { client.ingestAsync( + database = database, + table = targetTable, source = streamSource, ingestRequestProperties = properties, ) @@ -263,12 +262,14 @@ class StreamingIngestClientTest : ) val properties = - IngestRequestPropertiesBuilder.create(database, targetTable) + IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build() val response = client.ingestAsync( + database = database, + table = targetTable, source = fileSource, ingestRequestProperties = properties, ) diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilderTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilderTest.kt new file mode 100644 index 000000000..ab7a05522 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/IngestRequestPropertiesBuilderTest.kt @@ -0,0 +1,217 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.ColumnMapping +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping +import com.microsoft.azure.kusto.ingest.v2.models.Format +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class IngestRequestPropertiesBuilderTest { + + @Test + fun `build should throw exception when both mapping reference and inline mapping are set`() { + // Create an IngestionMapping with a reference + val mappingWithReference = + IngestionMapping( + ingestionMappingReference = "my_mapping_ref", + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + + // Create an IngestionMapping with column mappings + val col1 = ColumnMapping("col1", "string") + col1.setPath("$.col1") + val col2 = ColumnMapping("col2", "int") + col2.setPath("$.col2") + + val mappingWithColumns = + IngestionMapping( + columnMappings = listOf(col1, col2), + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + + // Scenario 1: First set reference, then set inline mapping + // This should work because withIngestionMapping clears the opposite mapping type + val properties1 = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mappingWithReference) + .withIngestionMapping(mappingWithColumns) + .build() + + // The last call should have cleared the reference and set inline mapping + assertNull(properties1.ingestionMappingReference) + assertNotNull(properties1.ingestionMapping) + + // Scenario 2: First set inline mapping, then set reference + // This should also work because withIngestionMapping clears the opposite mapping type + val properties2 = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mappingWithColumns) + .withIngestionMapping(mappingWithReference) + .build() + + // The last call should have cleared the inline mapping and set reference + assertEquals("my_mapping_ref", properties2.ingestionMappingReference) + assertNull(properties2.ingestionMapping) + } + + @Test + fun `build should succeed when only mapping reference is set`() { + val mappingWithReference = + IngestionMapping( + ingestionMappingReference = "my_mapping_ref", + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + + val properties = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mappingWithReference) + .build() + + assertEquals("my_mapping_ref", properties.ingestionMappingReference) + assertNull(properties.ingestionMapping) + } + + @Test + fun `build should succeed when only inline mapping is set`() { + val col1 = ColumnMapping("col1", "string") + col1.setPath("$.col1") + val col2 = ColumnMapping("col2", "int") + col2.setPath("$.col2") + + val mappingWithColumns = + IngestionMapping( + columnMappings = listOf(col1, col2), + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + + val properties = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mappingWithColumns) + .build() + + assertNull(properties.ingestionMappingReference) + assertNotNull(properties.ingestionMapping) + assertTrue(properties.ingestionMapping!!.contains("col1")) + assertTrue(properties.ingestionMapping.contains("col2")) + } + + @Test + fun `withIngestionMapping should override previous mapping correctly`() { + val mappingWithReference = + IngestionMapping( + ingestionMappingReference = "my_mapping_ref", + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + + val col1 = ColumnMapping("col1", "string") + col1.setPath("$.col1") + + val mappingWithColumns = + IngestionMapping( + columnMappings = listOf(col1), + ingestionMappingType = + IngestionMapping.IngestionMappingType.CSV, + ) + + // The last withIngestionMapping call should take precedence + // and clear the opposite mapping type + val builder = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mappingWithReference) + + // At this point, ingestionMappingReference is set, inlineIngestionMapping is null + + builder.withIngestionMapping(mappingWithColumns) + + // Now inlineIngestionMapping should be set, ingestionMappingReference should be null + val properties = builder.build() + + assertNull(properties.ingestionMappingReference) + assertNotNull(properties.ingestionMapping) + } + + @Test + fun `build should combine tags correctly`() { + val properties = + IngestRequestPropertiesBuilder.create() + .withAdditionalTags(listOf("tag1", "tag2")) + .withDropByTags(listOf("drop1")) + .withIngestByTags(listOf("ingest1", "ingest2")) + .build() + + val tags = properties.tags!! + assertEquals(5, tags.size) + assertTrue(tags.contains("tag1")) + assertTrue(tags.contains("tag2")) + assertTrue(tags.contains("drop-by:drop1")) + assertTrue(tags.contains("ingest-by:ingest1")) + assertTrue(tags.contains("ingest-by:ingest2")) + } + + @Test + fun `build should set format from mapping type`() { + val mappingWithReference = + IngestionMapping( + ingestionMappingReference = "my_mapping_ref", + ingestionMappingType = + IngestionMapping.IngestionMappingType.JSON, + ) + + val properties = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mappingWithReference) + .build() + + assertEquals(Format.json, properties.format) + } + + @Test + fun `build should use placeholder format when no mapping is set`() { + val properties = + IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build() + + assertEquals(Format.csv, properties.format) + } + + @Test + fun `build should set all properties correctly`() { + val properties = + IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .withSkipBatching(true) + .withDeleteAfterDownload(true) + .withIgnoreSizeLimit(true) + .withIgnoreFirstRecord(true) + .withIgnoreLastRecordIfInvalid(true) + .withExtendSchema(true) + .withRecreateSchema(true) + .withZipPattern("*.gz") + .withValidationPolicy("ValidateOnly") + .withIngestIfNotExists(listOf("tag1")) + .build() + + assertEquals(true, properties.enableTracking) + assertEquals(true, properties.skipBatching) + assertEquals(true, properties.deleteAfterDownload) + assertEquals(true, properties.ignoreSizeLimit) + assertEquals(true, properties.ignoreFirstRecord) + assertEquals(true, properties.ignoreLastRecordIfInvalid) + assertEquals(true, properties.extendSchema) + assertEquals(true, properties.recreateSchema) + assertEquals("*.gz", properties.zipPattern) + assertEquals("ValidateOnly", properties.validationPolicy) + assertEquals(listOf("tag1"), properties.ingestIfNotExists) + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt index c73fe205d..d6545e4e1 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/mapping/MappingTest.kt @@ -64,7 +64,6 @@ class MappingTest { @Test fun `ColumnMapping should set and get ordinal`() { val mapping = ColumnMapping("col1", "string") - mapping.setOrdinal(5) assertEquals(5, mapping.getOrdinal()) } @@ -261,7 +260,7 @@ class MappingTest { @Test fun `TransformationMethod should have all expected values`() { - val values = TransformationMethod.values() + val values = TransformationMethod.entries.toTypedArray() assertTrue(values.contains(TransformationMethod.None)) assertTrue( values.contains( @@ -305,27 +304,19 @@ class MappingTest { ) val mapping = - InlineIngestionMapping( + IngestionMapping( columnMappings = columnMappings, ingestionMappingType = - InlineIngestionMapping.IngestionMappingType.JSON, + IngestionMapping.IngestionMappingType.JSON, ) - assertEquals(2, mapping.columnMappings?.size) + assertEquals(2, mapping.columnMappings.size) assertEquals( - InlineIngestionMapping.IngestionMappingType.JSON, + IngestionMapping.IngestionMappingType.JSON, mapping.ingestionMappingType, ) } - @Test - fun `InlineIngestionMapping should support null values`() { - val mapping = InlineIngestionMapping() - - assertNull(mapping.columnMappings) - assertNull(mapping.ingestionMappingType) - } - @Test fun `InlineIngestionMapping copy constructor should create deep copy`() { val columnMappings = @@ -336,44 +327,25 @@ class MappingTest { ) val original = - InlineIngestionMapping( + IngestionMapping( columnMappings = columnMappings, ingestionMappingType = - InlineIngestionMapping.IngestionMappingType.JSON, + IngestionMapping.IngestionMappingType.JSON, ) - val copied = InlineIngestionMapping(original) + val copied = IngestionMapping(original) - assertEquals(original.columnMappings?.size, copied.columnMappings?.size) + assertEquals(original.columnMappings.size, copied.columnMappings.size) assertEquals(original.ingestionMappingType, copied.ingestionMappingType) - assertEquals("col1", copied.columnMappings?.get(0)?.columnName) - } - - @Test - fun `InlineIngestionMapping copy constructor should handle null columnMappings`() { - val original = - InlineIngestionMapping( - columnMappings = null, - ingestionMappingType = - InlineIngestionMapping.IngestionMappingType.CSV, - ) - - val copied = InlineIngestionMapping(original) - - assertNull(copied.columnMappings) - assertEquals( - InlineIngestionMapping.IngestionMappingType.CSV, - copied.ingestionMappingType, - ) + assertEquals("col1", copied.columnMappings[0].columnName) } // ==================== IngestionMappingType Tests ==================== - @Test fun `IngestionMappingType CSV should have correct kusto value`() { assertEquals( "Csv", - InlineIngestionMapping.IngestionMappingType.CSV.kustoValue, + IngestionMapping.IngestionMappingType.CSV.kustoValue, ) } @@ -381,7 +353,7 @@ class MappingTest { fun `IngestionMappingType JSON should have correct kusto value`() { assertEquals( "Json", - InlineIngestionMapping.IngestionMappingType.JSON.kustoValue, + IngestionMapping.IngestionMappingType.JSON.kustoValue, ) } @@ -389,7 +361,7 @@ class MappingTest { fun `IngestionMappingType AVRO should have correct kusto value`() { assertEquals( "Avro", - InlineIngestionMapping.IngestionMappingType.AVRO.kustoValue, + IngestionMapping.IngestionMappingType.AVRO.kustoValue, ) } @@ -397,7 +369,7 @@ class MappingTest { fun `IngestionMappingType PARQUET should have correct kusto value`() { assertEquals( "Parquet", - InlineIngestionMapping.IngestionMappingType.PARQUET.kustoValue, + IngestionMapping.IngestionMappingType.PARQUET.kustoValue, ) } @@ -405,7 +377,7 @@ class MappingTest { fun `IngestionMappingType SSTREAM should have correct kusto value`() { assertEquals( "SStream", - InlineIngestionMapping.IngestionMappingType.SSTREAM.kustoValue, + IngestionMapping.IngestionMappingType.SSTREAM.kustoValue, ) } @@ -413,7 +385,7 @@ class MappingTest { fun `IngestionMappingType ORC should have correct kusto value`() { assertEquals( "Orc", - InlineIngestionMapping.IngestionMappingType.ORC.kustoValue, + IngestionMapping.IngestionMappingType.ORC.kustoValue, ) } @@ -421,8 +393,7 @@ class MappingTest { fun `IngestionMappingType APACHEAVRO should have correct kusto value`() { assertEquals( "ApacheAvro", - InlineIngestionMapping.IngestionMappingType.APACHEAVRO - .kustoValue, + IngestionMapping.IngestionMappingType.APACHEAVRO.kustoValue, ) } @@ -430,14 +401,14 @@ class MappingTest { fun `IngestionMappingType W3CLOGFILE should have correct kusto value`() { assertEquals( "W3CLogFile", - InlineIngestionMapping.IngestionMappingType.W3CLOGFILE - .kustoValue, + IngestionMapping.IngestionMappingType.W3CLOGFILE.kustoValue, ) } @Test fun `IngestionMappingType should have all expected values`() { - val values = InlineIngestionMapping.IngestionMappingType.values() + val values = + IngestionMapping.IngestionMappingType.entries.toTypedArray() assertEquals(8, values.size) } diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt index f18d2d7db..e813fb2bb 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/utils/PathUtilsTest.kt @@ -12,49 +12,21 @@ import java.util.* class PathUtilsTest { @Test - fun `sanitizeFileName creates valid name with baseName and sourceId`() { + fun `sanitizeFileName creates valid name with sourceId`() { val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") - val result = PathUtils.sanitizeFileName("myfile.csv", sourceId) + val result = PathUtils.sanitizeFileName(sourceId) assertTrue(result.contains("e493b23d-684f-4f4c-8ba8-3edfaca09427")) - assertTrue(result.contains("myfile-csv")) } @Test - fun `sanitizeFileName handles null baseName`() { + fun `sanitizeFileName returns sanitized sourceId`() { val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") - val result = PathUtils.sanitizeFileName(null, sourceId) + val result = PathUtils.sanitizeFileName(sourceId) assertEquals("e493b23d-684f-4f4c-8ba8-3edfaca09427", result) } - @Test - fun `sanitizeFileName handles empty baseName`() { - val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") - val result = PathUtils.sanitizeFileName("", sourceId) - - assertEquals("e493b23d-684f-4f4c-8ba8-3edfaca09427", result) - } - - @Test - fun `sanitizeFileName replaces forbidden characters`() { - val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") - val result = PathUtils.sanitizeFileName("my file@#\$%.csv", sourceId) - - assertTrue(result.contains("my-file")) - assertTrue(result.contains("csv")) - } - - @Test - fun `sanitizeFileName truncates long names`() { - val sourceId = UUID.fromString("e493b23d-684f-4f4c-8ba8-3edfaca09427") - val longName = "a".repeat(150) + ".csv" - val result = PathUtils.sanitizeFileName(longName, sourceId) - - assertTrue(result.contains("__trunc")) - assertTrue(result.length <= 160) - } - @Test fun `createFileNameForUpload generates valid format`() { val name = "dataset.csv" @@ -160,22 +132,6 @@ class PathUtilsTest { assertEquals("file.csv.gz", result) } - @Test - fun `sanitizeFileName preserves hyphens and underscores`() { - val sourceId = UUID.randomUUID() - val result = PathUtils.sanitizeFileName("my-file_name.csv", sourceId) - - assertTrue(result.contains("my-file_name-csv")) - } - - @Test - fun `sanitizeFileName preserves alphanumeric characters`() { - val sourceId = UUID.randomUUID() - val result = PathUtils.sanitizeFileName("file123ABC.csv", sourceId) - - assertTrue(result.contains("file123ABC-csv")) - } - @Test fun `createFileNameForUpload handles special characters in name`() { val name = "my-file@#\$.csv" diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt index 20d69168a..fb8961baa 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/source/SourceClassesTest.kt @@ -112,7 +112,6 @@ class SourceClassesTest { format = Format.json, sourceCompression = CompressionType.NONE, sourceId = UUID.randomUUID(), - baseName = "test-stream", leaveOpen = false, ) @@ -141,10 +140,22 @@ class SourceClassesTest { @Test fun `CompressionType enum should have expected values`() { - assertEquals(3, CompressionType.values().size) - assertTrue(CompressionType.values().contains(CompressionType.NONE)) - assertTrue(CompressionType.values().contains(CompressionType.GZIP)) - assertTrue(CompressionType.values().contains(CompressionType.ZIP)) + assertEquals(3, CompressionType.entries.size) + assertTrue( + CompressionType.entries + .toTypedArray() + .contains(CompressionType.NONE), + ) + assertTrue( + CompressionType.entries + .toTypedArray() + .contains(CompressionType.GZIP), + ) + assertTrue( + CompressionType.entries + .toTypedArray() + .contains(CompressionType.ZIP), + ) } @Test diff --git a/ingest-v2/src/test/resources/compression/sample.csv b/ingest-v2/src/test/resources/compression/sample.csv new file mode 100644 index 000000000..e4a770c77 --- /dev/null +++ b/ingest-v2/src/test/resources/compression/sample.csv @@ -0,0 +1,2 @@ +timestamp,deviceId,messageId,temperature,humidity,format +2024-01-01T00:00:00Z,00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000001,25.5,60,csv diff --git a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java index 5198549b2..44591c4db 100644 --- a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java +++ b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java @@ -7,6 +7,7 @@ import com.azure.identity.ClientSecretCredentialBuilder; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.json.JsonMapper; import com.microsoft.azure.kusto.data.Client; import com.microsoft.azure.kusto.data.ClientFactory; import com.microsoft.azure.kusto.data.StringUtils; @@ -34,6 +35,7 @@ import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; import io.opentelemetry.semconv.ResourceAttributes; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -62,7 +64,7 @@ enum SourceType { this.source = source; } - public static SourceType valueOfLabel(String label) { + public static @Nullable SourceType valueOfLabel(String label) { for (SourceType e : values()) { if (e.source.equals(label)) { return e; @@ -71,6 +73,7 @@ public static SourceType valueOfLabel(String label) { return null; } } + /** * AuthenticationModeOptions - represents the different options to authenticate to the system */ @@ -476,8 +479,7 @@ private static void enableDistributedTracing() { private static ConfigJson loadConfigs() { Path configPath = locateConfigFile(); try { - ObjectMapper mapper = com.microsoft.azure.kusto.data.Utils.getObjectMapper(); - mapper.configure(ACCEPT_CASE_INSENSITIVE_ENUMS, true); + ObjectMapper mapper = JsonMapper.builder().configure(ACCEPT_CASE_INSENSITIVE_ENUMS, true).build(); return mapper.readValue(configPath.toFile(), ConfigJson.class); } catch (Exception e) { Utils.errorHandler(String.format("Couldn't read config file from file '%s'", configPath), e); @@ -769,15 +771,15 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q } private static @NotNull List> ingestV2FromStreams(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - @NotNull QueuedIngestClient queuedIngestClient) throws IOException { + @NotNull QueuedIngestClient queuedIngestClient) throws IOException { System.out.println("\n=== Queued ingestion from streams (ingest-v2) ==="); List> futures = new ArrayList<>(); IngestRequestProperties csvProps = buildIngestV2RequestProperties(config, ingestV2Config, null); String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; InputStream csvStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - StreamSource csvSource = new StreamSource(csvStream, Format.csv, CompressionType.NONE, UUID.randomUUID(), "csv-stream", false); - futures.add(queuedIngestClient.ingestAsync(csvSource, csvProps) + StreamSource csvSource = new StreamSource(csvStream, Format.csv, CompressionType.NONE, UUID.randomUUID(), false); + futures.add(queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), csvSource, csvProps) .thenCompose(response -> { closeQuietly(csvStream); System.out.println("CSV stream ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); @@ -785,9 +787,9 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q })); InputStream jsonStream = Files.newInputStream(resolveQuickstartPath("dataset.json")); - StreamSource jsonSource = new StreamSource(jsonStream, Format.json, CompressionType.NONE, UUID.randomUUID(), "json-stream", false); + StreamSource jsonSource = new StreamSource(jsonStream, Format.json, CompressionType.NONE, UUID.randomUUID(), false); IngestRequestProperties jsonProps = buildIngestV2RequestProperties(config, ingestV2Config, ingestV2Config.getDataMappingName()); - futures.add(queuedIngestClient.ingestAsync(jsonSource, jsonProps) + futures.add(queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), jsonSource, jsonProps) .thenCompose(response -> { closeQuietly(jsonStream); System.out.println("JSON stream ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); @@ -798,21 +800,21 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q } private static @NotNull List> ingestV2FromFiles(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - @NotNull QueuedIngestClient queuedIngestClient) { + @NotNull QueuedIngestClient queuedIngestClient) { System.out.println("\n=== Queued ingestion from files (ingest-v2) ==="); List> futures = new ArrayList<>(); IngestRequestProperties csvProps = buildIngestV2RequestProperties(config, ingestV2Config, null); - FileSource csvFileSource = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE, "csv-file"); - futures.add(queuedIngestClient.ingestAsync(csvFileSource, csvProps) + FileSource csvFileSource = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + futures.add(queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), csvFileSource, csvProps) .thenCompose(response -> { System.out.println("CSV file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "CSV File"); })); - FileSource jsonFileSource = new FileSource(resolveQuickstartPath("dataset.json"), Format.json, UUID.randomUUID(), CompressionType.NONE, "json-file"); + FileSource jsonFileSource = new FileSource(resolveQuickstartPath("dataset.json"), Format.json, UUID.randomUUID(), CompressionType.NONE); IngestRequestProperties jsonProps = buildIngestV2RequestProperties(config, ingestV2Config, ingestV2Config.getDataMappingName()); - futures.add(queuedIngestClient.ingestAsync(jsonFileSource, jsonProps) + futures.add(queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), jsonFileSource, jsonProps) .thenCompose(response -> { System.out.println("JSON file ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "JSON File"); @@ -822,14 +824,14 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q } private static @NotNull CompletableFuture ingestV2BatchIngestion(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - @NotNull QueuedIngestClient queuedIngestClient) { + @NotNull QueuedIngestClient queuedIngestClient) { System.out.println("\n=== Queued ingestion from multiple sources (ingest-v2 batch) ==="); - FileSource source1 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE, "source-1"); - FileSource source2 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE, "source-2"); + FileSource source1 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); + FileSource source2 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); List sources = Arrays.asList(source1, source2); IngestRequestProperties props = buildIngestV2RequestProperties(config, ingestV2Config, null); - return queuedIngestClient.ingestAsync(sources, props) + return queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), sources, props) .thenCompose(response -> { System.out.println("Batch ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); System.out.println("Number of sources in batch: " + sources.size()); @@ -837,18 +839,23 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q }); } - private static @NotNull IngestRequestProperties buildIngestV2RequestProperties(@NotNull ConfigJson config, @NotNull IngestV2QuickstartConfig ingestV2Config, String mappingName) { + private static @NotNull IngestRequestProperties buildIngestV2RequestProperties(@NotNull ConfigJson config, @NotNull IngestV2QuickstartConfig ingestV2Config, + String mappingName) { IngestRequestPropertiesBuilder builder = IngestRequestPropertiesBuilder - .create(config.getDatabaseName(), config.getTableName()) + .create() .withEnableTracking(ingestV2Config.isTrackingEnabled()); if (StringUtils.isNotBlank(mappingName)) { - builder.withIngestionMappingReference(mappingName); + // Only JSON samples are shown in the sample + com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping mapping = new com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping( + mappingName, + com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping.IngestionMappingType.JSON); + builder.withIngestionMapping(mapping); } return builder.build(); } private static @NotNull CompletableFuture trackIngestV2Operation(@NotNull ConfigJson config, @NotNull IngestV2QuickstartConfig ingestV2Config, - @NotNull QueuedIngestClient queuedIngestClient, @NotNull ExtendedIngestResponse response, String operationName) { + @NotNull QueuedIngestClient queuedIngestClient, @NotNull ExtendedIngestResponse response, String operationName) { IngestionOperation operation = new IngestionOperation( Objects.requireNonNull(response.getIngestResponse().getIngestionOperationId()), config.getDatabaseName(), diff --git a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java index e370c03be..0ea238f62 100644 --- a/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java +++ b/samples/src/main/java/ingestv2/ManagedStreamingIngestV2.java @@ -3,9 +3,8 @@ package ingestv2; +import com.azure.core.credential.TokenCredential; import com.azure.identity.AzureCliCredentialBuilder; -import com.azure.identity.ChainedTokenCredential; -import com.azure.identity.ChainedTokenCredentialBuilder; import com.azure.identity.ClientSecretCredentialBuilder; import com.microsoft.azure.kusto.data.StringUtils; import com.microsoft.azure.kusto.ingest.v2.builders.ManagedStreamingIngestClientBuilder; @@ -14,6 +13,7 @@ import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestKind; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus; import com.microsoft.azure.kusto.ingest.v2.models.Format; import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; @@ -49,42 +49,34 @@ public class ManagedStreamingIngestV2 { private static String database; private static String table; - private static String mapping; + private static String mappingName; private static ManagedStreamingIngestClient managedStreamingIngestClient; public static void main(String[] args) { try { // Get configuration from system properties - String engineEndpoint = - System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" String appId = System.getProperty("app-id"); String appKey = System.getProperty("appKey"); String tenant = System.getProperty("tenant"); database = System.getProperty("dbName"); table = System.getProperty("tableName"); - mapping = System.getProperty("dataMappingName"); + mappingName = System.getProperty("dataMappingName"); - ChainedTokenCredential credential; + TokenCredential credential; // Create Azure AD credential if (StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { - credential = - new ChainedTokenCredentialBuilder() - .addFirst( - new ClientSecretCredentialBuilder() - .clientId(appId) - .clientSecret(appKey) - .tenantId(tenant) - .build()) - .build(); + credential = new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build(); } else { - credential = - new ChainedTokenCredentialBuilder() - .addFirst(new AzureCliCredentialBuilder().build()) - .build(); + credential = new AzureCliCredentialBuilder().build(); } if (engineEndpoint == null || engineEndpoint.isEmpty()) { @@ -94,10 +86,9 @@ public static void main(String[] args) { // Create managed streaming ingest client using the new v2 API // The client will automatically handle streaming vs queued ingestion decisions - managedStreamingIngestClient = - ManagedStreamingIngestClientBuilder.create(engineEndpoint) - .withAuthentication(credential) - .build(); + managedStreamingIngestClient = ManagedStreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .build(); System.out.println("Managed Streaming Ingest Client created successfully"); System.out.println( @@ -130,73 +121,59 @@ static void ingestFromStream() throws Exception { System.out.println("\n=== Managed Streaming Ingestion from Streams ==="); // Example 1: Ingest from in-memory CSV string (small data - will use streaming) - String csvData = - "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = - new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - - StreamSource csvStreamSource = - new StreamSource( - csvInputStream, - Format.csv, - CompressionType.NONE, - UUID.randomUUID(), - "csv-managed-stream", - false); - - IngestRequestProperties csvProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = new StreamSource( + csvInputStream, + Format.csv, + CompressionType.NONE, + UUID.randomUUID(), + false); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Ingesting small CSV data from string..."); - ExtendedIngestResponse csvResponse = - managedStreamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); + ExtendedIngestResponse csvResponse = managedStreamingIngestClient.ingestAsync(database, table, csvStreamSource, csvProperties).get(); printIngestionResult("CSV String", csvResponse); // Example 2: Ingest from compressed CSV file String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - FileInputStream compressedCsvStream = - new FileInputStream(resourcesDirectory + "dataset.csv.gz"); - - StreamSource compressedStreamSource = - new StreamSource( - compressedCsvStream, - Format.csv, - CompressionType.GZIP, - UUID.randomUUID(), - "compressed-csv-managed-stream", - false); + FileInputStream compressedCsvStream = new FileInputStream(resourcesDirectory + "dataset.csv.gz"); + + StreamSource compressedStreamSource = new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + false); System.out.println("Ingesting compressed CSV file..."); - ExtendedIngestResponse compressedResponse = - managedStreamingIngestClient - .ingestAsync(compressedStreamSource, csvProperties) - .get(); + ExtendedIngestResponse compressedResponse = managedStreamingIngestClient + .ingestAsync(database, table, compressedStreamSource, csvProperties) + .get(); printIngestionResult("Compressed CSV", compressedResponse); compressedCsvStream.close(); // Example 3: Ingest JSON with mapping FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); - StreamSource jsonStreamSource = - new StreamSource( - jsonStream, - Format.json, - CompressionType.NONE, - UUID.randomUUID(), - "json-managed-stream", - false); - - IngestRequestProperties jsonProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + StreamSource jsonStreamSource = new StreamSource( + jsonStream, + Format.json, + CompressionType.NONE, + UUID.randomUUID(), + false); + IngestionMapping ingestionMapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(ingestionMapping) + .withEnableTracking(true) + .build(); System.out.println("Ingesting JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = - managedStreamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); + ExtendedIngestResponse jsonResponse = managedStreamingIngestClient.ingestAsync(database, table, jsonStreamSource, jsonProperties).get(); printIngestionResult("JSON with Mapping", jsonResponse); jsonStream.close(); } @@ -212,42 +189,35 @@ static void ingestFromFile() throws Exception { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Example 1: Ingest CSV file - FileSource csvFileSource = - new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE, - "m-ds-csv"); - - IngestRequestProperties csvProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + FileSource csvFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Ingesting CSV file..."); - ExtendedIngestResponse csvResponse = - managedStreamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); + ExtendedIngestResponse csvResponse = managedStreamingIngestClient.ingestAsync(database, table, csvFileSource, csvProperties).get(); printIngestionResult("CSV File", csvResponse); // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = - new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP, - "m-ds-json-compressed"); - - IngestRequestProperties jsonProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + FileSource jsonFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP); + + IngestionMapping ingestionMapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(ingestionMapping) + .withEnableTracking(true) + .build(); System.out.println("Ingesting compressed JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = - managedStreamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); + ExtendedIngestResponse jsonResponse = managedStreamingIngestClient.ingestAsync(database, table, jsonFileSource, jsonProperties).get(); printIngestionResult("Compressed JSON File", jsonResponse); } @@ -288,27 +258,23 @@ static void demonstrateFallbackTracking() throws Exception { // Mark the stream for potential retry (seekable stream) largeInputStream.mark(dataBytes.length); - StreamSource largeStreamSource = - new StreamSource( - largeInputStream, - Format.csv, - CompressionType.NONE, // Will be auto-compressed by the client - UUID.randomUUID(), - "large-data-fallback-demo", - false); + StreamSource largeStreamSource = new StreamSource( + largeInputStream, + Format.csv, + CompressionType.NONE, // Will be auto-compressed by the client + UUID.randomUUID(), + false); - IngestRequestProperties properties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + IngestRequestProperties properties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println( "Ingesting large dataset (" + formatBytes(dataBytes.length) + " uncompressed)..."); System.out.println("(Watch for fallback log messages from ManagedStreamingIngestClient)"); System.out.println(); - ExtendedIngestResponse response = - managedStreamingIngestClient.ingestAsync(largeStreamSource, properties).get(); + ExtendedIngestResponse response = managedStreamingIngestClient.ingestAsync(database, table, largeStreamSource, properties).get(); printIngestionResult("Large Data Ingestion", response); // The large data should trigger queued fallback @@ -316,26 +282,23 @@ static void demonstrateFallbackTracking() throws Exception { System.out.println("SUCCESS: Large data correctly triggered QUEUED fallback!"); System.out.println("This demonstrates the automatic size-based routing.\n"); - IngestionOperation operation = - new IngestionOperation( - Objects.requireNonNull( - response.getIngestResponse().getIngestionOperationId()), - database, - table, - response.getIngestionType()); + IngestionOperation operation = new IngestionOperation( + Objects.requireNonNull( + response.getIngestResponse().getIngestionOperationId()), + database, + table, + response.getIngestionType()); // Get initial operation details - CompletableFuture detailsFuture = - managedStreamingIngestClient.getOperationDetailsAsync(operation); + CompletableFuture detailsFuture = managedStreamingIngestClient.getOperationDetailsAsync(operation); StatusResponse details = detailsFuture.get(); printStatusResponse("Initial Status", details); // Poll for completion using getOperationDetailsAsync System.out.println( "\nPolling for completion (checking every 30 seconds, timeout 2 minutes)..."); - StatusResponse finalStatus = - pollForCompletionManually( - operation, Duration.ofSeconds(30), Duration.ofMinutes(2)); + StatusResponse finalStatus = pollForCompletionManually( + operation, Duration.ofSeconds(30), Duration.ofMinutes(2)); printStatusResponse("Final Status", finalStatus); } else { @@ -430,9 +393,12 @@ static void demonstrateFallbackTracking() throws Exception { /** Formats bytes into a human-readable string (e.g., "10.00 MB"). */ private static @NotNull String formatBytes(long bytes) { - if (bytes < 1024) return bytes + " B"; - if (bytes < 1024 * 1024) return String.format("%.2f KB", bytes / 1024.0); - if (bytes < 1024 * 1024 * 1024) return String.format("%.2f MB", bytes / (1024.0 * 1024.0)); + if (bytes < 1024) + return bytes + " B"; + if (bytes < 1024 * 1024) + return String.format("%.2f KB", bytes / 1024.0); + if (bytes < 1024 * 1024 * 1024) + return String.format("%.2f MB", bytes / (1024.0 * 1024.0)); return String.format("%.2f GB", bytes / (1024.0 * 1024.0 * 1024.0)); } @@ -450,8 +416,7 @@ private static StatusResponse pollForCompletionManually( long intervalMillis = pollingInterval.toMillis(); while (System.currentTimeMillis() - startTime < timeoutMillis) { - StatusResponse status = - managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); + StatusResponse status = managedStreamingIngestClient.getOperationDetailsAsync(operation).get(); // Check if completed (no more in-progress items) Status summary = status.getStatus(); @@ -479,8 +444,7 @@ private static StatusResponse pollForCompletionManually( /** Prints the ingestion result including which method (streaming or queued) was used. */ private static void printIngestionResult( String operationName, @NotNull ExtendedIngestResponse response) { - String ingestionMethod = - response.getIngestionType() == IngestKind.STREAMING ? "STREAMING" : "QUEUED"; + String ingestionMethod = response.getIngestionType() == IngestKind.STREAMING ? "STREAMING" : "QUEUED"; System.out.println( "[" + operationName diff --git a/samples/src/main/java/ingestv2/QueuedIngestV2.java b/samples/src/main/java/ingestv2/QueuedIngestV2.java index ed0dfb7f2..317b34d08 100644 --- a/samples/src/main/java/ingestv2/QueuedIngestV2.java +++ b/samples/src/main/java/ingestv2/QueuedIngestV2.java @@ -3,9 +3,8 @@ package ingestv2; +import com.azure.core.credential.TokenCredential; import com.azure.identity.AzureCliCredentialBuilder; -import com.azure.identity.ChainedTokenCredential; -import com.azure.identity.ChainedTokenCredentialBuilder; import com.azure.identity.ClientSecretCredentialBuilder; import com.microsoft.azure.kusto.data.StringUtils; import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; @@ -13,15 +12,14 @@ import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; -import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus; -import com.microsoft.azure.kusto.ingest.v2.models.Format; -import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; -import com.microsoft.azure.kusto.ingest.v2.models.Status; -import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; +import com.microsoft.azure.kusto.ingest.v2.models.*; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.FileSource; import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import org.jetbrains.annotations.NotNull; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -43,42 +41,35 @@ public class QueuedIngestV2 { private static String database; private static String table; - private static String mapping; + private static String mappingName; private static QueuedIngestClient queuedIngestClient; public static void main(String[] args) { try { // Get configuration from system properties - String engineEndpoint = - System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" String appId = System.getProperty("app-id"); String appKey = System.getProperty("appKey"); String tenant = System.getProperty("tenant"); database = System.getProperty("dbName"); table = System.getProperty("tableName"); - mapping = System.getProperty("dataMappingName"); + mappingName = System.getProperty("dataMappingName"); - ChainedTokenCredential credential; + TokenCredential credential; // Create Azure AD credential if (StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { - credential = - new ChainedTokenCredentialBuilder() - .addFirst( - new ClientSecretCredentialBuilder() - .clientId(appId) - .clientSecret(appKey) - .tenantId(tenant) - .build()) - .build(); + credential = new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build(); } else { - credential = - new ChainedTokenCredentialBuilder() - .addFirst(new AzureCliCredentialBuilder().build()) - .build(); + // If there is no app credentials were passed, use AzCli be used for auth + credential = new AzureCliCredentialBuilder().build(); } if (engineEndpoint == null || engineEndpoint.isEmpty()) { @@ -87,11 +78,10 @@ public static void main(String[] args) { } // Create queued ingest client using the new v2 API - queuedIngestClient = - QueuedIngestClientBuilder.create(engineEndpoint) - .withAuthentication(credential) - .withMaxConcurrency(10) // Set maximum concurrent uploads - .build(); + queuedIngestClient = QueuedIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .withMaxConcurrency(10) // Set maximum concurrent uploads + .build(); System.out.println("Queued Ingest Client created successfully"); @@ -104,8 +94,7 @@ public static void main(String[] args) { allFutures.add(ingestMultipleSources()); // Wait for all operations to complete - CompletableFuture allOf = - CompletableFuture.allOf(allFutures.toArray(new CompletableFuture[0])); + CompletableFuture allOf = CompletableFuture.allOf(allFutures.toArray(new CompletableFuture[0])); System.out.println("\nWaiting for all ingestion operations to complete..."); allOf.get(5, TimeUnit.MINUTES); @@ -132,102 +121,87 @@ static List> ingestFromStream() throws Exception { List> futures = new ArrayList<>(); // Example 1: Ingest from in-memory CSV string - String csvData = - "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = - new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - - StreamSource csvStreamSource = - new StreamSource( - csvInputStream, - Format.csv, - CompressionType.NONE, - UUID.randomUUID(), - "csv-queued-stream", - false); - - IngestRequestProperties csvProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = new StreamSource( + csvInputStream, + Format.csv, + CompressionType.NONE, + UUID.randomUUID(), + false); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Queueing CSV data from string..."); - CompletableFuture csvFuture = - queuedIngestClient - .ingestAsync(csvStreamSource, csvProperties) - .thenCompose( - response -> { - System.out.println( - "CSV ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation(response, "CSV Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); + CompletableFuture csvFuture = queuedIngestClient + .ingestAsync(database, table, csvStreamSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "CSV ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); futures.add(csvFuture); // Example 2: Ingest from compressed CSV file String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - InputStream compressedCsvStream = - new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.csv.gz")); - - StreamSource compressedStreamSource = - new StreamSource( - compressedCsvStream, - Format.csv, - CompressionType.GZIP, - UUID.randomUUID(), - "compressed-csv-queued-stream", - false); + InputStream compressedCsvStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.csv.gz")); + + StreamSource compressedStreamSource = new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + false); System.out.println("Queueing compressed CSV file..."); - CompletableFuture compressedFuture = - queuedIngestClient - .ingestAsync(compressedStreamSource, csvProperties) - .thenCompose( - response -> { - System.out.println( - "Compressed CSV ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation( - response, "Compressed CSV Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); + CompletableFuture compressedFuture = queuedIngestClient + .ingestAsync(database, table, compressedStreamSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "Compressed CSV ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation( + response, "Compressed CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); futures.add(compressedFuture); // Example 3: Ingest JSON with mapping - InputStream jsonStream = - new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); - - StreamSource jsonStreamSource = - new StreamSource( - jsonStream, - Format.json, - CompressionType.NONE, - UUID.randomUUID(), - "json-queued-stream", - false); - - IngestRequestProperties jsonProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + InputStream jsonStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); + + StreamSource jsonStreamSource = new StreamSource( + jsonStream, + Format.json, + CompressionType.NONE, + UUID.randomUUID(), + false); + IngestionMapping mapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mapping) + .withEnableTracking(true) + .build(); System.out.println("Queueing JSON file with mapping..."); - CompletableFuture jsonFuture = - queuedIngestClient - .ingestAsync(jsonStreamSource, jsonProperties) - .thenCompose( - response -> { - System.out.println( - "JSON ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation(response, "JSON Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); + CompletableFuture jsonFuture = queuedIngestClient + .ingestAsync(database, table, jsonStreamSource, jsonProperties) + .thenCompose( + response -> { + System.out.println( + "JSON ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "JSON Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); futures.add(jsonFuture); return futures; @@ -245,61 +219,53 @@ static List> ingestFromFile() { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Example 1: Ingest CSV file - FileSource csvFileSource = - new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE, - "dataset.csv"); - - IngestRequestProperties csvProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + FileSource csvFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Queueing CSV file..."); - CompletableFuture csvFuture = - queuedIngestClient - .ingestAsync(csvFileSource, csvProperties) - .thenCompose( - response -> { - System.out.println( - "CSV file ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation(response, "CSV File"); - }); + CompletableFuture csvFuture = queuedIngestClient + .ingestAsync(database, table, csvFileSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "CSV file ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "CSV File"); + }); futures.add(csvFuture); // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = - new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP, - "dataset.jsonz"); - - IngestRequestProperties jsonProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + FileSource jsonFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP); + IngestionMapping mapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mapping) + .withEnableTracking(true) + .build(); System.out.println("Queueing compressed JSON file with mapping..."); - CompletableFuture jsonFuture = - queuedIngestClient - .ingestAsync(jsonFileSource, jsonProperties) - .thenCompose( - response -> { - System.out.println( - "Compressed JSON file ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation( - response, "Compressed JSON File"); - }); + CompletableFuture jsonFuture = queuedIngestClient + .ingestAsync(database, table, jsonFileSource, jsonProperties) + .thenCompose( + response -> { + System.out.println( + "Compressed JSON file ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation( + response, "Compressed JSON File"); + }); futures.add(jsonFuture); return futures; @@ -309,38 +275,33 @@ static List> ingestFromFile() { * Demonstrates batch ingestion from multiple sources in a single operation. This is more * efficient than ingesting sources one by one when you have multiple files. */ - static CompletableFuture ingestMultipleSources() { + static @NotNull CompletableFuture ingestMultipleSources() { System.out.println("\n=== Queued Ingestion from Multiple Sources (Batch) ==="); String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Create multiple file sources - FileSource source1 = - new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE, - "dataset.csv"); - - FileSource source2 = - new FileSource( - Paths.get(resourcesDirectory + "dataset.csv.gz"), - Format.csv, - UUID.randomUUID(), - CompressionType.GZIP, - "dataset.csv.gz"); + FileSource source1 = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE); + + FileSource source2 = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv.gz"), + Format.csv, + UUID.randomUUID(), + CompressionType.GZIP); List sources = Arrays.asList(source1, source2); - IngestRequestProperties properties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + IngestRequestProperties properties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Queueing multiple sources in batch..."); return queuedIngestClient - .ingestAsync(sources, properties) + .ingestAsync(database, table, sources, properties) .thenCompose( response -> { System.out.println( @@ -358,13 +319,12 @@ static CompletableFuture ingestMultipleSources() { */ private static CompletableFuture trackIngestionOperation( ExtendedIngestResponse response, String operationName) { - IngestionOperation operation = - new IngestionOperation( - Objects.requireNonNull( - response.getIngestResponse().getIngestionOperationId()), - database, - table, - response.getIngestionType()); + IngestionOperation operation = new IngestionOperation( + Objects.requireNonNull( + response.getIngestResponse().getIngestionOperationId()), + database, + table, + response.getIngestionType()); System.out.println("\n--- Tracking " + operationName + " ---"); @@ -409,7 +369,9 @@ private static CompletableFuture trackIngestionOperation( }); } - /** Prints detailed status information from a StatusResponse */ + /** + * Prints detailed status information from a StatusResponse + */ private static void printStatusResponse(StatusResponse statusResponse) { if (statusResponse == null) { System.out.println(" Status: null"); diff --git a/samples/src/main/java/ingestv2/StreamingIngestV2.java b/samples/src/main/java/ingestv2/StreamingIngestV2.java index 351ab89aa..83269d715 100644 --- a/samples/src/main/java/ingestv2/StreamingIngestV2.java +++ b/samples/src/main/java/ingestv2/StreamingIngestV2.java @@ -3,15 +3,15 @@ package ingestv2; +import com.azure.core.credential.TokenCredential; import com.azure.identity.AzureCliCredentialBuilder; -import com.azure.identity.ChainedTokenCredential; -import com.azure.identity.ChainedTokenCredentialBuilder; import com.azure.identity.ClientSecretCredentialBuilder; import com.microsoft.azure.kusto.data.StringUtils; import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder; import com.microsoft.azure.kusto.ingest.v2.client.StreamingIngestClient; import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; import com.microsoft.azure.kusto.ingest.v2.models.Format; import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; @@ -33,49 +33,40 @@ public class StreamingIngestV2 { private static String database; private static String table; - private static String mapping; + private static String mappingName; private static StreamingIngestClient streamingIngestClient; public static void main(String[] args) { try { // Get configuration from system properties - String engineEndpoint = - System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" String appId = System.getProperty("app-id"); String appKey = System.getProperty("appKey"); String tenant = System.getProperty("tenant"); database = System.getProperty("dbName"); table = System.getProperty("tableName"); - mapping = System.getProperty("dataMappingName"); + mappingName = System.getProperty("dataMappingName"); - ChainedTokenCredential credential; + TokenCredential credential; // Create Azure AD credential if (StringUtils.isNotBlank(appId) && StringUtils.isNotBlank(appKey) && StringUtils.isNotBlank(tenant)) { - credential = - new ChainedTokenCredentialBuilder() - .addFirst( - new ClientSecretCredentialBuilder() - .clientId(appId) - .clientSecret(appKey) - .tenantId(tenant) - .build()) - .build(); + credential = new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build(); } else { - credential = - new ChainedTokenCredentialBuilder() - .addFirst(new AzureCliCredentialBuilder().build()) - .build(); + credential = new AzureCliCredentialBuilder().build(); } // Create streaming ingest client using the new v2 API - streamingIngestClient = - StreamingIngestClientBuilder.create(engineEndpoint) - .withAuthentication(credential) - .build(); + streamingIngestClient = StreamingIngestClientBuilder.create(engineEndpoint) + .withAuthentication(credential) + .build(); System.out.println("Streaming Ingest Client created successfully"); @@ -98,48 +89,38 @@ static void ingestFromStream() throws Exception { System.out.println("\n=== Ingesting from Streams ==="); // Example 1: Ingest from in-memory CSV string - String csvData = - "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = - new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - - StreamSource csvStreamSource = - new StreamSource( - csvInputStream, - Format.csv, - CompressionType.NONE, - UUID.randomUUID(), - "csv-test-src", - false); - - IngestRequestProperties csvProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource csvStreamSource = new StreamSource( + csvInputStream, + Format.csv, + CompressionType.NONE, + UUID.randomUUID(), + false); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Ingesting CSV data from string..."); - ExtendedIngestResponse ingestResponse = - streamingIngestClient.ingestAsync(csvStreamSource, csvProperties).get(); + ExtendedIngestResponse ingestResponse = streamingIngestClient.ingestAsync(database, table, csvStreamSource, csvProperties).get(); System.out.println( "CSV ingestion completed. Operation ID: " + ingestResponse.getIngestResponse().getIngestionOperationId()); // Example 2: Ingest from compressed CSV file String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - FileInputStream compressedCsvStream = - new FileInputStream(resourcesDirectory + "dataset.csv.gz"); - - StreamSource compressedStreamSource = - new StreamSource( - compressedCsvStream, - Format.csv, - CompressionType.GZIP, - UUID.randomUUID(), - "compressed-csv-stream", - false); + FileInputStream compressedCsvStream = new FileInputStream(resourcesDirectory + "dataset.csv.gz"); + + StreamSource compressedStreamSource = new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + false); System.out.println("Ingesting compressed CSV file..."); - ExtendedIngestResponse compressedResponse = - streamingIngestClient.ingestAsync(compressedStreamSource, csvProperties).get(); + ExtendedIngestResponse compressedResponse = streamingIngestClient.ingestAsync(database, table, compressedStreamSource, csvProperties).get(); System.out.println( "Compressed CSV ingestion completed. Operation ID: " + compressedResponse.getIngestResponse().getIngestionOperationId()); @@ -148,24 +129,20 @@ static void ingestFromStream() throws Exception { // Example 3: Ingest JSON with mapping FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); - StreamSource jsonStreamSource = - new StreamSource( - jsonStream, - Format.json, - CompressionType.NONE, - UUID.randomUUID(), - "json-data-stream", - false); - - IngestRequestProperties jsonProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + StreamSource jsonStreamSource = new StreamSource( + jsonStream, + Format.json, + CompressionType.NONE, + UUID.randomUUID(), + false); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON)) + .withEnableTracking(true) + .build(); System.out.println("Ingesting JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = - streamingIngestClient.ingestAsync(jsonStreamSource, jsonProperties).get(); + ExtendedIngestResponse jsonResponse = streamingIngestClient.ingestAsync(database, table, jsonStreamSource, jsonProperties).get(); System.out.println( "JSON ingestion completed. Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); @@ -182,44 +159,36 @@ static void ingestFromFile() throws Exception { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; // Example 1: Ingest CSV file - FileSource csvFileSource = - new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE, - "jcsv-file-source"); - - IngestRequestProperties csvProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withEnableTracking(true) - .build(); + FileSource csvFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.csv"), + Format.csv, + UUID.randomUUID(), + CompressionType.NONE); + + IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Ingesting CSV file..."); - ExtendedIngestResponse csvResponse = - streamingIngestClient.ingestAsync(csvFileSource, csvProperties).get(); + ExtendedIngestResponse csvResponse = streamingIngestClient.ingestAsync(database, table, csvFileSource, csvProperties).get(); System.out.println( "CSV file ingestion completed. Operation ID: " + csvResponse.getIngestResponse().getIngestionOperationId()); // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = - new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP, - "sjson-compressed-file"); - - IngestRequestProperties jsonProperties = - IngestRequestPropertiesBuilder.create(database, table) - .withIngestionMappingReference(mapping) - .withEnableTracking(true) - .build(); + FileSource jsonFileSource = new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON)) + .withEnableTracking(true) + .build(); System.out.println("Ingesting compressed JSON file with mapping..."); - ExtendedIngestResponse jsonResponse = - streamingIngestClient.ingestAsync(jsonFileSource, jsonProperties).get(); + ExtendedIngestResponse jsonResponse = streamingIngestClient.ingestAsync(database, table, jsonFileSource, jsonProperties).get(); System.out.println( "Compressed JSON file ingestion completed. Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); From 8a41876a310c7de0fa23e875c79fa00d0ec51eca Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:00:24 +0530 Subject: [PATCH 45/50] added code for wellknown kusto endpoints (#456) * added code for wellknown kusto endpoints * addressed review comments --- ingest-v2/pom.xml | 29 ++ .../kusto/ingest/v2/KustoBaseApiClient.kt | 10 + .../v2/auth/endpoints/FastSuffixMatcher.kt | 116 ++++++++ .../auth/endpoints/KustoTrustedEndpoints.kt | 216 ++++++++++++++ .../endpoints/WellKnownKustoEndpointsData.kt | 59 ++++ ...oClientInvalidConnectionStringException.kt | 17 ++ .../v2/TrustedEndpointValidationTest.kt | 272 ++++++++++++++++++ 7 files changed, 719 insertions(+) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/exceptions/KustoClientInvalidConnectionStringException.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index f06c611e5..767e62081 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -20,6 +20,7 @@ 1.4.14 11 5.10.0 + 3.3.1 7.15.0 2.0.9 2.46.1 @@ -183,6 +184,34 @@ + + + + org.apache.maven.plugins + maven-resources-plugin + ${maven.resources.plugin.version} + + + copy-well-known-endpoints + process-resources + + copy-resources + + + ${project.build.outputDirectory} + true + + + ${project.basedir}/../data/src/main/resources + + WellKnownKustoEndpoints.json + + + + + + + kotlin-maven-plugin org.jetbrains.kotlin diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 5e1830bdf..a96f3c94b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -5,6 +5,7 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential import com.azure.core.credential.TokenRequestContext import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi +import com.microsoft.azure.kusto.ingest.v2.auth.endpoints.KustoTrustedEndpoints import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer import io.ktor.client.HttpClientConfig @@ -37,6 +38,15 @@ open class KustoBaseApiClient( ) { private val logger = LoggerFactory.getLogger(KustoBaseApiClient::class.java) + init { + // Validate endpoint is trusted unless security checks are skipped + // Note: dmUrl might be empty/null in some test scenarios (e.g., mocked clients) + // The null check is required for Java interop - Java callers can pass null despite Kotlin's non-null type + if (!skipSecurityChecks && dmUrl != null && dmUrl.isNotBlank()) { + KustoTrustedEndpoints.validateTrustedEndpoint(dmUrl) + } + } + protected val setupConfig: (HttpClientConfig<*>) -> Unit = { config -> getClientConfig(config) } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt new file mode 100644 index 000000000..efeb67864 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.auth.endpoints + +/** + * Represents a matching rule for endpoint validation. + * @param suffix The suffix or hostname to match + * @param exact If true, the candidate must exactly match the suffix. If false, candidate must end with the suffix. + */ +data class MatchRule( + val suffix: String, + val exact: Boolean, +) { + val suffixLength: Int + get() = suffix.length +} + +/** + * Result of a match operation. + * @param isMatch Whether the candidate matched + * @param matchedRule The rule that matched, or null if no match + */ +data class MatchResult( + val isMatch: Boolean, + val matchedRule: MatchRule?, +) + +/** + * A fast suffix matcher that efficiently matches hostnames against a set of rules. + * Uses a map indexed by suffix tail for O(1) lookup. + */ +class FastSuffixMatcher private constructor( + private val suffixLength: Int, + private val rules: Map>, +) { + companion object { + /** + * Creates a new matcher with the provided matching rules. + * @param rules One or more matching rules to apply when match is called + * @return FastSuffixMatcher + */ + fun create(rules: List): FastSuffixMatcher { + require(rules.isNotEmpty()) { "Rules cannot be empty" } + + val minRuleLength = rules.minOfOrNull { it.suffixLength } ?: 0 + require(minRuleLength > 0) { + "Cannot have a match rule whose length is zero" + } + + val processedRules = mutableMapOf>() + for (rule in rules) { + val suffix = rule.suffix.takeLast(minRuleLength).lowercase() + processedRules.getOrPut(suffix) { mutableListOf() }.add(rule.copy()) + } + + return FastSuffixMatcher(minRuleLength, processedRules) + } + + /** + * Creates a new matcher with the provided matching rules, extending an + * existing matcher. + * @param existing An existing matcher whose rules are to be baseline + * @param rules One or more matching rules to apply when match is called + * @return FastSuffixMatcher + */ + fun create( + existing: FastSuffixMatcher?, + rules: List, + ): FastSuffixMatcher { + if (existing == null || existing.rules.isEmpty()) { + return create(rules) + } + + if (rules.isEmpty()) { + return existing + } + + val combinedRules = + rules + existing.rules.values.flatten() + return create(combinedRules) + } + } + + /** + * Checks if a candidate string matches any of the rules. + * @param candidate A string to match to the list of match rules + * @return true if at least one of the rules matched + */ + fun isMatch(candidate: String): Boolean = match(candidate).isMatch + + /** + * Matches an input string to the list of match rules. + * @param candidate A string to match + * @return MatchResult with match status and the matched rule if any + */ + fun match(candidate: String): MatchResult { + if (candidate.length < suffixLength) { + return MatchResult(false, null) + } + + val tail = candidate.takeLast(suffixLength).lowercase() + val matchRules = rules[tail] + + if (matchRules != null) { + for (rule in matchRules) { + if (candidate.endsWith(rule.suffix, ignoreCase = true)) { + if (candidate.length == rule.suffix.length || !rule.exact) { + return MatchResult(true, rule) + } + } + } + } + + return MatchResult(false, null) + } +} \ No newline at end of file diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt new file mode 100644 index 000000000..33efe6911 --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.auth.endpoints + +import com.microsoft.azure.kusto.ingest.v2.exceptions.KustoClientInvalidConnectionStringException +import org.slf4j.LoggerFactory +import java.net.URI +import java.net.URISyntaxException + +/** + * A helper class to determine which DNS names are "well-known/trusted" + * Kusto endpoints. Untrusted endpoints might require additional configuration + * before they can be used, for security reasons. + */ +object KustoTrustedEndpoints { + private val logger = LoggerFactory.getLogger(KustoTrustedEndpoints::class.java) + + /** + * Global flag to enable/disable endpoint validation. + * When false, untrusted endpoints will only log a warning instead of + * throwing an exception. + */ + @JvmField + @Volatile + var enableWellKnownKustoEndpointsValidation: Boolean = true + + private val matchers: MutableMap = mutableMapOf() + + @Volatile + private var additionalMatcher: FastSuffixMatcher? = null + + @Volatile + private var overrideMatcher: ((String) -> Boolean)? = null + + // Default login endpoint for public cloud + private const val DEFAULT_PUBLIC_LOGIN_ENDPOINT = + "https://login.microsoftonline.com" + + init { + loadEndpointsFromJson() + } + + private fun loadEndpointsFromJson() { + try { + val endpointsData = WellKnownKustoEndpointsData.getInstance() + + endpointsData.allowedEndpointsByLogin.forEach { (loginEndpoint, allowedEndpoints) -> + val rules = mutableListOf() + + // Add suffix rules (exact = false) + allowedEndpoints.allowedKustoSuffixes.forEach { suffix -> + rules.add(MatchRule(suffix, exact = false)) + } + + // Add hostname rules (exact = true) + allowedEndpoints.allowedKustoHostnames.forEach { hostname -> + rules.add(MatchRule(hostname, exact = true)) + } + + if (rules.isNotEmpty()) { + matchers[loginEndpoint.lowercase()] = FastSuffixMatcher.create(rules) + } + } + + logger.debug( + "Loaded {} login endpoint configurations from WellKnownKustoEndpoints.json", + matchers.size, + ) + } catch (ex: Exception) { + logger.error("Failed to load WellKnownKustoEndpoints.json", ex) + throw ex + } + } + + /** + * Sets an override policy for endpoint validation. + * @param matcher Rules that determine if a hostname is a valid/trusted + * Kusto endpoint (replaces existing rules) + */ + fun setOverridePolicy(matcher: ((String) -> Boolean)?) { + overrideMatcher = matcher + } + + /** + * Adds additional trusted hosts to the matcher. + * @param rules A set of rules + * @param replace If true, nullifies the last added rules + */ + fun addTrustedHosts( + rules: List?, + replace: Boolean, + ) { + if (rules.isNullOrEmpty()) { + if (replace) { + additionalMatcher = null + } + return + } + + additionalMatcher = + FastSuffixMatcher.create(if (replace) null else additionalMatcher, rules) + } + + /** + * Validates that the endpoint is trusted. + * @param uri Kusto endpoint URI string + * @param loginEndpoint The login endpoint to check against (optional, defaults to public cloud) + * @throws KustoClientInvalidConnectionStringException if endpoint is not trusted + */ + fun validateTrustedEndpoint( + uri: String, + loginEndpoint: String = DEFAULT_PUBLIC_LOGIN_ENDPOINT, + ) { + try { + validateTrustedEndpoint(URI(uri), loginEndpoint) + } catch (ex: URISyntaxException) { + throw KustoClientInvalidConnectionStringException(uri, ex.message ?: "Invalid URI", ex) + } + } + + /** + * Validates that the endpoint is trusted. + * @param uri Kusto endpoint URI + * @param loginEndpoint The login endpoint to check against + * @throws KustoClientInvalidConnectionStringException if endpoint is not trusted + */ + fun validateTrustedEndpoint( + uri: URI, + loginEndpoint: String, + ) { + val host = uri.host ?: uri.toString() + validateHostnameIsTrusted(host, loginEndpoint) + } + + /** + * Validates that a hostname is trusted. + * @param hostname The hostname to validate + * @param loginEndpoint The login endpoint to check against + * @throws KustoClientInvalidConnectionStringException if hostname is not trusted + */ + private fun validateHostnameIsTrusted( + hostname: String, + loginEndpoint: String, + ) { + // Loopback addresses are unconditionally allowed (we trust ourselves) + if (isLocalAddress(hostname)) { + return + } + + // Check override matcher first + val override = overrideMatcher + if (override != null) { + if (override(hostname)) { + return + } + } else { + // Check against login-specific matchers + val matcher = matchers[loginEndpoint.lowercase()] + if (matcher != null && matcher.isMatch(hostname)) { + return + } + } + + // Check additional matchers + val additional = additionalMatcher + if (additional != null && additional.isMatch(hostname)) { + return + } + + // Not trusted + if (!enableWellKnownKustoEndpointsValidation) { + logger.warn( + "Can't communicate with '{}' as this hostname is currently not trusted; " + + "please see https://aka.ms/kustotrustedendpoints.", + hostname, + ) + return + } + + throw KustoClientInvalidConnectionStringException( + "\$\$ALERT[ValidateHostnameIsTrusted]: Can't communicate with '$hostname' " + + "as this hostname is currently not trusted; " + + "please see https://aka.ms/kustotrustedendpoints", + ) + } + + /** + * Checks if the hostname is a local/loopback address. + */ + private fun isLocalAddress(hostname: String): Boolean { + val lowerHost = hostname.lowercase() + return lowerHost == "localhost" || + lowerHost == "127.0.0.1" || + lowerHost == "::1" || + lowerHost == "[::1]" || + lowerHost.startsWith("localhost:") + } + + /** + * Checks if a hostname is trusted without throwing an exception. + * @param hostname The hostname to check + * @param loginEndpoint The login endpoint to check against + * @return true if the hostname is trusted + */ + fun isTrusted( + hostname: String, + loginEndpoint: String = DEFAULT_PUBLIC_LOGIN_ENDPOINT, + ): Boolean { + return try { + validateHostnameIsTrusted(hostname, loginEndpoint) + true + } catch (_: KustoClientInvalidConnectionStringException) { + false + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt new file mode 100644 index 000000000..978c7608a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.auth.endpoints + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable as KSerializable +import kotlinx.serialization.json.Json +import org.slf4j.LoggerFactory + +/** + * Data class representing the structure of WellKnownKustoEndpoints.json + */ +@KSerializable +data class AllowedEndpoints( + @SerialName("AllowedKustoSuffixes") + val allowedKustoSuffixes: List = emptyList(), + @SerialName("AllowedKustoHostnames") + val allowedKustoHostnames: List = emptyList(), +) + +@KSerializable +data class WellKnownKustoEndpointsData( + @SerialName("_Comments") + val comments: List = emptyList(), + @SerialName("AllowedEndpointsByLogin") + val allowedEndpointsByLogin: Map = emptyMap(), +) { + companion object { + private val logger = LoggerFactory.getLogger(WellKnownKustoEndpointsData::class.java) + + @Volatile + private var instance: WellKnownKustoEndpointsData? = null + + private val json = Json { + ignoreUnknownKeys = true + isLenient = true + } + + fun getInstance(): WellKnownKustoEndpointsData { + return instance ?: synchronized(this) { + instance ?: readInstance().also { instance = it } + } + } + + private fun readInstance(): WellKnownKustoEndpointsData { + return try { + val resourceStream = WellKnownKustoEndpointsData::class.java + .getResourceAsStream("/WellKnownKustoEndpoints.json") + ?: throw RuntimeException("WellKnownKustoEndpoints.json not found in classpath") + + val content = resourceStream.bufferedReader().use { it.readText() } + json.decodeFromString(content) + } catch (ex: Exception) { + logger.error("Failed to read WellKnownKustoEndpoints.json", ex) + throw RuntimeException("Failed to read WellKnownKustoEndpoints.json", ex) + } + } + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/exceptions/KustoClientInvalidConnectionStringException.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/exceptions/KustoClientInvalidConnectionStringException.kt new file mode 100644 index 000000000..7bda6f3fe --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/exceptions/KustoClientInvalidConnectionStringException.kt @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.exceptions + +/** + * Exception thrown when a connection string is invalid or the endpoint is not + * trusted. + */ +class KustoClientInvalidConnectionStringException : RuntimeException { + constructor(message: String) : super(message) + + constructor( + uri: String, + message: String, + cause: Throwable, + ) : super("Invalid connection string for URI '$uri': $message", cause) +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt new file mode 100644 index 000000000..e2b1c489b --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2 + +import com.azure.core.credential.AccessToken +import com.azure.core.credential.TokenCredential +import com.azure.core.credential.TokenRequestContext +import com.microsoft.azure.kusto.ingest.v2.auth.endpoints.KustoTrustedEndpoints +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.exceptions.KustoClientInvalidConnectionStringException +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertDoesNotThrow +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.api.parallel.Execution +import org.junit.jupiter.api.parallel.ExecutionMode +import reactor.core.publisher.Mono +import java.time.OffsetDateTime +import kotlin.test.assertTrue + +/** + * Tests for endpoint validation functionality in ingest-v2. + * + * These tests verify that: + * 1. Adhoc/untrusted Kusto endpoints are blocked by default + * 2. The skipSecurityChecks() method allows untrusted endpoints + * 3. Trusted Kusto endpoints are allowed by default + * + * The validation is now implemented natively in ingest-v2 using: + * - WellKnownKustoEndpoints.json (copied from data module during build) + * - KustoTrustedEndpoints object for validation logic + * - KustoClientInvalidConnectionStringException for untrusted endpoints + * + * Note: This test class uses SAME_THREAD execution mode to prevent race conditions + * when modifying the global enableWellKnownKustoEndpointsValidation flag. + */ +@Execution(ExecutionMode.SAME_THREAD) +class TrustedEndpointValidationTest { + + // Mock token credential for testing + private val mockTokenCredential = TokenCredential { _: TokenRequestContext -> + Mono.just(AccessToken("mock-token", OffsetDateTime.now().plusHours(1))) + } + + // Example of an adhoc/untrusted endpoint + private val untrustedEndpoint = "https://my-random-adhoc-cluster.example.com" + + // Example of a trusted Kusto endpoint (public cloud) + private val trustedEndpoint = "https://mycluster.kusto.windows.net" + + // Store original validation state to restore after tests + private var originalValidationState: Boolean = true + + @BeforeEach + fun setUp() { + // Save original state and ensure validation is enabled + originalValidationState = + KustoTrustedEndpoints.enableWellKnownKustoEndpointsValidation + KustoTrustedEndpoints.enableWellKnownKustoEndpointsValidation = true + } + + @AfterEach + fun tearDown() { + // Restore original validation state + KustoTrustedEndpoints.enableWellKnownKustoEndpointsValidation = + originalValidationState + } + + // ============================================================================ + // UNTRUSTED ENDPOINT TESTS - Should throw exception + // ============================================================================ + + @Test + @DisplayName("StreamingIngestClient: Untrusted endpoint throws exception without skipSecurityChecks") + fun `streaming client - untrusted endpoint throws without skip security checks`() { + val exception = + assertThrows { + StreamingIngestClientBuilder.create(untrustedEndpoint) + .withAuthentication(mockTokenCredential) + // Note: NOT calling skipSecurityChecks() + .build() + } + + assertTrue( + exception.message?.contains("not trusted") == true || + exception.message?.contains("kustotrustedendpoints") == true, + "Exception should indicate endpoint is not trusted. Actual: ${exception.message}", + ) + } + + @Test + @DisplayName("QueuedIngestClient: Untrusted endpoint throws exception without skipSecurityChecks") + fun `queued client - untrusted endpoint throws without skip security checks`() { + val exception = + assertThrows { + QueuedIngestClientBuilder.create(untrustedEndpoint) + .withAuthentication(mockTokenCredential) + // Note: NOT calling skipSecurityChecks() + .build() + } + + assertTrue( + exception.message?.contains("not trusted") == true || + exception.message?.contains("kustotrustedendpoints") == true, + "Exception should indicate endpoint is not trusted. Actual: ${exception.message}", + ) + } + + // ============================================================================ + // SKIP SECURITY CHECKS TESTS - Should work with the flag + // ============================================================================ + + @Test + @DisplayName("StreamingIngestClient: Untrusted endpoint works with skipSecurityChecks") + fun `streaming client - untrusted endpoint works with skip security checks`() { + assertDoesNotThrow { + StreamingIngestClientBuilder.create(untrustedEndpoint) + .withAuthentication(mockTokenCredential) + .skipSecurityChecks() + .build() + } + } + + @Test + @DisplayName("QueuedIngestClient: Untrusted endpoint works with skipSecurityChecks") + fun `queued client - untrusted endpoint works with skip security checks`() { + assertDoesNotThrow { + QueuedIngestClientBuilder.create(untrustedEndpoint) + .withAuthentication(mockTokenCredential) + .skipSecurityChecks() + .build() + } + } + + // ============================================================================ + // TRUSTED ENDPOINT TESTS - Should work without skipSecurityChecks + // ============================================================================ + + @Test + @DisplayName("StreamingIngestClient: Trusted Kusto endpoint works without skipSecurityChecks") + fun `streaming client - trusted endpoint works without skip security checks`() { + assertDoesNotThrow { + StreamingIngestClientBuilder.create(trustedEndpoint) + .withAuthentication(mockTokenCredential) + .build() + } + } + + @Test + @DisplayName("QueuedIngestClient: Trusted Kusto endpoint works without skipSecurityChecks") + fun `queued client - trusted endpoint works without skip security checks`() { + assertDoesNotThrow { + QueuedIngestClientBuilder.create(trustedEndpoint) + .withAuthentication(mockTokenCredential) + .build() + } + } + + // ============================================================================ + // GLOBAL FLAG TESTS + // ============================================================================ + + @Test + @DisplayName("Global validation flag can disable endpoint checks") + fun `global validation flag disables endpoint checks`() { + // Disable validation globally + KustoTrustedEndpoints.enableWellKnownKustoEndpointsValidation = false + + try { + // Now untrusted endpoints should work even without skipSecurityChecks + assertDoesNotThrow { + StreamingIngestClientBuilder.create(untrustedEndpoint) + .withAuthentication(mockTokenCredential) + .build() + } + } finally { + // Re-enable for other tests + KustoTrustedEndpoints.enableWellKnownKustoEndpointsValidation = true + } + } + + // ============================================================================ + // CLOUD-SPECIFIC ENDPOINT TESTS + // ============================================================================ + + @Test + @DisplayName("Various cloud-specific endpoints are trusted") + fun `cloud specific endpoints are trusted`() { + val trustedEndpoints = + listOf( + // Public cloud + "https://mycluster.kusto.windows.net", + "https://mycluster.kustodev.windows.net", + "https://mycluster.kustomfa.windows.net", + // Fabric + "https://mycluster.kusto.fabric.microsoft.com", + // Synapse + "https://mycluster.kusto.azuresynapse.net", + ) + + trustedEndpoints.forEach { endpoint -> + assertDoesNotThrow("Endpoint $endpoint should be trusted") { + StreamingIngestClientBuilder.create(endpoint) + .withAuthentication(mockTokenCredential) + .build() + } + } + } + + @Test + @DisplayName("Localhost endpoints are always trusted") + fun `localhost endpoints are trusted`() { + val localhostEndpoints = + listOf( + "https://localhost:8080", + "https://127.0.0.1:8080", + "https://localhost", + ) + + localhostEndpoints.forEach { endpoint -> + assertDoesNotThrow("Localhost endpoint $endpoint should be trusted") { + StreamingIngestClientBuilder.create(endpoint) + .withAuthentication(mockTokenCredential) + .build() + } + } + } + + // ============================================================================ + // DIRECT API TESTS - Test KustoTrustedEndpoints directly + // ============================================================================ + + @Test + @DisplayName("KustoTrustedEndpoints.isTrusted returns correct values") + fun `isTrusted returns correct values`() { + assertTrue( + KustoTrustedEndpoints.isTrusted("mycluster.kusto.windows.net"), + "Public cloud endpoint should be trusted", + ) + assertTrue( + KustoTrustedEndpoints.isTrusted("mycluster.kusto.fabric.microsoft.com"), + "Fabric endpoint should be trusted", + ) + assertTrue( + KustoTrustedEndpoints.isTrusted("localhost"), + "Localhost should be trusted", + ) + assertTrue( + !KustoTrustedEndpoints.isTrusted("random.example.com"), + "Random endpoint should not be trusted", + ) + } + + @Test + @DisplayName("KustoTrustedEndpoints.validateTrustedEndpoint throws for untrusted") + fun `validateTrustedEndpoint throws for untrusted endpoints`() { + assertThrows { + KustoTrustedEndpoints.validateTrustedEndpoint("https://evil.example.com") + } + } + + @Test + @DisplayName("KustoTrustedEndpoints.validateTrustedEndpoint passes for trusted") + fun `validateTrustedEndpoint passes for trusted endpoints`() { + assertDoesNotThrow { + KustoTrustedEndpoints.validateTrustedEndpoint("https://mycluster.kusto.windows.net") + } + } +} \ No newline at end of file From 66de6e78d50ecd0e278780fb13a1f7f756434c06 Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Tue, 20 Jan 2026 16:43:42 +0530 Subject: [PATCH 46/50] Users/tanmayapanda/address review comments (#458) * fixed review comments --------- Co-authored-by: ag-ramachandran --- .../kusto/ingest/v2/client/IngestClient.kt | 25 +- .../ingest/v2/client/QueuedIngestClient.kt | 96 +--- .../ingest/v2/common/ConfigurationCache.kt | 53 ++ .../kusto/ingest/v2/source/FileSource.kt | 4 +- .../kusto/ingest/v2/source/StreamSource.kt | 6 +- .../v2/uploader/ContainerUploaderBase.kt | 180 +++--- .../ingest/v2/uploader/ICustomUploader.kt | 99 ++++ .../kusto/ingest/v2/QueuedIngestClientTest.kt | 167 +++--- .../azure/kusto/quickstart/SampleApp.java | 92 ++- .../ingestv2/AzureBlobRestCustomUploader.java | 541 ++++++++++++++++++ .../main/java/ingestv2/QueuedIngestV2.java | 431 +++++++++----- .../main/java/ingestv2/StreamingIngestV2.java | 24 +- 12 files changed, 1339 insertions(+), 379 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt create mode 100644 samples/src/main/java/ingestv2/AzureBlobRestCustomUploader.java diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt index 4799f850d..5f0535dee 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/IngestClient.kt @@ -80,18 +80,33 @@ interface IngestClient : Closeable { interface MultiIngestClient : IngestClient { /** - * Ingest data from multiple sources. + * Ingest data from multiple blob sources. * - * @param sources The sources to ingest. + * **Important:** Multi-blob ingestion only supports [BlobSource]. This + * design avoids partial failure scenarios where some local sources might be + * uploaded successfully while others fail, leaving the user in an + * inconsistent state. + * + * **For local files/streams**, you have two options: + * 1. **Single-source ingestion**: Use `ingestAsync(source, properties)` + * with a single [com.microsoft.azure.kusto.ingest.v2.source.LocalSource] + * (FileSource or StreamSource). The client handles upload internally. + * 2. **Multi-source ingestion**: Use + * [com.microsoft.azure.kusto.ingest.v2.uploader.IUploader] to upload + * local sources to blob storage first, then call this method with the + * resulting [BlobSource] objects. + * + * @param sources The blob sources to ingest. All sources must be + * [BlobSource] instances. * @param ingestRequestProperties Ingestion properties containing database, * table, format, and other settings. - * @return An [IngestionOperation] object that can be used to track the - * status of the ingestion. + * @return An [ExtendedIngestResponse] containing the ingestion operation + * details. */ suspend fun ingestAsync( database: String, table: String, - sources: List, + sources: List, ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt index 038ac94dc..e4aaf0908 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/client/QueuedIngestClient.kt @@ -28,9 +28,6 @@ import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader import io.ktor.http.HttpStatusCode import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.async -import kotlinx.coroutines.awaitAll -import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.delay import kotlinx.coroutines.future.future import kotlinx.coroutines.withTimeoutOrNull @@ -74,13 +71,17 @@ internal constructor( private val logger = LoggerFactory.getLogger(QueuedIngestClient::class.java) /** - * Ingests data from multiple sources with the given properties. This is the - * suspend function for Kotlin callers. + * Ingests data from multiple blob sources with the given properties. This + * is the suspend function for Kotlin callers. + * + * Multi-blob ingestion only supports [BlobSource]. The blobs are assumed to + * already exist in blob storage, so no upload is performed - the request is + * sent directly to the Data Management service. */ override suspend fun ingestAsync( database: String, table: String, - sources: List, + sources: List, ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse = ingestAsyncInternal( @@ -108,14 +109,18 @@ internal constructor( ) /** - * Ingests data from multiple sources with the given properties. This is the - * Java-friendly version that returns a CompletableFuture. + * Ingests data from multiple blob sources with the given properties. This + * is the Java-friendly version that returns a CompletableFuture. + * + * Multi-blob ingestion only supports [BlobSource]. The blobs are assumed to + * already exist in blob storage, so no upload is performed - the request is + * sent directly to the Data Management service. */ @JvmName("ingestAsync") fun ingestAsyncJava( database: String, table: String, - sources: List, + sources: List, ingestRequestProperties: IngestRequestProperties?, ): CompletableFuture = CoroutineScope(Dispatchers.IO).future { @@ -197,14 +202,18 @@ internal constructor( ) } - /** Internal implementation of ingestAsync for multiple sources. */ + /** + * Internal implementation of ingestAsync for multiple blob sources. + * + * This method only accepts [BlobSource] - no upload is performed. The blobs + * are assumed to already exist in blob storage. + */ private suspend fun ingestAsyncInternal( database: String, table: String, - sources: List, + sources: List, ingestRequestProperties: IngestRequestProperties?, ): ExtendedIngestResponse { - // Extract database and table from properties // Validate sources list is not empty require(sources.isNotEmpty()) { "sources list cannot be empty" } val maxBlobsPerBatch = getMaxSourcesPerMultiIngest() @@ -228,17 +237,14 @@ internal constructor( differentFormatBlob.joinToString(", "), ) throw IngestClientException( - "All blobs in the request must have the same format. All blobs in the request must have the same format.Received formats: $differentFormatBlob", + message = + "All blobs in the request must have the same format. Received formats: $differentFormatBlob", ) } - // Split sources and upload local sources in parallel - val blobSources = uploadLocalSourcesAsync(sources) - // Check for duplicate blob URLs val duplicates = - blobSources - .groupBy { sanitizeBlobUrl(it.blobPath) } + sources.groupBy { sanitizeBlobUrl(it.blobPath) } .filter { it.value.size > 1 } if (duplicates.isNotEmpty()) { @@ -251,12 +257,14 @@ internal constructor( "{Url: $url, Source Ids: [$sourceIds]}" } throw IngestClientException( + message = "Duplicate blob sources detected in the request: [$duplicateInfo]", ) } + // Create blob objects for the request val blobs = - blobSources.map { + sources.map { Blob( it.blobPath, sourceId = it.sourceId.toString(), @@ -340,6 +348,7 @@ internal constructor( } else -> { throw IngestClientException( + message = "Unsupported ingestion source type: ${source::class.simpleName}", ) } @@ -382,55 +391,6 @@ internal constructor( } } - /** - * Splits sources into BlobSources and LocalSources, uploads LocalSources in - * parallel, and returns a unified list of BlobSources. - * - * @param sources The list of ingestion sources to process - * @return A list of BlobSources including both original BlobSources and - * uploaded LocalSources - * @throws IngestClientException if an unsupported source type is - * encountered - */ - private suspend fun uploadLocalSourcesAsync( - sources: List, - ): List { - // Split sources into BlobSources and LocalSources - val blobSources = mutableListOf() - val localSources = mutableListOf() - - sources.forEach { source -> - when (source) { - is BlobSource -> blobSources.add(source) - is LocalSource -> localSources.add(source) - else -> - throw IngestClientException( - "Unsupported ingestion source type: ${source::class.simpleName}", - ) - } - } - - // Upload LocalSources in parallel and collect the resulting BlobSources - if (localSources.isNotEmpty()) { - logger.info( - "Uploading ${localSources.size} local source(s) to blob storage", - ) - val uploadedBlobs = coroutineScope { - localSources - .map { localSource -> - async { uploader.uploadAsync(localSource) } - } - .awaitAll() - } - blobSources.addAll(uploadedBlobs) - logger.info( - "Successfully uploaded ${uploadedBlobs.size} local source(s)", - ) - } - - return blobSources - } - /** * Sanitizes a blob URL by removing the SAS token and query parameters to * allow proper duplicate detection. diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index b716a7bb1..d9530f546 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -69,6 +69,59 @@ class DefaultConfigurationCache( val clientDetails: ClientDetails, val configurationProvider: (suspend () -> ConfigurationResponse)? = null, ) : ConfigurationCache { + companion object { + /** + * Creates a DefaultConfigurationCache for Java callers. + * + * This factory method provides a convenient way to create a cache from + * Java without dealing with Kotlin named parameters. + * + * @param dmUrl Data management endpoint URL + * @param tokenCredential Authentication credentials + * @param clientDetails Client identification details for tracking + * @return A new DefaultConfigurationCache instance + */ + @JvmStatic + fun create( + dmUrl: String, + tokenCredential: TokenCredential, + clientDetails: ClientDetails, + ): DefaultConfigurationCache = + DefaultConfigurationCache( + dmUrl = dmUrl, + tokenCredential = tokenCredential, + clientDetails = clientDetails, + ) + + /** + * Creates a DefaultConfigurationCache with all options for Java + * callers. + * + * @param dmUrl Data management endpoint URL + * @param tokenCredential Authentication credentials + * @param skipSecurityChecks Whether to skip security validation + * @param clientDetails Client identification details for tracking + * @param refreshInterval Duration after which cached configuration is + * stale + * @return A new DefaultConfigurationCache instance + */ + @JvmStatic + fun create( + dmUrl: String, + tokenCredential: TokenCredential, + skipSecurityChecks: Boolean, + clientDetails: ClientDetails, + refreshInterval: Duration, + ): DefaultConfigurationCache = + DefaultConfigurationCache( + refreshInterval = refreshInterval, + dmUrl = dmUrl, + tokenCredential = tokenCredential, + skipSecurityChecks = skipSecurityChecks, + clientDetails = clientDetails, + ) + } + init { if ( configurationProvider == null && diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt index 359f1e8cd..35743acad 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/FileSource.kt @@ -13,7 +13,9 @@ import java.nio.file.Path import java.util.UUID /** Represents a file-based ingestion source. */ -class FileSource( +class FileSource +@JvmOverloads +constructor( val path: Path, format: Format, sourceId: UUID = UUID.randomUUID(), diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt index 3b3e88a73..63f255d92 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/source/StreamSource.kt @@ -7,10 +7,12 @@ import java.io.InputStream import java.util.UUID /** Represents a stream-based ingestion source. */ -class StreamSource( +class StreamSource +@JvmOverloads +constructor( stream: InputStream, format: Format, - sourceCompression: CompressionType, + sourceCompression: CompressionType = CompressionType.NONE, sourceId: UUID = UUID.randomUUID(), leaveOpen: Boolean = false, ) : LocalSource(format, leaveOpen, sourceCompression, sourceId) { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt index 2058bf7c5..e673ea11b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -26,11 +26,15 @@ import com.microsoft.azure.kusto.ingest.v2.uploader.compression.CompressionExcep import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults +import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.delay +import kotlinx.coroutines.future.future +import kotlinx.coroutines.sync.Semaphore +import kotlinx.coroutines.sync.withPermit import kotlinx.coroutines.withContext import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -41,6 +45,8 @@ import java.io.PipedOutputStream import java.time.Clock import java.time.Duration import java.time.Instant +import java.util.concurrent.CompletableFuture +import java.util.concurrent.atomic.AtomicInteger import java.util.zip.GZIPOutputStream /** Represents an abstract base class for uploaders to storage containers. */ @@ -59,6 +65,12 @@ abstract class ContainerUploaderBase( private val effectiveMaxConcurrency: Int = minOf(maxConcurrency, Runtime.getRuntime().availableProcessors()) + /** + * Atomic counter for round-robin container selection. Increments on each + * upload to distribute load evenly across containers. + */ + private val containerIndexCounter = AtomicInteger(0) + override var ignoreSizeLimit: Boolean = false override fun close() { @@ -251,9 +263,10 @@ abstract class ContainerUploaderBase( ) /** - * Uploads a stream with retry logic and container cycling. Randomly selects - * a starting container and cycles through containers on each retry. For - * example, with 2 containers and 3 retries: 1->2->1 or 2->1->2 + * Uploads a stream with retry logic and container cycling. Uses an + * incrementing counter (mod container count) for round-robin container + * selection, ensuring even load distribution across containers on each + * retry. For example, with 2 containers and 3 retries: 0->1->0 or 1->0->1 */ private suspend fun uploadWithRetries( local: LocalSource, @@ -262,11 +275,12 @@ abstract class ContainerUploaderBase( containers: List, effectiveCompressionType: CompressionType = local.compressionType, ): BlobSource { - // Select random starting container index - var containerIndex = (0 until containers.size).random() + // Select container using incrementing counter for round-robin distribution + var containerIndex = + containerIndexCounter.getAndIncrement() % containers.size logger.debug( - "Starting upload with {} containers, random start index: {}", + "Starting upload with {} containers, round-robin index: {}", containers.size, containerIndex, ) @@ -373,73 +387,80 @@ abstract class ContainerUploaderBase( localSources.size, maxConcurrency, ) - // Process sources in chunks to respect maxConcurrency at file level + // TODO check and validate failure scenarios + // Use semaphore for true streaming parallelism + // This allows up to maxConcurrency concurrent uploads, starting new ones as soon as slots + // are available + val semaphore = Semaphore(maxConcurrency) + + // Launch all uploads concurrently, but semaphore limits actual concurrent execution val results = - localSources.chunked(maxConcurrency).flatMap { chunk -> - chunk.map { source -> + localSources + .map { source -> async { - val startedAt = - Instant.now(Clock.systemUTC()) - try { - val blobSource = uploadAsync(source) - val completedAt = - Instant.now(Clock.systemUTC()) - UploadResult.Success( - sourceName = source.name, - startedAt = startedAt, - completedAt = completedAt, - blobUrl = blobSource.blobPath, - sizeBytes = source.size() ?: -1, - ) - } catch (e: Exception) { - val completedAt = + semaphore.withPermit { + val startedAt = Instant.now(Clock.systemUTC()) - val errorCode = - when { - e.message?.contains( - "size", - ) == true -> - UploadErrorCode - .SOURCE_SIZE_LIMIT_EXCEEDED - e.message?.contains( - "readable", - ) == true -> - UploadErrorCode - .SOURCE_NOT_READABLE - e.message?.contains( - "empty", - ) == true -> - UploadErrorCode - .SOURCE_IS_EMPTY - e.message?.contains( - "container", - ) == true -> - UploadErrorCode - .NO_CONTAINERS_AVAILABLE - else -> - UploadErrorCode - .UPLOAD_FAILED - } + try { + val blobSource = uploadAsync(source) + val completedAt = + Instant.now(Clock.systemUTC()) + UploadResult.Success( + sourceName = source.name, + startedAt = startedAt, + completedAt = completedAt, + blobUrl = blobSource.blobPath, + sizeBytes = source.size() ?: -1, + ) + } catch (e: Exception) { + val completedAt = + Instant.now(Clock.systemUTC()) + val errorCode = + when { + e.message?.contains( + "size", + ) == true -> + UploadErrorCode + .SOURCE_SIZE_LIMIT_EXCEEDED + e.message?.contains( + "readable", + ) == true -> + UploadErrorCode + .SOURCE_NOT_READABLE + e.message?.contains( + "empty", + ) == true -> + UploadErrorCode + .SOURCE_IS_EMPTY + e.message?.contains( + "container", + ) == true -> + UploadErrorCode + .NO_CONTAINERS_AVAILABLE + else -> + UploadErrorCode + .UPLOAD_FAILED + } - UploadResult.Failure( - sourceName = source.name, - startedAt = startedAt, - completedAt = completedAt, - errorCode = errorCode, - errorMessage = - e.message - ?: "Upload failed", - exception = e, - isPermanent = - e is IngestException && - e.isPermanent == - true, - ) + UploadResult.Failure( + sourceName = source.name, + startedAt = startedAt, + completedAt = completedAt, + errorCode = errorCode, + errorMessage = + e.message + ?: "Upload failed", + exception = e, + isPermanent = + e is IngestException && + e.isPermanent == + true, + ) + } } } } - .awaitAll() - } + .awaitAll() val successes = results.filterIsInstance() val failures = results.filterIsInstance() @@ -705,6 +726,33 @@ abstract class ContainerUploaderBase( } } + /** + * Uploads the specified local source asynchronously. This is the + * Java-compatible version that returns a CompletableFuture. + * + * @param local The local source to upload. + * @return A CompletableFuture that will complete with the uploaded blob + * source. + */ + @JvmName("uploadAsync") + fun uploadAsyncJava(local: LocalSource): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { uploadAsync(local) } + + /** + * Uploads the specified local sources asynchronously. This is the + * Java-compatible version that returns a CompletableFuture. + * + * @param localSources List of the local sources to upload. + * @return A CompletableFuture that will complete with the upload results. + */ + @JvmName("uploadManyAsync") + fun uploadManyAsyncJava( + localSources: List, + ): CompletableFuture = + CoroutineScope(Dispatchers.IO).future { + uploadManyAsync(localSources) + } + /** * Selects the appropriate containers for upload based on the provided * configuration cache and upload method. diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt new file mode 100644 index 000000000..f2bd6bc9a --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults +import kotlinx.coroutines.future.await +import java.io.Closeable +import java.util.concurrent.CompletableFuture + +/** + * Java-compatible interface for creating custom uploaders. + * + * This interface uses [CompletableFuture] instead of Kotlin coroutines, + * allowing Java developers to implement custom upload logic without needing to + * understand Kotlin suspend functions. + */ +interface ICustomUploader : Closeable { + /** + * Indicates whether to ignore the max data size allowed during upload. + * Default should be false. + */ + fun getIgnoreSizeLimit(): Boolean + + /** Sets whether to ignore the max data size limit. */ + fun setIgnoreSizeLimit(value: Boolean) + + /** + * Uploads the specified local source asynchronously. + * + * @param local The local source to upload. + * @return A CompletableFuture that completes with the uploaded blob source. + */ + fun uploadAsync(local: LocalSource): CompletableFuture + + /** + * Uploads the specified local sources asynchronously. + * + * @param localSources List of the local sources to upload. + * @return A CompletableFuture that completes with the upload results. + */ + fun uploadManyAsync( + localSources: List, + ): CompletableFuture +} + +/** + * Extension function to convert [ICustomUploader] to [IUploader]. + * + * Kotlin users can use this as: `myCustomUploader.asUploader()` + */ +fun ICustomUploader.asUploader(): IUploader = CustomUploaderAdapter(this) + +/** + * Static helper methods for [ICustomUploader]. + * + * Provides Java-friendly static methods to work with custom uploaders. + */ +object CustomUploaderHelper { + /** + * Wraps an [ICustomUploader] with an adapter to create an [IUploader]. + * + * This is the Java-friendly way to convert a custom uploader: + */ + @JvmStatic + fun asUploader(customUploader: ICustomUploader): IUploader = + CustomUploaderAdapter(customUploader) +} + +/** + * Adapter that wraps an [ICustomUploader] to implement the [IUploader] + * interface. + * + * This allows Java-implemented uploaders to be used anywhere an [IUploader] is + * expected, such as with QueuedIngestClient or ManagedStreamingIngestClient. + */ +class CustomUploaderAdapter(private val customUploader: ICustomUploader) : + IUploader { + override var ignoreSizeLimit: Boolean + get() = customUploader.getIgnoreSizeLimit() + set(value) { + customUploader.setIgnoreSizeLimit(value) + } + + override suspend fun uploadAsync(local: LocalSource): BlobSource { + return customUploader.uploadAsync(local).await() + } + + override suspend fun uploadManyAsync( + localSources: List, + ): UploadResults { + return customUploader.uploadManyAsync(localSources).await() + } + + override fun close() { + customUploader.close() + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt index 12cbd5eec..ef64f6995 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/QueuedIngestClientTest.kt @@ -412,47 +412,6 @@ class QueuedIngestClientTest : logger.info( "Large file upload completed: $largeSucceeded succeeded", ) - - // Batch upload (5 files) - logger.info("Testing batch upload (5 files)") - val batchSources = - (1..5).map { i -> - createTestStreamSource( - 1024 * i, - "combined_batch_$i.json", - ) - } - val batchResponse = - queuedIngestClient.ingestAsync( - database = database, - table = targetTable, - sources = batchSources, - ingestRequestProperties = - IngestRequestPropertiesBuilder.create() - .withEnableTracking(true) - .build(), - ) - assertNotNull(batchResponse.ingestResponse.ingestionOperationId) - val batchStatus = - queuedIngestClient.pollUntilCompletion( - database = database, - table = targetTable, - operationId = - batchResponse.ingestResponse - .ingestionOperationId, - pollingInterval = pollInterval, - timeout = pollTimeout, - ) - val batchSucceeded = - batchStatus.details?.count { - it.status == BlobStatus.Status.Succeeded - } ?: 0 - assert(batchSucceeded == batchSources.size) { - "Expected all batch files to succeed" - } - logger.info( - "Batch upload completed: $batchSucceeded/${batchSources.size} succeeded", - ) } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") } catch (e: Exception) { @@ -465,37 +424,43 @@ class QueuedIngestClientTest : } @Test - fun `E2E - parallel processing with maxConcurrency`() = runBlocking { - logger.info("E2E: Testing parallel processing with maxConcurrency=5") + fun `E2E - multi-blob batch ingestion`() = runBlocking { + logger.info("E2E: Testing multi-blob batch ingestion using BlobSource") - val queuedIngestClient = createTestClient(maxConcurrency = 5) + val queuedIngestClient = createTestClient() - val sources = - (1..10).map { i -> - createTestStreamSource(512 * 1024, "parallel_$i.json") + // Multi-source API only accepts BlobSource - blobs already exist in storage, + // no upload needed, all blob URLs are submitted in a single request. + val sampleJsonFiles = + listOf( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json", + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", + ) + // Use multijson to handle different JSON structures + val blobSources = + sampleJsonFiles.map { url -> + BlobSource(url, format = Format.multijson) } try { - val startTime = System.currentTimeMillis() val response = queuedIngestClient.ingestAsync( database = database, table = targetTable, - sources = sources, + sources = blobSources, ingestRequestProperties = IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(), ) - val uploadDuration = System.currentTimeMillis() - startTime val operationId = assertValidIngestionResponse( response, - "E2E - parallel processing", + "E2E - multi-blob batch ingestion", ) logger.info( - "Parallel upload submitted in ${uploadDuration}ms with operation ID: $operationId", + "Multi-blob batch submitted with operation ID: $operationId", ) val finalStatus = @@ -512,10 +477,10 @@ class QueuedIngestClientTest : it.status == BlobStatus.Status.Succeeded } ?: 0 logger.info( - "Parallel upload: $succeededCount/${sources.size} succeeded (avg ${uploadDuration / sources.size}ms per file)", + "Multi-blob batch: $succeededCount/${blobSources.size} blobs succeeded", ) - assert(succeededCount == sources.size) { - "Expected parallel uploads to succeed" + assert(succeededCount == blobSources.size) { + "Expected all blobs in batch to be ingested successfully" } } catch (e: ConnectException) { assumeTrue(false, "Skipping test: ${e.message}") @@ -623,7 +588,7 @@ class QueuedIngestClientTest : queuedIngestClient.ingestAsync( database = database, table = targetTable, - sources = listOf(source), + source = source, ingestRequestProperties = irp, ) @@ -771,49 +736,81 @@ class QueuedIngestClientTest : } } + @Test + fun `E2E - duplicate blob URLs should be rejected`(): Unit = runBlocking { + logger.info("E2E: Testing duplicate blob URL detection") + + val client = createTestClient() + + // Create sources with duplicate blob URLs (same URL used multiple times) + val duplicateBlobUrl = + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json" + val sources = + listOf( + BlobSource(duplicateBlobUrl, format = Format.json), + BlobSource( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", + format = Format.json, + ), + // Duplicate! + BlobSource(duplicateBlobUrl, format = Format.json), + ) + + logger.info( + "Ingesting ${sources.size} blob sources with duplicate URLs (should fail)", + ) + val exception = + assertThrows { + client.ingestAsync( + database = database, + table = targetTable, + sources = sources, + ingestRequestProperties = + IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(), + ) + } + assertNotNull( + exception, + "Duplicate blob URLs should throw IngestClientException", + ) + assert( + exception.message?.contains( + "Duplicate blob sources detected", + ) == true, + ) { + "Exception message should indicate duplicate blob sources. Got: ${exception.message}" + } + logger.info( + "Duplicate blob URL detection test passed: ${exception.message}", + ) + } + @Test fun `E2E - format mismatch and mixed format batch`(): Unit = runBlocking { logger.info("E2E: Testing format mismatch detection with mixed formats") val client = createTestClient() - val jsonContent = - """{"name":"test","value":123,"timestamp":"2024-01-01"}""" - val csvContent = - """name,value,timestamp -test,123,2024-01-01 -test2,456,2024-01-02""" - val sources = listOf( - StreamSource( - stream = - ByteArrayInputStream( - jsonContent.toByteArray(), - ), + BlobSource( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json", format = Format.json, - sourceCompression = CompressionType.NONE, ), - StreamSource( - stream = - ByteArrayInputStream( - csvContent.toByteArray(), - ), + BlobSource( + "https://kustosamplefiles.blob.core.windows.net/csvsamplefiles/simple.csv", format = Format.csv, - sourceCompression = CompressionType.NONE, ), - StreamSource( - stream = - ByteArrayInputStream( - jsonContent.toByteArray(), - ), + BlobSource( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/multilined.json", format = Format.json, - sourceCompression = CompressionType.NONE, ), ) logger.info( - "Uploading ${sources.size} sources with mixed formats (JSON and CSV)", + "Ingesting ${sources.size} blob sources with mixed formats (JSON and CSV)", ) val exception = assertThrows { @@ -831,6 +828,9 @@ test2,456,2024-01-02""" exception, "Mixed formats are not permitted for ingestion", ) + assert(exception.message?.contains("same format") == true) { + "Exception message should indicate format mismatch. Got: ${exception.message}" + } } @ParameterizedTest( @@ -890,11 +890,12 @@ test2,456,2024-01-02""" .withEnableTracking(true) .build() + // Use single-source API for LocalSource val ingestionResponse = queuedIngestClient.ingestAsync( database = database, table = targetTable, - sources = listOf(source), + source = source, ingestRequestProperties = properties, ) diff --git a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java index 44591c4db..8dd330750 100644 --- a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java +++ b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java @@ -20,13 +20,22 @@ import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; import com.microsoft.azure.kusto.ingest.v2.client.IngestionOperation; import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache; +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache; +import com.microsoft.azure.kusto.ingest.v2.common.SimpleRetryPolicy; +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails; import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; import com.microsoft.azure.kusto.ingest.v2.models.*; +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.FileSource; -import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource; +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import com.microsoft.azure.kusto.ingest.v2.uploader.ManagedUploader; +import com.microsoft.azure.kusto.ingest.v2.uploader.UploadMethod; +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult; +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.exporters.logging.LoggingSpanExporter; import io.opentelemetry.sdk.OpenTelemetrySdk; @@ -73,7 +82,6 @@ enum SourceType { return null; } } - /** * AuthenticationModeOptions - represents the different options to authenticate to the system */ @@ -824,18 +832,82 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q } private static @NotNull CompletableFuture ingestV2BatchIngestion(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, - @NotNull QueuedIngestClient queuedIngestClient) { - System.out.println("\n=== Queued ingestion from multiple sources (ingest-v2 batch) ==="); + @NotNull QueuedIngestClient queuedIngestClient) { + System.out.println("\n=== Queued batch ingestion: Upload local files to blob, then ingest (ingest-v2) ==="); + String clusterPath = ingestV2Config.getClusterPath(); + ChainedTokenCredential credential = buildIngestV2Credential(ingestV2Config); + + ConfigurationCache configCache = DefaultConfigurationCache.create( + clusterPath, + credential, + new ClientDetails("SampleApp", "1.0", "quickstart-sample")); + + ManagedUploader uploader = ManagedUploader.builder() + .withConfigurationCache(configCache) + .withRetryPolicy(new SimpleRetryPolicy()) + .withMaxConcurrency(ingestV2Config.getMaxConcurrency()) + .withMaxDataSize(4L * 1024 * 1024 * 1024) // 4GB max size + .withUploadMethod(UploadMethod.STORAGE) + .withTokenCredential(credential) + .build(); + + System.out.println("ManagedUploader created for batch upload"); + FileSource source1 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); FileSource source2 = new FileSource(resolveQuickstartPath("dataset.csv"), Format.csv, UUID.randomUUID(), CompressionType.NONE); - List sources = Arrays.asList(source1, source2); + List localSources = Arrays.asList(source1, source2); + + System.out.println("Prepared " + localSources.size() + " local files for upload:"); + for (LocalSource source : localSources) { + System.out.println(" - " + source.getName() + " (format: " + source.getFormat() + ")"); + } IngestRequestProperties props = buildIngestV2RequestProperties(config, ingestV2Config, null); - return queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), sources, props) - .thenCompose(response -> { - System.out.println("Batch ingestion queued. Operation ID: " + response.getIngestResponse().getIngestionOperationId()); - System.out.println("Number of sources in batch: " + sources.size()); - return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "Batch Ingestion"); + + + System.out.println("Uploading " + localSources.size() + " files to blob storage..."); + + return uploader.uploadManyAsync(localSources) + .thenCompose(uploadResults -> { + System.out.println("Upload completed:"); + System.out.println(" Successes: " + uploadResults.getSuccesses().size()); + System.out.println(" Failures: " + uploadResults.getFailures().size()); + + for (UploadResult.Failure failure : uploadResults.getFailures()) { + System.err.println(" Upload failed for " + failure.getSourceName() + + ": " + failure.getErrorMessage()); + } + + List blobSources = new ArrayList<>(); + for (UploadResult.Success success : uploadResults.getSuccesses()) { + System.out.println(" Uploaded: " + success.getSourceName() + + " -> " + success.getBlobUrl().split("\\?")[0]); // Hide SAS token in log + + BlobSource blobSource = new BlobSource( + success.getBlobUrl(), + Format.csv, // All our files are CSV format + UUID.randomUUID(), + CompressionType.GZIP); + blobSources.add(blobSource); + } + + if (blobSources.isEmpty()) { + return CompletableFuture.failedFuture( + new RuntimeException("All uploads failed - nothing to ingest")); + } + + System.out.println("Ingesting " + blobSources.size() + " blobs as a batch..."); + return queuedIngestClient.ingestAsync(config.getDatabaseName(), config.getTableName(), blobSources, props) + .thenCompose(response -> { + System.out.println("Batch ingestion queued. Operation ID: " + + response.getIngestResponse().getIngestionOperationId()); + System.out.println("Number of sources in batch: " + blobSources.size()); + return trackIngestV2Operation(config, ingestV2Config, queuedIngestClient, response, "Batch Upload & Ingest"); + }); + }) + .whenComplete((unused, throwable) -> { + uploader.close(); + System.out.println("ManagedUploader closed"); }); } diff --git a/samples/src/main/java/ingestv2/AzureBlobRestCustomUploader.java b/samples/src/main/java/ingestv2/AzureBlobRestCustomUploader.java new file mode 100644 index 000000000..b0ca14f8f --- /dev/null +++ b/samples/src/main/java/ingestv2/AzureBlobRestCustomUploader.java @@ -0,0 +1,541 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package ingestv2; + +import com.azure.core.credential.TokenCredential; +import com.azure.identity.AzureCliCredentialBuilder; +import com.azure.identity.ChainedTokenCredential; +import com.azure.identity.ChainedTokenCredentialBuilder; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.microsoft.azure.kusto.data.StringUtils; +import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource; +import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; +import com.microsoft.azure.kusto.ingest.v2.source.FileSource; +import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource; +import com.microsoft.azure.kusto.ingest.v2.uploader.ICustomUploader; +import com.microsoft.azure.kusto.ingest.v2.uploader.CustomUploaderHelper; +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader; +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResult; +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadResults; +import com.microsoft.azure.kusto.ingest.v2.uploader.models.UploadErrorCode; +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; +import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; + +import java.io.*; +import java.net.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.time.Instant; +import java.util.*; +import java.util.concurrent.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * Custom uploader implementation using Azure Blob REST API with SAS tokens from DM config. + * + * This demonstrates end-to-end ICustomUploader usage: + * 1. Gets container URLs with SAS tokens from DM cluster's config API + * 2. Uses standard HTTP PUT to upload blobs (same approach works for S3/GCS) + * 3. Returns BlobSource with the uploaded blob URL for ingestion + * + * ICustomUploader pattern works end-to-end and can be adapted for: + * - AWS S3 (use AWS SDK or S3 REST API) + * - Google Cloud Storage (use GCS SDK or REST API) + * - Any HTTP-based blob storage + */ +public class AzureBlobRestCustomUploader implements ICustomUploader { + + private final String containerUrlWithSas; // Container URL with SAS token from DM config + private final ExecutorService executor; + private boolean ignoreSizeLimit = false; + + /** + * Creates a custom uploader using a container URL with SAS token. + * + * The container URL is obtained from DM cluster's config API: + * - ConfigurationResponse.containerSettings.containers[0].path + * + * @param containerUrlWithSas Full container URL including SAS token + * Example: https://account.blob.core.windows.net/container?sv=...&sig=... + */ + public AzureBlobRestCustomUploader(String containerUrlWithSas) { + this.containerUrlWithSas = containerUrlWithSas; + this.executor = Executors.newFixedThreadPool(4); + } + + @Override + public boolean getIgnoreSizeLimit() { + return ignoreSizeLimit; + } + + @Override + public void setIgnoreSizeLimit(boolean value) { + this.ignoreSizeLimit = value; + } + + @Override + public CompletableFuture uploadAsync(LocalSource local) { + return CompletableFuture.supplyAsync(() -> { + try { + return uploadBlobUsingRest(local); + } catch (Exception e) { + throw new RuntimeException("Upload failed: " + e.getMessage(), e); + } + }, executor); + } + + @Override + public CompletableFuture uploadManyAsync(List localSources) { + return CompletableFuture.supplyAsync(() -> { + List successes = new ArrayList<>(); + List failures = new ArrayList<>(); + + for (LocalSource source : localSources) { + Instant startedAt = Instant.now(); + try { + BlobSource result = uploadBlobUsingRest(source); + Long size = source.size(); + successes.add(new UploadResult.Success( + source.getName(), + startedAt, + Instant.now(), + result.getBlobPath(), + size != null ? size : -1 + )); + } catch (Exception e) { + failures.add(new UploadResult.Failure( + source.getName(), + startedAt, + Instant.now(), + UploadErrorCode.UPLOAD_FAILED, + e.getMessage(), + e, + false + )); + } + } + + return new UploadResults(successes, failures); + }, executor); + } + + @Override + public void close() throws IOException { + executor.shutdown(); + } + + /** + * Uploads a blob using HTTP PUT with SAS token authentication. + * + * This is the core upload logic that can be adapted for any REST/SDK based storage + */ + private BlobSource uploadBlobUsingRest(LocalSource local) throws Exception { + String blobName = generateBlobName(local); + + // Read data from source using Kotlin's data() method + byte[] data; + try (InputStream inputStream = local.data()) { + data = inputStream.readAllBytes(); + } + + // Parse container URL and SAS + String[] parts = containerUrlWithSas.split("\\?", 2); + String containerUrl = parts[0]; + String sasToken = parts.length > 1 ? parts[1] : ""; + + // Build full blob URL with SAS + String blobUrl = containerUrl + "/" + blobName; + if (!sasToken.isEmpty()) { + blobUrl += "?" + sasToken; + } + + System.out.println("Uploading to: " + containerUrl + "/" + blobName); + + // Create HTTP PUT request + URL url = new URL(blobUrl); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("PUT"); + connection.setDoOutput(true); + connection.setConnectTimeout(30000); + connection.setReadTimeout(60000); + + // Set required headers for Azure Blob REST API + connection.setRequestProperty("x-ms-blob-type", "BlockBlob"); + connection.setRequestProperty("x-ms-version", "2021-06-08"); + connection.setRequestProperty("Content-Type", getContentType(local.getFormat())); + connection.setRequestProperty("Content-Length", String.valueOf(data.length)); + + // Upload the data + try (OutputStream out = connection.getOutputStream()) { + out.write(data); + out.flush(); + } + + // Check response + int responseCode = connection.getResponseCode(); + if (responseCode < 200 || responseCode >= 300) { + String errorBody = ""; + try (InputStream errorStream = connection.getErrorStream()) { + if (errorStream != null) { + errorBody = new String(errorStream.readAllBytes(), StandardCharsets.UTF_8); + } + } catch (Exception ignored) {} + throw new RuntimeException( + "Upload failed with status " + responseCode + ": " + errorBody + ); + } + + System.out.println("Successfully uploaded blob: " + blobName + " (Status: " + responseCode + ", Size: " + data.length + " bytes)"); + + // Return BlobSource with the blob URL (including SAS for Kusto to access) + String resultBlobUrl = containerUrl + "/" + blobName; + if (!sasToken.isEmpty()) { + resultBlobUrl += "?" + sasToken; + } + + return new BlobSource( + resultBlobUrl, + local.getFormat(), + local.getSourceId(), + local.getCompressionType() + ); + } + + private String generateBlobName(LocalSource source) { + String name = source.getName(); + if (name == null || name.isEmpty()) { + name = UUID.randomUUID().toString(); + } + String extension = getExtension(source.getFormat(), source.getCompressionType()); + if (!name.endsWith(extension)) { + name = name + extension; + } + return "custom-upload/" + UUID.randomUUID().toString().substring(0, 8) + "_" + name; + } + + private String getExtension(Format format, CompressionType compression) { + String formatExt; + switch (format) { + case json: formatExt = ".json"; break; + case csv: formatExt = ".csv"; break; + case parquet: formatExt = ".parquet"; break; + case avro: formatExt = ".avro"; break; + case orc: formatExt = ".orc"; break; + default: formatExt = ".dat"; break; + } + + if (compression == CompressionType.GZIP) { + return formatExt + ".gz"; + } + return formatExt; + } + + private String getContentType(Format format) { + switch (format) { + case json: return "application/json"; + case csv: return "text/csv"; + default: return "application/octet-stream"; + } + } + + // ========================================== + // End-to-end demo using system properties (like other samples) + // ========================================== + + public static void main(String[] args) throws Exception { + // Get configuration from system properties (consistent with other samples) + String engineEndpoint = System.getProperty("clusterPath"); // "https://.kusto.windows.net" + String appId = System.getProperty("app-id"); + String appKey = System.getProperty("appKey"); + String tenant = System.getProperty("tenant"); + + String database = System.getProperty("dbName"); + String table = System.getProperty("tableName"); + String mapping = System.getProperty("dataMappingName"); + + // Container URL with SAS token (in production, get from DM config API) + String containerUrlWithSas = System.getProperty("containerUrl"); + + if (engineEndpoint == null || engineEndpoint.isEmpty()) { + System.out.println("=== Azure Blob REST Custom Uploader Demo ==="); + System.out.println(); + System.out.println("This demonstrates ICustomUploader end-to-end with Azure Blob REST API."); + System.out.println(); + System.out.println("Usage:"); + System.out.println(" mvn exec:java -pl samples -Dexec.mainClass=\"ingestv2.AzureBlobRestCustomUploader\" \\"); + System.out.println(" -DclusterPath=https://mycluster.region.kusto.windows.net \\"); + System.out.println(" -DdbName=MyDatabase \\"); + System.out.println(" -DtableName=MyTable \\"); + System.out.println(" -DdataMappingName=MyMapping \\"); + System.out.println(" -DcontainerUrl=https://account.blob.core.windows.net/container?sas=..."); + System.out.println(); + System.out.println("Optional authentication (defaults to Azure CLI credential):"); + System.out.println(" -Dapp-id= -DappKey= -Dtenant="); + System.out.println(); + System.out.println("Usage pattern:"); + System.out.println("```java"); + System.out.println("// 1. Get container with SAS from DM config API or provide your own"); + System.out.println("String containerUrlWithSas = ...; // e.g., from ConfigurationResponse"); + System.out.println(); + System.out.println("// 2. Create custom uploader"); + System.out.println("ICustomUploader customUploader = new AzureBlobRestCustomUploader(containerUrlWithSas);"); + System.out.println(); + System.out.println("// 3. Convert to IUploader using CustomUploaderHelper"); + System.out.println("IUploader uploader = CustomUploaderHelper.asUploader(customUploader);"); + System.out.println(); + System.out.println("// 4. Create QueuedIngestClient with the custom uploader"); + System.out.println("QueuedIngestClient client = QueuedIngestClientBuilder.create(dmUrl)"); + System.out.println(" .withAuthentication(credential)"); + System.out.println(" .withUploader(uploader, true) // true = client manages uploader lifecycle"); + System.out.println(" .build();"); + System.out.println(); + System.out.println("// 5. Ingest - the custom uploader handles the upload!"); + System.out.println("client.ingestAsync(fileSource, properties).join();"); + System.out.println("```"); + return; + } + + System.out.println("=== Running End-to-End Azure Blob REST Custom Uploader Test ==="); + System.out.println("Engine Endpoint: " + engineEndpoint); + System.out.println("Database: " + database); + System.out.println("Table: " + table); + System.out.println("Mapping: " + mapping); + System.out.println("Container URL: " + (containerUrlWithSas != null ? "[provided]" : "[not provided - using default uploader]")); + + // Create Azure AD credential + ChainedTokenCredential credential; + if (StringUtils.isNotBlank(appId) + && StringUtils.isNotBlank(appKey) + && StringUtils.isNotBlank(tenant)) { + System.out.println("Using Service Principal authentication"); + credential = new ChainedTokenCredentialBuilder() + .addFirst(new ClientSecretCredentialBuilder() + .clientId(appId) + .clientSecret(appKey) + .tenantId(tenant) + .build()) + .build(); + } else { + System.out.println("Using Azure CLI authentication"); + credential = new ChainedTokenCredentialBuilder() + .addFirst(new AzureCliCredentialBuilder().build()) + .build(); + } + + // Build the ingest URL (DM endpoint) + // The correct pattern: https://cluster.region.kusto.windows.net -> https://ingest-cluster.region.kusto.windows.net + String dmUrl = engineEndpoint; + if (engineEndpoint.startsWith("https://")) { + dmUrl = engineEndpoint.replace("https://", "https://ingest-"); + } else if (engineEndpoint.startsWith("http://")) { + dmUrl = engineEndpoint.replace("http://", "http://ingest-"); + } + System.out.println("DM Endpoint: " + dmUrl); + + QueuedIngestClient queuedIngestClient = null; + try { + // Create the QueuedIngestClient with or without custom uploader + QueuedIngestClientBuilder builder = QueuedIngestClientBuilder.create(dmUrl) + .withAuthentication(credential) + .withMaxConcurrency(10); + + // If container URL is not provided, fetch it from the cluster's configuration API + if (containerUrlWithSas == null || containerUrlWithSas.isEmpty()) { + System.out.println("\n1. Fetching container URL from Kusto cluster configuration API..."); + containerUrlWithSas = fetchContainerUrlFromKustoConfig(dmUrl, credential); + System.out.println(" Retrieved container URL from cluster configuration"); + } + + // Now create our custom uploader with the container URL + if (containerUrlWithSas != null && !containerUrlWithSas.isEmpty()) { + System.out.println("\n2. Creating AzureBlobRestCustomUploader with container URL"); + + // create the custom uploader + IUploader uploader = CustomUploaderHelper.asUploader(new AzureBlobRestCustomUploader(containerUrlWithSas)); + + // Configure the builder to use our custom uploader + // true = client will manage the uploader lifecycle (close it when done) + builder.withUploader(uploader, true); + + System.out.println(" Custom uploader configured successfully!"); + } else { + System.out.println("\n2. No container URL available - using default managed uploader"); + } + + // Build the client + queuedIngestClient = builder.build(); + System.out.println("\n3. QueuedIngestClient created successfully"); + + // Demonstrate ingestion using a file source + String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + + // Try CSV file first + FileSource fileSource = new FileSource(Paths.get(resourcesDirectory + "dataset.csv"), Format.csv); + + IngestionMapping ingestionMapping = new IngestionMapping(mapping, IngestionMapping.IngestionMappingType.CSV); + + IngestRequestProperties properties; + if (mapping != null && !mapping.isEmpty()) { + properties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(ingestionMapping) + .withEnableTracking(true) + .build(); + } else { + properties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); + } + + System.out.println("\n4. Ingesting file: " + resourcesDirectory + "dataset.csv"); + System.out.println(" (This will use the custom uploader to upload to Azure Blob Storage!)"); + + // Perform ingestion - the custom uploader handles the upload! + var response = queuedIngestClient.ingestAsync(database, table, fileSource, properties).get(); + + System.out.println("\n5. Ingestion queued successfully!"); + System.out.println(" Operation ID: " + response.getIngestResponse().getIngestionOperationId()); + System.out.println(" Ingestion Type: " + response.getIngestionType()); + + IngestionMapping jsonMapping = new IngestionMapping(mapping, IngestionMapping.IngestionMappingType.JSON); + // Also demonstrate JSON ingestion with mapping + if (mapping != null && !mapping.isEmpty()) { + FileSource jsonFileSource = new FileSource(Paths.get(resourcesDirectory + "dataset.json"), Format.json); + + IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() + .withIngestionMapping(jsonMapping) + .withEnableTracking(true) + .build(); + + System.out.println("\n6. Ingesting JSON file with mapping: " + resourcesDirectory + "dataset.json"); + + var jsonResponse = queuedIngestClient.ingestAsync(database, table, jsonFileSource, jsonProperties).get(); + + System.out.println(" JSON Ingestion queued successfully!"); + System.out.println(" Operation ID: " + jsonResponse.getIngestResponse().getIngestionOperationId()); + } + + // Demonstrate stream ingestion + String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + + StreamSource streamSource = new StreamSource(csvInputStream, Format.csv); + + IngestRequestProperties streamProperties = IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); + + System.out.println("\n7. Ingesting from stream (CSV data)"); + + var streamResponse = queuedIngestClient.ingestAsync(database, table,streamSource, streamProperties).get(); + + System.out.println(" Stream Ingestion queued successfully!"); + System.out.println(" Operation ID: " + streamResponse.getIngestResponse().getIngestionOperationId()); + + System.out.println("\n=== Azure Blob REST Custom Uploader Demo Complete ==="); + System.out.println(); + System.out.println("Key Integration Points:"); + System.out.println(" 1. AzureBlobRestCustomUploader implements ICustomUploader"); + System.out.println(" 2. CustomUploaderHelper.asUploader() converts to IUploader"); + System.out.println(" 3. QueuedIngestClientBuilder.withUploader() configures the custom uploader"); + System.out.println(" 4. client.ingestAsync() internally uses the custom uploader!"); + System.out.println(); + System.out.println("The same pattern works for any other source such as S3/GCP - just implement ICustomUploader!"); + + } catch (Exception e) { + System.err.println("Error during ingestion: " + e.getMessage()); + e.printStackTrace(); + } finally { + if (queuedIngestClient != null) { + queuedIngestClient.close(); + } + } + } + + /** + * Fetches container URL with SAS token from Kusto cluster's configuration API. + * + * The configuration API endpoint: {dmUrl}/v1/rest/ingestion/configuration + * Returns JSON with containerSettings.containers[0].path containing the container URL with SAS. + * + * @param dmUrl The DM (Data Management) cluster URL + * @param credential Azure credential for authentication + * @return Container URL with SAS token, or null if not available + */ + private static String fetchContainerUrlFromKustoConfig(String dmUrl, TokenCredential credential) throws Exception { + String configUrl = dmUrl + "/v1/rest/ingestion/configuration"; + System.out.println(" Fetching configuration from: " + configUrl); + + // Get access token for the Kusto resource + String scope = dmUrl + "/.default"; + com.azure.core.credential.AccessToken token = credential.getToken( + new com.azure.core.credential.TokenRequestContext().addScopes(scope) + ).block(); + + if (token == null) { + throw new RuntimeException("Failed to get access token for " + scope); + } + + // Create HTTP GET request + URL url = new URL(configUrl); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.setRequestProperty("Authorization", "Bearer " + token.getToken()); + connection.setRequestProperty("Accept", "application/json"); + connection.setConnectTimeout(30000); + connection.setReadTimeout(60000); + + // Read response + int responseCode = connection.getResponseCode(); + if (responseCode < 200 || responseCode >= 300) { + String errorBody = ""; + try (InputStream errorStream = connection.getErrorStream()) { + if (errorStream != null) { + errorBody = new String(errorStream.readAllBytes(), StandardCharsets.UTF_8); + } + } catch (Exception ignored) {} + throw new RuntimeException( + "Failed to get configuration from " + configUrl + " (status " + responseCode + "): " + errorBody + ); + } + + String responseBody; + try (InputStream inputStream = connection.getInputStream()) { + responseBody = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); + } + + // Parse JSON response to get container URL + ObjectMapper mapper = new ObjectMapper(); + JsonNode root = mapper.readTree(responseBody); + + // Navigate: containerSettings -> containers -> [0] -> path + JsonNode containerSettings = root.get("containerSettings"); + if (containerSettings == null) { + System.out.println(" Warning: No containerSettings in configuration response"); + return null; + } + + JsonNode containers = containerSettings.get("containers"); + if (containers == null || !containers.isArray() || containers.isEmpty()) { + System.out.println(" Warning: No containers in configuration response"); + return null; + } + + JsonNode firstContainer = containers.get(0); + JsonNode path = firstContainer.get("path"); + if (path == null || path.isNull()) { + System.out.println(" Warning: Container path is null"); + return null; + } + + String containerUrl = path.asText(); + System.out.println(" Found container: " + containerUrl.split("\\?")[0] + "?..."); + return containerUrl; + } +} diff --git a/samples/src/main/java/ingestv2/QueuedIngestV2.java b/samples/src/main/java/ingestv2/QueuedIngestV2.java index 317b34d08..bf68de0d8 100644 --- a/samples/src/main/java/ingestv2/QueuedIngestV2.java +++ b/samples/src/main/java/ingestv2/QueuedIngestV2.java @@ -5,19 +5,34 @@ import com.azure.core.credential.TokenCredential; import com.azure.identity.AzureCliCredentialBuilder; +import com.azure.identity.ChainedTokenCredential; import com.azure.identity.ClientSecretCredentialBuilder; import com.microsoft.azure.kusto.data.StringUtils; import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder; import com.microsoft.azure.kusto.ingest.v2.client.IngestionOperation; import com.microsoft.azure.kusto.ingest.v2.client.QueuedIngestClient; +import com.microsoft.azure.kusto.ingest.v2.common.DefaultConfigurationCache; +import com.microsoft.azure.kusto.ingest.v2.common.SimpleRetryPolicy; +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails; import com.microsoft.azure.kusto.ingest.v2.common.models.ExtendedIngestResponse; import com.microsoft.azure.kusto.ingest.v2.common.models.IngestRequestPropertiesBuilder; +import com.microsoft.azure.kusto.ingest.v2.models.BlobStatus; +import com.microsoft.azure.kusto.ingest.v2.models.Format; +import com.microsoft.azure.kusto.ingest.v2.models.IngestRequestProperties; +import com.microsoft.azure.kusto.ingest.v2.models.Status; +import com.microsoft.azure.kusto.ingest.v2.models.StatusResponse; +import com.microsoft.azure.kusto.ingest.v2.source.BlobSource; import com.microsoft.azure.kusto.ingest.v2.common.models.mapping.IngestionMapping; import com.microsoft.azure.kusto.ingest.v2.models.*; import com.microsoft.azure.kusto.ingest.v2.source.CompressionType; import com.microsoft.azure.kusto.ingest.v2.source.FileSource; -import com.microsoft.azure.kusto.ingest.v2.source.IngestionSource; +import com.microsoft.azure.kusto.ingest.v2.source.LocalSource; import com.microsoft.azure.kusto.ingest.v2.source.StreamSource; +import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader; +import com.microsoft.azure.kusto.ingest.v2.uploader.ManagedUploader; +import com.microsoft.azure.kusto.ingest.v2.uploader.UploadMethod; +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache; + import org.jetbrains.annotations.NotNull; import java.io.ByteArrayInputStream; @@ -114,94 +129,98 @@ public static void main(String[] args) { /** * Demonstrates ingestion from various stream sources including: - In-memory string data as CSV * - Compressed file stream (CSV) - JSON file stream with mapping + * + *

Shows both source configuration with defaults) and source configuration with full control) approaches. */ static List> ingestFromStream() throws Exception { System.out.println("\n=== Queued Ingestion from Streams ==="); List> futures = new ArrayList<>(); - // Example 1: Ingest from in-memory CSV string - String csvData = "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; - InputStream csvInputStream = new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); + // Example 1: Ingest from in-memory CSV string (only 2 required parameters) + // sourceCompression defaults to CompressionType.NONE, sourceId auto-generated, baseName null, leaveOpen false + String csvData = + "0,00000000-0000-0000-0001-020304050607,0,0,0,0,0,0,0,0,0,0,2014-01-01T01:01:01.0000000Z,Zero,\"Zero\",0,00:00:00,,null"; + InputStream csvInputStream = + new ByteArrayInputStream(StandardCharsets.UTF_8.encode(csvData).array()); - StreamSource csvStreamSource = new StreamSource( - csvInputStream, - Format.csv, - CompressionType.NONE, - UUID.randomUUID(), - false); + StreamSource csvStreamSource = new StreamSource(csvInputStream, Format.csv); IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() .withEnableTracking(true) .build(); System.out.println("Queueing CSV data from string..."); - CompletableFuture csvFuture = queuedIngestClient - .ingestAsync(database, table, csvStreamSource, csvProperties) - .thenCompose( - response -> { - System.out.println( - "CSV ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation(response, "CSV Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); + CompletableFuture csvFuture = + queuedIngestClient + .ingestAsync(database, table, csvStreamSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "CSV ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(csvInputStream)); futures.add(csvFuture); - // Example 2: Ingest from compressed CSV file + // Example 2: Ingest from compressed CSV file (all 6 parameters needed) + // Explicitly specify compression, sourceId, baseName, and leaveOpen String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; InputStream compressedCsvStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.csv.gz")); - StreamSource compressedStreamSource = new StreamSource( - compressedCsvStream, - Format.csv, - CompressionType.GZIP, - UUID.randomUUID(), - false); + StreamSource compressedStreamSource = + new StreamSource( + compressedCsvStream, + Format.csv, + CompressionType.GZIP, + UUID.randomUUID(), + false); System.out.println("Queueing compressed CSV file..."); - CompletableFuture compressedFuture = queuedIngestClient - .ingestAsync(database, table, compressedStreamSource, csvProperties) - .thenCompose( - response -> { - System.out.println( - "Compressed CSV ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation( - response, "Compressed CSV Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); + CompletableFuture compressedFuture = + queuedIngestClient + .ingestAsync(database, table, compressedStreamSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "Compressed CSV ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation( + response, "Compressed CSV Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(compressedCsvStream)); futures.add(compressedFuture); - // Example 3: Ingest JSON with mapping - InputStream jsonStream = new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); + // Example 3: Ingest JSON with mapping - with defaults + // Uses defaults: sourceCompression=NONE, auto-generated sourceId, leaveOpen=false + InputStream jsonStream = + new ByteArrayInputStream(readResourceBytes(resourcesDirectory, "dataset.json")); + + StreamSource jsonStreamSource = new StreamSource(jsonStream, Format.json); - StreamSource jsonStreamSource = new StreamSource( - jsonStream, - Format.json, - CompressionType.NONE, - UUID.randomUUID(), - false); IngestionMapping mapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() - .withIngestionMapping(mapping) - .withEnableTracking(true) - .build(); + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mapping) + .withEnableTracking(true) + .build(); System.out.println("Queueing JSON file with mapping..."); - CompletableFuture jsonFuture = queuedIngestClient - .ingestAsync(database, table, jsonStreamSource, jsonProperties) - .thenCompose( - response -> { - System.out.println( - "JSON ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation(response, "JSON Stream"); - }) - .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); + CompletableFuture jsonFuture = + queuedIngestClient + .ingestAsync(database, table, jsonStreamSource, jsonProperties) + .thenCompose( + response -> { + System.out.println( + "JSON ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "JSON Stream"); + }) + .whenComplete((unused, throwable) -> closeQuietly(jsonStream)); futures.add(jsonFuture); return futures; @@ -210,6 +229,8 @@ static List> ingestFromStream() throws Exception { /** * Demonstrates ingestion from file sources including: - CSV file - Compressed JSON file with * mapping + * + * Shows both source configuration with defaults and source configuration with all params approaches. */ static List> ingestFromFile() { System.out.println("\n=== Queued Ingestion from Files ==="); @@ -218,99 +239,243 @@ static List> ingestFromFile() { String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - // Example 1: Ingest CSV file - FileSource csvFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE); + // Example 1: Ingest CSV file - with defaults + // compressionType auto-detected from filename (.csv = NONE), sourceId auto-generated, baseName auto-extracted + FileSource csvFileSource = new FileSource(Paths.get(resourcesDirectory + "dataset.csv"), Format.csv); - IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() - .withEnableTracking(true) - .build(); + IngestRequestProperties csvProperties = + IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); System.out.println("Queueing CSV file..."); - CompletableFuture csvFuture = queuedIngestClient - .ingestAsync(database, table, csvFileSource, csvProperties) - .thenCompose( - response -> { - System.out.println( - "CSV file ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation(response, "CSV File"); - }); + CompletableFuture csvFuture = + queuedIngestClient + .ingestAsync(database, table, csvFileSource, csvProperties) + .thenCompose( + response -> { + System.out.println( + "CSV file ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation(response, "CSV File"); + }); futures.add(csvFuture); - // Example 2: Ingest compressed JSON file with mapping - FileSource jsonFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.jsonz.gz"), - Format.json, - UUID.randomUUID(), - CompressionType.GZIP); + // Example 2: Ingest compressed JSON file with mapping - with all parameters specified + // Explicitly specify sourceId, compression (auto-detected from .gz), and baseName for full control + FileSource jsonFileSource = + new FileSource( + Paths.get(resourcesDirectory + "dataset.jsonz.gz"), + Format.json, + UUID.randomUUID(), + CompressionType.GZIP); + IngestionMapping mapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); - IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() - .withIngestionMapping(mapping) - .withEnableTracking(true) - .build(); + IngestRequestProperties jsonProperties = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mapping) + .withEnableTracking(true) + .build(); System.out.println("Queueing compressed JSON file with mapping..."); - CompletableFuture jsonFuture = queuedIngestClient - .ingestAsync(database, table, jsonFileSource, jsonProperties) - .thenCompose( - response -> { - System.out.println( - "Compressed JSON file ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - return trackIngestionOperation( - response, "Compressed JSON File"); - }); + CompletableFuture jsonFuture = + queuedIngestClient + .ingestAsync(database, table, jsonFileSource, jsonProperties) + .thenCompose( + response -> { + System.out.println( + "Compressed JSON file ingestion queued. Operation ID: " + + response.getIngestResponse() + .getIngestionOperationId()); + return trackIngestionOperation( + response, "Compressed JSON File"); + }); futures.add(jsonFuture); return futures; } /** - * Demonstrates batch ingestion from multiple sources in a single operation. This is more - * efficient than ingesting sources one by one when you have multiple files. + * Demonstrates batch ingestion from multiple blob sources in a single operation. + * + *

IMPORTANT: Multi-source ingestion only accepts BlobSource. For local sources + * (FileSource, StreamSource), you must either: + *

    + *
  1. Ingest them one by one using the single-source ingestAsync method, or
  2. + *
  3. First upload them to blob storage using uploadManyAsync, then pass the resulting + * BlobSource list to ingestAsync
  4. + *
+ * + *

This example uses public blob URLs from the Kusto sample files to demonstrate + * multi-blob batch ingestion. All blobs must have the same format. */ - static @NotNull CompletableFuture ingestMultipleSources() { - System.out.println("\n=== Queued Ingestion from Multiple Sources (Batch) ==="); + static CompletableFuture ingestMultipleSources() { + System.out.println("\n=== Queued Ingestion from Multiple Blob Sources (Batch) ==="); - String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; + // Multi-source API only accepts BlobSource - not FileSource or StreamSource. + // If you have local files, you must upload them to blob storage first. + // Here we use public sample blob URLs from Kusto sample files to demonstrate the pattern. - // Create multiple file sources - FileSource source1 = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, + // IMPORTANT: All sources in a batch must have the same format! + // BlobSource constructor requires: blobPath, format, sourceId, compressionType, baseName + + // Using multiple JSON files from Kusto public sample files + // All files are JSON format - this is required for batch ingestion + BlobSource blob1 = new BlobSource( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/simple.json", + Format.json, UUID.randomUUID(), CompressionType.NONE); - FileSource source2 = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv.gz"), - Format.csv, + BlobSource blob2 = new BlobSource( + "https://kustosamplefiles.blob.core.windows.net/jsonsamplefiles/array.json", + Format.json, UUID.randomUUID(), - CompressionType.GZIP); + CompressionType.NONE); - List sources = Arrays.asList(source1, source2); + // Create list with all blob sources - all must have identical format + List blobSources = Arrays.asList(blob1, blob2); + IngestionMapping mapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); + IngestRequestProperties properties = + IngestRequestPropertiesBuilder.create() + .withIngestionMapping(mapping) + .withEnableTracking(true) + .build(); - IngestRequestProperties properties = IngestRequestPropertiesBuilder.create() - .withEnableTracking(true) - .build(); + System.out.println("Queueing " + blobSources.size() + " blob sources in batch..."); + for (int i = 0; i < blobSources.size(); i++) { + System.out.println(" Blob " + (i + 1) + ": " + blobSources.get(i).getName()); + } - System.out.println("Queueing multiple sources in batch..."); return queuedIngestClient - .ingestAsync(database, table, sources, properties) - .thenCompose( - response -> { - System.out.println( - "Batch ingestion queued. Operation ID: " - + response.getIngestResponse() - .getIngestionOperationId()); - System.out.println("Number of sources in batch: " + sources.size()); - return trackIngestionOperation(response, "Batch Ingestion"); - }); + .ingestAsync(database, table, blobSources, properties) + .thenCompose(response -> { + System.out.println( + "Batch ingestion queued. Operation ID: " + + response.getIngestResponse().getIngestionOperationId()); + System.out.println("Number of sources in batch: " + blobSources.size()); + return trackIngestionOperation(response, "Batch Blob Ingestion"); + }); + } + + /** + * Demonstrates ingesting multiple local files by uploading them to blob storage first, + * then ingesting them as a batch using the multi-source ingestAsync API. + * + *

Pattern: + *

    + *
  1. Create a ManagedUploader with proper configuration
  2. + *
  3. Create list of LocalSource (FileSource) objects
  4. + *
  5. Call uploader.uploadManyAsyncJava(localSources) to upload all files to blob storage
  6. + *
  7. Convert successful upload results to BlobSource list
  8. + *
  9. Call queuedIngestClient.ingestAsync(blobSources, properties) to ingest as a batch
  10. + *
+ * + *

This approach allows batch ingestion of local files by first uploading them + * to blob storage, which is required because the multi-source API only accepts BlobSource. + */ + static CompletableFuture ingestMultipleLocalFilesViaBlobUpload( + String engineEndpoint, ChainedTokenCredential credential) { + System.out.println("\n=== Queued Ingestion: Upload Local Files to Blob, Then Ingest ==="); + + // Step 1: Create configuration cache (needed for ManagedUploader) + String dmUrl = engineEndpoint.replace(".kusto.", ".ingest-"); + + ConfigurationCache configCache = + DefaultConfigurationCache.create( + dmUrl, + credential, + new ClientDetails("QueuedIngestV2Sample", "1.0", "ingest-v2-sample")); + + // Step 2: Create ManagedUploader for batch uploading local files to blob storage + ManagedUploader uploader = + ManagedUploader.builder() + .withConfigurationCache(configCache) + .withRetryPolicy(new SimpleRetryPolicy()) + .withMaxConcurrency(10) + .withMaxDataSize(4L * 1024 * 1024 * 1024) // 4GB max size + .withUploadMethod(UploadMethod.STORAGE) + .withTokenCredential(credential) + .build(); + + System.out.println("ManagedUploader created for batch upload"); + + // Step 3: Prepare list of local files to upload (all same format - CSV) + String resourcesDirectory = System.getProperty("user.dir") + "/src/main/resources/"; + + // IMPORTANT: All files must have the same format for batch ingestion! + FileSource file1 = new FileSource(Paths.get(resourcesDirectory + "dataset.csv"), Format.csv); + FileSource file2 = new FileSource(Paths.get(resourcesDirectory + "dataset.csv.gz"), Format.csv); + + List localSources = Arrays.asList(file1, file2); + + System.out.println("Prepared " + localSources.size() + " local files for upload:"); + for (LocalSource source : localSources) { + System.out.println(" - " + source.getName() + " (format: " + source.getFormat() + ")"); + } + + IngestRequestProperties properties = + IngestRequestPropertiesBuilder.create() + .withEnableTracking(true) + .build(); + + // Step 4: Upload all local files to blob storage using uploadManyAsync + // Note: The Kotlin suspend function uploadManyAsync is exposed to Java as uploadManyAsync + // (via @JvmName annotation) and returns CompletableFuture + System.out.println("Uploading " + localSources.size() + " files to blob storage..."); + + return uploader.uploadManyAsync(localSources) + .thenCompose(uploadResults -> { + // Step 5: Process upload results + System.out.println("Upload completed:"); + System.out.println(" Successes: " + uploadResults.getSuccesses().size()); + System.out.println(" Failures: " + uploadResults.getFailures().size()); + + // Log any failures + for (var failure : uploadResults.getFailures()) { + System.err.println(" Upload failed for " + failure.getSourceName() + + ": " + failure.getErrorMessage()); + } + + // Step 6: Convert successful uploads to BlobSource list + List blobSources = new ArrayList<>(); + for (var success : uploadResults.getSuccesses()) { + System.out.println(" Uploaded: " + success.getSourceName() + + " -> " + success.getBlobUrl().split("\\?")[0]); // Hide SAS token in log + + // Create BlobSource from upload result + // Match format from original FileSource (CSV in this case) + BlobSource blobSource = new BlobSource( + success.getBlobUrl(), + Format.csv, // All our files are CSV format + UUID.randomUUID(), + CompressionType.GZIP // Uploader auto-compresses to GZIP + ); + blobSources.add(blobSource); + } + + if (blobSources.isEmpty()) { + return CompletableFuture.failedFuture( + new RuntimeException("All uploads failed - nothing to ingest")); + } + + // Step 7: Ingest all blobs as a batch + System.out.println("Ingesting " + blobSources.size() + " blobs as a batch..."); + return queuedIngestClient.ingestAsync(database, table, blobSources, properties) + .thenCompose(response -> { + System.out.println( + "Batch ingestion queued. Operation ID: " + + response.getIngestResponse().getIngestionOperationId()); + System.out.println("Number of sources in batch: " + blobSources.size()); + return trackIngestionOperation(response, "Local Files Via Blob Upload"); + }); + }) + .whenComplete((unused, throwable) -> { + // Clean up uploader + uploader.close(); + System.out.println("ManagedUploader closed"); + }); } /** @@ -369,9 +534,7 @@ private static CompletableFuture trackIngestionOperation( }); } - /** - * Prints detailed status information from a StatusResponse - */ + /** Prints detailed status information from a StatusResponse */ private static void printStatusResponse(StatusResponse statusResponse) { if (statusResponse == null) { System.out.println(" Status: null"); diff --git a/samples/src/main/java/ingestv2/StreamingIngestV2.java b/samples/src/main/java/ingestv2/StreamingIngestV2.java index 83269d715..7c3b7149b 100644 --- a/samples/src/main/java/ingestv2/StreamingIngestV2.java +++ b/samples/src/main/java/ingestv2/StreamingIngestV2.java @@ -84,6 +84,9 @@ public static void main(String[] args) { /** * Demonstrates ingestion from various stream sources including: - In-memory string data as CSV * - Compressed file stream (CSV) - JSON file stream with mapping + * + *

NOTE: This example shows both source creation with defaults and source creation with full control + * StreamSource defaults: compression=NONE, sourceId=auto-generated, baseName=null, leaveOpen=false */ static void ingestFromStream() throws Exception { System.out.println("\n=== Ingesting from Streams ==="); @@ -127,15 +130,14 @@ static void ingestFromStream() throws Exception { compressedCsvStream.close(); // Example 3: Ingest JSON with mapping + // Demonstrating minimal parameters for quick prototyping FileInputStream jsonStream = new FileInputStream(resourcesDirectory + "dataset.json"); StreamSource jsonStreamSource = new StreamSource( jsonStream, - Format.json, - CompressionType.NONE, - UUID.randomUUID(), - false); + Format.json); + IngestionMapping mapping = new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON); IngestRequestProperties jsonProperties = IngestRequestPropertiesBuilder.create() .withIngestionMapping(new IngestionMapping(mappingName, IngestionMapping.IngestionMappingType.JSON)) .withEnableTracking(true) @@ -152,18 +154,20 @@ static void ingestFromStream() throws Exception { /** * Demonstrates ingestion from file sources including: - CSV file - Compressed JSON file with * mapping + * + *

NOTE: This example shows both source creation with defaults and source creation with full control. + * FileSource defaults: sourceId=auto-generated, compression=auto-detected from extension, baseName=from-filename */ static void ingestFromFile() throws Exception { System.out.println("\n=== Ingesting from Files ==="); String resourcesDirectory = System.getProperty("user.dir") + "/samples/src/main/resources/"; - // Example 1: Ingest CSV file - FileSource csvFileSource = new FileSource( - Paths.get(resourcesDirectory + "dataset.csv"), - Format.csv, - UUID.randomUUID(), - CompressionType.NONE); + // Example 1: Ingest CSV file using with defaults + // Only providing required parameters: path and format + // Defaults: sourceId=auto-generated, compression=auto-detected (NONE for .csv), baseName="dataset.csv" + FileSource csvFileSource = + new FileSource(Paths.get(resourcesDirectory + "dataset.csv"), Format.csv); IngestRequestProperties csvProperties = IngestRequestPropertiesBuilder.create() .withEnableTracking(true) From 4c3d8f64460e2cd9866107bd6393c49785667b83 Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Tue, 20 Jan 2026 17:24:41 +0530 Subject: [PATCH 47/50] addressed review comments (#460) --- .../ingest/v2/uploader/ContainerUploaderBase.kt | 7 ++++--- .../kusto/ingest/v2/uploader/ICustomUploader.kt | 7 ------- .../azure/kusto/quickstart/SampleApp.java | 17 +++++++---------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt index e673ea11b..c7536f76b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -276,8 +276,9 @@ abstract class ContainerUploaderBase( effectiveCompressionType: CompressionType = local.compressionType, ): BlobSource { // Select container using incrementing counter for round-robin distribution + // Note: Math.floorMod handles negative values correctly if overflow occurs var containerIndex = - containerIndexCounter.getAndIncrement() % containers.size + Math.floorMod(containerIndexCounter.getAndIncrement(), containers.size) logger.debug( "Starting upload with {} containers, round-robin index: {}", @@ -389,9 +390,9 @@ abstract class ContainerUploaderBase( ) // TODO check and validate failure scenarios // Use semaphore for true streaming parallelism - // This allows up to maxConcurrency concurrent uploads, starting new ones as soon as slots + // This allows up to effectiveMaxConcurrency concurrent uploads, starting new ones as soon as slots // are available - val semaphore = Semaphore(maxConcurrency) + val semaphore = Semaphore(effectiveMaxConcurrency) // Launch all uploads concurrently, but semaphore limits actual concurrent execution val results = diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt index f2bd6bc9a..ea82e90f7 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ICustomUploader.kt @@ -45,13 +45,6 @@ interface ICustomUploader : Closeable { ): CompletableFuture } -/** - * Extension function to convert [ICustomUploader] to [IUploader]. - * - * Kotlin users can use this as: `myCustomUploader.asUploader()` - */ -fun ICustomUploader.asUploader(): IUploader = CustomUploaderAdapter(this) - /** * Static helper methods for [ICustomUploader]. * diff --git a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java index 8dd330750..1d4f81c0b 100644 --- a/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java +++ b/quickstart/src/main/java/com/microsoft/azure/kusto/quickstart/SampleApp.java @@ -1,9 +1,8 @@ package com.microsoft.azure.kusto.quickstart; import com.azure.core.tracing.opentelemetry.OpenTelemetryTracer; +import com.azure.core.credential.TokenCredential; import com.azure.identity.AzureCliCredentialBuilder; -import com.azure.identity.ChainedTokenCredential; -import com.azure.identity.ChainedTokenCredentialBuilder; import com.azure.identity.ClientSecretCredentialBuilder; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.ObjectMapper; @@ -735,7 +734,7 @@ private static void runIngestV2Sample(@NotNull ConfigJson config) { } System.out.println("Running ingest-v2 quickstart sample..."); - ChainedTokenCredential credential = buildIngestV2Credential(ingestV2Config); + TokenCredential credential = buildIngestV2Credential(ingestV2Config); try (QueuedIngestClient queuedIngestClient = QueuedIngestClientBuilder.create(clusterPath) .withAuthentication(credential) @@ -757,25 +756,23 @@ private static void runIngestV2Sample(@NotNull ConfigJson config) { } } - private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2QuickstartConfig config) { + private static TokenCredential buildIngestV2Credential(@NotNull IngestV2QuickstartConfig config) { AuthenticationModeOptions mode = config.getAuthModeOverride(); if (mode == null) { mode = AuthenticationModeOptions.USER_PROMPT; } - ChainedTokenCredentialBuilder builder = new ChainedTokenCredentialBuilder(); if (mode == AuthenticationModeOptions.APP_KEY) { if (StringUtils.isBlank(config.getAppId()) || StringUtils.isBlank(config.getAppKey()) || StringUtils.isBlank(config.getTenantId())) { Utils.errorHandler("AppKey authentication requires 'APP_ID', 'APP_KEY', and 'APP_TENANT' environment variables or ingestV2 overrides."); } - builder.addFirst(new ClientSecretCredentialBuilder() + return new ClientSecretCredentialBuilder() .clientId(config.getAppId()) .clientSecret(config.getAppKey()) .tenantId(config.getTenantId()) - .build()); + .build(); } else { - builder.addFirst(new AzureCliCredentialBuilder().build()); + return new AzureCliCredentialBuilder().build(); } - return builder.build(); } private static @NotNull List> ingestV2FromStreams(ConfigJson config, IngestV2QuickstartConfig ingestV2Config, @@ -835,7 +832,7 @@ private static ChainedTokenCredential buildIngestV2Credential(@NotNull IngestV2Q @NotNull QueuedIngestClient queuedIngestClient) { System.out.println("\n=== Queued batch ingestion: Upload local files to blob, then ingest (ingest-v2) ==="); String clusterPath = ingestV2Config.getClusterPath(); - ChainedTokenCredential credential = buildIngestV2Credential(ingestV2Config); + TokenCredential credential = buildIngestV2Credential(ingestV2Config); ConfigurationCache configCache = DefaultConfigurationCache.create( clusterPath, From 5f296db56580650f6719ad791be2d400400c408c Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Wed, 21 Jan 2026 10:33:14 +0530 Subject: [PATCH 48/50] Users/ramacg/refactor managed uploader (#455) * * Fix method signature for uploads * * Minor edit to ConfigurationCache to determine refresh interval logic * * Additional tests for preferred upload combinations * * Reformat tests * Add tests for duration --- .../kusto/ingest/v2/KustoBaseApiClient.kt | 3 +- .../v2/auth/endpoints/FastSuffixMatcher.kt | 35 +-- .../auth/endpoints/KustoTrustedEndpoints.kt | 69 ++--- .../endpoints/WellKnownKustoEndpointsData.kt | 48 ++-- .../ingest/v2/common/ConfigurationCache.kt | 111 +++++++- .../v2/uploader/ContainerUploaderBase.kt | 19 +- .../ingest/v2/uploader/ManagedUploader.kt | 1 - .../v2/TrustedEndpointValidationTest.kt | 76 ++++-- .../ingest/v2/common/TimeSpanParsingTest.kt | 250 ++++++++++++++++++ .../ingest/v2/uploader/ManagedUploaderTest.kt | 97 +++++++ .../src/test/resources/config-response.json | 21 ++ 11 files changed, 619 insertions(+), 111 deletions(-) create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/TimeSpanParsingTest.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderTest.kt create mode 100644 ingest-v2/src/test/resources/config-response.json diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index a96f3c94b..4559d5dc5 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -41,7 +41,8 @@ open class KustoBaseApiClient( init { // Validate endpoint is trusted unless security checks are skipped // Note: dmUrl might be empty/null in some test scenarios (e.g., mocked clients) - // The null check is required for Java interop - Java callers can pass null despite Kotlin's non-null type + // The null check is required for Java interop - Java callers can pass null despite Kotlin's + // non-null type if (!skipSecurityChecks && dmUrl != null && dmUrl.isNotBlank()) { KustoTrustedEndpoints.validateTrustedEndpoint(dmUrl) } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt index efeb67864..8decc416a 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/FastSuffixMatcher.kt @@ -4,38 +4,37 @@ package com.microsoft.azure.kusto.ingest.v2.auth.endpoints /** * Represents a matching rule for endpoint validation. + * * @param suffix The suffix or hostname to match - * @param exact If true, the candidate must exactly match the suffix. If false, candidate must end with the suffix. + * @param exact If true, the candidate must exactly match the suffix. If false, + * candidate must end with the suffix. */ -data class MatchRule( - val suffix: String, - val exact: Boolean, -) { +data class MatchRule(val suffix: String, val exact: Boolean) { val suffixLength: Int get() = suffix.length } /** * Result of a match operation. + * * @param isMatch Whether the candidate matched * @param matchedRule The rule that matched, or null if no match */ -data class MatchResult( - val isMatch: Boolean, - val matchedRule: MatchRule?, -) +data class MatchResult(val isMatch: Boolean, val matchedRule: MatchRule?) /** - * A fast suffix matcher that efficiently matches hostnames against a set of rules. - * Uses a map indexed by suffix tail for O(1) lookup. + * A fast suffix matcher that efficiently matches hostnames against a set of + * rules. Uses a map indexed by suffix tail for O(1) lookup. */ -class FastSuffixMatcher private constructor( +class FastSuffixMatcher +private constructor( private val suffixLength: Int, private val rules: Map>, ) { companion object { /** * Creates a new matcher with the provided matching rules. + * * @param rules One or more matching rules to apply when match is called * @return FastSuffixMatcher */ @@ -50,7 +49,9 @@ class FastSuffixMatcher private constructor( val processedRules = mutableMapOf>() for (rule in rules) { val suffix = rule.suffix.takeLast(minRuleLength).lowercase() - processedRules.getOrPut(suffix) { mutableListOf() }.add(rule.copy()) + processedRules + .getOrPut(suffix) { mutableListOf() } + .add(rule.copy()) } return FastSuffixMatcher(minRuleLength, processedRules) @@ -59,6 +60,7 @@ class FastSuffixMatcher private constructor( /** * Creates a new matcher with the provided matching rules, extending an * existing matcher. + * * @param existing An existing matcher whose rules are to be baseline * @param rules One or more matching rules to apply when match is called * @return FastSuffixMatcher @@ -75,14 +77,14 @@ class FastSuffixMatcher private constructor( return existing } - val combinedRules = - rules + existing.rules.values.flatten() + val combinedRules = rules + existing.rules.values.flatten() return create(combinedRules) } } /** * Checks if a candidate string matches any of the rules. + * * @param candidate A string to match to the list of match rules * @return true if at least one of the rules matched */ @@ -90,6 +92,7 @@ class FastSuffixMatcher private constructor( /** * Matches an input string to the list of match rules. + * * @param candidate A string to match * @return MatchResult with match status and the matched rule if any */ @@ -113,4 +116,4 @@ class FastSuffixMatcher private constructor( return MatchResult(false, null) } -} \ No newline at end of file +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt index 33efe6911..4908092ca 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/KustoTrustedEndpoints.kt @@ -8,17 +8,17 @@ import java.net.URI import java.net.URISyntaxException /** - * A helper class to determine which DNS names are "well-known/trusted" - * Kusto endpoints. Untrusted endpoints might require additional configuration - * before they can be used, for security reasons. + * A helper class to determine which DNS names are "well-known/trusted" Kusto + * endpoints. Untrusted endpoints might require additional configuration before + * they can be used, for security reasons. */ object KustoTrustedEndpoints { - private val logger = LoggerFactory.getLogger(KustoTrustedEndpoints::class.java) + private val logger = + LoggerFactory.getLogger(KustoTrustedEndpoints::class.java) /** - * Global flag to enable/disable endpoint validation. - * When false, untrusted endpoints will only log a warning instead of - * throwing an exception. + * Global flag to enable/disable endpoint validation. When false, untrusted + * endpoints will only log a warning instead of throwing an exception. */ @JvmField @Volatile @@ -26,11 +26,9 @@ object KustoTrustedEndpoints { private val matchers: MutableMap = mutableMapOf() - @Volatile - private var additionalMatcher: FastSuffixMatcher? = null + @Volatile private var additionalMatcher: FastSuffixMatcher? = null - @Volatile - private var overrideMatcher: ((String) -> Boolean)? = null + @Volatile private var overrideMatcher: ((String) -> Boolean)? = null // Default login endpoint for public cloud private const val DEFAULT_PUBLIC_LOGIN_ENDPOINT = @@ -44,7 +42,8 @@ object KustoTrustedEndpoints { try { val endpointsData = WellKnownKustoEndpointsData.getInstance() - endpointsData.allowedEndpointsByLogin.forEach { (loginEndpoint, allowedEndpoints) -> + endpointsData.allowedEndpointsByLogin.forEach { + (loginEndpoint, allowedEndpoints) -> val rules = mutableListOf() // Add suffix rules (exact = false) @@ -58,7 +57,8 @@ object KustoTrustedEndpoints { } if (rules.isNotEmpty()) { - matchers[loginEndpoint.lowercase()] = FastSuffixMatcher.create(rules) + matchers[loginEndpoint.lowercase()] = + FastSuffixMatcher.create(rules) } } @@ -74,6 +74,7 @@ object KustoTrustedEndpoints { /** * Sets an override policy for endpoint validation. + * * @param matcher Rules that determine if a hostname is a valid/trusted * Kusto endpoint (replaces existing rules) */ @@ -83,13 +84,11 @@ object KustoTrustedEndpoints { /** * Adds additional trusted hosts to the matcher. + * * @param rules A set of rules * @param replace If true, nullifies the last added rules */ - fun addTrustedHosts( - rules: List?, - replace: Boolean, - ) { + fun addTrustedHosts(rules: List?, replace: Boolean) { if (rules.isNullOrEmpty()) { if (replace) { additionalMatcher = null @@ -98,14 +97,20 @@ object KustoTrustedEndpoints { } additionalMatcher = - FastSuffixMatcher.create(if (replace) null else additionalMatcher, rules) + FastSuffixMatcher.create( + if (replace) null else additionalMatcher, + rules, + ) } /** * Validates that the endpoint is trusted. + * * @param uri Kusto endpoint URI string - * @param loginEndpoint The login endpoint to check against (optional, defaults to public cloud) - * @throws KustoClientInvalidConnectionStringException if endpoint is not trusted + * @param loginEndpoint The login endpoint to check against (optional, + * defaults to public cloud) + * @throws KustoClientInvalidConnectionStringException if endpoint is not + * trusted */ fun validateTrustedEndpoint( uri: String, @@ -114,29 +119,34 @@ object KustoTrustedEndpoints { try { validateTrustedEndpoint(URI(uri), loginEndpoint) } catch (ex: URISyntaxException) { - throw KustoClientInvalidConnectionStringException(uri, ex.message ?: "Invalid URI", ex) + throw KustoClientInvalidConnectionStringException( + uri, + ex.message ?: "Invalid URI", + ex, + ) } } /** * Validates that the endpoint is trusted. + * * @param uri Kusto endpoint URI * @param loginEndpoint The login endpoint to check against - * @throws KustoClientInvalidConnectionStringException if endpoint is not trusted + * @throws KustoClientInvalidConnectionStringException if endpoint is not + * trusted */ - fun validateTrustedEndpoint( - uri: URI, - loginEndpoint: String, - ) { + fun validateTrustedEndpoint(uri: URI, loginEndpoint: String) { val host = uri.host ?: uri.toString() validateHostnameIsTrusted(host, loginEndpoint) } /** * Validates that a hostname is trusted. + * * @param hostname The hostname to validate * @param loginEndpoint The login endpoint to check against - * @throws KustoClientInvalidConnectionStringException if hostname is not trusted + * @throws KustoClientInvalidConnectionStringException if hostname is not + * trusted */ private fun validateHostnameIsTrusted( hostname: String, @@ -184,9 +194,7 @@ object KustoTrustedEndpoints { ) } - /** - * Checks if the hostname is a local/loopback address. - */ + /** Checks if the hostname is a local/loopback address. */ private fun isLocalAddress(hostname: String): Boolean { val lowerHost = hostname.lowercase() return lowerHost == "localhost" || @@ -198,6 +206,7 @@ object KustoTrustedEndpoints { /** * Checks if a hostname is trusted without throwing an exception. + * * @param hostname The hostname to check * @param loginEndpoint The login endpoint to check against * @return true if the hostname is trusted diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt index 978c7608a..21cfec8d3 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/auth/endpoints/WellKnownKustoEndpointsData.kt @@ -3,13 +3,11 @@ package com.microsoft.azure.kusto.ingest.v2.auth.endpoints import kotlinx.serialization.SerialName -import kotlinx.serialization.Serializable as KSerializable import kotlinx.serialization.json.Json import org.slf4j.LoggerFactory +import kotlinx.serialization.Serializable as KSerializable -/** - * Data class representing the structure of WellKnownKustoEndpoints.json - */ +/** Data class representing the structure of WellKnownKustoEndpoints.json */ @KSerializable data class AllowedEndpoints( @SerialName("AllowedKustoSuffixes") @@ -20,39 +18,49 @@ data class AllowedEndpoints( @KSerializable data class WellKnownKustoEndpointsData( - @SerialName("_Comments") - val comments: List = emptyList(), + @SerialName("_Comments") val comments: List = emptyList(), @SerialName("AllowedEndpointsByLogin") val allowedEndpointsByLogin: Map = emptyMap(), ) { companion object { - private val logger = LoggerFactory.getLogger(WellKnownKustoEndpointsData::class.java) - - @Volatile - private var instance: WellKnownKustoEndpointsData? = null - + private val logger = + LoggerFactory.getLogger(WellKnownKustoEndpointsData::class.java) + + @Volatile private var instance: WellKnownKustoEndpointsData? = null + private val json = Json { ignoreUnknownKeys = true isLenient = true } fun getInstance(): WellKnownKustoEndpointsData { - return instance ?: synchronized(this) { - instance ?: readInstance().also { instance = it } - } + return instance + ?: synchronized(this) { + instance ?: readInstance().also { instance = it } + } } private fun readInstance(): WellKnownKustoEndpointsData { return try { - val resourceStream = WellKnownKustoEndpointsData::class.java - .getResourceAsStream("/WellKnownKustoEndpoints.json") - ?: throw RuntimeException("WellKnownKustoEndpoints.json not found in classpath") - - val content = resourceStream.bufferedReader().use { it.readText() } + val resourceStream = + WellKnownKustoEndpointsData::class + .java + .getResourceAsStream( + "/WellKnownKustoEndpoints.json", + ) + ?: throw RuntimeException( + "WellKnownKustoEndpoints.json not found in classpath", + ) + + val content = + resourceStream.bufferedReader().use { it.readText() } json.decodeFromString(content) } catch (ex: Exception) { logger.error("Failed to read WellKnownKustoEndpoints.json", ex) - throw RuntimeException("Failed to read WellKnownKustoEndpoints.json", ex) + throw RuntimeException( + "Failed to read WellKnownKustoEndpoints.json", + ex, + ) } } } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index d9530f546..04e60041b 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -11,6 +11,7 @@ import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import java.lang.AutoCloseable import java.time.Duration import java.util.concurrent.atomic.AtomicReference +import kotlin.math.min /** * Interface for caching configuration data. @@ -150,25 +151,101 @@ class DefaultConfigurationCache( } /** - * Holds both the configuration and its refresh timestamp atomically. This - * prevents race conditions between checking expiration and updating. + * Holds the configuration, its refresh timestamp, and the effective refresh + * interval atomically. This prevents race conditions between checking + * expiration and updating, and ensures we use the correct refresh interval + * from when the config was fetched. */ private data class CachedData( val configuration: ConfigurationResponse, val timestamp: Long, + val refreshInterval: Long, ) private val cache = AtomicReference(null) + /** + * Parses a .NET TimeSpan format string to a Java Duration. + * + * Supports formats: + * - HH:mm:ss (e.g., "01:00:00" = 1 hour) + * - d.HH:mm:ss (e.g., "1.02:30:00" = 1 day, 2 hours, 30 minutes) + * - HH:mm:ss.fffffff (with fractional seconds) + * + * @param timeSpan The TimeSpan string to parse + * @return The parsed Duration, or null if parsing fails + */ + private fun parseTimeSpanToDuration(timeSpan: String): Duration? { + return try { + // Split by '.' to handle days (format: "d.HH:mm:ss") + val parts = timeSpan.split('.') + val timePart = if (parts.size > 1) parts[1] else parts[0] + val days = if (parts.size > 1) parts[0].toLongOrNull() ?: 0L else 0L + + // Split time part by ':' to get hours, minutes, seconds + val timeParts = timePart.split(':') + if (timeParts.size < 3) return null + + val hours = timeParts[0].toLongOrNull() ?: return null + val minutes = timeParts[1].toLongOrNull() ?: return null + + // Handle fractional seconds (e.g., "30.1234567") + val secondsPart = timeParts[2] + val secondsValue = secondsPart.toDoubleOrNull() ?: return null + + // Build duration + var duration = + Duration.ofDays(days) + .plusHours(hours) + .plusMinutes(minutes) + .plusSeconds(secondsValue.toLong()) + + // Add fractional seconds if present + val fractionalSeconds = (secondsValue - secondsValue.toLong()) + if (fractionalSeconds > 0) { + duration = + duration.plusMillis((fractionalSeconds * 1000).toLong()) + } + + duration + } catch (_: Exception) { + null + } + } + + /** + * Helper function to calculate effective refresh interval from a + * configuration response. If the configuration specifies a refresh + * interval, use the minimum of that and the default. Otherwise, use the + * default refresh interval. + */ + private fun calculateEffectiveRefreshInterval( + config: ConfigurationResponse?, + ): Long { + val configRefreshInterval = config?.containerSettings?.refreshInterval + return if (configRefreshInterval?.isNotEmpty() == true) { + val parsedDuration = parseTimeSpanToDuration(configRefreshInterval) + if (parsedDuration != null) { + min(this.refreshInterval.toMillis(), parsedDuration.toMillis()) + } else { + // If parsing fails, log warning and use default + this.refreshInterval.toMillis() + } + } else { + this.refreshInterval.toMillis() + } + } + override suspend fun getConfiguration(): ConfigurationResponse { val currentTime = System.currentTimeMillis() - val cached = cache.get() + val cachedData = cache.get() - // Check if we need to refresh + // Check if we need to refresh based on the effective refresh interval + // stored with the cached data val needsRefresh = - cached == null || - (currentTime - cached.timestamp) >= - refreshInterval.toMillis() + cachedData == null || + (currentTime - cachedData.timestamp) >= + cachedData.refreshInterval if (needsRefresh) { // Attempt to refresh - only one thread will succeed @@ -176,19 +253,27 @@ class DefaultConfigurationCache( runCatching { provider() } .getOrElse { // If fetch fails, return cached if available, otherwise rethrow - cached?.configuration ?: throw it + cachedData?.configuration ?: throw it } + // Calculate effective refresh interval from the NEW configuration + val newEffectiveRefreshInterval = + calculateEffectiveRefreshInterval(newConfig) + // Atomically update if still needed (prevents thundering herd) cache.updateAndGet { current -> - val currentTimestamp = current?.timestamp ?: 0 - // Only update if current is null or still stale + // Only update if current is null or still stale based on its + // stored effective interval if ( current == null || - (currentTime - currentTimestamp) >= - refreshInterval.toMillis() + (currentTime - current.timestamp) >= + current.refreshInterval ) { - CachedData(newConfig, currentTime) + CachedData( + newConfig, + currentTime, + newEffectiveRefreshInterval, + ) } else { // Another thread already refreshed current diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt index c7536f76b..cb6bb4bbd 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -54,7 +54,7 @@ abstract class ContainerUploaderBase( private val retryPolicy: IngestRetryPolicy, private val maxConcurrency: Int, private val maxDataSize: Long, - private val configurationCache: ConfigurationCache, + protected val configurationCache: ConfigurationCache, private val uploadMethod: UploadMethod, private val tokenCredential: TokenCredential?, ) : IUploader { @@ -112,7 +112,7 @@ abstract class ContainerUploaderBase( } // Get containers from configuration - val containers = selectContainers(configurationCache, uploadMethod) + val containers = selectContainers(uploadMethod) if (containers.isEmpty()) { logger.error("No containers available for upload") @@ -278,7 +278,10 @@ abstract class ContainerUploaderBase( // Select container using incrementing counter for round-robin distribution // Note: Math.floorMod handles negative values correctly if overflow occurs var containerIndex = - Math.floorMod(containerIndexCounter.getAndIncrement(), containers.size) + Math.floorMod( + containerIndexCounter.getAndIncrement(), + containers.size, + ) logger.debug( "Starting upload with {} containers, round-robin index: {}", @@ -390,7 +393,8 @@ abstract class ContainerUploaderBase( ) // TODO check and validate failure scenarios // Use semaphore for true streaming parallelism - // This allows up to effectiveMaxConcurrency concurrent uploads, starting new ones as soon as slots + // This allows up to effectiveMaxConcurrency concurrent uploads, starting new ones as soon + // as slots // are available val semaphore = Semaphore(effectiveMaxConcurrency) @@ -755,17 +759,14 @@ abstract class ContainerUploaderBase( } /** - * Selects the appropriate containers for upload based on the provided - * configuration cache and upload method. + * Selects the appropriate containers for upload based on the uploader's + * configuration cache and the specified upload method. * - * @param configurationCache The configuration cache to use for selecting - * containers. * @param uploadMethod The upload method to consider when selecting * containers. * @return A list of selected container information. */ abstract suspend fun selectContainers( - configurationCache: ConfigurationCache, uploadMethod: UploadMethod, ): List } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt index a2084e200..df38b52ed 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploader.kt @@ -40,7 +40,6 @@ internal constructor( } override suspend fun selectContainers( - configurationCache: ConfigurationCache, uploadMethod: UploadMethod, ): List { // This method is delegated to and this calls getConfiguration again to ensure fresh data is diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt index e2b1c489b..aa5db6c94 100644 --- a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/TrustedEndpointValidationTest.kt @@ -34,19 +34,27 @@ import kotlin.test.assertTrue * - KustoTrustedEndpoints object for validation logic * - KustoClientInvalidConnectionStringException for untrusted endpoints * - * Note: This test class uses SAME_THREAD execution mode to prevent race conditions - * when modifying the global enableWellKnownKustoEndpointsValidation flag. + * Note: This test class uses SAME_THREAD execution mode to prevent race + * conditions when modifying the global enableWellKnownKustoEndpointsValidation + * flag. */ @Execution(ExecutionMode.SAME_THREAD) class TrustedEndpointValidationTest { // Mock token credential for testing - private val mockTokenCredential = TokenCredential { _: TokenRequestContext -> - Mono.just(AccessToken("mock-token", OffsetDateTime.now().plusHours(1))) - } + private val mockTokenCredential = + TokenCredential { _: TokenRequestContext -> + Mono.just( + AccessToken( + "mock-token", + OffsetDateTime.now().plusHours(1), + ), + ) + } // Example of an adhoc/untrusted endpoint - private val untrustedEndpoint = "https://my-random-adhoc-cluster.example.com" + private val untrustedEndpoint = + "https://my-random-adhoc-cluster.example.com" // Example of a trusted Kusto endpoint (public cloud) private val trustedEndpoint = "https://mycluster.kusto.windows.net" @@ -74,7 +82,9 @@ class TrustedEndpointValidationTest { // ============================================================================ @Test - @DisplayName("StreamingIngestClient: Untrusted endpoint throws exception without skipSecurityChecks") + @DisplayName( + "StreamingIngestClient: Untrusted endpoint throws exception without skipSecurityChecks", + ) fun `streaming client - untrusted endpoint throws without skip security checks`() { val exception = assertThrows { @@ -86,13 +96,16 @@ class TrustedEndpointValidationTest { assertTrue( exception.message?.contains("not trusted") == true || - exception.message?.contains("kustotrustedendpoints") == true, + exception.message?.contains("kustotrustedendpoints") == + true, "Exception should indicate endpoint is not trusted. Actual: ${exception.message}", ) } @Test - @DisplayName("QueuedIngestClient: Untrusted endpoint throws exception without skipSecurityChecks") + @DisplayName( + "QueuedIngestClient: Untrusted endpoint throws exception without skipSecurityChecks", + ) fun `queued client - untrusted endpoint throws without skip security checks`() { val exception = assertThrows { @@ -104,7 +117,8 @@ class TrustedEndpointValidationTest { assertTrue( exception.message?.contains("not trusted") == true || - exception.message?.contains("kustotrustedendpoints") == true, + exception.message?.contains("kustotrustedendpoints") == + true, "Exception should indicate endpoint is not trusted. Actual: ${exception.message}", ) } @@ -114,7 +128,9 @@ class TrustedEndpointValidationTest { // ============================================================================ @Test - @DisplayName("StreamingIngestClient: Untrusted endpoint works with skipSecurityChecks") + @DisplayName( + "StreamingIngestClient: Untrusted endpoint works with skipSecurityChecks", + ) fun `streaming client - untrusted endpoint works with skip security checks`() { assertDoesNotThrow { StreamingIngestClientBuilder.create(untrustedEndpoint) @@ -125,7 +141,9 @@ class TrustedEndpointValidationTest { } @Test - @DisplayName("QueuedIngestClient: Untrusted endpoint works with skipSecurityChecks") + @DisplayName( + "QueuedIngestClient: Untrusted endpoint works with skipSecurityChecks", + ) fun `queued client - untrusted endpoint works with skip security checks`() { assertDoesNotThrow { QueuedIngestClientBuilder.create(untrustedEndpoint) @@ -140,7 +158,9 @@ class TrustedEndpointValidationTest { // ============================================================================ @Test - @DisplayName("StreamingIngestClient: Trusted Kusto endpoint works without skipSecurityChecks") + @DisplayName( + "StreamingIngestClient: Trusted Kusto endpoint works without skipSecurityChecks", + ) fun `streaming client - trusted endpoint works without skip security checks`() { assertDoesNotThrow { StreamingIngestClientBuilder.create(trustedEndpoint) @@ -150,7 +170,9 @@ class TrustedEndpointValidationTest { } @Test - @DisplayName("QueuedIngestClient: Trusted Kusto endpoint works without skipSecurityChecks") + @DisplayName( + "QueuedIngestClient: Trusted Kusto endpoint works without skipSecurityChecks", + ) fun `queued client - trusted endpoint works without skip security checks`() { assertDoesNotThrow { QueuedIngestClientBuilder.create(trustedEndpoint) @@ -221,7 +243,9 @@ class TrustedEndpointValidationTest { ) localhostEndpoints.forEach { endpoint -> - assertDoesNotThrow("Localhost endpoint $endpoint should be trusted") { + assertDoesNotThrow( + "Localhost endpoint $endpoint should be trusted", + ) { StreamingIngestClientBuilder.create(endpoint) .withAuthentication(mockTokenCredential) .build() @@ -241,7 +265,9 @@ class TrustedEndpointValidationTest { "Public cloud endpoint should be trusted", ) assertTrue( - KustoTrustedEndpoints.isTrusted("mycluster.kusto.fabric.microsoft.com"), + KustoTrustedEndpoints.isTrusted( + "mycluster.kusto.fabric.microsoft.com", + ), "Fabric endpoint should be trusted", ) assertTrue( @@ -255,18 +281,26 @@ class TrustedEndpointValidationTest { } @Test - @DisplayName("KustoTrustedEndpoints.validateTrustedEndpoint throws for untrusted") + @DisplayName( + "KustoTrustedEndpoints.validateTrustedEndpoint throws for untrusted", + ) fun `validateTrustedEndpoint throws for untrusted endpoints`() { assertThrows { - KustoTrustedEndpoints.validateTrustedEndpoint("https://evil.example.com") + KustoTrustedEndpoints.validateTrustedEndpoint( + "https://evil.example.com", + ) } } @Test - @DisplayName("KustoTrustedEndpoints.validateTrustedEndpoint passes for trusted") + @DisplayName( + "KustoTrustedEndpoints.validateTrustedEndpoint passes for trusted", + ) fun `validateTrustedEndpoint passes for trusted endpoints`() { assertDoesNotThrow { - KustoTrustedEndpoints.validateTrustedEndpoint("https://mycluster.kusto.windows.net") + KustoTrustedEndpoints.validateTrustedEndpoint( + "https://mycluster.kusto.windows.net", + ) } } -} \ No newline at end of file +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/TimeSpanParsingTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/TimeSpanParsingTest.kt new file mode 100644 index 000000000..5e00d68bc --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/TimeSpanParsingTest.kt @@ -0,0 +1,250 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common + +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import com.microsoft.azure.kusto.ingest.v2.models.ContainerSettings +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Test +import java.time.Duration +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class TimeSpanParsingTest { + + private fun createClientDetails() = + ClientDetails( + applicationForTracing = "test", + userNameForTracing = "testUser", + clientVersionForTracing = "1.0", + ) + + @Test + fun `parseTimeSpan handles standard HH_mm_ss format`() { + runBlocking { + // Given a configuration with "01:00:00" (1 hour) + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = "01:00:00", + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + val cache = + DefaultConfigurationCache( + refreshInterval = Duration.ofHours(2), + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then it should parse "01:00:00" as 1 hour + assertNotNull(result) + assertEquals("01:00:00", result.containerSettings?.refreshInterval) + } + } + + @Test + fun `parseTimeSpan handles days format d_HH_mm_ss`() { + runBlocking { + // Given a configuration with "1.02:30:00" (1 day, 2 hours, 30 minutes) + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = "1.02:30:00", + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + val cache = + DefaultConfigurationCache( + refreshInterval = Duration.ofDays(2), + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then it should parse correctly + assertNotNull(result) + assertEquals( + "1.02:30:00", + result.containerSettings?.refreshInterval, + ) + } + } + + @Test + fun `parseTimeSpan handles fractional seconds`() { + runBlocking { + // Given a configuration with "00:00:30.5" (30.5 seconds) + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = "00:00:30.5", + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + val cache = + DefaultConfigurationCache( + refreshInterval = Duration.ofMinutes(1), + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then it should parse correctly + assertNotNull(result) + assertEquals( + "00:00:30.5", + result.containerSettings?.refreshInterval, + ) + } + } + + @Test + fun `parseTimeSpan uses default when format is invalid`() { + runBlocking { + // Given a configuration with invalid format + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = "invalid", + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + val defaultRefresh = Duration.ofHours(3) + val cache = + DefaultConfigurationCache( + refreshInterval = defaultRefresh, + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then it should fall back to default and not throw + assertNotNull(result) + } + } + + @Test + fun `parseTimeSpan uses minimum of config and default`() { + runBlocking { + // Given a configuration with "00:30:00" (30 minutes) + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = "00:30:00", + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + // And default is 2 hours + val cache = + DefaultConfigurationCache( + refreshInterval = Duration.ofHours(2), + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then the effective refresh should be the minimum (30 minutes from config) + assertNotNull(result) + } + } + + @Test + fun `parseTimeSpan handles empty string`() { + runBlocking { + // Given a configuration with empty refreshInterval + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = "", + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + val cache = + DefaultConfigurationCache( + refreshInterval = Duration.ofHours(1), + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then it should use default and not throw + assertNotNull(result) + } + } + + @Test + fun `parseTimeSpan handles null refreshInterval`() { + runBlocking { + // Given a configuration with null refreshInterval + val config = + ConfigurationResponse( + containerSettings = + ContainerSettings( + containers = emptyList(), + lakeFolders = emptyList(), + refreshInterval = null, + preferredUploadMethod = null, + ), + ingestionSettings = null, + ) + + val cache = + DefaultConfigurationCache( + refreshInterval = Duration.ofHours(1), + clientDetails = createClientDetails(), + configurationProvider = { config }, + ) + + // When getting configuration + val result = cache.getConfiguration() + + // Then it should use default and not throw + assertNotNull(result) + } + } +} diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderTest.kt new file mode 100644 index 000000000..d201de9e5 --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ManagedUploaderTest.kt @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.uploader + +import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache +import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.withContext +import kotlinx.serialization.json.Json +import kotlinx.serialization.modules.SerializersModule +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNotNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths +import java.time.Duration +import java.time.OffsetDateTime + +class ManagedUploaderTest { + + @ParameterizedTest(name = "PreferredUploadMethod={0}") + @CsvSource("DEFAULT", "STORAGE", "LAKE") + fun selectContainers(preferredUploadMethod: String): Unit = runBlocking { + val uploadMethod = UploadMethod.valueOf(preferredUploadMethod) + val configurationCache = TestConfigurationCache() + val managedUploader = + ManagedUploaderBuilder.create() + .withConfigurationCache(configurationCache) + .build() + val selectedContainers = managedUploader.selectContainers(uploadMethod) + assertNotNull(selectedContainers) + assertTrue(selectedContainers.isNotEmpty()) + selectedContainers.forEach { + assertNotNull(it.containerInfo.path) + // When the server configuration prefers Lake and the user does not specify (DEFAULT), + // ManagedUploader should honor the server preference and use Lake. If the user + // explicitly + // specifies a method (e.g., STORAGE), that explicit choice is respected. + if (uploadMethod != UploadMethod.STORAGE) { + assertTrue( + it.containerInfo.path?.contains("alakefolder") ?: false, + ) + assertFalse( + it.containerInfo.path?.contains("somecontainer") + ?: false, + ) + } else { + // User mentioned storage here, use that + assertFalse( + it.containerInfo.path?.contains("alakefolder") ?: false, + ) + assertTrue( + it.containerInfo.path?.contains("somecontainer") + ?: false, + ) + } + } + } + + private class TestConfigurationCache : ConfigurationCache { + private val json = Json { + ignoreUnknownKeys = true + serializersModule = SerializersModule { + contextual(OffsetDateTime::class, OffsetDateTimeSerializer) + } + } + override val refreshInterval: Duration + get() = Duration.ofHours(1) + + override suspend fun getConfiguration(): ConfigurationResponse { + val resourcesDirectory = "src/test/resources/" + val fileName = "config-response.json" + val configContent = + withContext(Dispatchers.IO) { + Files.readString( + Paths.get(resourcesDirectory + fileName), + StandardCharsets.UTF_8, + ) + } + val configurationResponse = + json.decodeFromString(configContent) + + assertNotNull(configurationResponse) + assertNotNull(configurationResponse.containerSettings) + return configurationResponse + } + + override fun close() { + // No resources to clean up in this test implementation + } + } +} diff --git a/ingest-v2/src/test/resources/config-response.json b/ingest-v2/src/test/resources/config-response.json new file mode 100644 index 000000000..d58a019c0 --- /dev/null +++ b/ingest-v2/src/test/resources/config-response.json @@ -0,0 +1,21 @@ +{ + "containerSettings": { + "containers": [ + { + "path": "https://somecontainer.z11.blob.storage.azure.net/trdwvweg9nfnngghb1eey-20260108-ingestdata-e5c334ee145d4b4-0?sv=keys" + } + ], + "lakeFolders": [ + { + "path": "https://alakefolder.onelake.fabric.microsoft.com/17a97d10-a17f-4d72-8f38-858aac992978/bb9c26d4-4f99-44b5-9614-3ebb037f3510/Ingestions/20260108-lakedata" + } + ], + "refreshInterval": "01:00:00", + "preferredUploadMethod": "Lake" + }, + "ingestionSettings": { + "maxBlobsPerBatch": 20, + "maxDataSize": 6442450944, + "preferredIngestionMethod": "Rest" + } +} \ No newline at end of file From b1ec31150daadd65e1b2401d130c6333d05b608a Mon Sep 17 00:00:00 2001 From: Ramachandran A G <106139410+ag-ramachandran@users.noreply.github.com> Date: Wed, 21 Jan 2026 10:46:43 +0530 Subject: [PATCH 49/50] * Fix JDK version in POM (#461) --- ingest-v2/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ingest-v2/pom.xml b/ingest-v2/pom.xml index 767e62081..a2bdcd66f 100644 --- a/ingest-v2/pom.xml +++ b/ingest-v2/pom.xml @@ -183,6 +183,10 @@ + + ${java.version} + ${java.version} + From f61924c749d56d0390d5d3c174ccfc3d79956094 Mon Sep 17 00:00:00 2001 From: Tanmaya Panda <108695755+tanmaya-panda1@users.noreply.github.com> Date: Wed, 21 Jan 2026 14:46:55 +0530 Subject: [PATCH 50/50] added private link and removed tuple/Pair (#462) --- .../kusto/ingest/v2/ConfigurationClient.kt | 5 + .../kusto/ingest/v2/KustoBaseApiClient.kt | 7 +- .../v2/builders/BaseIngestClientBuilder.kt | 5 +- .../ManagedStreamingIngestClientBuilder.kt | 3 + .../v2/builders/QueuedIngestClientBuilder.kt | 3 + .../ingest/v2/common/ConfigurationCache.kt | 5 + .../kusto/ingest/v2/common/models/S2SToken.kt | 30 ++ .../v2/uploader/ContainerUploaderBase.kt | 34 ++- .../ingest/v2/common/FabricPrivateLinkTest.kt | 272 ++++++++++++++++++ 9 files changed, 349 insertions(+), 15 deletions(-) create mode 100644 ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/S2SToken.kt create mode 100644 ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/FabricPrivateLinkTest.kt diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt index 98b2f322e..1ca62fa7f 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/ConfigurationClient.kt @@ -5,6 +5,7 @@ package com.microsoft.azure.kusto.ingest.v2 import com.azure.core.credential.TokenCredential import com.microsoft.azure.kusto.ingest.v2.common.exceptions.IngestException import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.S2SToken import com.microsoft.azure.kusto.ingest.v2.infrastructure.HttpResponse import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import io.ktor.http.HttpStatusCode @@ -16,12 +17,16 @@ class ConfigurationClient( override val tokenCredential: TokenCredential, override val skipSecurityChecks: Boolean = false, override val clientDetails: ClientDetails, + override val s2sTokenProvider: (suspend () -> S2SToken)? = null, + override val s2sFabricPrivateLinkAccessContext: String? = null, ) : KustoBaseApiClient( dmUrl, tokenCredential, skipSecurityChecks, clientDetails, + s2sTokenProvider = s2sTokenProvider, + s2sFabricPrivateLinkAccessContext = s2sFabricPrivateLinkAccessContext, ) { private val logger = LoggerFactory.getLogger(ConfigurationClient::class.java) diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt index 4559d5dc5..2ad0546a9 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/KustoBaseApiClient.kt @@ -7,6 +7,7 @@ import com.azure.core.credential.TokenRequestContext import com.microsoft.azure.kusto.ingest.v2.apis.DefaultApi import com.microsoft.azure.kusto.ingest.v2.auth.endpoints.KustoTrustedEndpoints import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.S2SToken import com.microsoft.azure.kusto.ingest.v2.common.serialization.OffsetDateTimeSerializer import io.ktor.client.HttpClientConfig import io.ktor.client.plugins.DefaultRequest @@ -33,7 +34,7 @@ open class KustoBaseApiClient( open val skipSecurityChecks: Boolean = false, open val clientDetails: ClientDetails, open val clientRequestIdPrefix: String = "KIC.execute", - open val s2sTokenProvider: (suspend () -> Pair)? = null, + open val s2sTokenProvider: (suspend () -> S2SToken)? = null, open val s2sFabricPrivateLinkAccessContext: String? = null, ) { private val logger = LoggerFactory.getLogger(KustoBaseApiClient::class.java) @@ -134,10 +135,10 @@ open class KustoBaseApiClient( onRequest { request, _ -> try { // Get S2S token - val (token, scheme) = provider() + val s2sToken = provider() request.headers.append( "x-ms-s2s-actor-authorization", - "$scheme $token", + s2sToken.toHeaderValue(), ) // Add Fabric Private Link access context header diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt index 053fb3956..e086c6f47 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/BaseIngestClientBuilder.kt @@ -8,6 +8,7 @@ import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_CONCURRENCY import com.microsoft.azure.kusto.ingest.v2.UPLOAD_CONTAINER_MAX_DATA_SIZE_BYTES import com.microsoft.azure.kusto.ingest.v2.common.ConfigurationCache import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.S2SToken import com.microsoft.azure.kusto.ingest.v2.uploader.IUploader import com.microsoft.azure.kusto.ingest.v2.uploader.ManagedUploader @@ -17,7 +18,7 @@ abstract class BaseIngestClientBuilder> { protected var clientDetails: ClientDetails? = null // Fabric Private Link support - protected var s2sTokenProvider: (suspend () -> Pair)? = null + protected var s2sTokenProvider: (suspend () -> S2SToken)? = null protected var s2sFabricPrivateLinkAccessContext: String? = null // Added properties for ingestion endpoint and authentication @@ -60,7 +61,7 @@ abstract class BaseIngestClientBuilder> { * @return This builder instance for method chaining */ fun withFabricPrivateLink( - s2sTokenProvider: suspend () -> Pair, + s2sTokenProvider: suspend () -> S2SToken, s2sFabricPrivateLinkAccessContext: String, ): T { require(s2sFabricPrivateLinkAccessContext.isNotBlank()) { diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt index cf8b882a6..ffe8f32e6 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/ManagedStreamingIngestClientBuilder.kt @@ -55,6 +55,9 @@ private constructor(private val dmUrl: String) : tokenCredential = this.tokenCredential, skipSecurityChecks = this.skipSecurityChecks, clientDetails = effectiveClientDetails, + s2sTokenProvider = this.s2sTokenProvider, + s2sFabricPrivateLinkAccessContext = + this.s2sFabricPrivateLinkAccessContext, ) val effectiveUploader = diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt index 04834ec70..313951905 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/builders/QueuedIngestClientBuilder.kt @@ -71,6 +71,9 @@ class QueuedIngestClientBuilder private constructor(private val dmUrl: String) : tokenCredential = this.tokenCredential, skipSecurityChecks = this.skipSecurityChecks, clientDetails = effectiveClientDetails, + s2sTokenProvider = this.s2sTokenProvider, + s2sFabricPrivateLinkAccessContext = + this.s2sFabricPrivateLinkAccessContext, ) val apiClient = createApiClient( diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt index 04e60041b..d581edc2c 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/ConfigurationCache.kt @@ -7,6 +7,7 @@ import com.microsoft.azure.kusto.ingest.v2.CONFIG_CACHE_DEFAULT_REFRESH_INTERVAL import com.microsoft.azure.kusto.ingest.v2.CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS import com.microsoft.azure.kusto.ingest.v2.ConfigurationClient import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.S2SToken import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse import java.lang.AutoCloseable import java.time.Duration @@ -69,6 +70,8 @@ class DefaultConfigurationCache( CONFIG_CACHE_DEFAULT_SKIP_SECURITY_CHECKS, val clientDetails: ClientDetails, val configurationProvider: (suspend () -> ConfigurationResponse)? = null, + val s2sTokenProvider: (suspend () -> S2SToken)? = null, + val s2sFabricPrivateLinkAccessContext: String? = null, ) : ConfigurationCache { companion object { /** @@ -146,6 +149,8 @@ class DefaultConfigurationCache( tokenCredential!!, skipSecurityChecks!!, clientDetails, + s2sTokenProvider, + s2sFabricPrivateLinkAccessContext, ) .getConfigurationDetails() } diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/S2SToken.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/S2SToken.kt new file mode 100644 index 000000000..5d22f087c --- /dev/null +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/common/models/S2SToken.kt @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common.models + +/** + * Represents an S2S (Service-to-Service) authentication token used for + * Fabric Private Link authentication. + * + * @property scheme The authentication scheme (e.g., "Bearer") + * @property token The authentication token value + */ +data class S2SToken( + val scheme: String, + val token: String, +) { + /** + * Formats the token as an HTTP Authorization header value. + * @return The formatted header value in the format "{scheme} {token}" + */ + fun toHeaderValue(): String = "$scheme $token" + + companion object { + /** + * Creates an S2SToken with the Bearer scheme. + * @param token The token value + * @return An S2SToken with scheme "Bearer" + */ + fun bearer(token: String): S2SToken = S2SToken("Bearer", token) + } +} diff --git a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt index cb6bb4bbd..71dc91af8 100644 --- a/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt +++ b/ingest-v2/src/main/kotlin/com/microsoft/azure/kusto/ingest/v2/uploader/ContainerUploaderBase.kt @@ -123,7 +123,7 @@ abstract class ContainerUploaderBase( } // Compress stream if needed (for non-binary, non-compressed formats) - val (uploadStream, effectiveCompressionType, compressionJob) = + val preparedStream = if (local.shouldCompress) { logger.debug( "Auto-compressing stream for {} (format: {}, original compression: {})", @@ -137,13 +137,17 @@ abstract class ContainerUploaderBase( name, availableSize, ) - Triple( - compressResult.stream, - CompressionType.GZIP, - compressResult.compressionJob, + PreparedUploadStream( + stream = compressResult.stream, + compressionType = CompressionType.GZIP, + compressionJob = compressResult.compressionJob, ) } else { - Triple(originalStream, local.compressionType, null) + PreparedUploadStream( + stream = originalStream, + compressionType = local.compressionType, + compressionJob = null, + ) } // Upload with retry policy and container cycling @@ -151,13 +155,13 @@ abstract class ContainerUploaderBase( uploadWithRetries( local = local, name = name, - stream = uploadStream, + stream = preparedStream.stream, containers = containers, - effectiveCompressionType = effectiveCompressionType, + effectiveCompressionType = preparedStream.compressionType, ) .also { // Ensure compression job completes successfully - compressionJob?.await() + preparedStream.compressionJob?.await() logger.debug( "Compression job completed successfully for {}", name, @@ -165,7 +169,7 @@ abstract class ContainerUploaderBase( } } catch (e: Exception) { // Cancel compression job if upload fails - compressionJob?.cancel() + preparedStream.compressionJob?.cancel() throw e } } @@ -262,6 +266,16 @@ abstract class ContainerUploaderBase( val compressionJob: kotlinx.coroutines.Deferred, ) + /** + * Helper class to hold prepared upload stream with its compression type + * and optional compression job. + */ + private data class PreparedUploadStream( + val stream: InputStream, + val compressionType: CompressionType, + val compressionJob: kotlinx.coroutines.Deferred?, + ) + /** * Uploads a stream with retry logic and container cycling. Uses an * incrementing counter (mod container count) for round-robin container diff --git a/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/FabricPrivateLinkTest.kt b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/FabricPrivateLinkTest.kt new file mode 100644 index 000000000..d90aa667d --- /dev/null +++ b/ingest-v2/src/test/kotlin/com/microsoft/azure/kusto/ingest/v2/common/FabricPrivateLinkTest.kt @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +package com.microsoft.azure.kusto.ingest.v2.common + +import com.azure.core.credential.TokenCredential +import com.microsoft.azure.kusto.ingest.v2.ConfigurationClient +import com.microsoft.azure.kusto.ingest.v2.builders.ManagedStreamingIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.builders.QueuedIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.builders.StreamingIngestClientBuilder +import com.microsoft.azure.kusto.ingest.v2.common.models.ClientDetails +import com.microsoft.azure.kusto.ingest.v2.common.models.S2SToken +import com.microsoft.azure.kusto.ingest.v2.models.ConfigurationResponse +import io.mockk.mockk +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertNull + +/** + * Tests for Fabric Private Link (S2S authentication) support in the + * configuration cache and client builders. + */ +class FabricPrivateLinkTest { + + private val validDmUrl = "https://ingest-test.kusto.windows.net" + private val mockTokenCredential: TokenCredential = mockk(relaxed = true) + private val testAccessContext = "test-fabric-access-context" + private val testS2SScheme = "Bearer" + private val testS2SToken = "s2s-test-token" + + private val mockS2sTokenProvider: suspend () -> S2SToken = { + S2SToken(testS2SScheme, testS2SToken) + } + + // ==================== DefaultConfigurationCache Tests ==================== + + @Test + fun `DefaultConfigurationCache should accept S2S parameters`() { + val cache = + DefaultConfigurationCache( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + s2sTokenProvider = mockS2sTokenProvider, + s2sFabricPrivateLinkAccessContext = testAccessContext, + ) + assertNotNull(cache) + } + + @Test + fun `DefaultConfigurationCache should work without S2S parameters`() { + val cache = + DefaultConfigurationCache( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + ) + assertNotNull(cache) + } + + @Test + fun `DefaultConfigurationCache should accept only S2S token provider`() { + val cache = + DefaultConfigurationCache( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + s2sTokenProvider = mockS2sTokenProvider, + ) + assertNotNull(cache) + } + + @Test + fun `DefaultConfigurationCache should accept only access context`() { + val cache = + DefaultConfigurationCache( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + s2sFabricPrivateLinkAccessContext = testAccessContext, + ) + assertNotNull(cache) + } + + @Test + fun `DefaultConfigurationCache with custom provider should work with S2S parameters`() { + val mockConfigResponse: ConfigurationResponse = mockk(relaxed = true) + val customProvider: suspend () -> ConfigurationResponse = { + mockConfigResponse + } + + val cache = + DefaultConfigurationCache( + clientDetails = ClientDetails.createDefault(), + configurationProvider = customProvider, + s2sTokenProvider = mockS2sTokenProvider, + s2sFabricPrivateLinkAccessContext = testAccessContext, + ) + + runBlocking { + val config = cache.getConfiguration() + assertNotNull(config) + } + } + + // ==================== ConfigurationClient Tests ==================== + + @Test + fun `ConfigurationClient should accept S2S parameters`() { + val client = + ConfigurationClient( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + s2sTokenProvider = mockS2sTokenProvider, + s2sFabricPrivateLinkAccessContext = testAccessContext, + ) + assertNotNull(client) + assertEquals(mockS2sTokenProvider, client.s2sTokenProvider) + assertEquals(testAccessContext, client.s2sFabricPrivateLinkAccessContext) + } + + @Test + fun `ConfigurationClient should work without S2S parameters`() { + val client = + ConfigurationClient( + dmUrl = validDmUrl, + tokenCredential = mockTokenCredential, + skipSecurityChecks = true, + clientDetails = ClientDetails.createDefault(), + ) + assertNotNull(client) + assertNull(client.s2sTokenProvider) + assertNull(client.s2sFabricPrivateLinkAccessContext) + } + + // ==================== QueuedIngestClientBuilder Tests ==================== + + @Test + fun `QueuedIngestClientBuilder should accept withFabricPrivateLink`() { + val builder = + QueuedIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + + assertNotNull(builder) + } + + @Test + fun `QueuedIngestClientBuilder build with FabricPrivateLink should succeed`() { + val client = + QueuedIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + .build() + + assertNotNull(client) + } + + @Test + fun `QueuedIngestClientBuilder should chain withFabricPrivateLink correctly`() { + val builder = QueuedIngestClientBuilder.create(validDmUrl) + val result = + builder + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + .withMaxConcurrency(5) + .skipSecurityChecks() + + assertEquals(builder, result) + } + + // ==================== StreamingIngestClientBuilder Tests ==================== + + @Test + fun `StreamingIngestClientBuilder should accept withFabricPrivateLink`() { + val builder = + StreamingIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + + assertNotNull(builder) + } + + @Test + fun `StreamingIngestClientBuilder build with FabricPrivateLink should succeed`() { + val client = + StreamingIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + .build() + + assertNotNull(client) + } + + // ==================== ManagedStreamingIngestClientBuilder Tests ==================== + + @Test + fun `ManagedStreamingIngestClientBuilder should accept withFabricPrivateLink`() { + val builder = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + + assertNotNull(builder) + } + + @Test + fun `ManagedStreamingIngestClientBuilder build with FabricPrivateLink should succeed`() { + val client = + ManagedStreamingIngestClientBuilder.create(validDmUrl) + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + .build() + + assertNotNull(client) + } + + @Test + fun `ManagedStreamingIngestClientBuilder should chain withFabricPrivateLink correctly`() { + val builder = ManagedStreamingIngestClientBuilder.create(validDmUrl) + val result = + builder + .withAuthentication(mockTokenCredential) + .withFabricPrivateLink(mockS2sTokenProvider, testAccessContext) + .skipSecurityChecks() + + assertEquals(builder, result) + } + + // ==================== Integration-like Tests ==================== + + @Test + fun `S2S token provider should be callable and return expected values`() { + runBlocking { + val s2sToken = mockS2sTokenProvider() + assertEquals(testS2SScheme, s2sToken.scheme) + assertEquals(testS2SToken, s2sToken.token) + assertEquals("$testS2SScheme $testS2SToken", s2sToken.toHeaderValue()) + } + } + + @Test + fun `DefaultConfigurationCache with custom provider respects S2S context`() { + var providerCalled = false + val mockConfigResponse: ConfigurationResponse = mockk(relaxed = true) + val customProvider: suspend () -> ConfigurationResponse = { + providerCalled = true + mockConfigResponse + } + + val cache = + DefaultConfigurationCache( + clientDetails = ClientDetails.createDefault(), + configurationProvider = customProvider, + s2sTokenProvider = mockS2sTokenProvider, + s2sFabricPrivateLinkAccessContext = testAccessContext, + ) + + runBlocking { + cache.getConfiguration() + } + + assertEquals(true, providerCalled) + } +}