Skip to content

[ERROR]: Dependency error #13

@netogerbi

Description

@netogerbi

#Error trying to install dependencies

output log (error):

Collecting drainage==0.1.4
  Using cached drainage-0.1.4.tar.gz (20.6 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'error'
  ERROR: Command errored out with exit status 1:
   command: /usr/bin/python3 /usr/lib/python3/dist-packages/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-n6x23duk/normal --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- puccinialin
       cwd: None
  Complete output (2 lines):
  ERROR: Could not find a version that satisfies the requirement puccinialin (from versions: none)
  ERROR: No matching distribution found for puccinialin
  ----------------------------------------
ERROR: Command errored out with exit status 1: /usr/bin/python3 /usr/lib/python3/dist-packages/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-n6x23duk/normal --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- puccinialin Check the logs for full command output.

requirements.txt:

py4j==0.10.9.7
pyspark
typeid-python
uuid6
kafka-python
pyyaml
deepmerge
drainage==0.1.4
puccinialin==0.1.8

Dockerfile:

FROM --platform=$BUILDPLATFORM apache/spark:3.5.3-python3

USER root

# Install Java 17 to support TypeId UDF (requires Java 17+)
RUN apt-get update && \
    apt-get install -y openjdk-17-jdk rsync && \
    rm -rf /var/lib/apt/lists/*

# Detect architecture and set JAVA_HOME to Java 17
RUN ARCH=$(dpkg --print-architecture) && \
    ln -sf /usr/lib/jvm/java-17-openjdk-${ARCH} /usr/lib/jvm/java-17-openjdk

# Set JAVA_HOME to Java 17 and ensure it takes precedence in PATH
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk
ENV PATH=${JAVA_HOME}/bin:${PATH}

# Iceberg dependencies
RUN curl -L -o /opt/spark/jars/iceberg-spark-runtime-3.5_2.12-1.10.0.jar \
      https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.10.0/iceberg-spark-runtime-3.5_2.12-1.10.0.jar && \
    curl -L -o /opt/spark/jars/iceberg-aws-bundle-1.10.0.jar \
      https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.10.0/iceberg-aws-bundle-1.10.0.jar

# Spark extensions
RUN curl -L -o /opt/spark/jars/spark-hadoop-cloud_2.12-3.5.3.jar \
      https://repo1.maven.org/maven2/org/apache/spark/spark-hadoop-cloud_2.12/3.5.3/spark-hadoop-cloud_2.12-3.5.3.jar && \
    curl -L -o /opt/spark/jars/spark-measure_2.12-0.24.jar \
      https://repo1.maven.org/maven2/ch/cern/sparkmeasure/spark-measure_2.12/0.24/spark-measure_2.12-0.24.jar

# Kafka dependencies - assembly with all deps
RUN curl -L -o /opt/spark/jars/spark-sql-kafka-0-10_2.12-3.5.3.jar \
      https://repo1.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.12/3.5.3/spark-sql-kafka-0-10_2.12-3.5.3.jar && \
    curl -L -o /opt/spark/jars/spark-token-provider-kafka-0-10_2.12-3.5.3.jar \
      https://repo1.maven.org/maven2/org/apache/spark/spark-token-provider-kafka-0-10_2.12/3.5.3/spark-token-provider-kafka-0-10_2.12-3.5.3.jar && \
    curl -L -o /opt/spark/jars/kafka-clients-3.4.1.jar \
      https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/3.4.1/kafka-clients-3.4.1.jar && \
    curl -L -o /opt/spark/jars/commons-pool2-2.11.1.jar \
      https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.11.1/commons-pool2-2.11.1.jar

# AWS dependencies
RUN curl -L -o /opt/spark/jars/hadoop-aws-3.3.4.jar \
      https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar && \
    curl -L -o /opt/spark/jars/aws-java-sdk-bundle-1.12.262.jar \
      https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar

# Nessie
RUN curl -L -o /opt/spark/jars/nessie-spark-extensions-3.5_2.12-0.80.0.jar \
      https://repo1.maven.org/maven2/org/projectnessie/nessie-integrations/nessie-spark-extensions-3.5_2.12/0.80.0/nessie-spark-extensions-3.5_2.12-0.80.0.jar

# Custom JARs
COPY ./jars $SPARK_HOME/jars

RUN ls -al $SPARK_HOME/jars && chmod 644 $SPARK_HOME/jars/*.jar

USER 185

WORKDIR /opt/spark/work-dir

docker-compose.yaml (snipet):

  spark-master:
    build:
      context: .
      dockerfile: Dockerfile.local
    image: local-spark:3.5.3
    container_name: local-spark-master
    env_file:
      - .env
    environment:
      - SPARK_NO_DAEMONIZE=true
      - PYTHONPATH=/opt/application
    working_dir: /opt/application
    ports:
      - "7077:7077"
      - "8080:8080"
      - "4040:4040"
    volumes:
      - ../runners:/opt/application/runners
      - ../scripts:/opt/application/scripts
      - ../requirements.txt:/opt/application/requirements.txt
      - ./scripts/migrations:${MIGRATIONS_FOLDER}
      - ./conf/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
      - spark-events:/tmp/spark-events/event-logs
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080"]
      interval: 10s
      timeout: 15s
      retries: 5
      start_period: 40s
    networks:
      - spark-network
    user: root
    command: >
      /bin/bash -c "
      pip install -r requirements.txt &&
      /opt/spark/sbin/start-master.sh &&
      tail -f /opt/spark/logs/spark-*.out
      "

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions