-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Description
#Error trying to install dependencies
output log (error):
Collecting drainage==0.1.4
Using cached drainage-0.1.4.tar.gz (20.6 MB)
Installing build dependencies: started
Installing build dependencies: finished with status 'done'
Getting requirements to build wheel: started
Getting requirements to build wheel: finished with status 'done'
Installing backend dependencies: started
Installing backend dependencies: finished with status 'error'
ERROR: Command errored out with exit status 1:
command: /usr/bin/python3 /usr/lib/python3/dist-packages/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-n6x23duk/normal --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- puccinialin
cwd: None
Complete output (2 lines):
ERROR: Could not find a version that satisfies the requirement puccinialin (from versions: none)
ERROR: No matching distribution found for puccinialin
----------------------------------------
ERROR: Command errored out with exit status 1: /usr/bin/python3 /usr/lib/python3/dist-packages/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-n6x23duk/normal --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- puccinialin Check the logs for full command output.requirements.txt:
py4j==0.10.9.7
pyspark
typeid-python
uuid6
kafka-python
pyyaml
deepmerge
drainage==0.1.4
puccinialin==0.1.8Dockerfile:
FROM --platform=$BUILDPLATFORM apache/spark:3.5.3-python3
USER root
# Install Java 17 to support TypeId UDF (requires Java 17+)
RUN apt-get update && \
apt-get install -y openjdk-17-jdk rsync && \
rm -rf /var/lib/apt/lists/*
# Detect architecture and set JAVA_HOME to Java 17
RUN ARCH=$(dpkg --print-architecture) && \
ln -sf /usr/lib/jvm/java-17-openjdk-${ARCH} /usr/lib/jvm/java-17-openjdk
# Set JAVA_HOME to Java 17 and ensure it takes precedence in PATH
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk
ENV PATH=${JAVA_HOME}/bin:${PATH}
# Iceberg dependencies
RUN curl -L -o /opt/spark/jars/iceberg-spark-runtime-3.5_2.12-1.10.0.jar \
https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.10.0/iceberg-spark-runtime-3.5_2.12-1.10.0.jar && \
curl -L -o /opt/spark/jars/iceberg-aws-bundle-1.10.0.jar \
https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.10.0/iceberg-aws-bundle-1.10.0.jar
# Spark extensions
RUN curl -L -o /opt/spark/jars/spark-hadoop-cloud_2.12-3.5.3.jar \
https://repo1.maven.org/maven2/org/apache/spark/spark-hadoop-cloud_2.12/3.5.3/spark-hadoop-cloud_2.12-3.5.3.jar && \
curl -L -o /opt/spark/jars/spark-measure_2.12-0.24.jar \
https://repo1.maven.org/maven2/ch/cern/sparkmeasure/spark-measure_2.12/0.24/spark-measure_2.12-0.24.jar
# Kafka dependencies - assembly with all deps
RUN curl -L -o /opt/spark/jars/spark-sql-kafka-0-10_2.12-3.5.3.jar \
https://repo1.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.12/3.5.3/spark-sql-kafka-0-10_2.12-3.5.3.jar && \
curl -L -o /opt/spark/jars/spark-token-provider-kafka-0-10_2.12-3.5.3.jar \
https://repo1.maven.org/maven2/org/apache/spark/spark-token-provider-kafka-0-10_2.12/3.5.3/spark-token-provider-kafka-0-10_2.12-3.5.3.jar && \
curl -L -o /opt/spark/jars/kafka-clients-3.4.1.jar \
https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/3.4.1/kafka-clients-3.4.1.jar && \
curl -L -o /opt/spark/jars/commons-pool2-2.11.1.jar \
https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.11.1/commons-pool2-2.11.1.jar
# AWS dependencies
RUN curl -L -o /opt/spark/jars/hadoop-aws-3.3.4.jar \
https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar && \
curl -L -o /opt/spark/jars/aws-java-sdk-bundle-1.12.262.jar \
https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar
# Nessie
RUN curl -L -o /opt/spark/jars/nessie-spark-extensions-3.5_2.12-0.80.0.jar \
https://repo1.maven.org/maven2/org/projectnessie/nessie-integrations/nessie-spark-extensions-3.5_2.12/0.80.0/nessie-spark-extensions-3.5_2.12-0.80.0.jar
# Custom JARs
COPY ./jars $SPARK_HOME/jars
RUN ls -al $SPARK_HOME/jars && chmod 644 $SPARK_HOME/jars/*.jar
USER 185
WORKDIR /opt/spark/work-dirdocker-compose.yaml (snipet):
spark-master:
build:
context: .
dockerfile: Dockerfile.local
image: local-spark:3.5.3
container_name: local-spark-master
env_file:
- .env
environment:
- SPARK_NO_DAEMONIZE=true
- PYTHONPATH=/opt/application
working_dir: /opt/application
ports:
- "7077:7077"
- "8080:8080"
- "4040:4040"
volumes:
- ../runners:/opt/application/runners
- ../scripts:/opt/application/scripts
- ../requirements.txt:/opt/application/requirements.txt
- ./scripts/migrations:${MIGRATIONS_FOLDER}
- ./conf/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
- spark-events:/tmp/spark-events/event-logs
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080"]
interval: 10s
timeout: 15s
retries: 5
start_period: 40s
networks:
- spark-network
user: root
command: >
/bin/bash -c "
pip install -r requirements.txt &&
/opt/spark/sbin/start-master.sh &&
tail -f /opt/spark/logs/spark-*.out
"Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels