https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page
pip install -r requirements.txt
pyspark --version
spark-submit --help
http://localhost:4040/jobs/
docker build -t owshq-spark:3.5 -f Dockerfile.spark .
docker build -t owshq-spark-history-server:3.5 -f Dockerfile.history .
.env
docker-compose up -d
docker ps
docker logs spark-master
docker logs spark-worker-1
docker logs spark-worker-2
docker logs spark-history-server
docker exec -it spark-master /opt/bitnami/spark/bin/spark-submit \
--master spark://spark-master:7077 \
--deploy-mode client \
/opt/bitnami/spark/jobs/etl-rides-fhvhv-basic.py
docker exec -it spark-master /opt/bitnami/spark/bin/spark-submit \
--master spark://spark-master:7077 \
--deploy-mode client \
/opt/bitnami/spark/jobs/etl-rides-fhvhv.py
http://localhost:8080/
http://localhost:18080/
docker-compose down