Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Add ability to run Fetch Migration on Docker Solution #487

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ services:
- migrations
ports:
- "29200:9200"
depends_on: # Fetch immediately starts running once the container is up, so making sure it's up before one of the clusters prevents any unintentional migrations from happening.
- fetch-migration

opensearchanalytics:
image: 'opensearchproject/opensearch:latest'
Expand Down Expand Up @@ -130,9 +132,17 @@ services:
- migrations
volumes:
- sharedReplayerOutput:/shared-replayer-output
- /var/run/docker.sock:/var/run/docker.sock
environment:
- MIGRATION_KAFKA_BROKER_ENDPOINTS=kafka:9092

fetch-migration:
image: migrations/fetch-migration
volumes:
- ./pipeline_yaml.yml:/code/input.yaml # The only way to provide this to Fetch is either by mounting it or by having it already existed at "/code" in the container as "input.yaml"
networks:
- migrations

volumes:
zookeeper_data:
driver: local
Expand All @@ -145,4 +155,4 @@ volumes:

networks:
migrations:
driver: bridge
driver: bridge
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && \
apt-get install -y --no-install-recommends python3.9 python3-pip python3-dev openjdk-11-jre-headless wget gcc libc-dev git curl vim jq unzip less && \
pip3 install urllib3==1.25.11 opensearch-benchmark==1.1.0 awscurl tqdm
pip3 install urllib3==1.25.11 opensearch-benchmark==1.1.0 awscurl tqdm \
# TODO upon the next release of opensearch-benchmark the awscli package should be installed by pip3, with the expected boto3 version upgrade resolving the current conflicts between opensearch-benchmark and awscli
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install && rm -rf aws awscliv2.zip
RUN mkdir /root/kafka-tools
Expand All @@ -14,16 +14,22 @@ COPY humanReadableLogs.py /root/
COPY catIndices.sh /root/
COPY showFetchMigrationCommand.sh /root/
COPY setupIntegTests.sh /root/
COPY startDockerFetchMigration.sh /root/
COPY msk-iam-auth.properties /root/kafka-tools/aws
COPY kafkaCmdRef.md /root/kafka-tools
RUN chmod ug+x /root/runTestBenchmarks.sh
RUN chmod ug+x /root/humanReadableLogs.py
RUN chmod ug+x /root/catIndices.sh
RUN chmod ug+x /root/showFetchMigrationCommand.sh
RUN chmod ug+x /root/startDockerFetchMigration.sh

WORKDIR /root/kafka-tools
# Get kafka distribution and unpack to 'kafka'
RUN wget -qO- https://archive.apache.org/dist/kafka/3.6.0/kafka_2.13-3.6.0.tgz | tar --transform 's!^[^/]*!kafka!' -xvz
RUN wget -O kafka/libs/msk-iam-auth.jar https://github.com/aws/aws-msk-iam-auth/releases/download/v1.1.9/aws-msk-iam-auth-1.1.9-all.jar

# Install Docker CLI
RUN apt-get install -y docker.io

WORKDIR /root

CMD tail -f /dev/null
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

container_id=$(docker ps -a | grep "fetch-migration" | awk '{print $1}')

if [ ! -z "$container_id" ]; then
docker start $container_id
else
echo "No container found with 'fetch-migration' in its name"
fi
40 changes: 40 additions & 0 deletions TrafficCapture/dockerSolution/src/main/docker/pipeline_yaml.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Name of the Data Prepper pipeline
historical-data-migration:
# Source cluster configuration
source:
opensearch:
# CDK code will replace this value, so DO NOT CHANGE this
# unless the file is being used outside of the CDK
hosts:
- https://capture-proxy-es:19200
# Example configuration on how to disable authentication (default: false)
indices:
# Indices to exclude - exclude system indices by default
exclude:
- index_name_regex: \.*
# disable_authentication: true
username: "admin"
password: "admin"
connection:
insecure: true
# Target cluster configuration
sink:
- opensearch:
# Note - CDK code will replace this value with the target cluster endpoint
hosts:
- https://opensearchtarget:9200
# Derive index name from record metadata
index: ${getMetadata("opensearch-index")}
# Use the same document ID as the source cluster document
document_id: ${getMetadata("opensearch-document_id")}
# Example configuration for basic auth
# disable_authentication: true
username: "admin"
password: "admin"
connection:
insecure: true
# Additional pipeline options/optimizations
# For maximum throughput, match workers to number of vCPUs (default: 1)
workers: 1
# delay is how often the worker threads should process data (default: 3000 ms)
delay: 0
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@ script_abs_path=$(readlink -f "$0")
script_dir_abs_path=$(dirname "$script_abs_path")
cd $script_dir_abs_path || exit

cd ../../../TrafficCapture || exit
cd ../../../FetchMigration || exit
docker build -t migrations/fetch-migration .

cd ../TrafficCapture || exit
./gradlew :dockerSolution:buildDockerImages -x test
Loading