diff --git a/.github/workflows/test-code-code2parquet.yml b/.github/workflows/test-code-code2parquet.yml index 996610e53..f8f1654e7 100644 --- a/.github/workflows/test-code-code2parquet.yml +++ b/.github/workflows/test-code-code2parquet.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/code/code2parquet run: | if [ -e "transforms/code/code2parquet/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/code/code2parquet/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/code/code2parquet DOCKER=docker test-image else echo "transforms/code/code2parquet/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-code_quality.yml b/.github/workflows/test-code-code_quality.yml index e855962ab..d53c81c61 100644 --- a/.github/workflows/test-code-code_quality.yml +++ b/.github/workflows/test-code-code_quality.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/code/code_quality run: | if [ -e "transforms/code/code_quality/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/code/code_quality/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/code/code_quality DOCKER=docker test-image else echo "transforms/code/code_quality/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-header_cleanser.yml b/.github/workflows/test-code-header_cleanser.yml index 74b713cda..1834f4983 100644 --- a/.github/workflows/test-code-header_cleanser.yml +++ b/.github/workflows/test-code-header_cleanser.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/code/header_cleanser run: | if [ -e "transforms/code/header_cleanser/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/code/header_cleanser/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/code/header_cleanser DOCKER=docker test-image else echo "transforms/code/header_cleanser/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-malware.yml b/.github/workflows/test-code-malware.yml index cef5746a2..debc779d1 100644 --- a/.github/workflows/test-code-malware.yml +++ b/.github/workflows/test-code-malware.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/code/malware run: | if [ -e "transforms/code/malware/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/code/malware/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/code/malware DOCKER=docker test-image else echo "transforms/code/malware/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-proglang_select.yml b/.github/workflows/test-code-proglang_select.yml index 86ea55f55..36bf6a869 100644 --- a/.github/workflows/test-code-proglang_select.yml +++ b/.github/workflows/test-code-proglang_select.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/code/proglang_select run: | if [ -e "transforms/code/proglang_select/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/code/proglang_select/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/code/proglang_select DOCKER=docker test-image else echo "transforms/code/proglang_select/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-repo_level_ordering.yml b/.github/workflows/test-code-repo_level_ordering.yml index 8451b174b..fe0ee23bb 100644 --- a/.github/workflows/test-code-repo_level_ordering.yml +++ b/.github/workflows/test-code-repo_level_ordering.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/code/repo_level_ordering run: | if [ -e "transforms/code/repo_level_ordering/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/code/repo_level_ordering/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/code/repo_level_ordering DOCKER=docker test-image else echo "transforms/code/repo_level_ordering/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-doc_chunk.yml b/.github/workflows/test-language-doc_chunk.yml index 98341903b..fa3ea58ca 100644 --- a/.github/workflows/test-language-doc_chunk.yml +++ b/.github/workflows/test-language-doc_chunk.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/language/doc_chunk run: | if [ -e "transforms/language/doc_chunk/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/language/doc_chunk/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/language/doc_chunk DOCKER=docker test-image else echo "transforms/language/doc_chunk/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-doc_quality.yml b/.github/workflows/test-language-doc_quality.yml index 540c2490c..dde61e1fa 100644 --- a/.github/workflows/test-language-doc_quality.yml +++ b/.github/workflows/test-language-doc_quality.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/language/doc_quality run: | if [ -e "transforms/language/doc_quality/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/language/doc_quality/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/language/doc_quality DOCKER=docker test-image else echo "transforms/language/doc_quality/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-lang_id.yml b/.github/workflows/test-language-lang_id.yml index 1c310270a..3b39358c9 100644 --- a/.github/workflows/test-language-lang_id.yml +++ b/.github/workflows/test-language-lang_id.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/language/lang_id run: | if [ -e "transforms/language/lang_id/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/language/lang_id/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/language/lang_id DOCKER=docker test-image else echo "transforms/language/lang_id/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-pdf2parquet.yml b/.github/workflows/test-language-pdf2parquet.yml index 85e6e15e4..bb523c57e 100644 --- a/.github/workflows/test-language-pdf2parquet.yml +++ b/.github/workflows/test-language-pdf2parquet.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/language/pdf2parquet run: | if [ -e "transforms/language/pdf2parquet/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/language/pdf2parquet/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/language/pdf2parquet DOCKER=docker test-image else echo "transforms/language/pdf2parquet/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-pii_redactor.yml b/.github/workflows/test-language-pii_redactor.yml index c162a3322..9656a2f24 100644 --- a/.github/workflows/test-language-pii_redactor.yml +++ b/.github/workflows/test-language-pii_redactor.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/language/pii_redactor run: | if [ -e "transforms/language/pii_redactor/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/language/pii_redactor/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/language/pii_redactor DOCKER=docker test-image else echo "transforms/language/pii_redactor/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-text_encoder.yml b/.github/workflows/test-language-text_encoder.yml index cc4cdf0f5..f7622f8e0 100644 --- a/.github/workflows/test-language-text_encoder.yml +++ b/.github/workflows/test-language-text_encoder.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/language/text_encoder run: | if [ -e "transforms/language/text_encoder/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/language/text_encoder/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/language/text_encoder DOCKER=docker test-image else echo "transforms/language/text_encoder/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-transform.template b/.github/workflows/test-transform.template index bf8a56534..e0966717e 100644 --- a/.github/workflows/test-transform.template +++ b/.github/workflows/test-transform.template @@ -102,7 +102,9 @@ jobs: - name: Test transform image in @TARGET_TRANSFORM_DIR@ run: | if [ -e "@TARGET_TRANSFORM_DIR@/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "@TARGET_TRANSFORM_DIR@/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C @TARGET_TRANSFORM_DIR@ DOCKER=docker test-image else echo "@TARGET_TRANSFORM_DIR@/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-doc_id.yml b/.github/workflows/test-universal-doc_id.yml index 056dae834..66d0283ca 100644 --- a/.github/workflows/test-universal-doc_id.yml +++ b/.github/workflows/test-universal-doc_id.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/doc_id run: | if [ -e "transforms/universal/doc_id/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/doc_id/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/doc_id DOCKER=docker test-image else echo "transforms/universal/doc_id/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-ededup.yml b/.github/workflows/test-universal-ededup.yml index 9a9e3d174..225c27cc3 100644 --- a/.github/workflows/test-universal-ededup.yml +++ b/.github/workflows/test-universal-ededup.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/ededup run: | if [ -e "transforms/universal/ededup/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/ededup/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/ededup DOCKER=docker test-image else echo "transforms/universal/ededup/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-fdedup.yml b/.github/workflows/test-universal-fdedup.yml index 4814c7c7b..356736fca 100644 --- a/.github/workflows/test-universal-fdedup.yml +++ b/.github/workflows/test-universal-fdedup.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/fdedup run: | if [ -e "transforms/universal/fdedup/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/fdedup/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/fdedup DOCKER=docker test-image else echo "transforms/universal/fdedup/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-filter.yml b/.github/workflows/test-universal-filter.yml index 4ce46c874..44858feff 100644 --- a/.github/workflows/test-universal-filter.yml +++ b/.github/workflows/test-universal-filter.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/filter run: | if [ -e "transforms/universal/filter/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/filter/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/filter DOCKER=docker test-image else echo "transforms/universal/filter/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-html2parquet.yml b/.github/workflows/test-universal-html2parquet.yml index 46608c3d0..7d3f83a61 100644 --- a/.github/workflows/test-universal-html2parquet.yml +++ b/.github/workflows/test-universal-html2parquet.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/html2parquet run: | if [ -e "transforms/universal/html2parquet/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/html2parquet/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/html2parquet DOCKER=docker test-image else echo "transforms/universal/html2parquet/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-noop.yml b/.github/workflows/test-universal-noop.yml index 44aa72c76..cd72703d1 100644 --- a/.github/workflows/test-universal-noop.yml +++ b/.github/workflows/test-universal-noop.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/noop run: | if [ -e "transforms/universal/noop/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/noop/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/noop DOCKER=docker test-image else echo "transforms/universal/noop/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-profiler.yml b/.github/workflows/test-universal-profiler.yml index 0b34a5bcf..50cd8cd26 100644 --- a/.github/workflows/test-universal-profiler.yml +++ b/.github/workflows/test-universal-profiler.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/profiler run: | if [ -e "transforms/universal/profiler/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/profiler/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/profiler DOCKER=docker test-image else echo "transforms/universal/profiler/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-resize.yml b/.github/workflows/test-universal-resize.yml index 9c8f65c6f..99e14b1b8 100644 --- a/.github/workflows/test-universal-resize.yml +++ b/.github/workflows/test-universal-resize.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/resize run: | if [ -e "transforms/universal/resize/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/resize/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/resize DOCKER=docker test-image else echo "transforms/universal/resize/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-tokenization.yml b/.github/workflows/test-universal-tokenization.yml index 7e78fa6e2..e7a620882 100644 --- a/.github/workflows/test-universal-tokenization.yml +++ b/.github/workflows/test-universal-tokenization.yml @@ -102,7 +102,9 @@ jobs: - name: Test transform image in transforms/universal/tokenization run: | if [ -e "transforms/universal/tokenization/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + if [ -d "transforms/universal/tokenization/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi make -C transforms/universal/tokenization DOCKER=docker test-image else echo "transforms/universal/tokenization/Makefile not found - testing disabled for this transform." diff --git a/transforms/code/header_cleanser/python/src/header_cleanser_transform_python.py b/transforms/code/header_cleanser/python/src/header_cleanser_transform_python.py index 9d4938b65..21c07d4ad 100644 --- a/transforms/code/header_cleanser/python/src/header_cleanser_transform_python.py +++ b/transforms/code/header_cleanser/python/src/header_cleanser_transform_python.py @@ -27,5 +27,5 @@ def __init__(self): if __name__ == "__main__": launcher = PythonTransformLauncher(HeaderCleanserPythonTransformConfiguration()) - logger.info("Launching license copyright header removal") + logger.info("Launching license copyright header removal.") launcher.launch()