Skip to content

Commit

Permalink
try different dependencies in attempt to resolve conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: Maroun Touma <touma@us.ibm.com>
  • Loading branch information
touma-I committed Sep 9, 2024
1 parent 65f4ac4 commit 703ebe0
Show file tree
Hide file tree
Showing 12 changed files with 81 additions and 53 deletions.
2 changes: 1 addition & 1 deletion data-processing-lib/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit"
version = "0.2.1.dev2"
version = "0.2.1.dev3"
requires-python = ">=3.10"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Library"
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_ray"
version = "0.2.1.dev2"
version = "0.2.1.dev3"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10"
description = "Data Preparation Toolkit Library for Ray"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev2",
"data-prep-toolkit>=0.2.1.dev3",
"ray[default]==2.24.0",
# These two are to fix security issues identified by quay.io
"fastapi>=0.110.2",
Expand Down
6 changes: 3 additions & 3 deletions examples/notebooks/rag/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Data prep kit
data-prep-toolkit-transforms==0.2.1.dev1
data-prep-toolkit-transforms-ray==0.2.1.dev1
#data-prep-toolkit-transforms==0.2.1.dev1
#data-prep-toolkit-transforms-ray==0.2.1.dev1



Expand Down Expand Up @@ -53,4 +53,4 @@ ipython
ipywidgets
IProgress
chardet==5.2.0
charset-normalizer==3.3.2
charset-normalizer==3.3.2
3 changes: 2 additions & 1 deletion transforms/packaging/.make.packaging
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ setup: .transforms.setup venv


src:
mkdir src
for T in $(TRANSFORMS_NAMES); do \
echo copy src from $$T ; \
cp -R $(REPOROOT)/transforms/$$T/$(PACKAGING_RUN_TIME)/src/ src/ ; \
cp -R $(REPOROOT)/transforms/$$T/$(PACKAGING_RUN_TIME)/src/* src ; \
rm -fr *.egg-info ; \
rm -fr dist ; \
rm -fr build ; \
Expand Down
4 changes: 4 additions & 0 deletions transforms/packaging/python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ TRANSFORMS_NAMES = code/code_quality \
universal/tokenization \
universal/doc_id

# language/doc_chunk has conflict dependencies with pdf2parquet that need to be resolved
# doc_chunk depends on docling>=1.8.2,<2.0.0
# pdf2parquet depends on docling==1.7.0


test-src:: .transforms.setup venv
# source venv/bin/activate && cd ../../../data-processing-lib/python && $(PYTHON) -m pip install .
Expand Down
6 changes: 3 additions & 3 deletions transforms/packaging/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_transforms"
version = "0.2.1.dev2"
version = "0.2.1.dev3"
requires-python = ">=3.10,<3.12"
keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Transforms"
Expand All @@ -16,10 +16,10 @@ requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
build-backend = "setuptools.build_meta"

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}
dependencies = {file = ["requirements.transforms.python.txt"]}

[options]
package_dir = ["src", "test"]
package_dir = ["src"]

[options.packages.find]
where = ["src/"]
Expand Down
32 changes: 32 additions & 0 deletions transforms/packaging/python/requirements.transforms.python.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
data-prep-toolkit>=0.2.1.dev3
bs4==0.0.2
#docling 1.9.0 depends on docling-parse<2.0.0 and >=1.1.3
#pdf2parquet depends on docling-parse==1.0.0
#docling 1.8.5 depends on docling-parse<2.0.0 and >=1.1.3
docling-parse>=1.0.0,
# language/doc_chunk has conflict dependencies with pdf2parquet that need to be resolved
# doc_chunk depends on docling>=1.8.2,<2.0.0
# pdf2parquet depends on docling==1.7.0
#docling==1.7.0,
docling>=1.8.2,<2.0.0,
llama-index-core>=0.11.1,<0.12.0,
docling-core>=1.1.2,<2.0.0,
quackling==0.1.1,
# quackling will pull
# docling>=1.8.2,<2.0.0
# llama-index-core<0.12.0,>=0.11.1
# docling-core<2.0.0,>=1.1.2
filetype >=1.2.0, <2.0.0
duckdb==0.10.1
fasttext==0.9.2
huggingface-hub >= 0.21.4, <1.0.0
langcodes==3.3.0
mmh3==4.1.0
numpy==1.26.4
pandas
parameterized
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'
sentence-transformers==3.0.1
transformers==4.38.2
xxhash==3.4.1

24 changes: 0 additions & 24 deletions transforms/packaging/python/requirements.txt

This file was deleted.

7 changes: 6 additions & 1 deletion transforms/packaging/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ TRANSFORMS_NAMES = code/proglang_select \
code/code_quality \
code/repo_level_ordering \
code/code2parquet \
language/doc_quality \
language/doc_chunk \
language/doc_quality \
language/lang_id \
language/text_encoder \
language/pdf2parquet \
Expand All @@ -36,6 +36,11 @@ TRANSFORMS_NAMES = code/proglang_select \
universal/filter \
universal/resize

# doc chunk has conflict dependencies with pdf2parquet that need to be resolved
# doc_chunk depends on docling>=1.8.2,<2.0.0
# pdf2parquet depends on docling==1.7.0


test-src:: .transforms.setup venv
# source venv/bin/activate && cd ../../../data-processing-lib/python && $(PYTHON) -m pip install .
# source venv/bin/activate && cd ../../../data-processing-lib/ray && $(PYTHON) -m pip install .
Expand Down
6 changes: 3 additions & 3 deletions transforms/packaging/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_transforms_ray"
version = "0.2.1.dev2"
version = "0.2.1.dev3"
requires-python = ">=3.10,<3.12"
keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Transforms using Ray"
Expand All @@ -16,11 +16,11 @@ requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
build-backend = "setuptools.build_meta"

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}
dependencies = {file = ["requirements.transforms.ray.txt"]}


[options]
package_dir = ["src","test"]
package_dir = ["src"]

[options.packages.find]
where = ["src/"]
Expand Down
25 changes: 25 additions & 0 deletions transforms/packaging/ray/requirements.transforms.ray.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
data-prep-toolkit-ray>=0.2.1.dev3
data_prep_toolkit_transforms>=0.2.1.dev3
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'
parameterized
tqdm==4.66.3
mmh3==4.1.0
xxhash==3.4.1
tqdm==4.66.3
#The conflict is caused by:
# ray fdedup depends on scipy==1.12.0
# docling 1.7.0 depends on scipy<2.0.0 and >=1.14.1
scipy>=1.12.0
networkx==3.3
colorlog==6.8.2
func-timeout==4.3.5
pandas==2.2.2
emerge-viz==2.0.0

#Note:
# when installing data-processing-library-ray, get the following
# ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
# deepsearch-toolkit 1.0.0 requires platformdirs<4.0.0,>=3.5.1, but you have platformdirs 4.3.2 which is incompatible.



15 changes: 0 additions & 15 deletions transforms/packaging/ray/requirements.txt

This file was deleted.

0 comments on commit 703ebe0

Please sign in to comment.