Create nlp_primitives[complete] install option and remove tensorflow …

…from core requirements (#24) * create nlp_primitives[complete] install option * update README.md * add serialization test * fix typo in README.md * update test without tensorflow to use fixture * update circleci machine size * update circleci machine size * update requirements * add feature serialization test * update test to use tmpdir * update requirements * update README.md
alteryx · Aug 10, 2020 · 0868ced · 0868ced
1 parent 8dca05b
commit 0868ced
Show file tree

Hide file tree

Showing 10 changed files with 107 additions and 11 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -8,6 +8,7 @@ executors:
         default: "python:3.7"
     docker:
       - image: circleci/<< parameters.image_tag >>
+    resource_class: large
 
 commands:
   installation:
@@ -19,7 +20,7 @@ commands:
             virtualenv env -q
             source env/bin/activate
             pip config --site set global.progress_bar off
-            pip install .
+            pip install ".[complete]"
 
 jobs:
   lint_tests:

diff --git a/Makefile b/Makefile
@@ -14,3 +14,9 @@ lint-tests:
 
 unit-tests:
 	pytest --cache-clear --show-capture=stderr -vv
+
+.PHONY: installdeps
+installdeps:
+	pip install --upgrade pip
+	pip install -e .
+	pip install -r test-requirements.txt
diff --git a/README.md b/README.md
@@ -7,8 +7,16 @@ nlp_primitives is a Python library with Natural Language Processing Primitives,
 nlp_primitives allows you to make use of text data in your machine learning pipeline in the same pipeline as the rest of your data.
 
 ### Install
+There are two options for installing nlp_primitives. Both of the options will also install Featuretools, if it is not already installed.
+
+The first option is to install a version of nlp_primitives that does not include Tensorflow. With this option, primitives that depend on Tensorflow cannot be used. Currently, the only primitive that can not be used with this install option is ``UniversalSentenceEncoder``. To install nlp_primitives without Tensorflow run:
+```shell
+pip install nlp_primitives
+```
+
+The second option is to install the complete version of nlp_primitives, which will also install Tensorflow and allow use of all primitives. To install the complete version of nlp_primitives:
 ```shell
-pip install 'featuretools[nlp_primitives]'
+pip install "nlp_primitives[complete]"
 ```
 
 ### Demos

diff --git a/complete-requirements.txt b/complete-requirements.txt
@@ -0,0 +1,2 @@
+tensorflow>=1.14.0
+tensorflow_hub>=0.4.0
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -1,6 +1,10 @@
 =========
 Changelog
 =========
+**Future Release**
+    * Remove tensorflow and tensorhub as core requirements but instead 
+      can be installed with ``pip install nlp_primitives[complete]"``
+
 **v0.3.1**
     * Fix installation error related to scipy version
 

diff --git a/nlp_primitives/tests/test_universal_sentence_encoder.py b/nlp_primitives/tests/test_universal_sentence_encoder.py
@@ -1,4 +1,14 @@
+import sys
+
+import featuretools as ft
 import pandas as pd
+import pytest
+from featuretools.primitives.utils import (
+    PrimitivesDeserializer,
+    serialize_primitive
+)
+
+from nlp_primitives import UniversalSentenceEncoder
 
 
 def test_regular(universal_sentence_encoder):
@@ -13,3 +23,64 @@ def test_regular(universal_sentence_encoder):
     a = a.mean().round(7).astype('str')
     b = pd.Series(['-0.0007475', '0.0032088', '0.0018552', '0.0008256', '0.0028342'])
     assert a.equals(b)
+
+
+@pytest.fixture()
+def mock_remove_tensorflow():
+    # Simulate tensorflow being missing
+    tf_mod = sys.modules['tensorflow']
+    sys.modules['tensorflow'] = None
+    yield
+    sys.modules['tensorflow'] = tf_mod
+
+
+def test_without_tensorflow(universal_sentence_encoder, mock_remove_tensorflow):
+    err_message = "In order to use the UniversalSentenceEncoder primitive install 'nlp_primitives[complete]'"
+    with pytest.raises(ImportError) as error:
+        UniversalSentenceEncoder()
+    assert error.value.args[0] == err_message
+
+
+def test_primitive_serialization(universal_sentence_encoder):
+    sentences = pd.Series([
+        "",
+        "I like to eat pizza",
+        "The roller coaster was built in 1885.",
+        "When will humans go to mars?",
+        "Mitochondria is the powerhouse of the cell",
+    ])
+    serialized_primitive = serialize_primitive(universal_sentence_encoder)
+    deserializer = PrimitivesDeserializer()
+    deserialized_primitive = deserializer.deserialize_primitive(serialized_primitive)
+
+    a = pd.DataFrame(deserialized_primitive(sentences))
+    a = a.mean().round(7).astype('str')
+    b = pd.Series(['-0.0007475', '0.0032088', '0.0018552', '0.0008256', '0.0028342'])
+    assert a.equals(b)
+
+
+def test_feature_serialization(universal_sentence_encoder, tmpdir):
+    sentences = pd.Series([
+        "",
+        "I like to eat pizza",
+        "The roller coaster was built in 1885.",
+        "When will humans go to mars?",
+        "Mitochondria is the powerhouse of the cell",
+    ])
+
+    es = ft.EntitySet("es")
+    df = pd.DataFrame({"id": [0, 1, 2, 3, 4], "sentences": sentences})
+    es.entity_from_dataframe(dataframe=df,
+                             entity_id="entity",
+                             index="id",
+                             variable_types={"sentences": ft.variable_types.Text})
+    fm, features = ft.dfs(entityset=es,
+                          target_entity="entity",
+                          trans_primitives=[universal_sentence_encoder])
+
+    filename = str(tmpdir.join("features.txt"))
+    ft.save_features(features, filename)
+    loaded_features = ft.load_features(filename)
+    fm_serialized = ft.calculate_feature_matrix(loaded_features, entityset=es)
+
+    pd.testing.assert_frame_equal(fm, fm_serialized)
diff --git a/nlp_primitives/universal_sentence_encoder.py b/nlp_primitives/universal_sentence_encoder.py
@@ -1,6 +1,5 @@
-import tensorflow as tf
-import tensorflow_hub as hub
 from featuretools.primitives import TransformPrimitive
+from featuretools.utils.gen_utils import import_or_raise
 from featuretools.variable_types import Numeric, Text
 
 
@@ -27,17 +26,20 @@ class UniversalSentenceEncoder(TransformPrimitive):
     return_type = Numeric
 
     def __init__(self):
-        tf.compat.v1.disable_eager_execution()
+        message = "In order to use the UniversalSentenceEncoder primitive install 'nlp_primitives[complete]'"
+        self.tf = import_or_raise("tensorflow", message)
+        hub = import_or_raise("tensorflow_hub", message)
+        self.tf.compat.v1.disable_eager_execution()
         self.module_url = "https://tfhub.dev/google/universal-sentence-encoder/2"
         self.embed = hub.Module(self.module_url)
         self.number_output_features = 512
         self.n = 512
 
     def get_function(self):
         def universal_sentence_encoder(col):
-            with tf.compat.v1.Session() as session:
-                session.run([tf.compat.v1.global_variables_initializer(),
-                             tf.compat.v1.tables_initializer()])
+            with self.tf.compat.v1.Session() as session:
+                session.run([self.tf.compat.v1.global_variables_initializer(),
+                             self.tf.compat.v1.tables_initializer()])
                 embeddings = session.run(self.embed(col.tolist()))
             return embeddings.transpose()
         return universal_sentence_encoder
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,5 @@
 numpy>=1.13.3
 pandas>=0.23.0
-tensorflow>=1.14.0
 featuretools>=0.7.0
 nltk>=3.4.5
-tensorflow_hub>=0.4.0
 scikit-learn>=0.20.0
diff --git a/setup.py b/setup.py
@@ -6,6 +6,9 @@
 with open(path.join(this_directory, 'README.md')) as f:
     long_description = f.read()
 
+extras_require = {
+    'complete': open('complete-requirements.txt').readlines()
+}
 
 setup(
     name='nlp_primitives',
@@ -15,11 +18,11 @@
     license='BSD 3-clause',
     url='http://www.featurelabs.com/',
     install_requires=open('requirements.txt').readlines(),
-    tests_require=open('test-requirements.txt').readlines(),
     packages=find_packages(),
     long_description=long_description,
     long_description_content_type='text/markdown',
     python_requires='>=3.6',
+    extras_require=extras_require,
     entry_points={
         'featuretools_plugin': [
             'nlp_primitives = nlp_primitives',

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -2,3 +2,4 @@ flake8>=3.7.0
 autopep8>=1.4.3
 isort>=5.0.5
 pytest>=4.4.1
+-r complete-requirements.txt