Skip to content

Commit

Permalink
Create nlp_primitives[complete] install option and remove tensorflow …
Browse files Browse the repository at this point in the history
…from core requirements (#24)

* create nlp_primitives[complete] install option

* update README.md

* add serialization test

* fix typo in README.md

* update test without tensorflow to use fixture

* update circleci machine size

* update circleci machine size

* update requirements

* add feature serialization test

* update test to use tmpdir

* update requirements

* update README.md
  • Loading branch information
thehomebrewnerd authored Aug 10, 2020
1 parent 8dca05b commit 0868ced
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 11 deletions.
3 changes: 2 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ executors:
default: "python:3.7"
docker:
- image: circleci/<< parameters.image_tag >>
resource_class: large

commands:
installation:
Expand All @@ -19,7 +20,7 @@ commands:
virtualenv env -q
source env/bin/activate
pip config --site set global.progress_bar off
pip install .
pip install ".[complete]"
jobs:
lint_tests:
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ lint-tests:

unit-tests:
pytest --cache-clear --show-capture=stderr -vv

.PHONY: installdeps
installdeps:
pip install --upgrade pip
pip install -e .
pip install -r test-requirements.txt
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@ nlp_primitives is a Python library with Natural Language Processing Primitives,
nlp_primitives allows you to make use of text data in your machine learning pipeline in the same pipeline as the rest of your data.

### Install
There are two options for installing nlp_primitives. Both of the options will also install Featuretools, if it is not already installed.

The first option is to install a version of nlp_primitives that does not include Tensorflow. With this option, primitives that depend on Tensorflow cannot be used. Currently, the only primitive that can not be used with this install option is ``UniversalSentenceEncoder``. To install nlp_primitives without Tensorflow run:
```shell
pip install nlp_primitives
```

The second option is to install the complete version of nlp_primitives, which will also install Tensorflow and allow use of all primitives. To install the complete version of nlp_primitives:
```shell
pip install 'featuretools[nlp_primitives]'
pip install "nlp_primitives[complete]"
```

### Demos
Expand Down
2 changes: 2 additions & 0 deletions complete-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tensorflow>=1.14.0
tensorflow_hub>=0.4.0
4 changes: 4 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
=========
Changelog
=========
**Future Release**
* Remove tensorflow and tensorhub as core requirements but instead
can be installed with ``pip install nlp_primitives[complete]"``

**v0.3.1**
* Fix installation error related to scipy version

Expand Down
71 changes: 71 additions & 0 deletions nlp_primitives/tests/test_universal_sentence_encoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
import sys

import featuretools as ft
import pandas as pd
import pytest
from featuretools.primitives.utils import (
PrimitivesDeserializer,
serialize_primitive
)

from nlp_primitives import UniversalSentenceEncoder


def test_regular(universal_sentence_encoder):
Expand All @@ -13,3 +23,64 @@ def test_regular(universal_sentence_encoder):
a = a.mean().round(7).astype('str')
b = pd.Series(['-0.0007475', '0.0032088', '0.0018552', '0.0008256', '0.0028342'])
assert a.equals(b)


@pytest.fixture()
def mock_remove_tensorflow():
# Simulate tensorflow being missing
tf_mod = sys.modules['tensorflow']
sys.modules['tensorflow'] = None
yield
sys.modules['tensorflow'] = tf_mod


def test_without_tensorflow(universal_sentence_encoder, mock_remove_tensorflow):
err_message = "In order to use the UniversalSentenceEncoder primitive install 'nlp_primitives[complete]'"
with pytest.raises(ImportError) as error:
UniversalSentenceEncoder()
assert error.value.args[0] == err_message


def test_primitive_serialization(universal_sentence_encoder):
sentences = pd.Series([
"",
"I like to eat pizza",
"The roller coaster was built in 1885.",
"When will humans go to mars?",
"Mitochondria is the powerhouse of the cell",
])
serialized_primitive = serialize_primitive(universal_sentence_encoder)
deserializer = PrimitivesDeserializer()
deserialized_primitive = deserializer.deserialize_primitive(serialized_primitive)

a = pd.DataFrame(deserialized_primitive(sentences))
a = a.mean().round(7).astype('str')
b = pd.Series(['-0.0007475', '0.0032088', '0.0018552', '0.0008256', '0.0028342'])
assert a.equals(b)


def test_feature_serialization(universal_sentence_encoder, tmpdir):
sentences = pd.Series([
"",
"I like to eat pizza",
"The roller coaster was built in 1885.",
"When will humans go to mars?",
"Mitochondria is the powerhouse of the cell",
])

es = ft.EntitySet("es")
df = pd.DataFrame({"id": [0, 1, 2, 3, 4], "sentences": sentences})
es.entity_from_dataframe(dataframe=df,
entity_id="entity",
index="id",
variable_types={"sentences": ft.variable_types.Text})
fm, features = ft.dfs(entityset=es,
target_entity="entity",
trans_primitives=[universal_sentence_encoder])

filename = str(tmpdir.join("features.txt"))
ft.save_features(features, filename)
loaded_features = ft.load_features(filename)
fm_serialized = ft.calculate_feature_matrix(loaded_features, entityset=es)

pd.testing.assert_frame_equal(fm, fm_serialized)
14 changes: 8 additions & 6 deletions nlp_primitives/universal_sentence_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import tensorflow as tf
import tensorflow_hub as hub
from featuretools.primitives import TransformPrimitive
from featuretools.utils.gen_utils import import_or_raise
from featuretools.variable_types import Numeric, Text


Expand All @@ -27,17 +26,20 @@ class UniversalSentenceEncoder(TransformPrimitive):
return_type = Numeric

def __init__(self):
tf.compat.v1.disable_eager_execution()
message = "In order to use the UniversalSentenceEncoder primitive install 'nlp_primitives[complete]'"
self.tf = import_or_raise("tensorflow", message)
hub = import_or_raise("tensorflow_hub", message)
self.tf.compat.v1.disable_eager_execution()
self.module_url = "https://tfhub.dev/google/universal-sentence-encoder/2"
self.embed = hub.Module(self.module_url)
self.number_output_features = 512
self.n = 512

def get_function(self):
def universal_sentence_encoder(col):
with tf.compat.v1.Session() as session:
session.run([tf.compat.v1.global_variables_initializer(),
tf.compat.v1.tables_initializer()])
with self.tf.compat.v1.Session() as session:
session.run([self.tf.compat.v1.global_variables_initializer(),
self.tf.compat.v1.tables_initializer()])
embeddings = session.run(self.embed(col.tolist()))
return embeddings.transpose()
return universal_sentence_encoder
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
numpy>=1.13.3
pandas>=0.23.0
tensorflow>=1.14.0
featuretools>=0.7.0
nltk>=3.4.5
tensorflow_hub>=0.4.0
scikit-learn>=0.20.0
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
with open(path.join(this_directory, 'README.md')) as f:
long_description = f.read()

extras_require = {
'complete': open('complete-requirements.txt').readlines()
}

setup(
name='nlp_primitives',
Expand All @@ -15,11 +18,11 @@
license='BSD 3-clause',
url='http://www.featurelabs.com/',
install_requires=open('requirements.txt').readlines(),
tests_require=open('test-requirements.txt').readlines(),
packages=find_packages(),
long_description=long_description,
long_description_content_type='text/markdown',
python_requires='>=3.6',
extras_require=extras_require,
entry_points={
'featuretools_plugin': [
'nlp_primitives = nlp_primitives',
Expand Down
1 change: 1 addition & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ flake8>=3.7.0
autopep8>=1.4.3
isort>=5.0.5
pytest>=4.4.1
-r complete-requirements.txt

0 comments on commit 0868ced

Please sign in to comment.