From 71f63d2a35502f3498bafed31bc92e0e2dcd109a Mon Sep 17 00:00:00 2001 From: James Wexler Date: Tue, 4 Sep 2018 09:46:16 -0400 Subject: [PATCH] add reference to Facets Overview spark project --- facets_overview/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/facets_overview/README.md b/facets_overview/README.md index 0b4432a..2f9c521 100644 --- a/facets_overview/README.md +++ b/facets_overview/README.md @@ -39,6 +39,11 @@ import pandas as pd df = pd.DataFrame({'num' : [1, 2, 3, 4], 'str' : ['a', 'a', 'b', None]}) proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames([{'name': 'test', 'table': df}]) ``` + +## Large Datasets + +The python code in this repository for generating feature stats only works on datasets that are small enough to fit into memory on your local machine. For distributed generation of feature stats for large datasets, check out the independently-developed [Facets Overview Spark project](https://github.com/gopro/facets-overview-spark). + # Visualization A proto can easily be visualized in a Jupyter notebook using the installed nbextension.