Skip to content

Commit

Permalink
Updated NYCTaxi-E2E notebook
Browse files Browse the repository at this point in the history
Additions to NYCTaxi-E2E notebook addressing issues on rapidsai-community#214
  • Loading branch information
vilmara authored Oct 29, 2019
1 parent 02567e4 commit 08848d2
Showing 1 changed file with 73 additions and 3 deletions.
76 changes: 73 additions & 3 deletions intermediate_notebooks/E2E/taxi/NYCTaxi-E2E.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,51 @@
"client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Rapids Memory Manager Functionality (RMM)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### Rapids Memory Manager Functionality (RMM) \n",
"import rmm\n",
"from rmm import rmm_config as rmm_cfg\n",
"\n",
"def initialize_rmm_pool():\n",
" rmm_cfg.use_pool_allocator = True\n",
" return rmm.initialize()\n",
"\n",
"def initialize_rmm_no_pool():\n",
" rmm_cfg.use_pool_allocator = False\n",
" return rmm.initialize()\n",
"\n",
"def finalize_rmm():\n",
" return rmm.finalize()\n",
"\n",
"def run_dask_task(func, **kwargs):\n",
" task = func(**kwargs)\n",
" return task"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Initialize the gpu memory pool\n",
"\n",
"client.run(finalize_rmm)\n",
"client.run(initialize_rmm_pool) "
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -309,8 +354,9 @@
" outcols=dict(day_of_week=np.float32),\n",
" kwargs=dict())\n",
" \n",
" # Currently xgboost doesn't support consuming boolean values from cudf, as it specializes it to bitset according to arrow\n",
" df['is_weekend'] = (df['day_of_week']<2).astype(np.int32)\n",
" \n",
" df = df.drop('day_of_week')\n",
" return df"
]
},
Expand Down Expand Up @@ -392,7 +438,6 @@
" 'silent': True,\n",
" 'verbose_eval': True,\n",
" 'tree_method':'gpu_hist',\n",
" 'n_gpus': 1\n",
"}\n",
"\n",
"trained_model = dxgb_gpu.train(client, params, X_train, Y_train, num_boost_round=100)"
Expand Down Expand Up @@ -423,12 +468,21 @@
" return df.partitions[nonempty]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pick a Test Set"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"X_test = taxi_df.query('day >= 25').persist()\n",
"X_test = drop_empty_partitions(X_test)\n",
"\n",
Expand All @@ -438,6 +492,9 @@
"# Drop the fare amount from X_test\n",
"X_test = X_test[X_test.columns.difference(['fare_amount'])]\n",
"\n",
"# this wont return until all data is in GPU memory\n",
"done = wait([X_test, Y_test])\n",
"\n",
"# display test set size\n",
"len(X_test)"
]
Expand All @@ -448,6 +505,8 @@
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"# generate predictions on the test set\n",
"Y_test['prediction'] = dxgb_gpu.predict(client, trained_model, X_test)"
]
Expand Down Expand Up @@ -543,6 +602,17 @@
"math.sqrt(Y_test.squared_error.mean().compute())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Finalize the gpu memory pool\n",
"\n",
"client.run(finalize_rmm)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -577,7 +647,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 08848d2

Please sign in to comment.