Skip to content

Commit

Permalink
Add read buffer memory sanity check
Browse files Browse the repository at this point in the history
ref #150
  • Loading branch information
ihnorton committed May 17, 2019
1 parent b099a9d commit 80c246a
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 0 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ numpy>=1.7.2
setuptools>=18.0.1
setuptools-scm>=1.5.4
wheel>=0.30.0
psutil
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ setuptools==40.8.0
setuptools-scm==1.5.4
wheel==0.30.0
tox==3.0.0
psutil
69 changes: 69 additions & 0 deletions tiledb/tests/test_libtiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,75 @@ def test_io(self):
self.assertEqual(io.readall(), b"")


class MemoryTest(DiskTestCase):
# sanity check that memory usage doesn't increase more than 10% reading 40MB 100x
# https://github.com/TileDB-Inc/TileDB-Py/issues/150

def setUp(self):
if not sys.platform.startswith("linux"):
self.skipTest("Only run MemoryTest on linux for now")

@staticmethod
def use_many_buffers(path):
import psutil, os
# https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
process = psutil.Process(os.getpid())

x = np.ones(10000000, dtype=np.float32)
ctx = tiledb.Ctx()
d1 = tiledb.Dim(
'test_domain', domain=(0, x.shape[0] - 1), tile=10000, dtype="uint32")
domain = tiledb.Domain(d1)
v = tiledb.Attr(
'test_value',
dtype="float32")

schema = tiledb.ArraySchema(
domain=domain, attrs=(v,), cell_order="row-major", tile_order="row-major")

A = tiledb.DenseArray.create(path, schema)

with tiledb.DenseArray(path, mode="w", ctx=ctx) as A:
A[:] = {'test_value': x}



with tiledb.DenseArray(path, mode='r') as data:
data[:]
initial = process.memory_info().rss
print(" initial RSS: {}".format(round(initial / (10 ** 6)), 2))
for i in range(100):
# read but don't store: this memory should be freed
data[:]

if i % 10 == 0:
print(' read iter {}, RSS (MB): {}'.format(
i, round(process.memory_info().rss / (10 ** 6), 2)))

return initial

def test_memory_cleanup(self):
import tiledb, numpy as np
import psutil, os

# run function which reads 100x from a 40MB test array
# TODO: RSS is too loose to do this end-to-end, so should use instrumentation.
print("Starting TileDB-Py memory test:")
initial = self.use_many_buffers(self.path('test_memory_cleanup'))

process = psutil.Process(os.getpid())
final = process.memory_info().rss
print(" final RSS: {}".format(round(final / (10 ** 6)), 2))

import gc
gc.collect()

final_gc = process.memory_info().rss
print(" final RSS after forced GC: {}".format(round(final_gc / (10 ** 6)), 2))

self.assertTrue(abs(final - initial) < (.1 * initial))


#if __name__ == '__main__':
# # run a single example for in-process debugging
# # better to use `pytest --gdb` if available
Expand Down

0 comments on commit 80c246a

Please sign in to comment.