Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat]Copy-free save and load for cuckoo hashtable #243

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ jobs:
run: |
# Run on all notebooks to prevent upstream change.
echo "Check formatting with nbfmt:"
python3 -m pip install --upgrade protobuf==3.20.0
python3 -m tensorflow_docs.tools.nbfmt --test \
$(find docs/tutorials/ -type f -name *.ipynb)
nblint:
Expand All @@ -105,6 +106,7 @@ jobs:
run: |
# Run on all notebooks to prevent upstream change.
echo "Lint check with nblint:"
python3 -m pip install --upgrade protobuf==3.20.0
python3 -m tensorflow_docs.tools.nblint \
--arg=repo:tensorflow/recommenders-addons \
--exclude_lint=tensorflow::button_colab \
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/make_wheel_Windows_x86.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ fi
python -m pip install --default-timeout=1000 wheel setuptools tensorflow==$TF_VERSION horovod==$HOROVOD_VERSION
bash ./tools/testing/build_and_run_tests.sh

python -m pip install --upgrade protobuf==3.20.0

python configure.py

bazel.exe build --no-cache \
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/make_wheel_macOS_arm64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export TF_NEED_CUDA=0

python --version

RUN python -m pip install --upgrade protobuf==3.20.0
python configure.py

bazel build \
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/make_wheel_macOS_x86.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ python --version
brew install open-mpi

python -m pip install --default-timeout=1000 delocate==0.9.1 wheel setuptools tensorflow==$TF_VERSION
python -m pip install --upgrade protobuf==3.20.0

bash tools/docker/install/install_horovod.sh $HOROVOD_VERSION --only-cpu

bash tools/testing/build_and_run_tests.sh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
pip install --default-timeout=1000 -r tools/install_deps/pytest.txt -r tools/install_deps/tensorflow-cpu.txt -r requirements.txt
sudo apt install -y redis > /dev/null 2> /dev/null
bash tools/install_deps/install_bazelisk.sh ./
python -m pip install --upgrade protobuf==3.20.0
python configure.py
bazel test -c opt -k --test_timeout 300,450,1200,3600 --test_output=errors //tensorflow_recommenders_addons/...
release-wheel:
Expand Down
6 changes: 4 additions & 2 deletions tensorflow_recommenders_addons/dynamic_embedding/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ custom_op_library(
"kernels/cuckoo_hashtable_op.h",
"kernels/cuckoo_hashtable_op.cc",
"ops/cuckoo_hashtable_ops.cc",
"utils/utils.h",
"utils/filebuffer.h",
"utils/types.h",
"utils/utils.h",
] + glob(["kernels/lookup_impl/lookup_table_op_cpu*"]),
cuda_deps = if_cuda_for_tf_serving(
["//tensorflow_recommenders_addons/dynamic_embedding/core/lib/nvhash:nvhashtable"],
Expand All @@ -26,8 +27,9 @@ custom_op_library(
"kernels/cuckoo_hashtable_op.h",
"kernels/cuckoo_hashtable_op_gpu.h",
"kernels/cuckoo_hashtable_op_gpu.cu.cc",
"utils/utils.h",
"utils/filebuffer.h",
"utils/types.h",
"utils/utils.h",
] + glob(["kernels/lookup_impl/lookup_table_op_gpu*"])),
deps = ["//tensorflow_recommenders_addons/dynamic_embedding/core/lib/cuckoo:cuckoohash"],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,18 @@ class CuckooHashTableOfTensors final : public LookupInterface {
return table_->export_values(ctx, value_dim);
}

Status SaveToFile(OpKernelContext* ctx, const string filepath,
const size_t buffer_size) {
int64 value_dim = value_shape_.dim_size(0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int64_t

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tensorflow::int64 is returned from dim_size and also used in table_->save()

return table_->save_to_file(ctx, value_dim, filepath, buffer_size);
}

Status LoadFromFile(OpKernelContext* ctx, const string filepath,
const size_t buffer_size) {
int64 value_dim = value_shape_.dim_size(0);
Copy link
Member

@rhdong rhdong May 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int64_t

return table_->load_from_file(ctx, value_dim, filepath, buffer_size);
}

DataType key_dtype() const override { return DataTypeToEnum<K>::v(); }

DataType value_dtype() const override { return DataTypeToEnum<V>::v(); }
Expand Down Expand Up @@ -607,6 +619,36 @@ class HashTableExportOp : public HashTableOpKernel {
}
};

// Op that export all keys and values to file.
template <class K, class V>
class HashTableExportToFileOp : public HashTableOpKernel {
public:
explicit HashTableExportToFileOp(OpKernelConstruction* ctx)
: HashTableOpKernel(ctx) {
int64 signed_buffer_size = 0;
ctx->GetAttr("buffer_size", &signed_buffer_size);
buffer_size_ = static_cast<size_t>(signed_buffer_size);
}

void Compute(OpKernelContext* ctx) override {
LookupInterface* table;
OP_REQUIRES_OK(ctx, GetTable(ctx, &table));
core::ScopedUnref unref_me(table);

const Tensor& ftensor = ctx->input(1);
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(ftensor.shape()),
errors::InvalidArgument("filepath must be scalar."));
string filepath = string(ftensor.scalar<tstring>()().data());

lookup::CuckooHashTableOfTensors<K, V>* table_cuckoo =
(lookup::CuckooHashTableOfTensors<K, V>*)table;
OP_REQUIRES_OK(ctx, table_cuckoo->SaveToFile(ctx, filepath, buffer_size_));
}

private:
size_t buffer_size_;
};

// Clear the table and insert data.
class HashTableImportOp : public HashTableOpKernel {
public:
Expand Down Expand Up @@ -637,6 +679,37 @@ class HashTableImportOp : public HashTableOpKernel {
}
};

// Op that export all keys and values to file.
template <class K, class V>
class HashTableImportFromFileOp : public HashTableOpKernel {
public:
explicit HashTableImportFromFileOp(OpKernelConstruction* ctx)
: HashTableOpKernel(ctx) {
int64 signed_buffer_size = 0;
ctx->GetAttr("buffer_size", &signed_buffer_size);
buffer_size_ = static_cast<size_t>(signed_buffer_size);
}

void Compute(OpKernelContext* ctx) override {
LookupInterface* table;
OP_REQUIRES_OK(ctx, GetTable(ctx, &table));
core::ScopedUnref unref_me(table);

const Tensor& ftensor = ctx->input(1);
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(ftensor.shape()),
errors::InvalidArgument("filepath must be scalar."));
string filepath = string(ftensor.scalar<tstring>()().data());

lookup::CuckooHashTableOfTensors<K, V>* table_cuckoo =
(lookup::CuckooHashTableOfTensors<K, V>*)table;
OP_REQUIRES_OK(ctx,
table_cuckoo->LoadFromFile(ctx, filepath, buffer_size_));
}

private:
size_t buffer_size_;
};

REGISTER_KERNEL_BUILDER(
Name(PREFIX_OP_NAME(CuckooHashTableFind)).Device(DEVICE_CPU),
HashTableFindOp);
Expand Down Expand Up @@ -679,7 +752,17 @@ REGISTER_KERNEL_BUILDER(
.Device(DEVICE_CPU) \
.TypeConstraint<key_dtype>("Tin") \
.TypeConstraint<value_dtype>("Tout"), \
HashTableFindWithExistsOp<key_dtype, value_dtype>);
HashTableFindWithExistsOp<key_dtype, value_dtype>); \
REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(CuckooHashTableExportToFile)) \
.Device(DEVICE_CPU) \
.TypeConstraint<key_dtype>("key_dtype") \
.TypeConstraint<value_dtype>("value_dtype"), \
HashTableExportToFileOp<key_dtype, value_dtype>); \
REGISTER_KERNEL_BUILDER(Name(PREFIX_OP_NAME(CuckooHashTableImportFromFile)) \
.Device(DEVICE_CPU) \
.TypeConstraint<key_dtype>("key_dtype") \
.TypeConstraint<value_dtype>("value_dtype"), \
HashTableImportFromFileOp<key_dtype, value_dtype>);

REGISTER_KERNEL(int32, double);
REGISTER_KERNEL(int32, float);
Expand Down
Loading