diff --git a/doc/api.rst b/doc/api.rst index 12df53a4..7f0e688e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1,3 +1,4 @@ +.. _API: .. currentmodule:: rbc ============= diff --git a/doc/conf.py b/doc/conf.py index f7c3e917..c6a27744 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -53,8 +53,12 @@ 'sphinx.ext.napoleon', 'sphinx.ext.autosectionlabel', 'numbadoc', + 'sphinx_design', + 'myst_parser', ] +myst_enable_extensions = ["colon_fence"] + # autosummary configuration autosummary_generate = True autodoc_typehints = "description" diff --git a/doc/developer.rst b/doc/developer.rst index 4832d56b..4ae7ebbe 100644 --- a/doc/developer.rst +++ b/doc/developer.rst @@ -41,7 +41,7 @@ Then, to create an environment with a few of the most common dependencies: To activate the environment for the current shell session: -.. code-block:: console +.. code-block:: bash $ conda activate rbc @@ -52,7 +52,7 @@ To activate the environment for the current shell session: Once the environment is activated, you have a dedicated Python with the required dependencies: -.. code-block:: python +.. code-block:: bash $ python >>> import llvmlite diff --git a/doc/getting_started.rst b/doc/getting_started.rst new file mode 100644 index 00000000..e69de29b diff --git a/doc/howto.md b/doc/howto.md new file mode 100644 index 00000000..ed833f57 --- /dev/null +++ b/doc/howto.md @@ -0,0 +1,51 @@ + +# RBC how-tos + +These documents are intended as short recipes for common tasks using RBC. It is +based on [NumPy how-tos](https://numpy.org/devdocs/user/howtos_index.html) and +[diataxis how to guide](https://diataxis.fr/how-to-guides/). + +How-tos are supposed to be short documents describing a specific feature or +property of the RBC project. For full content, check the tutorials page. + +```{toctree} +--- +maxdepth: 1 +caption: Basics +--- + +howtos/connect +howtos/udf +howtos/udtf +howtos/raise_exception +howtos/template +``` + +----- + + +```{toctree} +--- +maxdepth: 1 +caption: Datatypes +--- + +howtos/array +howtos/column +howtos/text +howtos/geo +howtos/manager +howtos/string_dict_proxy +``` + +----- + +```{toctree} +--- +maxdepth: 1 +caption: Advanced +--- + +howtos/devices +howtos/external_functions +``` diff --git a/doc/howtos/array.md b/doc/howtos/array.md new file mode 100644 index 00000000..8dbd7a3a --- /dev/null +++ b/doc/howtos/array.md @@ -0,0 +1,82 @@ +# Array type + +Arrays provide a convenient way to represent a sequence of values of the same +type. + +## Basics + +### Building an array + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_array`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.array.new.begin +:end-before: magictoken.udf.array.new.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geopoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.array.new.sql.begin +:end-before: magictoken.udf.array.new.sql.end +:dedent: 4 +:linenos: +``` + +::: + + +### Computing the length of an array + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_array`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.array.length.begin +:end-before: magictoken.udf.array.length.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geopoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.array.length.sql.begin +:end-before: magictoken.udf.array.length.sql.end +:dedent: 4 +:linenos: +``` + +::: + +### Using the [array_api](https://data-apis.org/array-api/2022.12/) + +RBC partially implements the array api standard. For a list of supported +functions, check the [API](API) page. + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_array`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.array.array_api.begin +:end-before: magictoken.udf.array.array_api.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geopoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.array.array_api.sql.begin +:end-before: magictoken.udf.array.array_api.sql.end +:dedent: 4 +:linenos: +``` + +::: \ No newline at end of file diff --git a/doc/howtos/column.md b/doc/howtos/column.md new file mode 100644 index 00000000..2abfff54 --- /dev/null +++ b/doc/howtos/column.md @@ -0,0 +1,28 @@ +# Column Type + +The Column type provides the structure and organization for storing and +retrieving columnar data within HeavyDB. + +## Basic usage + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_column_power`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.column.basic.begin +:end-before: magictoken.udtf.column.basic.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_column`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.column.basic.sql.begin +:end-before: magictoken.udtf.column.basic.sql.end +:dedent: 4 +:linenos: +``` + +::: diff --git a/doc/howtos/connect.md b/doc/howtos/connect.md new file mode 100644 index 00000000..10346d50 --- /dev/null +++ b/doc/howtos/connect.md @@ -0,0 +1,12 @@ + +(heavydb-connect)= +# Connect to the HeavyDB server + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_connect`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.connect.begin +:end-before: magictoken.connect.end +:dedent: 4 +:linenos: +``` \ No newline at end of file diff --git a/doc/howtos/devices.md b/doc/howtos/devices.md new file mode 100644 index 00000000..9f5e87c6 --- /dev/null +++ b/doc/howtos/devices.md @@ -0,0 +1,25 @@ +# Restricting a function to run only in a specific device (CPU/GPU) + +Assuming you already have a [connection](heavydb-connect) to the HeavyDB server: + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_devices`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.devices.begin +:end-before: magictoken.devices.end +:dedent: 4 +:linenos: +``` + +By default, both devices are used if available. Otherwise, only the CPU is used. + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_devices`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.devices.sql.begin +:end-before: magictoken.devices.sql.end +:dedent: 4 +:linenos: +``` +::: \ No newline at end of file diff --git a/doc/howtos/external_functions.md b/doc/howtos/external_functions.md new file mode 100644 index 00000000..5a9034df --- /dev/null +++ b/doc/howtos/external_functions.md @@ -0,0 +1,52 @@ + +# Defining and using external functions + +## cmath `abs` + +The `external` keyword provides a way of calling C functions defined in other +libraries within Python code. + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_external_functions`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.external_functions.abs.begin +:end-before: magictoken.external_functions.abs.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_external_functions`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.external_functions.abs.sql.begin +:end-before: magictoken.external_functions.abs.sql.end +:dedent: 4 +:linenos: +``` +::: + +RBC already exposes a small set of C functions from the C stdlib. Check the API +reference page for more details. + +## Using `printf` + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_external_functions`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.external_functions.printf.begin +:end-before: magictoken.external_functions.printf.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_external_functions`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.external_functions.printf.sql.begin +:end-before: magictoken.external_functions.printf.sql.end +:dedent: 4 +:linenos: +``` +::: \ No newline at end of file diff --git a/doc/howtos/geo.md b/doc/howtos/geo.md new file mode 100644 index 00000000..97aa264a --- /dev/null +++ b/doc/howtos/geo.md @@ -0,0 +1,58 @@ +# Geometry Types + +HeavyDB offers an extensive range of geometry types, encompassing +`(Multi)Point`, `(Multi)LineString`, and `(Multi)Polygon`. + +## Basics + +### `GeoPoint` + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geopoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.geopoint.basic.begin +:end-before: magictoken.udtf.geopoint.basic.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geopoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.geopoint.basic.sql.begin +:end-before: magictoken.udtf.geopoint.basic.sql.end +:dedent: 4 +:linenos: +``` + +::: + + +### `GeoMultiPoint` + +`MultiPoint` works a bit different than `Point`. They are created by calling +the `.from_coords` method. + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geomultipoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.mp.basic.begin +:end-before: magictoken.udtf.mp.basic.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_geopoint`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.mp.basic.sql.begin +:end-before: magictoken.udtf.mp.basic.sql.end +:dedent: 4 +:linenos: +``` + +::: \ No newline at end of file diff --git a/doc/howtos/manager.md b/doc/howtos/manager.md new file mode 100644 index 00000000..6b7bbfe9 --- /dev/null +++ b/doc/howtos/manager.md @@ -0,0 +1,70 @@ +# Table/Row Function Manager + +The function managers in HeavyDB provide a convenient mechanism of handling the +state of user-defined functions. They can perform various tasks such as +allocate memory for output buffers, retrieve dictionary encoded strings in +the [dictionary proxy](string-dict-proxy), and raise exceptions. + +## Table Function Manager + +### Basic usage + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_tablefunctionmanager`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.mgr.basic.begin +:end-before: magictoken.udtf.mgr.basic.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_tablefunctionmanager`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.mgr.basic.sql.begin +:end-before: magictoken.udtf.mgr.basic.sql.end +:dedent: 4 +:linenos: +``` +::: + +### Retrieving the dictionary string proxy + +When the Column has type `TextEncodingDict`, users can access the dictionary +string proxy by calling the `string_dict_proxy` attribute: + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udtf_string_proxy`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.proxy.begin +:end-before: magictoken.udtf.proxy.end +:dedent: 4 +:linenos: +``` + +For additional information and references, please refer to the [API](API) page +and the dedicated [how-to](string-dict-proxy) page on the string dictionary +proxy in HeavyDB. + +## Row Function Manager + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_rowfunctionmanager`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.mgr.basic.begin +:end-before: magictoken.udf.mgr.basic.end +:dedent: 8 +:linenos: +``` + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_rowfunctionmanager`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.mgr.basic.sql.begin +:end-before: magictoken.udf.mgr.basic.sql.end +:dedent: 8 +:linenos: +``` +::: diff --git a/doc/howtos/raise_exception.md b/doc/howtos/raise_exception.md new file mode 100644 index 00000000..eaf09c29 --- /dev/null +++ b/doc/howtos/raise_exception.md @@ -0,0 +1,33 @@ + +# Raising exceptions + +Exceptions in HeavyDB are quite different from the ones used in Python. In RBC +code, you signal to the database an exception happened by calling a specific +method (`error_message`) in the runner manager. + +## In a UDF: + +It is currently not possible to raise an exception in a UDF. The server must +implement support for it first before RBC can support it. + +## In a UDTF + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_raise_exception`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.raise_exception.begin +:end-before: magictoken.raise_exception.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_raise_exception`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.raise_exception.sql.begin +:end-before: magictoken.raise_exception.sql.end +:dedent: 4 +:linenos: +``` +::: \ No newline at end of file diff --git a/doc/howtos/string_dict_proxy.md b/doc/howtos/string_dict_proxy.md new file mode 100644 index 00000000..6cc06a1a --- /dev/null +++ b/doc/howtos/string_dict_proxy.md @@ -0,0 +1,32 @@ +(string-dict-proxy)= +# String Dictionary Proxy + +The string dictionary proxy provides a convenient way for retrieving encoded +strings within the database. It works by maintaining a lookup table that maps +string values to unique integer identifiers. + +## Retrieving the dictionary proxy + +### From `RowFunctionManager` + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_text`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.proxy.begin +:end-before: magictoken.udf.proxy.end +:dedent: 8 +:linenos: +``` + +### From `TableFunctionManager` + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_text`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.proxy.begin +:end-before: magictoken.udtf.proxy.end +:dedent: 4 +:linenos: +``` + +Check the [API reference page](API) for a full list of table methods diff --git a/doc/howtos/template.md b/doc/howtos/template.md new file mode 100644 index 00000000..bbae6061 --- /dev/null +++ b/doc/howtos/template.md @@ -0,0 +1,32 @@ + +# Using templates + +Templates are generic types used in the decorator `@heavydb`. Templating allows +the target function to accept different data types for its arguments. + +Assuming you already have a [connection](heavydb-connect) to the HeavyDB server: + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_templates`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.templates.begin +:end-before: magictoken.templates.end +:dedent: 4 +:linenos: +``` + +In the case above, the template arguments `T` and `Z`, are specified within the +decorator, indicating the valid data types that can be used for the `add` +function. + + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_templates`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.templates.sql.begin +:end-before: magictoken.templates.sql.end +:dedent: 4 +:linenos: +``` +::: \ No newline at end of file diff --git a/doc/howtos/text.md b/doc/howtos/text.md new file mode 100644 index 00000000..c4795dda --- /dev/null +++ b/doc/howtos/text.md @@ -0,0 +1,54 @@ +# Text types + +HeavyDB supports two text encoding options: `TEXT ENCODING NONE` and +`TEXT ENCODING DICT`. + +`TEXT ENCODING NONE` stores textual data without compression, while +`TEXT ENCODING DICT` uses dictionary-based encoding to reduce storage +requirements by replacing common words or phrases with shorter codes. +The choice depends on data characteristics and the trade-off between storage +space and encoding/decoding overhead. + +## Defining an UDF with Text types: + +(example-1)= +### Encoding dict +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_text`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.text.dict.begin +:end-before: magictoken.udf.text.dict.end +:dedent: 8 +:linenos: +``` + +### Encoding none +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_text`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.text.none.begin +:end-before: magictoken.udf.text.none.end +:dedent: 4 +:linenos: +``` + +### Converting a Text Encoding None to a string + +Text encoding none objects feature a handy `to_string()` method for converting +the object into a *Python Unicode* type. + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf_text`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.text.capitalize.begin +:end-before: magictoken.udf.text.capitalize.end +:dedent: 4 +:linenos: +``` + +Check the Numba [readthedocs page](https://numba.readthedocs.io/en/stable/reference/pysupported.html#str) +for a list of supported string methods. + +### Converting a Text Encoding Dict to a string + +See the [first example](example-1) in this page. \ No newline at end of file diff --git a/doc/howtos/udf.md b/doc/howtos/udf.md new file mode 100644 index 00000000..f974fe5e --- /dev/null +++ b/doc/howtos/udf.md @@ -0,0 +1,55 @@ +# User Defined Functions (UDFs) + +## Basics + +UDFs are functions that operate at row-level. That is, they receive as input a +single row at a time. + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.begin +:end-before: magictoken.udf.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_column`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.sql.begin +:end-before: magictoken.udf.sql.end +:dedent: 4 +:linenos: +``` + +::: + + +## Multiple signatures + +Defining UDFs with multiple signatures + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udf`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.multiple_signatures.begin +:end-before: magictoken.udf.multiple_signatures.end +:dedent: 4 +:linenos: +``` + +:::{dropdown} Example SQL Query + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_column`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udf.multiple_signatures.sql.begin +:end-before: magictoken.udf.multiple_signatures.sql.end +:dedent: 4 +:linenos: +``` + +::: \ No newline at end of file diff --git a/doc/howtos/udtf.md b/doc/howtos/udtf.md new file mode 100644 index 00000000..bd1f41a7 --- /dev/null +++ b/doc/howtos/udtf.md @@ -0,0 +1,25 @@ +# User Defined Table Functions (UDTFs) + +UDTFs are functions that operate on a column-level basis. In simpler terms, +UDTFs take a set of columns as input and return a set of columns as output. + +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udtf`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.begin +:end-before: magictoken.udtf.end +:dedent: 4 +:linenos: +``` + + +:::{dropdown} Example SQL Query +```{literalinclude} ../../rbc/tests/heavydb/test_howtos.py +:language: python +:caption: from ``test_udtf`` of ``rbc/tests/heavydb/test_howtos.py`` +:start-after: magictoken.udtf.sql.begin +:end-before: magictoken.udtf.sql.end +:dedent: 4 +:linenos: +``` +::: \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst index 7e906e3c..162714c7 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -15,12 +15,25 @@ A Python package implementing a remote backend compiler using numba and llvmlite tools. .. toctree:: + :caption: Getting started :maxdepth: 1 - :caption: Contents: design + installation + +.. toctree:: + :caption: Fundamentals and usage + :maxdepth: 1 + + howto + tutorials + +.. toctree:: + :maxdepth: 1 + :caption: Reference Manual: + api - releases + envvars developer nrt - envvars + releases diff --git a/doc/installation.md b/doc/installation.md new file mode 100644 index 00000000..2c8b8edc --- /dev/null +++ b/doc/installation.md @@ -0,0 +1,24 @@ + +# Installation + +RBC is available on both conda-forge and PyPI. + +### conda/mamba + +You can use either [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) +or [mamba](https://github.com/mamba-org/mamba) to install RBC. We suggest using +mamba as it uses a faster solver. + +```bash +conda create --name rbc_env +conda activate rbc_env +conda install -c conda-forge rbc +``` + +### Pip + +On PyPI, the RBC library is called `rbc-project`. + +```bash +pip install rbc-project +``` diff --git a/doc/tutorials.md b/doc/tutorials.md new file mode 100644 index 00000000..c3fa9f94 --- /dev/null +++ b/doc/tutorials.md @@ -0,0 +1,39 @@ + +# RBC tutorials + +Unlike how-to guides, tutorials are longer and cover a broader set of +features available in the RBC project. These tutorials are usually jupyter +notebooks available in the `notebooks` repo and rendered here. + + +```{toctree} +--- +maxdepth: 1 +caption: Basics +--- + +tutorials/intro +``` + +------------ + +```{toctree} +--- +maxdepth: 1 +caption: Advanced +--- + +tutorials/ibis +tutorials/external_fns +``` + +------------ + +```{toctree} +--- +maxdepth: 1 +caption: Case Study +--- + +tutorials/black_scholes +``` \ No newline at end of file diff --git a/doc/tutorials/black_scholes.md b/doc/tutorials/black_scholes.md new file mode 100644 index 00000000..d33fda34 --- /dev/null +++ b/doc/tutorials/black_scholes.md @@ -0,0 +1,11 @@ + +# Case Study - Black Scholes model + +
+
+ + + +
+
diff --git a/doc/tutorials/external_fns.md b/doc/tutorials/external_fns.md new file mode 100644 index 00000000..d600c8e4 --- /dev/null +++ b/doc/tutorials/external_fns.md @@ -0,0 +1,11 @@ + +# Defining and calling external functions + +
+
+ + + +
+
diff --git a/doc/tutorials/ibis.md b/doc/tutorials/ibis.md new file mode 100644 index 00000000..6a89e8e0 --- /dev/null +++ b/doc/tutorials/ibis.md @@ -0,0 +1,11 @@ + +# Using RBC with Ibis framework + +
+
+ + + +
+
diff --git a/doc/tutorials/intro.md b/doc/tutorials/intro.md new file mode 100644 index 00000000..831d64f6 --- /dev/null +++ b/doc/tutorials/intro.md @@ -0,0 +1,11 @@ + +# RBC Basics + +
+
+ + + +
+
diff --git a/rbc/heavydb/geomultipoint.py b/rbc/heavydb/geomultipoint.py index c0bcc0d9..1846e5a6 100644 --- a/rbc/heavydb/geomultipoint.py +++ b/rbc/heavydb/geomultipoint.py @@ -16,13 +16,17 @@ class GeoMultiPointNumbaType(GeoNestedArrayNumbaType): - def __init__(self): - super().__init__(name="GeoMultiPointNumbaType") + def __init__(self, name): + super().__init__(name) class HeavyDBGeoMultiPointType(HeavyDBGeoNestedArray): """Typesystem type class for HeavyDB buffer structures.""" + @property + def numba_type(self): + return GeoMultiPointNumbaType + @property def type_name(self): return "GeoMultiPoint" diff --git a/rbc/heavydb/point2d.py b/rbc/heavydb/point2d.py index 67b8f139..29c99b32 100644 --- a/rbc/heavydb/point2d.py +++ b/rbc/heavydb/point2d.py @@ -64,18 +64,18 @@ def tonumba(self, bool_is_int8=None): @extending.type_callable(Point2D) def type_heavydb_point2d(context): def typer(x, y): - if isinstance(x, nb_types.Float) and isinstance(y, nb_types.Float): + if isinstance(x, nb_types.Number) and isinstance(y, nb_types.Number): return typesystem.Type.fromobject('Point2D').tonumba() return typer -@extending.lower_builtin(Point2D, nb_types.Float, nb_types.Float) +@extending.lower_builtin(Point2D, nb_types.Number, nb_types.Number) def heavydb_point2d_ctor(context, builder, sig, args): [x, y] = args typ = sig.return_type point = cgutils.create_struct_proxy(typ)(context, builder) - point.x = x - point.y = y + point.x = context.cast(builder, x, sig.args[0], nb_types.double) + point.y = context.cast(builder, y, sig.args[1], nb_types.double) return point._getvalue() diff --git a/rbc/tests/heavydb/test_howtos.py b/rbc/tests/heavydb/test_howtos.py new file mode 100644 index 00000000..ef385440 --- /dev/null +++ b/rbc/tests/heavydb/test_howtos.py @@ -0,0 +1,466 @@ +# tests for how-tos page +# ensure that examples in the docs page are correct +import pytest +from rbc.errors import HeavyDBServerError +from rbc.tests import heavydb_fixture +from rbc.heavydb import RemoteHeavyDB + + +@pytest.fixture(scope='module') +def heavydb(): + for o in heavydb_fixture(globals()): + yield o + + +def test_connect(heavydb): + # magictoken.connect.begin + from rbc.heavydb import RemoteHeavyDB + heavy = RemoteHeavyDB(user='admin', password='HyperInteractive', + host='127.0.0.1', port=6274) + # magictoken.connect.end + assert heavy.version is not None + + +def test_external_functions(heavydb): + heavydb.unregister() + # magictoken.external_functions.abs.begin + from rbc.external import external + cmath_abs = external('int64 abs(int64)') + + @heavydb('int64(int64)') + def apply_abs(x): + return cmath_abs(x) + # magictoken.external_functions.abs.end + + # magictoken.external_functions.abs.sql.begin + assert apply_abs(-3).execute() == 3 + # magictoken.external_functions.abs.sql.end + + # magictoken.external_functions.printf.begin + from rbc.externals.stdio import printf + + @heavydb('int64(int64)') + def power_2(x): + # This message will show in the heavydb logs + printf("input number: %d\n", x) + return x * x + # magictoken.external_functions.printf.end + + # magictoken.external_functions.printf.sql.begin + assert power_2(3).execute() == 9 + # magictoken.external_functions.printf.sql.end + + +def test_raise_exception(heavydb): + heavydb.unregister() + + # magictoken.raise_exception.begin + @heavydb('int32(TableFunctionManager, Column, OutputColumn)') + def udtf_copy(mgr, inp, out): + size = len(inp) + if size > 4: + # error message must be known at compile-time + return mgr.error_message('TableFunctionManager error_message!') + + mgr.set_output_row_size(size) + for i in range(size): + out[i] = inp[i] + return size + # magictoken.raise_exception.end + + # magictoken.raise_exception.sql.begin + query = ''' + SELECT * FROM TABLE(udtf_copy( + cursor(SELECT * FROM TABLE(generate_series(1, 5, 1))) + )); + ''' + + with pytest.raises(HeavyDBServerError) as exc: + heavydb.sql_execute(query) + exc_msg = ('Error executing table function: TableFunctionManager ' + 'error_message!') + assert exc.match(exc_msg) + # magictoken.raise_exception.sql.end + + +def test_devices(heavydb): + heavydb.unregister() + + # magictoken.devices.begin + @heavydb('int32(int32, int32)', devices=['CPU', 'GPU']) + def add(a, b): + return a + b + # magictoken.devices.end + + # magictoken.devices.sql.begin + _, result = heavydb.sql_execute('SELECT add(-3, 3);') + assert list(result) == [(0,)] + # magictoken.devices.sql.end + + +def test_templates(heavydb): + heavydb.unregister() + + # magictoken.templates.begin + @heavydb('Z(T, Z)', T=['int32', 'float'], Z=['int64', 'double']) + def add(a, b): + return a + b + # magictoken.templates.end + heavydb.register() + + # magictoken.templates.sql.begin + if heavydb.version[:2] >= (6, 2): + assert heavydb.function_names(runtime_only=True) == ['add'] + assert len(heavydb.function_details('add')) == 4 + assert add(2, 3).execute() == 5 + # magictoken.templates.sql.end + + +def test_udf(heavydb): + heavydb.unregister() + + # magictoken.udf.begin + @heavydb('int64(int64)') + def incr(a): + return a + 1 + # magictoken.udf.end + + # magictoken.udf.sql.begin + _, result = heavydb.sql_execute('SELECT incr(3)') + assert list(result) == [(4,)] + # magictoken.udf.sql.end + + heavydb.unregister() + + # magictoken.udf.multiple_signatures.begin + @heavydb('int64(int64, int64)', 'float64(float64, float64)') + def multiply(a, b): + return a * b + # magictoken.udf.multiple_signatures.end + + # magictoken.udf.multiple_signatures.sql.begin + _, result = heavydb.sql_execute('SELECT multiply(3.0, 2.0)') + assert list(result) == [(6.0,)] + # magictoken.udf.multiple_signatures.sql.end + + +def test_udtf(heavydb): + heavydb.unregister() + + # magictoken.udtf.begin + @heavydb('int32(TableFunctionManager, Column, OutputColumn)') + def my_copy(mgr, inp, out): + size = len(inp) + mgr.set_output_row_size(size) + for i in range(size): + out[i] = inp[i] + return size + # magictoken.udtf.end + + # magictoken.udtf.sql.begin + query = ''' + SELECT * FROM TABLE(my_copy( + cursor(SELECT * FROM TABLE(generate_series(1, 5, 1))) + )); + ''' + _, result = heavydb.sql_execute(query) + assert list(result) == [(1,), (2,), (3,), (4,), (5,)] + # magictoken.udtf.sql.end + + +def test_udf_text_copy(heavydb): + heavydb.unregister() + if heavydb.version[:2] >= (6, 3): + # magictoken.udf.text.dict.begin + # Requires HeavyDB server v6.3 or newer + @heavydb('TextEncodingDict(RowFunctionManager, TextEncodingDict)') + def text_copy(mgr, t): + db_id: int = mgr.get_dict_db_id('text_copy', 0) + dict_id: int = mgr.get_dict_id('text_copy', 0) + s: str = mgr.get_string(db_id, dict_id, t) + return mgr.get_or_add_transient( + mgr.TRANSIENT_DICT_DB_ID, + mgr.TRANSIENT_DICT_ID, + s) + # magictoken.udf.text.dict.end + + table_name = heavydb.table_name + 'text' + query = f"select text_copy(t1) from {table_name}" + _, r = heavydb.sql_execute(query) + assert list(r) == [('fun',), ('bar',), ('foo',), ('barr',), ('foooo',)] + + +def test_udf_dict_proxy(heavydb): + heavydb.unregister() + if heavydb.version[:2] >= (6, 3): + # magictoken.udf.proxy.begin + # Requires HeavyDB server v6.3 or newer + from rbc.heavydb import StringDictionaryProxy + + @heavydb('TextEncodingDict(RowFunctionManager, TextEncodingDict)') + def test_string_proxy(mgr, t): + db_id: int = mgr.get_dict_db_id('test_string_proxy', 0) + dict_id: int = mgr.get_dict_id('test_string_proxy', 0) + proxy: StringDictionaryProxy = mgr.get_string_dictionary_proxy(db_id, dict_id) + s: str = proxy.get_string(t) + return mgr.get_or_add_transient( + mgr.TRANSIENT_DICT_DB_ID, + mgr.TRANSIENT_DICT_ID, + s) + # magictoken.udf.proxy.end + + table_name = heavydb.table_name + 'text' + query = f"select test_string_proxy(t1) from {table_name}" + _, r = heavydb.sql_execute(query) + assert list(r) == [('fun',), ('bar',), ('foo',), ('barr',), ('foooo',)] + + +def test_udtf_string_proxy(heavydb): + heavydb.unregister() + + if heavydb.version[:2] < (6, 2): + pytest.skip('Test requires HeavyDB 6.1 or newer') + + # magictoken.udtf.proxy.begin + @heavydb('int32(TableFunctionManager, Column, OutputColumn | input_id=args<0>)', + T=['TextEncodingDict']) + def test_string_proxy(mgr, inp, out): + size = len(inp) + mgr.set_output_row_size(size) + for i in range(size): + s = inp.string_dict_proxy.get_string(inp[i]) + id = out.string_dict_proxy.get_or_add_transient(s.title()) + out[i] = id + return size + # magictoken.udtf.proxy.end + + table_name = heavydb.table_name + 'text' + query = f''' + SELECT * FROM TABLE(test_string_proxy( + cursor(SELECT t1 from {table_name}) + )) + ''' + _, r = heavydb.sql_execute(query) + assert list(r) == [('Fun',), ('Bar',), ('Foo',), ('Barr',), ('Foooo',)] + + +def test_udf_text_duplicate(heavydb): + heavydb.unregister() + # magictoken.udf.text.none.begin + from rbc.heavydb import TextEncodingNone + + @heavydb('TextEncodingNone(TextEncodingNone)') + def text_duplicate(t): + s: str = t.to_string() + return TextEncodingNone(s + s) + # magictoken.udf.text.none.end + + table_name = heavydb.table_name + 'text' + query = f"select text_duplicate(n) from {table_name}" + _, r = heavydb.sql_execute(query) + assert list(r) == [('funfun',), ('barbar',), ('foofoo',), ('barrbarr',), + ('foooofoooo',)] + + +def test_udf_text_capitalize(heavydb): + heavydb.unregister() + # magictoken.udf.text.capitalize.begin + from rbc.heavydb import TextEncodingNone + + @heavydb('TextEncodingNone(TextEncodingNone)') + def text_capitalize(t): + s: str = t.to_string() + return TextEncodingNone(s.capitalize()) + # magictoken.udf.text.capitalize.end + + table_name = heavydb.table_name + 'text' + query = f"select text_capitalize(n) from {table_name}" + _, r = heavydb.sql_execute(query) + assert list(r) == [('Fun',), ('Bar',), ('Foo',), ('Barr',), + ('Foooo',)] + + +def test_array(heavydb): + heavydb.unregister() + table_name = heavydb.table_name + 'array' + + # magictoken.udf.array.new.begin + from rbc.heavydb import Array + + @heavydb('Array(int32)') + def arr_new(size): + arr = Array(size, dtype='int64') + for i in range(size): + arr[i] = 1 + return arr + # magictoken.udf.array.new.end + + # magictoken.udf.array.new.sql.begin + _, r = heavydb.sql_execute('SELECT arr_new(5);') + assert list(r) == [([1, 1, 1, 1, 1],)] + # magictoken.udf.array.new.sql.end + + # magictoken.udf.array.length.begin + @heavydb('int64(Array)') + def my_length(arr): + return len(arr) + # magictoken.udf.array.length.end + + # magictoken.udf.array.length.sql.begin + _, r = heavydb.sql_execute(f'SELECT my_length(i4) from {table_name}') + assert list(r) == [(0,), (1,), (2,), (3,), (4,)] + # magictoken.udf.array.length.sql.end + + # magictoken.udf.array.array_api.begin + from rbc.stdlib import array_api + + @heavydb('Array(int32)') + def arr_new_ones(sz): + return array_api.ones(sz, dtype='int64') + # magictoken.udf.array.array_api.end + + # magictoken.udf.array.array_api.sql.begin + _, r = heavydb.sql_execute('SELECT arr_new_ones(5);') + assert list(r) == [([1, 1, 1, 1, 1],)] + # magictoken.udf.array.array_api.sql.end + + +def test_tablefunctionmanager(heavydb): + heavydb.unregister() + + # magictoken.udtf.mgr.basic.begin + @heavydb('int32(TableFunctionManager, Column, OutputColumn)') + def table_copy(mgr, inp, out): + size = len(inp) + mgr.set_output_row_size(size) + for i in range(size): + out[i] = inp[i] + return size + # magictoken.udtf.mgr.basic.end + + # magictoken.udtf.mgr.basic.sql.begin + query = ''' + SELECT * FROM TABLE(table_copy( + cursor(SELECT * FROM TABLE(generate_series(0, 4, 1))) + )) + ''' + _, r = heavydb.sql_execute(query) + assert list(r) == [(0,), (1,), (2,), (3,), (4,)] + # magictoken.udtf.mgr.basic.sql.end + + +def test_rowfunctionmanager(heavydb): + heavydb.unregister() + table = heavydb.table_name + 'text' + + if heavydb.version[:2] >= (6, 4): + # magictoken.udf.mgr.basic.begin + @heavydb('TextEncodingDict(RowFunctionManager, TextEncodingDict)') + def concat(mgr, text): + db_id: int = mgr.get_dict_db_id('concat', 0) + dict_id: int = mgr.get_dict_id('concat', 0) + s: str = mgr.get_string(db_id, dict_id, text) + s_concat = 'test: ' + s + return mgr.get_or_add_transient(mgr.TRANSIENT_DICT_DB_ID, + mgr.TRANSIENT_DICT_ID, + s_concat) + # magictoken.udf.mgr.basic.end + + # magictoken.udf.mgr.basic.sql.begin + query = f''' + SELECT concat(t1) FROM {table} + ''' + _, r = heavydb.sql_execute(query) + assert list(r) == [('test: fun',), ('test: bar',), ('test: foo',), + ('test: barr',), ('test: foooo',)] + # magictoken.udf.mgr.basic.sql.end + + +def test_column_power(heavydb): + heavydb.unregister() + + # magictoken.udtf.column.basic.begin + import numpy as np + + @heavydb('int32(TableFunctionManager, Column, T, OutputColumn)', + T=['int64']) + def udtf_power(mgr, inp, exp, out): + size = len(inp) + mgr.set_output_row_size(size) + for i in range(size): + out[i] = np.power(inp[i], exp) + return size + # magictoken.udtf.column.basic.end + + # magictoken.udtf.column.basic.sql.begin + query = ''' + SELECT * FROM TABLE(udtf_power( + cursor(SELECT * FROM TABLE(generate_series(1, 5, 1))), + 3 + )) + ''' + _, r = heavydb.sql_execute(query) + assert list(r) == [(1,), (8,), (27,), (64,), (125,)] + # magictoken.udtf.column.basic.sql.end + + +def test_geopoint(heavydb: RemoteHeavyDB): + if heavydb.version[:2] < (7, 0): + pytest.skip('Requires HeavyDB 7.0 or newer') + + heavydb.unregister() + + # magictoken.udtf.geopoint.basic.begin + from rbc.heavydb import Point2D + + @heavydb('int32(TableFunctionManager, int64 size, OutputColumn)', + Z=['GeoPoint']) + def generate_geo(mgr, size, out): + mgr.set_output_row_size(size) + for i in range(size): + out[i] = Point2D(i, i) + return size + # magictoken.udtf.geopoint.basic.end + + # magictoken.udtf.geopoint.basic.sql.begin + query = ''' + SELECT * FROM TABLE( + generate_geo(5) + ); + ''' + _, r = heavydb.sql_execute(query) + r = list(r) + assert r == [('POINT (0 0)',), ('POINT (1 1)',), ('POINT (2 2)',), + ('POINT (3 3)',), ('POINT (4 4)',)] + # magictoken.udtf.geopoint.basic.sql.end + + +def test_geomultipoint(heavydb: RemoteHeavyDB): + if heavydb.version[:2] < (7, 0): + pytest.skip('Requires HeavyDB 7.0 or newer') + + heavydb.unregister() + + # magictoken.udtf.mp.basic.begin + @heavydb('int32(TableFunctionManager, int64 size, OutputColumn)', + Z=['GeoMultiPoint']) + def generate_geo(mgr, size, out): + mgr.set_output_item_values_total_number(0, size * 4) + mgr.set_output_row_size(size) + for i in range(size): + coords = [i + 1.0, i + 2.0, i + 3.0, i + 4.0] + out[i].from_coords(coords) + return size + # magictoken.udtf.mp.basic.end + + # magictoken.udtf.mp.basic.sql.begin + query = ''' + SELECT * FROM TABLE( + generate_geo(3) + ); + ''' + _, r = heavydb.sql_execute(query) + r = list(r) + assert r == [('MULTIPOINT (1 2,3 4)',), ('MULTIPOINT (2 3,4 5)',), + ('MULTIPOINT (3 4,5 6)',)] + # magictoken.udtf.mp.basic.sql.end diff --git a/test_requirements.txt b/test_requirements.txt index 6049decf..85a2a9d1 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -5,6 +5,8 @@ thriftpy2 netifaces clang==11.1.0 furo +myst-parser +sphinx-design packaging setuptools urllib3<2