pointable-ai · scottwey · Oct 26, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 26, 2023
diff --git a/python/README.md b/python/README.md
@@ -15,7 +15,7 @@ client = Client(api_key="YOUR_API_KEY_HERE")
 
 documents = [
   {
-    "embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+    "embeddings": [0.1, 0.2, 0.3, 0.4, 0.5],
     "metadata": {
       "label1": "0",
       "label2": "1",
@@ -31,11 +31,13 @@ client.insert(documents=documents, collection_name="COLLECTION_NAME")
 ## Contributing
 
 Make sure you have installed dev requirements
+
 ```
 pip install -r dev-requirements.txt
 ```
 
 Unit tests should be passing. You can run them via
+
 ```
 pytest ./tests
 ```
diff --git a/python/starpoint/db.py b/python/starpoint/db.py
@@ -8,6 +8,7 @@
 import validators
 
 from starpoint import reader, writer, _utils
+from starpoint.embedding import Embedding
 
 LOGGER = logging.getLogger(__name__)
 
@@ -56,42 +57,6 @@ def delete(
             collection_name=collection_name,
         )
 
-    def column_delete(
-        self,
-        embeddings: List[List[float]],
-        document_metadatas: List[Dict[Any, Any]],
-        collection_id: Optional[str] = None,
-        collection_name: Optional[str] = None,
-    ) -> Dict[Any, Any]:
-        """Deletes documents from an existing collection by embedding and document metadata arrays.
-        The arrays are zipped together and updates the document in the order of the two arrays.
-        `column_delete()` method from [`Writer`](#writer-objects).
-
-        Args:
-            embeddings: A list of embeddings.
-                Order of the embeddings should match the document_metadatas.
-            document_metadatas: A list of metadata to be associated with embeddings.
-                Order of these metadatas should match the embeddings.
-            collection_id: The collection's id where the documents will be deleted.
-                This or the `collection_name` needs to be provided.
-            collection_name: The collection's name where the documents will be deleted.
-                This or the `collection_id` needs to be provided.
-
-        Returns:
-            dict: delete response json
-
-        Raises:
-            ValueError: If neither collection id and collection name are provided.
-            ValueError: If both collection id and collection name are provided.
-            requests.exceptions.SSLError: Failure likely due to network issues.
-        """
-        return self.writer.column_delete(
-            embeddings=embeddings,
-            document_metadatas=document_metadatas,
-            collection_id=collection_id,
-            collection_name=collection_name,
-        )
-
     def insert(
         self,
         documents: List[Dict[Any, Any]],
@@ -123,7 +88,7 @@ def insert(
 
     def column_insert(
         self,
-        embeddings: List[List[float]],
+        embeddings: List[Embedding],
         document_metadatas: List[Dict[Any, Any]],
         collection_id: Optional[str] = None,
         collection_name: Optional[str] = None,
@@ -162,7 +127,7 @@ def query(
         sql: Optional[str] = None,
         collection_id: Optional[str] = None,
         collection_name: Optional[str] = None,
-        query_embedding: Optional[List[float]] = None,
+        query_embedding: Optional[List[float] | Embedding] = None,
         params: Optional[List[Any]] = None,
         text_search_query: Optional[List[str]] = None,
         text_search_weight: Optional[float] = None,
@@ -188,11 +153,18 @@ def query(
             ValueError: If both collection id and collection name are provided.
             requests.exceptions.SSLError: Failure likely due to network issues.
         """
+
+        # check if query embedding is a float, if it is, convert to a embedding object
+        if isinstance(query_embedding, list):
+            query_embedding = Embedding(
+                vectors=query_embedding,
+                dim=len(query_embedding))
+
         return self.reader.query(
             sql=sql,
             collection_id=collection_id,
             collection_name=collection_name,
-            query_embedding=query_embedding,
+            query_embeddings=query_embedding,
             params=params,
             text_search_query=text_search_query,
             text_search_weight=text_search_weight,
@@ -259,7 +231,7 @@ def update(
 
     def column_update(
         self,
-        embeddings: List[List[float]],
+        embeddings: List[Embedding],
         document_metadatas: List[Dict[Any, Any]],
         collection_id: Optional[str] = None,
         collection_name: Optional[str] = None,

diff --git a/python/starpoint/embedding.py b/python/starpoint/embedding.py
@@ -30,6 +30,15 @@
 )
 
 
+class Embedding(object):
+    values: List[float]
+    dimensionality: int
+
+    def __init__(self, values: List[float], dimensionality: Optional[int] = None):
+        self.values = values
+        self.dimensionality = len(values) if dimensionality is None else dimensionality
+
+
 class EmbeddingModel(Enum):
     MINILM = "MINI_LM"
 

diff --git a/python/starpoint/openai.py b/python/starpoint/openai.py
@@ -114,7 +114,7 @@ def build_and_insert_embeddings(
         # Return the embedding response no matter what issues/bugs we might run into in the sdk
         try:
             sorted_embedding_data = sorted(embedding_data, key=lambda x: x["index"])
-            embeddings = map(lambda x: x.get("embedding"), sorted_embedding_data)
+            embeddings = map(lambda x: x.get("embeddings"), sorted_embedding_data)
             starpoint_response = self.starpoint.column_insert(
                 embeddings=embeddings,
                 document_metadatas=document_metadatas,

diff --git a/python/starpoint/pandas.py b/python/starpoint/pandas.py
@@ -8,7 +8,7 @@
 
 LOGGER = logging.getLogger(__name__)
 
-EMBEDDING_COLUMN_NAME = "embedding"
+EMBEDDING_COLUMN_NAME = "embeddings"
 
 TOO_FEW_COLUMN_ERROR = """Not enough columns in dataframe provided. Please make sure to provide a
 column for at least embeddings. For examples of what this should look like visit:
@@ -29,10 +29,10 @@ def _check_column_length(dataframe: pd.DataFrame):
 def _get_aggregate_column_values_from_dataframe(
     dataframe: pd.DataFrame, exclude_column_names: List[str]
 ) -> List[Dict]:
-    """Gets a dataframe of everything except for the "embedding" column then produce
+    """Gets a dataframe of everything except for the "embeddings" column then produce
     a list of row-wise dicts that will be loaded as the metadata. For example:
 
-    df = DataFrame([[1,2,3], [4,5,6]], columns=["embedding","b","c"]
+    df = DataFrame([[1,2,3], [4,5,6]], columns=["embeddings","b","c"]
     metadata_column_values will be [{'b': 2, 'c': 3}, {'b': 5, 'c': 6}]
     """
     if not all((True if name in dataframe else False for name in exclude_column_names)):
@@ -112,27 +112,3 @@ def update_by_dataframe(
             collection_name=collection_name,
         )
 
-    def delete_by_dataframe(
-        self,
-        dataframe: pd.DataFrame,
-        collection_id: Optional[str] = None,
-        collection_name: Optional[str] = None,
-        embedding_column_name: str = EMBEDDING_COLUMN_NAME,
-    ) -> Dict[Any, Any]:
-        _check_column_length(dataframe)
-        embedding_column_values = _get_column_value_from_dataframe(
-            dataframe,
-            embedding_column_name,
-        )
-
-        metadata_column_values = _get_aggregate_column_values_from_dataframe(
-            dataframe,
-            [embedding_column_name],
-        )
-
-        self.starpoint.column_delete(
-            embeddings=embedding_column_values,
-            document_metadatas=metadata_column_values,
-            collection_id=collection_id,
-            collection_name=collection_name,
-        )
diff --git a/python/starpoint/reader.py b/python/starpoint/reader.py
@@ -11,6 +11,7 @@
     _validate_host,
 )
 
+from starpoint.embedding import Embedding
 
 LOGGER = logging.getLogger(__name__)
 
@@ -48,7 +49,7 @@ def query(
         sql: Optional[str] = None,
         collection_id: Optional[str] = None,
         collection_name: Optional[str] = None,
-        query_embedding: Optional[List[float]] = None,
+        query_embeddings: Optional[Embedding] = None,
         params: Optional[List[Any]] = None,
         text_search_query: Optional[List[str]] = None,
         text_search_weight: Optional[float] = None,
@@ -91,7 +92,7 @@ def query(
         request_data = dict(
             collection_id=collection_id,
             collection_name=collection_name,
-            query_embedding=query_embedding,
+            query_embeddings=query_embeddings,
             sql=sql,
             params=params,
             text_search_query=text_search_query,

diff --git a/python/starpoint/writer.py b/python/starpoint/writer.py
@@ -10,6 +10,8 @@
     _validate_host,
 )
 
+from starpoint.embedding import Embedding
+
 LOGGER = logging.getLogger(__name__)
 
 # Host
@@ -100,51 +102,6 @@ def delete(
             return {}
         return response.json()
 
-    def column_delete(
-        self,
-        embeddings: List[List[float]],
-        document_metadatas: List[Dict[Any, Any]],
-        collection_id: Optional[str] = None,
-        collection_name: Optional[str] = None,
-    ) -> Dict[Any, Any]:
-        """Deletes documents from an existing collection by embedding and document metadata arrays.
-        The arrays are zipped together and updates the document in the order of the two arrays.
-
-        Args:
-            embeddings: A list of embeddings.
-                Order of the embeddings should match the document_metadatas.
-            document_metadatas: A list of metadata to be associated with embeddings.
-                Order of these metadatas should match the embeddings.
-            collection_id: The collection's id where the documents will be deleted.
-                This or the `collection_name` needs to be provided.
-            collection_name: The collection's name where the documents will be deleted.
-                This or the `collection_id` needs to be provided.
-
-        Returns:
-            dict: delete response json
-
-        Raises:
-            ValueError: If neither collection id and collection name are provided.
-            ValueError: If both collection id and collection name are provided.
-            requests.exceptions.SSLError: Failure likely due to network issues.
-        """
-        if len(embeddings) != len(document_metadatas):
-            LOGGER.warning(EMBEDDING_METADATA_LENGTH_MISMATCH_WARNING)
-
-        documents = [
-            {
-                "embedding": embedding,
-                "metadata": document_metadata,
-            }
-            for embedding, document_metadata in zip(embeddings, document_metadatas)
-        ]
-
-        return self.delete(
-            documents=documents,
-            collection_id=collection_id,
-            collection_name=collection_name,
-        )
-
     def insert(
         self,
         documents: List[Dict[Any, Any]],
@@ -214,7 +171,7 @@ def insert(
 
     def column_insert(
         self,
-        embeddings: List[List[float]],
+        embeddings: List[Embedding],
         document_metadatas: List[Dict[Any, Any]],
         collection_id: Optional[str] = None,
         collection_name: Optional[str] = None,
@@ -245,7 +202,7 @@ def column_insert(
 
         documents = [
             {
-                "embedding": embedding,
+                "embeddings": embedding,
                 "metadata": document_metadata,
             }
             for embedding, document_metadata in zip(embeddings, document_metadatas)
@@ -325,7 +282,7 @@ def update(
 
     def column_update(
         self,
-        embeddings: List[List[float]],
+        embeddings: List[Embedding],
         document_metadatas: List[Dict[Any, Any]],
         collection_id: Optional[str] = None,
         collection_name: Optional[str] = None,
@@ -356,7 +313,7 @@ def column_update(
 
         documents = [
             {
-                "embedding": embedding,
+                "embeddings": embedding,
                 "metadata": document_metadata,
             }
             for embedding, document_metadata in zip(embeddings, document_metadatas)

diff --git a/python/tests/test_db.py b/python/tests/test_db.py
@@ -1,6 +1,7 @@
 from tempfile import NamedTemporaryFile
 from uuid import uuid4
 from unittest.mock import MagicMock, patch
+from starpoint.embedding import Embedding
 
 import pytest
 from _pytest.monkeypatch import MonkeyPatch
@@ -30,17 +31,6 @@ def test_client_delete(mock_writer: MagicMock, mock_reader: MagicMock):
     mock_writer().delete.assert_called_once()
 
 
-@patch("starpoint.reader.Reader")
-@patch("starpoint.writer.Writer")
-def test_client_column_delete(mock_writer: MagicMock, mock_reader: MagicMock):
-    client = db.Client(api_key=uuid4())
-
-    client.column_delete(embeddings=[1.1], document_metadatas={"mock": "value"})
-
-    mock_reader.assert_called_once()  # Only called during init
-    mock_writer().column_delete.assert_called_once()
-
-
 @patch("starpoint.reader.Reader")
 @patch("starpoint.writer.Writer")
 def test_client_insert(mock_writer: MagicMock, mock_reader: MagicMock):
@@ -57,7 +47,7 @@ def test_client_insert(mock_writer: MagicMock, mock_reader: MagicMock):
 def test_client_column_insert(mock_writer: MagicMock, mock_reader: MagicMock):
     client = db.Client(api_key=uuid4())
 
-    client.column_insert(embeddings=[1.1], document_metadatas={"mock": "value"})
+    client.column_insert(embeddings=[Embedding([1.1])], document_metadatas=[{"mock": "value"}])
 
     mock_reader.assert_called_once()  # Only called during init
     mock_writer().column_insert.assert_called_once()
@@ -101,7 +91,7 @@ def test_client_update(mock_writer: MagicMock, mock_reader: MagicMock):
 def test_client_column_update(mock_writer: MagicMock, mock_reader: MagicMock):
     client = db.Client(api_key=uuid4())
 
-    client.column_update(embeddings=[1.1], document_metadatas={"mock": "value"})
+    client.column_update(embeddings=[Embedding([1.1])], document_metadatas=[{"mock": "value"}])
 
     mock_reader.assert_called_once()  # Only called during init
     mock_writer().column_update.assert_called_once()
diff --git a/python/tests/test_openai.py b/python/tests/test_openai.py
@@ -80,7 +80,7 @@ def test_client_build_and_insert_embeddings_input_string_success(
     expected_embedding_response = {
         "data": [
             {
-                "embedding": mock_embedding,
+                "embeddings": mock_embedding,
                 "index": 0,
             }
         ]
@@ -128,11 +128,11 @@ def test_client_build_and_insert_embeddings_input_list_success(
     expected_embedding_response = {
         "data": [
             {
-                "embedding": 0.77,
+                "embeddings": 0.77,
                 "index": 0,
             },
             {
-                "embedding": 0.88,
+                "embeddings": 0.88,
                 "index": 1,
             },
         ]
@@ -224,7 +224,7 @@ def test_client_build_and_insert_embeddings_exception_during_write(
     expected_embedding_response = {
         "data": [
             {
-                "embedding": 0.77,
+                "embeddings": 0.77,
                 "index": 0,
             }
         ]