From 984c1c88a509462ff2feada43058e919d2770a90 Mon Sep 17 00:00:00 2001
From: Kristian Aune <kraune@verizonmedia.com>
Date: Wed, 9 Aug 2023 11:37:04 +0200
Subject: [PATCH] Generalize, improve flow

---
 docs/sphinx/source/application-packages.ipynb | 161 +++++++++---------
 1 file changed, 82 insertions(+), 79 deletions(-)

diff --git a/docs/sphinx/source/application-packages.ipynb b/docs/sphinx/source/application-packages.ipynb
index 14df3e28..d0427d55 100644
--- a/docs/sphinx/source/application-packages.ipynb
+++ b/docs/sphinx/source/application-packages.ipynb
@@ -11,40 +11,23 @@
     "\n",
     "Vespa is configured using an [application package](https://docs.vespa.ai/en/application-packages.html).\n",
     "Pyvespa provides an API to generate a deployable application package.\n",
+    "An application package has at a minimum a [schema](https://docs.vespa.ai/en/schemas.html)\n",
+    "and [services.xml](https://docs.vespa.ai/en/reference/services.html).\n",
     "\n",
-    "**Note:** Pyvespa does not support all Vespa features.\n",
-    "See the end of this notebook for how to export files to modify the schema and deploy.\n",
+    "> **_NOTE: pyvespa generally does not support all indexing options in Vespa - it is made for easy experimentation._**\n",
+    "  **_To configure setting an unsupported indexing option (or any other unsupported option),_**\n",
+    "  **_export the application package like above, modify the schema or other files_**\n",
+    "  **_and deploy the application package from the directory, or as a zipped file._**\n",
+    "  **_Find more details at the end of this notebook._**\n",
     "\n",
     "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vespa-engine/pyvespa/blob/master/docs/sphinx/source/application-packages.ipynb)\n",
     "\n",
-    "An application package has at a minimum a [schema](https://docs.vespa.ai/en/schemas.html)\n",
-    "and [services.xml](https://docs.vespa.ai/en/reference/services.html).\n",
-    "Example - create an empty application package:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "7e3477a6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from vespa.package import ApplicationPackage\n",
-    "\n",
-    "app_package = ApplicationPackage(name=\"myschema\", create_query_profile_by_default=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ddd40e65",
-   "metadata": {},
-   "source": [
-    "In this notebook, the application package is exported to disk for inspection - example:"
+    "By exporting to disk, one can see the generated files:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 50,
    "id": "956abe16",
    "metadata": {},
    "outputs": [
@@ -52,17 +35,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/9_/z105jyln7jz8h2vwsrjb7kxh0000gp/T/tmpj_gsm432/services.xml\n",
-      "/var/folders/9_/z105jyln7jz8h2vwsrjb7kxh0000gp/T/tmpj_gsm432/schemas/myschema.sd\n"
+      "/var/folders/9_/z105jyln7jz8h2vwsrjb7kxh0000gp/T/tmp6geo2dpg/services.xml\n",
+      "/var/folders/9_/z105jyln7jz8h2vwsrjb7kxh0000gp/T/tmp6geo2dpg/schemas/myschema.sd\n"
      ]
     }
    ],
    "source": [
     "import os, tempfile\n",
     "from pathlib import Path\n",
+    "from vespa.package import ApplicationPackage\n",
+    "\n",
+    "app_name = \"myschema\"\n",
+    "app_package = ApplicationPackage(name=app_name, create_query_profile_by_default=False)\n",
     "\n",
     "temp_dir = tempfile.TemporaryDirectory()\n",
-    "os.environ[\"TMP_APP_DIR\"] = temp_dir.name\n",
     "app_package.to_files(temp_dir.name)\n",
     "\n",
     "for p in Path(temp_dir.name).rglob('*'):\n",
@@ -70,18 +56,6 @@
     "        print(p)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "ecc580fb",
-   "metadata": {},
-   "source": [
-    "> **_NOTE: pyvespa generally does not support all indexing options in Vespa - it is made for easy experimentation._**\n",
-    "  **_To configure setting an unsupported indexing option (or any other unsupported option),_**\n",
-    "  **_export the application package like above, modify the schema or other files_**\n",
-    "  **_and deploy the application package from the directory, or as a zipped file._**\n",
-    "  **_Find more details at the end of this notebook._**"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "7b01cd09",
@@ -89,12 +63,12 @@
    "source": [
     "## Schema\n",
     "\n",
-    "Use a schema to create fields, fieldsets and a ranking function. Export the empty schema (an empty schema is created, with the same name as the application package):"
+    "A schema is created with the same name as the application package:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 51,
    "id": "923edec8",
    "metadata": {},
    "outputs": [
@@ -110,7 +84,10 @@
     }
    ],
    "source": [
-    "!cat $TMP_APP_DIR/schemas/myschema.sd"
+    "os.environ[\"TMP_APP_DIR\"] = temp_dir.name\n",
+    "os.environ[\"APP_NAME\"] = app_name\n",
+    "\n",
+    "!cat $TMP_APP_DIR/schemas/$APP_NAME.sd"
    ]
   },
   {
@@ -118,12 +95,14 @@
    "id": "5a1cbaf2",
    "metadata": {},
    "source": [
-    "Add fields, a fieldset and a ranking function:"
+    "Configure the schema with [fields](https://docs.vespa.ai/en/schemas.html#field),\n",
+    "[fieldsets](https://docs.vespa.ai/en/schemas.html#fieldset)\n",
+    "and a [ranking function](https://docs.vespa.ai/en/ranking.html):"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 52,
    "id": "c83c1945",
    "metadata": {},
    "outputs": [],
@@ -155,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 53,
    "id": "4fcd3de2",
    "metadata": {},
    "outputs": [
@@ -193,19 +172,8 @@
    ],
    "source": [
     "app_package.to_files(temp_dir.name)\n",
-    "!cat $TMP_APP_DIR/schemas/myschema.sd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cfd73872",
-   "metadata": {},
-   "source": [
-    "Note how the indexing settings are written to the schema. At this point, review the Vespa documentation:\n",
     "\n",
-    "* [field](https://docs.vespa.ai/en/schemas.html#field)\n",
-    "* [fieldset](https://docs.vespa.ai/en/schemas.html#fieldset)\n",
-    "* [rank-profile](https://docs.vespa.ai/en/ranking.html#rank-profiles)"
+    "!cat $TMP_APP_DIR/schemas/$APP_NAME.sd"
    ]
   },
   {
@@ -215,14 +183,14 @@
    "source": [
     "## Services\n",
     "\n",
-    "In `services.xml` you will find a container and content cluster -\n",
+    "`services.xml` configures container and content clusters -\n",
     "see the [Vespa Overview](https://docs.vespa.ai/en/overview.html).\n",
-    "This is a file you will normally not change or need to know much about - dump the default file:"
+    "This is a file you will normally not change or need to know much about:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 54,
    "id": "4abae84e",
    "metadata": {},
    "outputs": [
@@ -260,12 +228,52 @@
    "source": [
     "Observe:\n",
     "\n",
-    "* A content cluster (this is where the index is stored) called `myschema_content` is created.\n",
+    "* A _content cluster_ (this is where the index is stored) called `myschema_content` is created.\n",
     "  This is information not normally needed, unless using\n",
     "  [delete_all_docs](https://pyvespa.readthedocs.io/en/latest/reference-api.html#vespa.application.Vespa.delete_all_docs)\n",
     "  to quickly remove all documents from a schema"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "cc878191",
+   "metadata": {},
+   "source": [
+    "## Deploy\n",
+    "\n",
+    "After completing the code for the fields and ranking, deploy the application into a Docker container -\n",
+    "the container is started by pyvespa:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "419534c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Waiting for configuration server, 0/300 seconds...\n",
+      "Waiting for configuration server, 5/300 seconds...\n",
+      "Waiting for application status, 0/300 seconds...\n",
+      "Waiting for application status, 5/300 seconds...\n",
+      "Waiting for application status, 10/300 seconds...\n",
+      "Waiting for application status, 15/300 seconds...\n",
+      "Waiting for application status, 20/300 seconds...\n",
+      "Waiting for application status, 25/300 seconds...\n",
+      "Finished deployment.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from vespa.deployment import VespaDocker\n",
+    "\n",
+    "vespa_container = VespaDocker()\n",
+    "vespa_connection = vespa_container.deploy(application_package=app_package)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "8f5c589d",
@@ -273,12 +281,14 @@
    "source": [
     "## Deploy from modified files\n",
     "\n",
+    "To add configuration the the schema, which is not supported by the pyvespa code,\n",
+    "export the files, modify, then deploy by using `deploy_from_disk`.\n",
     "This example adds custom configuration to the `services.xml` file above and deploys it:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 56,
    "id": "7995befa",
    "metadata": {},
    "outputs": [],
@@ -287,14 +297,14 @@
     "cat << EOF > $TMP_APP_DIR/services.xml\n",
     "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
     "<services version=\"1.0\">\n",
-    "    <container id=\"myschema_container\" version=\"1.0\">\n",
+    "    <container id=\"${APP_NAME}_container\" version=\"1.0\">\n",
     "        <search></search>\n",
     "        <document-api></document-api>\n",
     "    </container>\n",
-    "    <content id=\"myschema_content\" version=\"1.0\">\n",
+    "    <content id=\"${APP_NAME}_content\" version=\"1.0\">\n",
     "        <redundancy reply-after=\"1\">1</redundancy>\n",
     "        <documents>\n",
-    "            <document type=\"myschema\" mode=\"index\"></document>\n",
+    "            <document type=\"${APP_NAME}\" mode=\"index\"></document>\n",
     "        </documents>\n",
     "        <nodes>\n",
     "            <node distribution-key=\"0\" hostalias=\"node1\"></node>\n",
@@ -321,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 57,
    "id": "9794e561",
    "metadata": {},
    "outputs": [
@@ -333,19 +343,12 @@
       "Waiting for configuration server, 5/300 seconds...\n",
       "Waiting for application status, 0/300 seconds...\n",
       "Waiting for application status, 5/300 seconds...\n",
-      "Waiting for application status, 10/300 seconds...\n",
-      "Waiting for application status, 15/300 seconds...\n",
-      "Waiting for application status, 20/300 seconds...\n",
-      "Waiting for application status, 25/300 seconds...\n",
       "Finished deployment.\n"
      ]
     }
    ],
    "source": [
-    "from vespa.deployment import VespaDocker\n",
-    "\n",
-    "vespa_container = VespaDocker()\n",
-    "vespa_connection = vespa_container.deploy_from_disk(application_name=\"myapp\", application_root=temp_dir.name)"
+    "vespa_connection = vespa_container.deploy_from_disk(application_name=app_name, application_root=temp_dir.name)"
    ]
   },
   {
@@ -358,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 58,
    "id": "346f3cce",
    "metadata": {},
    "outputs": [
@@ -366,7 +369,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/9_/z105jyln7jz8h2vwsrjb7kxh0000gp/T/tmpj_gsm432/zip/application.zip\r\n"
+      "/var/folders/9_/z105jyln7jz8h2vwsrjb7kxh0000gp/T/tmp6geo2dpg/zip/application.zip\r\n"
      ]
     }
    ],
@@ -389,7 +392,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 59,
    "id": "84ce16e8",
    "metadata": {},
    "outputs": [],