From d679248c7d420b76720f3e5650eaf4b1d8f796cd Mon Sep 17 00:00:00 2001
From: Peter M E Frandsen <petermfrandsen@gmail.com>
Date: Sun, 2 Apr 2023 22:14:51 +0200
Subject: [PATCH] feat(clean-install): add feature that enables a clean install
 of documentation by deleting all childs with specified parent

---
 README.md                     |  11 +++
 md2cf/__main__.py             |  35 +++++++
 md2cf/api.py                  |  94 ++++++++++++++++++
 tests/unit/test_confluence.py | 176 ++++++++++++++++++++++++++++++++++
 4 files changed, 316 insertions(+)
diff --git a/README.md b/README.md
index 03a6638..a3f57fb 100644
--- a/README.md
+++ b/README.md
@@ -284,6 +284,17 @@ folderA/
 ```
 </details>
 
+## Clean confluence before publishing pages
+
+`md2cf` can delete pages in Confluence.  
+By using the `--clean-install` flag, `md2cf` will then permenantly delete pages in Confluence.  
+The pages delete are all subpages with the specified parent using with `--parent-id`or `--parent-title` flags.  
+If no parent is specified then the space `homepage`will be used.
+
+> :warning: Use with caution!  
+> :warning: When `clean-install` is passed to `md2cf` then the pages will be purged before attempting to upload pages, and it will also run if no pages are to be uploaded
+
+
 ## Terminal output format
 
 By default, `md2cf` produces rich output with animated progress bars that are meant for human consumption. If the output is redirected to a file, the progress bars will not be displayed and only the final result will be written to the file. Error messages are always printed to standard error.
diff --git a/md2cf/__main__.py b/md2cf/__main__.py
index 591e83d..3499ee5 100644
--- a/md2cf/__main__.py
+++ b/md2cf/__main__.py
@@ -249,6 +249,11 @@ def get_parser():
         help="markdown files or directories to upload to Confluence. Empty for stdin",
         nargs="*",
     )
+    parser.add_argument(
+        "--clean-install",
+        action="store_true",
+        help="Deletes all pages with the parent specified. If no parent then parent will be set to the space homepage",
+    )
 
     return parser
 
@@ -308,6 +313,11 @@ def main():
         )
         sys.exit(1)
 
+    if args.parent_title is not None and args.parent_id is not None:
+        error_console.log(
+            ":x: Parent Title and Parent page ID cannot both be specified on the command line "
+        )
+
     pages_to_upload = collect_pages_to_upload(args)
 
     page_title_counts = Counter([page.title for page in pages_to_upload])
@@ -347,6 +357,10 @@ def main():
                 )
                 sys.exit(1)
 
+    if args.clean_install:
+        # This runs even though that there are no files found, i.e. len(pages_to_upload) == 0
+       perform_clean_install(confluence, args)
+
     preface_markup = ""
     if args.preface_markdown:
         preface_markup = md2cf.document.parse_page([args.preface_markdown]).body
@@ -722,6 +736,27 @@ def collect_pages_to_upload(args):
 
     return pages_to_upload
 
+def perform_clean_install(confluence, args):
+    pages_to_purge = get_page_descendants(confluence, args)
+
+    if not args.dry_run:
+        console.log("Number of pages set to be purged before deployment: ", len(pages_to_purge))
+        for page_to_purge in pages_to_purge:
+            confluence.purge_page(page_to_purge)
+            console.log("-", page_to_purge.title, "- has been purged")
+    else:
+        console.log("Number of pages set to be purged before deployment: ", len(pages_to_purge))
+        console.log("Pages to be purged: ", ", ".join([page['title'] for page in pages_to_purge]))
+
+def get_page_descendants(confluence, args):
+    if args.parent_title is not None:
+        return confluence.get_content_descendant(
+            title=args.parent_title, space_key=args.space)
+    elif args.parent_id is not None:
+        return confluence.get_content_descendant(
+            page_id=args.parent_id, space_key=args.space)
+    else:
+        return confluence.get_content_descendant(space_key=args.space)
 
 if __name__ == "__main__":
     main()
diff --git a/md2cf/api.py b/md2cf/api.py
index 832b0df..98089e9 100644
--- a/md2cf/api.py
+++ b/md2cf/api.py
@@ -75,6 +75,9 @@ def _post(self, path, **kwargs):
     def _put(self, path, **kwargs):
         return self._request("PUT", path, **kwargs)
 
+    def _delete(self, path, **kwargs):
+        return self.api.request("DELETE", urljoin(self.host, path), **kwargs)
+
     def get_page(
         self,
         title=None,
@@ -184,6 +187,7 @@ def update_page(
         update_message=None,
         labels=None,
         minor_edit=False,
+        status=None
     ):
         update_structure = {
             "version": {
@@ -208,6 +212,13 @@ def update_page(
                 "labels": [{"name": label, "prefix": "global"} for label in labels]
             }
 
+        if status is not None:
+            if ['current', 'trashed', 'deleted', 'historical', 'draft'].count(status) > 0:
+                update_structure["status"] = status
+            else:
+                raise ValueError(
+                    "Status has to be either current, trashed, deleted, historical or draft")
+
         return self._put(f"content/{page.id}", json=update_structure)
 
     def get_attachment(self, confluence_page, name):
@@ -256,3 +267,86 @@ def get_space(self, space, additional_expansions=None):
         if additional_expansions is not None:
             params = {"expand": ",".join(additional_expansions)}
         return self._get(f"space/{space}", params=params)
+
+    def get_content_descendant(
+        self,
+        title=None,
+        space_key=None,
+        page_id=None,
+        content_type="page",
+        additional_expansions=None,
+    ):
+        """
+        Gets content descendant
+
+        Args:
+            title (str): the title for the page
+            space_key (str): the Confluence space for the page
+            content_type (str): Content type. Default value: page.
+              Valid values: page, blogpost.
+            page_id (str or int): the ID of the page
+            additional_expansions (list of str): Additional expansions that should be
+              made when calling the api
+
+        Returns:
+            The response from the API
+
+        """
+        params = None
+        if additional_expansions is not None:
+            params = {"expand": ",".join(additional_expansions)}
+
+        if page_id is not None:
+            response = self._get(
+                f"content/{page_id}/descendant/{content_type}", params=params)
+            results = response.results
+            while (hasattr(response, '_links') and hasattr(response._links, 'next')):
+                response = self._get(response._links.next.replace(
+                    '/rest/api/', ''), params=params)
+                results.extend(response.results)
+            return results
+        elif title is not None:
+            params = {"title": title, "type": content_type}
+            if space_key is not None:
+                params["spaceKey"] = space_key
+            response = self._get("content", params=params)
+            try:
+                # A search by title/space doesn't return full page objects,
+                # and since we don't support expansion in this implementation
+                # just yet, we just retrieve the "full" page data using the page
+                # ID for the first search result
+                return self.get_content_descendant(
+                    page_id=response.results[0].id,
+                    additional_expansions=additional_expansions
+                )
+            except IndexError:
+                return None
+        elif space_key is not None:
+            space_homepage_id = self._get(
+                f"space/{space_key}")._expandable.homepage.replace('/rest/api/content/', '')
+            return self.get_content_descendant(page_id=space_homepage_id)
+        else:
+            raise ValueError(
+                "At least one of title or page_id or space_key must not be None")
+
+    def purge_page(
+        self,
+        page=None
+    ):
+        """
+        Delete page in a space
+
+        Args:
+            page (page): the page to be purged
+
+        Returns:
+            The response from the API
+
+        """
+        if page is not None:
+            params = {"status": "trashed"}
+            page_from_get = self.get_page(page_id=page.id)
+            self.update_page(page=page_from_get, body="", status="trashed")
+            return self._delete(f"content/{page.id}", params=params)
+        else:
+            raise ValueError("Page cannot be None")
diff --git a/tests/unit/test_confluence.py b/tests/unit/test_confluence.py
index 7f320b6..8bb77cd 100644
--- a/tests/unit/test_confluence.py
+++ b/tests/unit/test_confluence.py
@@ -309,6 +309,60 @@ def test_update_page(confluence, requests_mock):
 
     assert page == updated_page
 
+def test_update_page_with_status(confluence, requests_mock):
+    test_page_id = 12345
+    test_page_title = "This is a title"
+    test_page_version = 1
+    test_page_status = "trashed"
+
+    test_page_object = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "version": {"number": test_page_version},
+        }
+    )
+
+    test_new_body = "<p>This is my new body</p>"
+
+    update_structure = {
+        "version": {"number": test_page_version + 1, "minorEdit": False},
+        "title": test_page_title,
+        "type": "page",
+        "body": {"storage": {"value": test_new_body, "representation": "storage"}},
+        "status": test_page_status
+    }
+
+    updated_page = {"test": 1}
+    requests_mock.put(
+        TEST_HOST + f"content/{test_page_id}",
+        complete_qs=True,
+        json=updated_page,
+        additional_matcher=lambda x: x.json() == update_structure,
+    )
+
+    page = confluence.update_page(test_page_object, body=test_new_body, status=test_page_status)
+
+    assert page == updated_page
+
+def test_update_page_with_wrong_status(confluence, requests_mock):
+    test_page_id = 12345
+    test_page_title = "This is a title"
+    test_page_version = 1
+    test_page_status = "I do not exist"
+
+    test_page_object = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "version": {"number": test_page_version},
+        }
+    )
+
+    test_new_body = "<p>This is my new body</p>"
+
+    with pytest.raises(ValueError):
+        confluence.update_page(test_page_object, body=test_new_body, status=test_page_status)
 
 def test_update_page_with_message(confluence, requests_mock):
     test_page_id = 12345
@@ -386,3 +440,125 @@ def test_update_attachment(mocker, confluence, requests_mock):
     response = confluence.update_attachment(test_page, test_fp, test_attachment)
 
     assert response == test_response
+
+def test_get_content_descendant_with_page_id(confluence, requests_mock):
+    test_page_id = 12345
+    test_content_type = "page"
+    test_return_value = { "results": [{"some_stuff": 1}] }
+
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}/descendant/{test_content_type}", json=test_return_value)
+    page = confluence.get_content_descendant(page_id=test_page_id)
+
+    assert page == bunchify(test_return_value["results"])
+
+def test_get_content_descendant_with_page_id_and_next_link(confluence, requests_mock):
+    test_page_id = 12345
+    test_content_type = "page"
+    test_return_value2_next_url = TEST_HOST + "/next-api-call"
+    test_return_value1 = { 
+        "results": [{"id": 1}],
+        "_links": {
+            "next": test_return_value2_next_url
+        }    
+    }
+    test_return_value2 = {
+        "results": [{"id": 2}],
+    }
+    expected_result = [{ "id": 1 }, { "id": 2 }]
+    
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}/descendant/{test_content_type}", json=test_return_value1)
+    requests_mock.get(f"{test_return_value2_next_url}", json=test_return_value2)
+    page = confluence.get_content_descendant(page_id=test_page_id)
+
+    assert page == bunchify(expected_result)
+
+def test_get_content_descendant_with_title(confluence, requests_mock):
+    test_page_title = "test title"
+    test_page_id = 12345
+    test_content_type = "page"
+    test_return_value = { "results": [{"some_stuff": 1}] }
+    test_get_page_from_title_return_value = {"results": [{"id": test_page_id}]}
+    requests_mock.get(
+        TEST_HOST + f"content?title={test_page_title}&type=page",
+        complete_qs=True,
+        json=test_get_page_from_title_return_value,
+    )
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}/descendant/{test_content_type}", json=test_return_value)
+
+    page = confluence.get_content_descendant(title=test_page_title)
+
+    assert page == bunchify(test_return_value["results"])
+
+def test_get_content_descendant_without_page_id_or_title(confluence, requests_mock):
+    test_space_key = "ABC"
+    test_homepage_id = 54321
+    test_space_return_value = { "_expandable": { "homepage": f"/rest/api/content/{test_homepage_id}"}}
+
+    test_content_type = "page"
+    test_return_value = { "results": [{"some_stuff": 1}] }
+
+    requests_mock.get(
+        TEST_HOST + f"space/{test_space_key}",
+        complete_qs=True,
+        json=test_space_return_value,
+    )
+
+    requests_mock.get(TEST_HOST + f"content/{test_homepage_id}/descendant/{test_content_type}", json=test_return_value)
+
+    page = confluence.get_content_descendant(space_key=test_space_key)
+
+    assert page == bunchify(test_return_value["results"])
+
+def test_get_content_descendant_without_any_parameters(confluence):
+    with pytest.raises(ValueError):
+        confluence.get_content_descendant()
+
+def test_purge_page(confluence, requests_mock):
+    test_page_id = 12345
+    test_page_title = "This is a title"
+    test_page_version = 1
+    test_page_body = ""
+    test_page_status = "trashed"
+    test_get_page_return_value = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "version": {"number": test_page_version},
+        }
+    )
+
+    test_get_descendant_page_object = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "_expandable": { "body": test_page_body }
+        }
+    )
+
+    update_structure = {
+        "version": {
+            "number": test_page_version + 1,
+            "minorEdit": False,
+        },
+        "title": test_page_title,
+        "type": "page",
+        "body": {"storage": {"value": test_page_body, "representation": "storage"}},
+        "status": test_page_status
+    }
+
+
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}", json=test_get_page_return_value)
+    requests_mock.put(
+        TEST_HOST + f"content/{test_page_id}",
+        json=update_structure,
+        additional_matcher=lambda x: x.json() == update_structure,
+    )
+
+    requests_mock.delete(TEST_HOST + f"content/{test_page_id}")
+    response = confluence.purge_page(test_get_descendant_page_object)
+
+    assert response.status_code == 200
+
+def test_purge_page_without_any_parameters(confluence):
+    with pytest.raises(ValueError):
+        confluence.purge_page()