iamjackg · petermefrandsen · Apr 2, 2023 · iamjackg · Apr 9, 2023 · iamjackg
diff --git a/README.md b/README.md
@@ -284,6 +284,18 @@ folderA/
 ```
 </details>
 
+## Clean confluence before publishing pages
+
+`md2cf` can delete pages in Confluence.  
+By using the `--clean-install` flag, `md2cf` will permenantly delete pages in Confluence that does not have the same title as the one(s) provided either through folder or a specific file.  
+The pages targeeted for deletion are all subpages of the parent specified using the `--parent-id` or `--parent-title` flags.  
+If no parent is specified then the space `homepage` will be used.
+
+> :warning: Use with caution!  
+> :warning: When `clean-install` is passed to `md2cf` the pages will be purged before attempting to upload pages, and it will also run if no pages are to be uploaded
+> :warning: If a single file is provided alongside this flag, then only one page will remain 
+
+
 ## Terminal output format
 
 By default, `md2cf` produces rich output with animated progress bars that are meant for human consumption. If the output is redirected to a file, the progress bars will not be displayed and only the final result will be written to the file. Error messages are always printed to standard error.

diff --git a/md2cf/__main__.py b/md2cf/__main__.py
@@ -249,6 +249,11 @@ def get_parser():
         help="markdown files or directories to upload to Confluence. Empty for stdin",
         nargs="*",
     )
+    parser.add_argument(
+        "--clean-install",
+        action="store_true",
+        help="Deletes all pages in confluence with the specified parent that is not the file or in the folder specified. If no parent then parent will be set to the space homepage",
+    )
 
     return parser
 
@@ -308,6 +313,11 @@ def main():
         )
         sys.exit(1)
 
+    if args.parent_title is not None and args.parent_id is not None:
 parent_group = page_group.add_mutually_exclusive_group() 
 parent_group.add_argument( 
     "-a", 
     "--parent-title", 
     help="title of the parent page under which the new page will be uploaded", 
 ) 
 parent_group.add_argument( 
     "-A", 
     "--parent-id", 
     help="ID of the parent page under which the new page will be uploaded", 
 ) 
 parent_group.add_argument( 
     "--top-level", 
     action="store_true", 
     help="upload the page tree starting from the top level (no top level parent)", 
 ) 
 page_group.add_argument( 
     "-t", 
     "--title", 
     help="a title for the page. Determined from the document if missing", 
 ) 
 parent_group = page_group.add_mutually_exclusive_group() 
 parent_group.add_argument( 
     "-a", 
     "--parent-title", 
     help="title of the parent page under which the new page will be uploaded", 
 ) 
 parent_group.add_argument( 
     "-A", 
     "--parent-id", 
     help="ID of the parent page under which the new page will be uploaded", 
 ) 
 parent_group.add_argument( 
     "--top-level", 
     action="store_true", 
     help="upload the page tree starting from the top level (no top level parent)", 
 ) 
  
 page_group.add_argument( 
     "-t", 
     "--title", 
     help="a title for the page. Determined from the document if missing", 
 ) 
+        error_console.log(
+            ":x: Parent Title and Parent page ID cannot both be specified on the command line "
+        )
+
     pages_to_upload = collect_pages_to_upload(args)
 
     page_title_counts = Counter([page.title for page in pages_to_upload])
@@ -347,6 +357,10 @@ def main():
                 )
                 sys.exit(1)
 
+    if args.clean_install:
+        # This runs even though that there are no files found, i.e. len(pages_to_upload) == 0
+       perform_clean_install(confluence, pages_to_upload,  args)
+
     preface_markup = ""
     if args.preface_markdown:
         preface_markup = md2cf.document.parse_page([args.preface_markdown]).body
@@ -722,6 +736,29 @@ def collect_pages_to_upload(args):
 
     return pages_to_upload
 
+def perform_clean_install(confluence, pages_to_upload, args):
+    page_descendants = get_page_descendants(confluence, args)
+
+    pages_to_purge = [page for page in page_descendants if page['title'] not in [p.title for p in pages_to_upload]]
 pre_process_page(page, args, postface_markup, preface_markup, space_info) 
 pre_process_page(page, args, postface_markup, preface_markup, space_info) 
+
+    if not args.dry_run:
+        console.log("Number of pages set to be purged before deployment: ", len(pages_to_purge))
+        for page_to_purge in pages_to_purge:
+            confluence.purge_page(page_to_purge)
+            console.log("-", page_to_purge.title, "- has been purged")
+    else:
+        console.log("Number of pages set to be purged before deployment: ", len(pages_to_purge))
+        console.log("Pages to be purged: ", ", ".join([page['title'] for page in pages_to_purge]))
+
+def get_page_descendants(confluence, args):
+    if args.parent_title is not None:
+        return confluence.get_content_descendant(
+            title=args.parent_title, space_key=args.space)
+    elif args.parent_id is not None:
+        return confluence.get_content_descendant(
+            page_id=args.parent_id, space_key=args.space)
+    else:
+        return confluence.get_content_descendant(space_key=args.space)
 
 if __name__ == "__main__":
     main()
diff --git a/md2cf/api.py b/md2cf/api.py
@@ -75,6 +75,9 @@ def _post(self, path, **kwargs):
     def _put(self, path, **kwargs):
         return self._request("PUT", path, **kwargs)
 
+    def _delete(self, path, **kwargs):
+        return self.api.request("DELETE", urljoin(self.host, path), **kwargs)
-        return self.api.request("DELETE", urljoin(self.host, path), **kwargs)
+        return self._request("DELETE", path, **kwargs)
-        return self.api.request("DELETE", urljoin(self.host, path), **kwargs)
+        return self._request("DELETE", path, **kwargs)
+
     def get_page(
         self,
         title=None,
@@ -184,6 +187,7 @@ def update_page(
         update_message=None,
         labels=None,
         minor_edit=False,
+        status=None
     ):
         update_structure = {
             "version": {
@@ -208,6 +212,13 @@ def update_page(
                 "labels": [{"name": label, "prefix": "global"} for label in labels]
             }
 
+        if status is not None:
+            if ['current', 'trashed', 'deleted', 'historical', 'draft'].count(status) > 0:
-            if ['current', 'trashed', 'deleted', 'historical', 'draft'].count(status) > 0:
+            if status in ['current', 'trashed', 'deleted', 'historical', 'draft']:
-            if ['current', 'trashed', 'deleted', 'historical', 'draft'].count(status) > 0:
+            if status in ['current', 'trashed', 'deleted', 'historical', 'draft']:
+                update_structure["status"] = status
+            else:
+                raise ValueError(
+                    "Status has to be either current, trashed, deleted, historical or draft")
+
         return self._put(f"content/{page.id}", json=update_structure)
 
     def get_attachment(self, confluence_page, name):
@@ -256,3 +267,86 @@ def get_space(self, space, additional_expansions=None):
         if additional_expansions is not None:
             params = {"expand": ",".join(additional_expansions)}
         return self._get(f"space/{space}", params=params)
+
+    def get_content_descendant(
+        self,
+        title=None,
+        space_key=None,
+        page_id=None,
+        content_type="page",
+        additional_expansions=None,
+    ):
+        """
+        Gets content descendant
+
+        Args:
+            title (str): the title for the page
+            space_key (str): the Confluence space for the page
+            content_type (str): Content type. Default value: page.
+              Valid values: page, blogpost.
+            page_id (str or int): the ID of the page
+            additional_expansions (list of str): Additional expansions that should be
+              made when calling the api
+
+        Returns:
+            The response from the API
+
+        """
+        params = None
+        if additional_expansions is not None:
+            params = {"expand": ",".join(additional_expansions)}
+
+        if page_id is not None:
+            response = self._get(
+                f"content/{page_id}/descendant/{content_type}", params=params)
+            results = response.results
+            while (hasattr(response, '_links') and hasattr(response._links, 'next')):
+                response = self._get(response._links.next.replace(
+                    '/rest/api/', ''), params=params)
+                results.extend(response.results)
+            return results
+        elif title is not None:
+            params = {"title": title, "type": content_type}
+            if space_key is not None:
+                params["spaceKey"] = space_key
+            response = self._get("content", params=params)
+            try:
+                # A search by title/space doesn't return full page objects,
+                # and since we don't support expansion in this implementation
+                # just yet, we just retrieve the "full" page data using the page
+                # ID for the first search result
+                return self.get_content_descendant(
+                    page_id=response.results[0].id,
+                    additional_expansions=additional_expansions
+                )
+            except IndexError:
+                return None
+        elif space_key is not None:
+            space_homepage_id = self._get(
+                f"space/{space_key}")._expandable.homepage.replace('/rest/api/content/', '')
+            return self.get_content_descendant(page_id=space_homepage_id)
+        else:
+            raise ValueError(
+                "At least one of title or page_id or space_key must not be None")
+
+    def purge_page(
+        self,
+        page=None
+    ):
+        """
+        Delete page in a space
+
+        Args:
+            page (page): the page to be purged
+
+        Returns:
+            The response from the API
+
+        """
+        if page is not None:
+            params = {"status": "trashed"}
+            page_from_get = self.get_page(page_id=page.id)
+            self.update_page(page=page_from_get, body="", status="trashed")
+            return self._delete(f"content/{page.id}", params=params)
+        else:
+            raise ValueError("Page cannot be None")
-    def purge_page(
-        self,
-        page=None
-    ):
-        """
-        Delete page in a space
-
-        Args:
-            page (page): the page to be purged
-
-        Returns:
-            The response from the API
-
-        """
-        if page is not None:
-            params = {"status": "trashed"}
-            page_from_get = self.get_page(page_id=page.id)
-            self.update_page(page=page_from_get, body="", status="trashed")
-            return self._delete(f"content/{page.id}", params=params)
-        else:
-            raise ValueError("Page cannot be None")
+    def purge_page(
+        self,
+        page
+    ):
+        """
+        Delete page in a space
+
+        Args:
+            page (page): the page to be purged
+
+        Returns:
+            The response from the API
+
+        """
+        params = {"status": "trashed"}
+        page_from_get = self.get_page(page_id=page.id)
+        self.update_page(page=page_from_get, body="", status="trashed")
+        return self._delete(f"content/{page.id}", params=params)
-    def purge_page(
-        self,
-        page=None
-    ):
-        """
-        Delete page in a space
-
-        Args:
-            page (page): the page to be purged
-
-        Returns:
-            The response from the API
-
-        """
-        if page is not None:
-            params = {"status": "trashed"}
-            page_from_get = self.get_page(page_id=page.id)
-            self.update_page(page=page_from_get, body="", status="trashed")
-            return self._delete(f"content/{page.id}", params=params)
-        else:
-            raise ValueError("Page cannot be None")
+    def purge_page(
+        self,
+        page
+    ):
+        """
+        Delete page in a space
+
+        Args:
+            page (page): the page to be purged
+
+        Returns:
+            The response from the API
+
+        """
+        params = {"status": "trashed"}
+        page_from_get = self.get_page(page_id=page.id)
+        self.update_page(page=page_from_get, body="", status="trashed")
+        return self._delete(f"content/{page.id}", params=params)
diff --git a/tests/unit/test_confluence.py b/tests/unit/test_confluence.py
@@ -309,6 +309,60 @@ def test_update_page(confluence, requests_mock):
 
     assert page == updated_page
 
+def test_update_page_with_status(confluence, requests_mock):
+    test_page_id = 12345
+    test_page_title = "This is a title"
+    test_page_version = 1
+    test_page_status = "trashed"
+
+    test_page_object = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "version": {"number": test_page_version},
+        }
+    )
+
+    test_new_body = "<p>This is my new body</p>"
+
+    update_structure = {
+        "version": {"number": test_page_version + 1, "minorEdit": False},
+        "title": test_page_title,
+        "type": "page",
+        "body": {"storage": {"value": test_new_body, "representation": "storage"}},
+        "status": test_page_status
+    }
+
+    updated_page = {"test": 1}
+    requests_mock.put(
+        TEST_HOST + f"content/{test_page_id}",
+        complete_qs=True,
+        json=updated_page,
+        additional_matcher=lambda x: x.json() == update_structure,
+    )
+
+    page = confluence.update_page(test_page_object, body=test_new_body, status=test_page_status)
+
+    assert page == updated_page
+
+def test_update_page_with_wrong_status(confluence, requests_mock):
+    test_page_id = 12345
+    test_page_title = "This is a title"
+    test_page_version = 1
+    test_page_status = "I do not exist"
+
+    test_page_object = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "version": {"number": test_page_version},
+        }
+    )
+
+    test_new_body = "<p>This is my new body</p>"
+
+    with pytest.raises(ValueError):
+        confluence.update_page(test_page_object, body=test_new_body, status=test_page_status)
 
 def test_update_page_with_message(confluence, requests_mock):
     test_page_id = 12345
@@ -386,3 +440,125 @@ def test_update_attachment(mocker, confluence, requests_mock):
     response = confluence.update_attachment(test_page, test_fp, test_attachment)
 
     assert response == test_response
+
+def test_get_content_descendant_with_page_id(confluence, requests_mock):
+    test_page_id = 12345
+    test_content_type = "page"
+    test_return_value = { "results": [{"some_stuff": 1}] }
+
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}/descendant/{test_content_type}", json=test_return_value)
+    page = confluence.get_content_descendant(page_id=test_page_id)
+
+    assert page == bunchify(test_return_value["results"])
+
+def test_get_content_descendant_with_page_id_and_next_link(confluence, requests_mock):
+    test_page_id = 12345
+    test_content_type = "page"
+    test_return_value2_next_url = TEST_HOST + "/next-api-call"
+    test_return_value1 = { 
+        "results": [{"id": 1}],
+        "_links": {
+            "next": test_return_value2_next_url
+        }    
+    }
+    test_return_value2 = {
+        "results": [{"id": 2}],
+    }
+    expected_result = [{ "id": 1 }, { "id": 2 }]
+
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}/descendant/{test_content_type}", json=test_return_value1)
+    requests_mock.get(f"{test_return_value2_next_url}", json=test_return_value2)
+    page = confluence.get_content_descendant(page_id=test_page_id)
+
+    assert page == bunchify(expected_result)
+
+def test_get_content_descendant_with_title(confluence, requests_mock):
+    test_page_title = "test title"
+    test_page_id = 12345
+    test_content_type = "page"
+    test_return_value = { "results": [{"some_stuff": 1}] }
+    test_get_page_from_title_return_value = {"results": [{"id": test_page_id}]}
+    requests_mock.get(
+        TEST_HOST + f"content?title={test_page_title}&type=page",
+        complete_qs=True,
+        json=test_get_page_from_title_return_value,
+    )
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}/descendant/{test_content_type}", json=test_return_value)
+
+    page = confluence.get_content_descendant(title=test_page_title)
+
+    assert page == bunchify(test_return_value["results"])
+
+def test_get_content_descendant_without_page_id_or_title(confluence, requests_mock):
+    test_space_key = "ABC"
+    test_homepage_id = 54321
+    test_space_return_value = { "_expandable": { "homepage": f"/rest/api/content/{test_homepage_id}"}}
+
+    test_content_type = "page"
+    test_return_value = { "results": [{"some_stuff": 1}] }
+
+    requests_mock.get(
+        TEST_HOST + f"space/{test_space_key}",
+        complete_qs=True,
+        json=test_space_return_value,
+    )
+
+    requests_mock.get(TEST_HOST + f"content/{test_homepage_id}/descendant/{test_content_type}", json=test_return_value)
+
+    page = confluence.get_content_descendant(space_key=test_space_key)
+
+    assert page == bunchify(test_return_value["results"])
+
+def test_get_content_descendant_without_any_parameters(confluence):
+    with pytest.raises(ValueError):
+        confluence.get_content_descendant()
+
+def test_purge_page(confluence, requests_mock):
+    test_page_id = 12345
+    test_page_title = "This is a title"
+    test_page_version = 1
+    test_page_body = ""
+    test_page_status = "trashed"
+    test_get_page_return_value = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "version": {"number": test_page_version},
+        }
+    )
+
+    test_get_descendant_page_object = bunchify(
+        {
+            "id": test_page_id,
+            "title": test_page_title,
+            "_expandable": { "body": test_page_body }
+        }
+    )
+
+    update_structure = {
+        "version": {
+            "number": test_page_version + 1,
+            "minorEdit": False,
+        },
+        "title": test_page_title,
+        "type": "page",
+        "body": {"storage": {"value": test_page_body, "representation": "storage"}},
+        "status": test_page_status
+    }
+
+
+    requests_mock.get(TEST_HOST + f"content/{test_page_id}", json=test_get_page_return_value)
+    requests_mock.put(
+        TEST_HOST + f"content/{test_page_id}",
+        json=update_structure,
+        additional_matcher=lambda x: x.json() == update_structure,
+    )
+
+    requests_mock.delete(TEST_HOST + f"content/{test_page_id}")
+    response = confluence.purge_page(test_get_descendant_page_object)
+
+    assert response.status_code == 200
+
+def test_purge_page_without_any_parameters(confluence):
+    with pytest.raises(ValueError):
+        confluence.purge_page()