feat: Store published name and description in studio search index (#3…

…5678)
openedx · Oct 21, 2024 · fb25a5d · fb25a5d
1 parent a49110b
commit fb25a5d
Show file tree

Hide file tree

Showing 4 changed files with 222 additions and 20 deletions.
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
@@ -356,6 +356,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
             Fields.tags + "." + Fields.tags_level3,
             Fields.collections + "." + Fields.collections_display_name,
             Fields.collections + "." + Fields.collections_key,
+            Fields.published + "." + Fields.display_name,
+            Fields.published + "." + Fields.published_description,
         ])
         # Mark which attributes can be used for sorting search results:
         client.index(temp_index_name).update_sortable_attributes([

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -11,11 +11,13 @@
 from opaque_keys.edx.keys import LearningContextKey, UsageKey
 from openedx_learning.api import authoring as authoring_api
 from opaque_keys.edx.locator import LibraryLocatorV2
+from rest_framework.exceptions import NotFound
 
 from openedx.core.djangoapps.content.search.models import SearchAccess
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.xblock import api as xblock_api
+from openedx.core.djangoapps.xblock.data import LatestVersion
 from openedx_learning.api.authoring_models import Collection
 
 log = logging.getLogger(__name__)
@@ -74,6 +76,11 @@ class Fields:
     # Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
     num_children = "num_children"
 
+    # Published data (dictionary) of this object
+    published = "published"
+    published_display_name = "display_name"
+    published_description = "description"
+
     # Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
     # search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
     # command (changing those settings on an large active index is not recommended).
@@ -121,6 +128,48 @@ def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
     }
 
 
+def _get_content_from_block(block) -> dict:
+    """
+    Get the content from block
+    """
+    try:
+        content_data = block.index_dictionary()
+        # Will be something like:
+        # {
+        #     'content': {'display_name': '...', 'capa_content': '...'},
+        #     'content_type': 'CAPA',
+        #     'problem_types': ['multiplechoiceresponse']
+        # }
+        # Which we need to flatten:
+        if "content_type" in content_data:
+            del content_data["content_type"]  # Redundant with our standard Fields.block_type field.
+        if "content" in content_data and isinstance(content_data["content"], dict):
+            content = content_data["content"]
+            if "display_name" in content:
+                del content["display_name"]
+            del content_data["content"]
+            content_data.update(content)
+
+        return content_data
+
+    except Exception as err:  # pylint: disable=broad-except
+        log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
+
+
+def _get_description_from_block_content(block_type, block_content) -> str:
+    """
+    Generate description from block content
+    """
+    result = None
+
+    if block_type == 'html' and 'html_content' in block_content:
+        result = block_content['html_content']
+    elif block_type == 'problem' and 'capa_content' in block_content:
+        result = block_content['capa_content']
+
+    return result
+
+
 def _fields_from_block(block) -> dict:
     """
     Given an XBlock instance, call its index_dictionary() method to load any
@@ -130,16 +179,17 @@ def _fields_from_block(block) -> dict:
     class implementation returns only:
         {"content": {"display_name": "..."}, "content_type": "..."}
     """
+    block_type = block.scope_ids.block_type
     block_data = {
         Fields.usage_key: str(block.usage_key),
         Fields.block_id: str(block.usage_key.block_id),
         Fields.display_name: xblock_api.get_block_display_name(block),
-        Fields.block_type: block.scope_ids.block_type,
+        Fields.block_type: block_type,
         # This is called context_key so it's the same for courses and libraries
         Fields.context_key: str(block.usage_key.context_key),  # same as lib_key
         Fields.org: str(block.usage_key.context_key.org),
         Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
-        Fields.breadcrumbs: []
+        Fields.breadcrumbs: [],
     }
     # Get the breadcrumbs (course, section, subsection, etc.):
     if block.usage_key.context_key.is_course:  # Getting parent is not yet implemented in Learning Core (for libraries).
@@ -160,25 +210,14 @@ class implementation returns only:
                 parent_data,
             )
     try:
-        content_data = block.index_dictionary()
-        # Will be something like:
-        # {
-        #     'content': {'display_name': '...', 'capa_content': '...'},
-        #     'content_type': 'CAPA',
-        #     'problem_types': ['multiplechoiceresponse']
-        # }
-        # Which we need to flatten:
-        if "content_type" in content_data:
-            del content_data["content_type"]  # Redundant with our standard Fields.block_type field.
-        if "content" in content_data and isinstance(content_data["content"], dict):
-            content = content_data["content"]
-            if "display_name" in content:
-                del content["display_name"]
-            del content_data["content"]
-            content_data.update(content)
-        # Now we have something like:
-        # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
+        content_data = _get_content_from_block(block)
         block_data[Fields.content] = content_data
+
+        # Generate description from the content
+        description = _get_description_from_block_content(block_type, content_data)
+        if description:
+            block_data[Fields.description] = description
+
     except Exception as err:  # pylint: disable=broad-except
         log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
     return block_data
@@ -295,6 +334,32 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) ->
     return result
 
 
+def _published_data_from_block(block_published) -> dict:
+    """
+    Given an library block get the published data.
+    """
+    result = {
+        Fields.published: {
+            Fields.published_display_name: xblock_api.get_block_display_name(block_published),
+        }
+    }
+
+    try:
+        content_data = _get_content_from_block(block_published)
+
+        description = _get_description_from_block_content(
+            block_published.scope_ids.block_type,
+            content_data,
+        )
+
+        if description:
+            result[Fields.published][Fields.published_description] = description
+    except Exception as err:  # pylint: disable=broad-except
+        log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}")
+
+    return result
+
+
 def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
@@ -307,6 +372,12 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
     library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
     block = xblock_api.load_block(xblock_metadata.usage_key, user=None)
 
+    try:
+        block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED)
+    except NotFound:
+        # Never published
+        block_published = None
+
     doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
     doc.update({
         Fields.type: DocType.library_block,
@@ -318,6 +389,9 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
 
     doc.update(_fields_from_block(block))
 
+    if block_published:
+        doc.update(_published_data_from_block(block_published))
+
     # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
     doc[Fields.breadcrumbs] = [{"display_name": library_name}]
 

diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -154,6 +154,7 @@ def test_problem_block(self):
             "org": "edX",
             "access_id": self.toy_course_access_id,
             "display_name": "Test Problem",
+            "description": "What is a test?",
             "breadcrumbs": [
                 {
                     'display_name': 'Toy Course',
@@ -202,6 +203,8 @@ def test_html_block(self):
             "org": "edX",
             "access_id": self.toy_course_access_id,
             "display_name": "Text",
+            "description": "This is a link to another page and some Chinese 四節比分和七年前 Some "
+                         "more Chinese 四節比分和七年前 ",
             "breadcrumbs": [
                 {
                     'display_name': 'Toy Course',
@@ -297,6 +300,128 @@ def test_html_library_block(self):
             },
         }
 
+    def test_html_published_library_block(self):
+        library_api.publish_changes(self.library.key)
+
+        doc = searchable_doc_for_library_block(self.library_block)
+        doc.update(searchable_doc_tags(self.library_block.usage_key))
+        doc.update(searchable_doc_collections(self.library_block.usage_key))
+
+        assert doc == {
+            "id": "lbedx2012_fallhtmltext2-4bb47d67",
+            "type": "library_block",
+            "block_type": "html",
+            "usage_key": "lb:edX:2012_Fall:html:text2",
+            "block_id": "text2",
+            "context_key": "lib:edX:2012_Fall",
+            "org": "edX",
+            "access_id": self.library_access_id,
+            "display_name": "Text",
+            "breadcrumbs": [
+                {
+                    "display_name": "some content_library",
+                },
+            ],
+            "last_published": None,
+            "created": 1680674828.0,
+            "modified": 1680674828.0,
+            "content": {
+                "html_content": "",
+            },
+            "collections": {
+                "key": ["TOY_COLLECTION"],
+                "display_name": ["Toy Collection"],
+            },
+            "tags": {
+                "taxonomy": ["Difficulty"],
+                "level0": ["Difficulty > Normal"],
+            },
+            'published': {'display_name': 'Text'},
+        }
+
+        # Update library block to create a draft
+        olx_str = '<html display_name="Text 2"><![CDATA[<p>This is a Test</p>]]></html>'
+        library_api.set_library_block_olx(self.library_block.usage_key, olx_str)
+
+        doc = searchable_doc_for_library_block(self.library_block)
+        doc.update(searchable_doc_tags(self.library_block.usage_key))
+        doc.update(searchable_doc_collections(self.library_block.usage_key))
+
+        assert doc == {
+            "id": "lbedx2012_fallhtmltext2-4bb47d67",
+            "type": "library_block",
+            "block_type": "html",
+            "usage_key": "lb:edX:2012_Fall:html:text2",
+            "block_id": "text2",
+            "context_key": "lib:edX:2012_Fall",
+            "org": "edX",
+            "access_id": self.library_access_id,
+            "display_name": "Text 2",
+            "description": "This is a Test",
+            "breadcrumbs": [
+                {
+                    "display_name": "some content_library",
+                },
+            ],
+            "last_published": None,
+            "created": 1680674828.0,
+            "modified": 1680674828.0,
+            "content": {
+                "html_content": "This is a Test",
+            },
+            "collections": {
+                "key": ["TOY_COLLECTION"],
+                "display_name": ["Toy Collection"],
+            },
+            "tags": {
+                "taxonomy": ["Difficulty"],
+                "level0": ["Difficulty > Normal"],
+            },
+            "published": {"display_name": "Text"},
+        }
+
+        # Publish new changes
+        library_api.publish_changes(self.library.key)
+        doc = searchable_doc_for_library_block(self.library_block)
+        doc.update(searchable_doc_tags(self.library_block.usage_key))
+        doc.update(searchable_doc_collections(self.library_block.usage_key))
+
+        assert doc == {
+            "id": "lbedx2012_fallhtmltext2-4bb47d67",
+            "type": "library_block",
+            "block_type": "html",
+            "usage_key": "lb:edX:2012_Fall:html:text2",
+            "block_id": "text2",
+            "context_key": "lib:edX:2012_Fall",
+            "org": "edX",
+            "access_id": self.library_access_id,
+            "display_name": "Text 2",
+            "description": "This is a Test",
+            "breadcrumbs": [
+                {
+                    "display_name": "some content_library",
+                },
+            ],
+            "last_published": None,
+            "created": 1680674828.0,
+            "modified": 1680674828.0,
+            "content": {
+                "html_content": "This is a Test",
+            },
+            "collections": {
+                "key": ["TOY_COLLECTION"],
+                "display_name": ["Toy Collection"],
+            },
+            "tags": {
+                "taxonomy": ["Difficulty"],
+                "level0": ["Difficulty > Normal"],
+            },
+            "published": {
+                "display_name": "Text 2",
+                "description": "This is a Test",
+            },
+        }
+
     def test_collection_with_library(self):
         doc = searchable_doc_for_collection(self.library.key, self.collection.key)
         doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))

diff --git a/openedx/core/djangoapps/content/search/tests/test_handlers.py b/openedx/core/djangoapps/content/search/tests/test_handlers.py
@@ -176,6 +176,7 @@ def test_create_delete_library_block(self, meilisearch_client):
         with freeze_time(published_date):
             library_api.publish_changes(library.key)
         doc_problem["last_published"] = published_date.timestamp()
+        doc_problem["published"] = {"display_name": "Blank Problem"}
         meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])
 
         # Delete the Library Block