Skip to content

Commit

Permalink
feat: Store published name and description in studio search index (#3…
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisChV authored Oct 21, 2024
1 parent a49110b commit fb25a5d
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 20 deletions.
2 changes: 2 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level3,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.published + "." + Fields.display_name,
Fields.published + "." + Fields.published_description,
])
# Mark which attributes can be used for sorting search results:
client.index(temp_index_name).update_sortable_attributes([
Expand Down
114 changes: 94 additions & 20 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api
from opaque_keys.edx.locator import LibraryLocatorV2
from rest_framework.exceptions import NotFound

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
from openedx.core.djangoapps.xblock.data import LatestVersion
from openedx_learning.api.authoring_models import Collection

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,6 +76,11 @@ class Fields:
# Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
num_children = "num_children"

# Published data (dictionary) of this object
published = "published"
published_display_name = "display_name"
published_description = "description"

# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
# command (changing those settings on an large active index is not recommended).
Expand Down Expand Up @@ -121,6 +128,48 @@ def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
}


def _get_content_from_block(block) -> dict:
"""
Get the content from block
"""
try:
content_data = block.index_dictionary()
# Will be something like:
# {
# 'content': {'display_name': '...', 'capa_content': '...'},
# 'content_type': 'CAPA',
# 'problem_types': ['multiplechoiceresponse']
# }
# Which we need to flatten:
if "content_type" in content_data:
del content_data["content_type"] # Redundant with our standard Fields.block_type field.
if "content" in content_data and isinstance(content_data["content"], dict):
content = content_data["content"]
if "display_name" in content:
del content["display_name"]
del content_data["content"]
content_data.update(content)

return content_data

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")


def _get_description_from_block_content(block_type, block_content) -> str:
"""
Generate description from block content
"""
result = None

if block_type == 'html' and 'html_content' in block_content:
result = block_content['html_content']
elif block_type == 'problem' and 'capa_content' in block_content:
result = block_content['capa_content']

return result


def _fields_from_block(block) -> dict:
"""
Given an XBlock instance, call its index_dictionary() method to load any
Expand All @@ -130,16 +179,17 @@ def _fields_from_block(block) -> dict:
class implementation returns only:
{"content": {"display_name": "..."}, "content_type": "..."}
"""
block_type = block.scope_ids.block_type
block_data = {
Fields.usage_key: str(block.usage_key),
Fields.block_id: str(block.usage_key.block_id),
Fields.display_name: xblock_api.get_block_display_name(block),
Fields.block_type: block.scope_ids.block_type,
Fields.block_type: block_type,
# This is called context_key so it's the same for courses and libraries
Fields.context_key: str(block.usage_key.context_key), # same as lib_key
Fields.org: str(block.usage_key.context_key.org),
Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
Fields.breadcrumbs: []
Fields.breadcrumbs: [],
}
# Get the breadcrumbs (course, section, subsection, etc.):
if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries).
Expand All @@ -160,25 +210,14 @@ class implementation returns only:
parent_data,
)
try:
content_data = block.index_dictionary()
# Will be something like:
# {
# 'content': {'display_name': '...', 'capa_content': '...'},
# 'content_type': 'CAPA',
# 'problem_types': ['multiplechoiceresponse']
# }
# Which we need to flatten:
if "content_type" in content_data:
del content_data["content_type"] # Redundant with our standard Fields.block_type field.
if "content" in content_data and isinstance(content_data["content"], dict):
content = content_data["content"]
if "display_name" in content:
del content["display_name"]
del content_data["content"]
content_data.update(content)
# Now we have something like:
# { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
content_data = _get_content_from_block(block)
block_data[Fields.content] = content_data

# Generate description from the content
description = _get_description_from_block_content(block_type, content_data)
if description:
block_data[Fields.description] = description

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
return block_data
Expand Down Expand Up @@ -295,6 +334,32 @@ def _collections_for_content_object(object_id: UsageKey | LearningContextKey) ->
return result


def _published_data_from_block(block_published) -> dict:
"""
Given an library block get the published data.
"""
result = {
Fields.published: {
Fields.published_display_name: xblock_api.get_block_display_name(block_published),
}
}

try:
content_data = _get_content_from_block(block_published)

description = _get_description_from_block_content(
block_published.scope_ids.block_type,
content_data,
)

if description:
result[Fields.published][Fields.published_description] = description
except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block_published.usage_key}: {err}")

return result


def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand All @@ -307,6 +372,12 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)

try:
block_published = xblock_api.load_block(xblock_metadata.usage_key, user=None, version=LatestVersion.PUBLISHED)
except NotFound:
# Never published
block_published = None

doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
doc.update({
Fields.type: DocType.library_block,
Expand All @@ -318,6 +389,9 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad

doc.update(_fields_from_block(block))

if block_published:
doc.update(_published_data_from_block(block_published))

# Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
doc[Fields.breadcrumbs] = [{"display_name": library_name}]

Expand Down
125 changes: 125 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_problem_block(self):
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Test Problem",
"description": "What is a test?",
"breadcrumbs": [
{
'display_name': 'Toy Course',
Expand Down Expand Up @@ -202,6 +203,8 @@ def test_html_block(self):
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Text",
"description": "This is a link to another page and some Chinese 四節比分和七年前 Some "
"more Chinese 四節比分和七年前 ",
"breadcrumbs": [
{
'display_name': 'Toy Course',
Expand Down Expand Up @@ -297,6 +300,128 @@ def test_html_library_block(self):
},
}

def test_html_published_library_block(self):
library_api.publish_changes(self.library.key)

doc = searchable_doc_for_library_block(self.library_block)
doc.update(searchable_doc_tags(self.library_block.usage_key))
doc.update(searchable_doc_collections(self.library_block.usage_key))

assert doc == {
"id": "lbedx2012_fallhtmltext2-4bb47d67",
"type": "library_block",
"block_type": "html",
"usage_key": "lb:edX:2012_Fall:html:text2",
"block_id": "text2",
"context_key": "lib:edX:2012_Fall",
"org": "edX",
"access_id": self.library_access_id,
"display_name": "Text",
"breadcrumbs": [
{
"display_name": "some content_library",
},
],
"last_published": None,
"created": 1680674828.0,
"modified": 1680674828.0,
"content": {
"html_content": "",
},
"collections": {
"key": ["TOY_COLLECTION"],
"display_name": ["Toy Collection"],
},
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"],
},
'published': {'display_name': 'Text'},
}

# Update library block to create a draft
olx_str = '<html display_name="Text 2"><![CDATA[<p>This is a Test</p>]]></html>'
library_api.set_library_block_olx(self.library_block.usage_key, olx_str)

doc = searchable_doc_for_library_block(self.library_block)
doc.update(searchable_doc_tags(self.library_block.usage_key))
doc.update(searchable_doc_collections(self.library_block.usage_key))

assert doc == {
"id": "lbedx2012_fallhtmltext2-4bb47d67",
"type": "library_block",
"block_type": "html",
"usage_key": "lb:edX:2012_Fall:html:text2",
"block_id": "text2",
"context_key": "lib:edX:2012_Fall",
"org": "edX",
"access_id": self.library_access_id,
"display_name": "Text 2",
"description": "This is a Test",
"breadcrumbs": [
{
"display_name": "some content_library",
},
],
"last_published": None,
"created": 1680674828.0,
"modified": 1680674828.0,
"content": {
"html_content": "This is a Test",
},
"collections": {
"key": ["TOY_COLLECTION"],
"display_name": ["Toy Collection"],
},
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"],
},
"published": {"display_name": "Text"},
}

# Publish new changes
library_api.publish_changes(self.library.key)
doc = searchable_doc_for_library_block(self.library_block)
doc.update(searchable_doc_tags(self.library_block.usage_key))
doc.update(searchable_doc_collections(self.library_block.usage_key))

assert doc == {
"id": "lbedx2012_fallhtmltext2-4bb47d67",
"type": "library_block",
"block_type": "html",
"usage_key": "lb:edX:2012_Fall:html:text2",
"block_id": "text2",
"context_key": "lib:edX:2012_Fall",
"org": "edX",
"access_id": self.library_access_id,
"display_name": "Text 2",
"description": "This is a Test",
"breadcrumbs": [
{
"display_name": "some content_library",
},
],
"last_published": None,
"created": 1680674828.0,
"modified": 1680674828.0,
"content": {
"html_content": "This is a Test",
},
"collections": {
"key": ["TOY_COLLECTION"],
"display_name": ["Toy Collection"],
},
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"],
},
"published": {
"display_name": "Text 2",
"description": "This is a Test",
},
}

def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.library.key, self.collection.key)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def test_create_delete_library_block(self, meilisearch_client):
with freeze_time(published_date):
library_api.publish_changes(library.key)
doc_problem["last_published"] = published_date.timestamp()
doc_problem["published"] = {"display_name": "Blank Problem"}
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])

# Delete the Library Block
Expand Down

0 comments on commit fb25a5d

Please sign in to comment.