Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log and skip failed encodes when indexing #4205

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion app/lib/meadow/data/indexer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ defmodule Meadow.Data.Indexer do
Repo.transaction(
fn ->
stream
|> Stream.map(&SearchDocument.encode(&1, version))
|> Stream.map(&encode_document(&1, version))
|> Stream.reject(&(&1 == :skip))
|> Bulk.upload(index)

SearchIndex.refresh(index)
Expand All @@ -104,6 +105,20 @@ defmodule Meadow.Data.Indexer do
)
end

defp encode_document(nil, _), do: :skip

defp encode_document(item, version) do
SearchDocument.encode(item, version)
rescue
e ->
with_log_metadata module: __MODULE__, id: item.id do
("Index encoding failed due to: " <> Exception.format_banner(:error, e, []))
|> Logger.error()
end

:skip
end

def stream(query, preloads) do
from(query)
|> Repo.stream()
Expand Down
23 changes: 23 additions & 0 deletions app/test/meadow/data/indexer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ defmodule Meadow.Data.IndexerTest do
# use Meadow.AuthorityCase
use Meadow.DataCase
use Meadow.IndexCase
alias Ecto.Adapters.SQL
alias Ecto.Adapters.SQL.Sandbox
alias Meadow.Data.{Collections, FileSets, Indexer, Works}
alias Meadow.Data.Schemas.{Collection, FileSet, Work}
alias Meadow.Ingest.{Projects, Sheets}
alias Meadow.{Config, Repo}

import ExUnit.CaptureLog

describe "indexing" do
setup do
{:ok, indexable_data()}
Expand All @@ -21,6 +24,26 @@ defmodule Meadow.Data.IndexerTest do
assert_doc_counts_match(context)
end

test "error_handling", context do
assert_all_empty()
%{file_sets: [file_set | _]} = context

SQL.query!(
Repo,
"UPDATE file_sets SET core_metadata = NULL WHERE id = $1",
[Ecto.UUID.dump!(file_set.id)]
)

logged = capture_log(fn -> Indexer.synchronize_index() end)
assert {:ok, file_set_count} = indexed_doc_count(FileSet, 2)
assert file_set_count == length(context.file_sets) - 1

assert String.contains?(
logged,
"id=#{file_set.id} [error] Index encoding failed due to: ** (KeyError)"
)
end

test "reindex_all", context do
Indexer.synchronize_index()
assert_doc_counts_match(context)
Expand Down
Loading