Skip to content

Commit

Permalink
Hyrax Cache Migration Script QA (#1118)
Browse files Browse the repository at this point in the history
* log warning in fetch work by fileset id instead of raising an error

* set default field in work utils helper to mitigate errors

* specified logging if an admin set name couldnt be found
  • Loading branch information
davidcam-src authored Sep 3, 2024
1 parent bf2925f commit 4c583d2
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 41 deletions.
27 changes: 20 additions & 7 deletions app/helpers/work_utils_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,30 @@
module WorkUtilsHelper
def self.fetch_work_data_by_fileset_id(fileset_id)
work = ActiveFedora::SolrService.get("file_set_ids_ssim:#{fileset_id}", rows: 1)['response']['docs'].first || {}
raise "No work found for fileset id: #{fileset_id}" if work.blank?
Rails.logger.warn("No work found for fileset id: #{fileset_id}") if work.blank?
# Fetch the admin set related to the work
admin_set_name = work['admin_set_tesim']&.first || 'Unknown'
admin_set = ActiveFedora::SolrService.get("title_tesim:#{admin_set_name}", rows: 1)['response']['docs'].first || {}
admin_set_name = work['admin_set_tesim']&.first
# If the admin set name is not nil, fetch the admin set
# Set the admin set to an empty hash if the solr query returns nil
admin_set = admin_set_name ? ActiveFedora::SolrService.get("title_tesim:#{admin_set_name}", { :rows => 1, 'df' => 'title_tesim'})['response']['docs'].first || {} : {}
Rails.logger.warn(self.generate_warning_message(admin_set_name, fileset_id)) if admin_set.blank?

{
work_id: work['id'] || 'Unknown',
work_type: work.dig('has_model_ssim', 0) || 'Unknown',
title: work['title_tesim']&.first || 'Unknown',
admin_set_id: admin_set['id'] || 'Unknown',
work_id: work['id'],
work_type: work.dig('has_model_ssim', 0),
title: work['title_tesim']&.first,
admin_set_id: admin_set['id'],
admin_set_name: admin_set_name
}
end

private_class_method

def self.generate_warning_message(admin_set_name, fileset_id)
if admin_set_name.blank?
return "Could not find an admin set, the work with fileset id: #{fileset_id} has no admin set name."
else
return "No admin set found with title_tesim: #{admin_set_name}."
end
end
end
7 changes: 3 additions & 4 deletions app/services/tasks/download_stats_migration_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def create_hyc_download_stat(stat, progress_tracker)
work_data = work_data_from_stat(stat)
hyc_download_stat.assign_attributes(
fileset_id: stat[:file_id],
work_id: work_data[:work_id],
admin_set_id: work_data[:admin_set_id],
work_type: work_data[:work_type],
work_id: work_data[:work_id] || 'Unknown',
admin_set_id: work_data[:admin_set_id] || 'Unknown',
work_type: work_data[:work_type] || 'Unknown',
date: stat[:date],
download_count: stat[:downloads],
)
Expand All @@ -108,7 +108,6 @@ def work_data_from_stat(stat)

# Method to write work stats to a CSV file
def write_to_csv(output_path, work_stats, headers = ['file_id', 'date', 'downloads'])
puts "Inspect work_stats: #{work_stats.inspect}"
CSV.open(output_path, 'w', write_headers: true, headers: headers) do |csv|
work_stats.each do |stat|
csv << [stat[:file_id], stat[:date], stat[:downloads]]
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/dimensions_ingest.rake
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ namespace :dimensions do

# Helper method to compute the last run file path
def self.dimensions_last_run_path
File.join(ENV['DATA_STORAGE'], 'last_dimensions_ingest_run.txt')
File.join(ENV['DATA_STORAGE'], 'hyrax', 'last_dimensions_ingest_run.txt')
end

def self.get_date_range(args)
Expand Down
68 changes: 42 additions & 26 deletions spec/helpers/hyrax/work_utils_helper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,21 @@
require Rails.root.join('app/overrides/controllers/hyrax/downloads_controller_override.rb')

RSpec.describe WorkUtilsHelper, type: :module do
let(:fileset_id) { 'file-set-id' }
let(:fileset_ids) { ['file-set-id', 'file-set-id-2'] }
let(:admin_set_name) { 'Open_Access_Articles_and_Book_Chapters' }
let(:example_admin_set_id) { 'h128zk07m' }
let(:example_work_id) { '1z40m031g' }

let(:mock_record) { [{
let(:mock_records) { [[{
'has_model_ssim' => ['Article'],
'id' => '1z40m031g',
'title_tesim' => ['Key ethical issues discussed at CDC-sponsored international, regional meetings to explore cultural perspectives and contexts on pandemic influenza preparedness and response'],
'admin_set_tesim' => ['Open_Access_Articles_and_Book_Chapters']}
],
[{
'has_model_ssim' => ['Article'],
'id' => '1z40m031g-2',
'title_tesim' => ['Placeholder Title'],
'admin_set_tesim' => []}
]
]
}

Expand All @@ -34,45 +39,56 @@
}

before do
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_id}", rows: 1).and_return('response' => { 'docs' => mock_record })
allow(ActiveFedora::SolrService).to receive(:get).with("title_tesim:#{admin_set_name}", rows: 1).and_return('response' => { 'docs' => mock_admin_set })
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_ids[0]}", rows: 1).and_return('response' => { 'docs' => mock_records[0] })
allow(ActiveFedora::SolrService).to receive(:get).with("title_tesim:#{admin_set_name}", {'df'=>'title_tesim', :rows=>1}).and_return('response' => { 'docs' => mock_admin_set })
end

describe '#fetch_work_data_by_fileset_id' do
it 'fetches the work data correctly' do
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_id)
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_ids[0])
expect(result).to eq(expected_work_data)
end

it 'properly substitutes Unknown for missing values' do
it 'logs appropriate messages for missing values' do
# Mock the solr response to simulate a work with missing values, if it somehow makes it past the initial nil check
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_id}", rows: 1).and_return('response' => { 'docs' => [{ 'placeholder-key' => 'placeholder-value' }] })
allow(ActiveFedora::SolrService).to receive(:get).with('title_tesim:Unknown', rows: 1).and_return('response' => { 'docs' => [] })
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_id)
expect(result[:work_id]).to eq('Unknown')
expect(result[:work_type]).to eq('Unknown')
expect(result[:title]).to eq('Unknown')
expect(result[:admin_set_id]).to eq('Unknown')
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_ids[0]}", rows: 1).and_return('response' => { 'docs' => [] })
allow(Rails.logger).to receive(:warn)
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_ids[0])
expect(Rails.logger).to have_received(:warn).with("No work found for fileset id: #{fileset_ids[0]}")
expect(Rails.logger).to have_received(:warn).with("Could not find an admin set, the work with fileset id: #{fileset_ids[0]} has no admin set name.")
expect(result[:work_id]).to be_nil
expect(result[:work_type]).to be_nil
expect(result[:title]).to be_nil
expect(result[:admin_set_id]).to be_nil
end

context 'when no work is found' do
before do
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_id}", rows: 1).and_return('response' => { 'docs' => [] })
end

it 'raises an error if no work is found' do
expect { WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_id) }.to raise_error(RuntimeError, "No work found for fileset id: #{fileset_id}")
it 'logs a warning if no work is found' do
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_ids[1]}", rows: 1).and_return('response' => { 'docs' => [] })
allow(Rails.logger).to receive(:warn)
WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_ids[1])
expect(Rails.logger).to have_received(:warn).with("No work found for fileset id: #{fileset_ids[1]}")
end
end

context 'when admin set is not found' do
before do
allow(ActiveFedora::SolrService).to receive(:get).with("title_tesim:#{admin_set_name}", rows: 1).and_return('response' => { 'docs' => [] })
it 'logs an appropriate message if the work doesnt have an admin set title' do
# Using the mock record without an admin set title
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_ids[1]}", rows: 1).and_return('response' => { 'docs' => mock_records[1] })
allow(Rails.logger).to receive(:warn)
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_ids[1])
expect(Rails.logger).to have_received(:warn).with("Could not find an admin set, the work with fileset id: #{fileset_ids[1]} has no admin set name.")
expect(result[:admin_set_id]).to be_nil
end

it 'sets the admin_set_id to Unknown if admin set is not found' do
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_id)
expect(result[:admin_set_id]).to eq('Unknown')
it 'logs an appropriate message if the query for an admin set returns nothing' do
# Using the mock record with an admin set title
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{fileset_ids[1]}", rows: 1).and_return('response' => { 'docs' => mock_records[0] })
allow(ActiveFedora::SolrService).to receive(:get).with("title_tesim:#{admin_set_name}", {'df'=>'title_tesim', :rows=>1}).and_return('response' => { 'docs' => [] })
allow(Rails.logger).to receive(:warn)
result = WorkUtilsHelper.fetch_work_data_by_fileset_id(fileset_ids[1])
expect(Rails.logger).to have_received(:warn).with("No admin set found with title_tesim: #{admin_set_name}.")
expect(result[:admin_set_id]).to be_nil
end
end
end
Expand Down
31 changes: 28 additions & 3 deletions spec/services/tasks/download_stats_migration_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
let(:service) { described_class.new }

before do
allow(ActiveFedora::SolrService).to receive(:get).with("title_tesim:#{admin_set_title}", rows: 1).and_return('response' => { 'docs' => [mock_admin_set] })
allow(ActiveFedora::SolrService).to receive(:get).with("title_tesim:#{admin_set_title}", { :rows => 1, 'df' => 'title_tesim'}).and_return('response' => { 'docs' => [mock_admin_set] })
end

after do
Expand Down Expand Up @@ -110,13 +110,13 @@
file_download_stats.flatten.each_with_index do |stat, index|
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{stat.file_id}", rows: 1).and_return('response' => { 'docs' => [mock_works[index]] })
end
service.list_work_stat_info(output_path, nil)
service.migrate_to_new_table(output_path)
end

after { HycDownloadStat.delete_all }

it 'creates new HycDownloadStat works from the CSV file' do
service.list_work_stat_info(output_path, nil)
service.migrate_to_new_table(output_path)
csv_to_hash_array(output_path).each_with_index do |csv_row, index|
work_data = WorkUtilsHelper.fetch_work_data_by_fileset_id(csv_row[:file_id])
csv_row_date = Date.parse(csv_row[:date]).beginning_of_month
Expand All @@ -130,6 +130,27 @@
end
end

it 'retains historic stats for a work even if the work cannot be found in solr' do
file_download_stats.flatten.each_with_index do |stat, index|
allow(ActiveFedora::SolrService).to receive(:get).with("file_set_ids_ssim:#{stat.file_id}", rows: 1).and_return('response' => { 'docs' => [] })
end
service.list_work_stat_info(output_path, nil)
service.migrate_to_new_table(output_path)
csv_to_hash_array(output_path).each_with_index do |csv_row, index|
work_data = WorkUtilsHelper.fetch_work_data_by_fileset_id(csv_row[:file_id])
csv_row_date = Date.parse(csv_row[:date]).beginning_of_month
hyc_download_stat = HycDownloadStat.find_by(fileset_id: csv_row[:file_id], date: csv_row_date)

expect(hyc_download_stat).to be_present
expect(hyc_download_stat.fileset_id).to eq(csv_row[:file_id])
expect(hyc_download_stat.work_id).to eq('Unknown')
expect(hyc_download_stat.admin_set_id).to eq('Unknown')
expect(hyc_download_stat.work_type).to eq('Unknown')
expect(hyc_download_stat.date).to eq(csv_row[:date].to_date)
expect(hyc_download_stat.download_count).to eq(expected_aggregated_download_count[[csv_row[:file_id], csv_row_date]])
end
end

it 'handles and logs errors' do
allow(CSV).to receive(:read).and_raise(StandardError, 'Simulated CSV read failure')
allow(Rails.logger).to receive(:error)
Expand All @@ -138,6 +159,10 @@
end

context 'if a failure occurs during a private function' do
before do
service.list_work_stat_info(output_path, nil)
end

it 'handles and logs errors from create_hyc_download_stat' do
allow(Rails.logger).to receive(:error)
# Simulate a failure during the creation of a HycDownloadStat object for a specific file_id
Expand Down

0 comments on commit 4c583d2

Please sign in to comment.