From 2cb78c3f6860a0fdf8c8b59664a307ffd5c2b769 Mon Sep 17 00:00:00 2001 From: Bruce Bolt Date: Thu, 21 Dec 2023 15:26:05 +0000 Subject: [PATCH 01/34] Update GitHub Actions for migrating to PostgreSQL This removes the mongo configuration and migrates us to PostgreSQL. --- .github/workflows/deploy.yml | 11 ++--------- .github/workflows/rspec.yml | 9 +++++---- .github/workflows/verify-pact.yml | 12 ++++++++---- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 8d429ffe..3037ff14 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -9,6 +9,7 @@ on: description: 'Commit, tag or branch name to deploy' required: true type: string + default: 'main' environment: description: 'Environment to deploy to' required: true @@ -18,14 +19,6 @@ on: - staging - production default: 'integration' - ecrRepositoryName: - description: 'ECR repo name to push image to' - required: true - type: choice - options: - - content-store - - content-store-postgresql-branch - default: 'content-store' release: types: [released] @@ -35,7 +28,7 @@ jobs: name: Build and publish image uses: alphagov/govuk-infrastructure/.github/workflows/build-and-push-image.yml@main with: - ecrRepositoryName: ${{ inputs.ecrRepositoryName || 'content-store' }} + ecrRepositoryName: content-store gitRef: ${{ inputs.gitRef || github.ref_name }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_GOVUK_ECR_ACCESS_KEY_ID }} diff --git a/.github/workflows/rspec.yml b/.github/workflows/rspec.yml index 54c09be9..985be32b 100644 --- a/.github/workflows/rspec.yml +++ b/.github/workflows/rspec.yml @@ -18,10 +18,9 @@ jobs: name: Run RSpec runs-on: ubuntu-latest steps: - - name: Setup MongoDB - uses: alphagov/govuk-infrastructure/.github/actions/setup-mongodb@main - with: - version: 2.6 + - name: Setup Postgres + id: setup-postgres + uses: alphagov/govuk-infrastructure/.github/actions/setup-postgres@main - name: Checkout repository uses: actions/checkout@v3 @@ -44,10 +43,12 @@ jobs: - name: Initialize database env: RAILS_ENV: test + TEST_DATABASE_URL: ${{ steps.setup-postgres.outputs.db-url }} run: bundle exec rails db:setup - name: Run RSpec env: RAILS_ENV: test + TEST_DATABASE_URL: ${{ steps.setup-postgres.outputs.db-url }} GOVUK_CONTENT_SCHEMAS_PATH: vendor/publishing-api/content_schemas run: bundle exec rake spec diff --git a/.github/workflows/verify-pact.yml b/.github/workflows/verify-pact.yml index 5f0d19a9..2cbfb079 100644 --- a/.github/workflows/verify-pact.yml +++ b/.github/workflows/verify-pact.yml @@ -26,10 +26,9 @@ jobs: env: RAILS_ENV: test steps: - - name: Setup MongoDB - uses: alphagov/govuk-infrastructure/.github/actions/setup-mongodb@main - with: - version: 2.6 + - name: Setup Postgres + id: setup-postgres + uses: alphagov/govuk-infrastructure/.github/actions/setup-postgres@main - name: Checkout repository uses: actions/checkout@v3 @@ -44,11 +43,14 @@ jobs: - name: Initialize database run: bundle exec rails db:setup + env: + TEST_DATABASE_URL: ${{ steps.setup-postgres.outputs.db-url }} - name: Verify pact consumer version if: inputs.pact_artifact == '' env: PACT_CONSUMER_VERSION: ${{ inputs.pact_consumer_version }} + TEST_DATABASE_URL: ${{ steps.setup-postgres.outputs.db-url }} run: bundle exec rake pact:verify - name: Download pact artifact @@ -61,3 +63,5 @@ jobs: - name: Verify pact artifact if: inputs.pact_artifact != '' run: bundle exec rake pact:verify:at[tmp/pacts/publishing_api-content_store.json] + env: + TEST_DATABASE_URL: ${{ steps.setup-postgres.outputs.db-url }} From 38b468fe848fefe5979e179987dfc8b811a11862 Mon Sep 17 00:00:00 2001 From: Bruce Bolt Date: Thu, 21 Dec 2023 15:37:39 +0000 Subject: [PATCH 02/34] Update Dockerfile for migration to PostgreSQL This removes the MongoDB configuration and adds PostgreSQL. --- Dockerfile | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index b84bd06c..0ca02cd5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,13 +14,8 @@ RUN bootsnap precompile --gemfile . FROM $base_image -# TODO: remove this temporary MongoDB package once we no longer need mongoexport (once the migration to Postgres is done). -ARG mongo_package=mongodb-database-tools-ubuntu2204-x86_64-100.7.2.deb -ARG mongo_package_repo=https://fastdl.mongodb.org/tools/db -WORKDIR /tmp -RUN curl -LSsf "${mongo_package_repo}/${mongo_package}" --output "${mongo_package}" && \ - apt-get install -y --no-install-recommends "./${mongo_package}" && \ - rm -fr /tmp/* +# Need psql for importing from Mongo exports +RUN install_packages postgresql-client ENV GOVUK_APP_NAME=content-store From 7250e2fdc5c36fe80240acbf0a0589823a9bc6cd Mon Sep 17 00:00:00 2001 From: Bruce Bolt Date: Thu, 21 Dec 2023 15:50:00 +0000 Subject: [PATCH 03/34] Remove MongoDB gems These won't be needed after migration to PostgreSQL. --- Gemfile | 5 ++--- Gemfile.lock | 19 ++++++------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/Gemfile b/Gemfile index 28bb5c23..824510f2 100644 --- a/Gemfile +++ b/Gemfile @@ -7,8 +7,7 @@ gem "deepsort" gem "gds-api-adapters" gem "gds-sso" gem "govuk_app_config" -gem "mongo", "2.15.0" # Later releases require Mongo >= 3.6 -gem "mongoid" +gem "pg" gem "plek" gem "rack-cache" gem "uuidtools" @@ -17,7 +16,7 @@ gem "whenever", require: false group :development, :test do gem "ci_reporter_rspec" gem "climate_control" - gem "database_cleaner-mongoid" + gem "database_cleaner-active_record" gem "factory_bot" gem "govuk_schemas" gem "govuk_test" diff --git a/Gemfile.lock b/Gemfile.lock index eaade299..0714c0c7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -73,7 +73,6 @@ GEM bootsnap (1.17.0) msgpack (~> 1.2) brakeman (6.1.0) - bson (4.15.0) builder (3.2.4) byebug (11.1.3) capybara (3.39.2) @@ -97,10 +96,10 @@ GEM crack (0.4.5) rexml crass (1.0.6) - database_cleaner-core (2.0.1) - database_cleaner-mongoid (2.0.1) + database_cleaner-active_record (2.1.0) + activerecord (>= 5.a) database_cleaner-core (~> 2.0.0) - mongoid + database_cleaner-core (2.0.1) date (3.3.3) deepsort (0.5.0) diff-lcs (1.5.0) @@ -194,12 +193,6 @@ GEM mini_mime (1.1.5) mini_portile2 (2.8.5) minitest (5.20.0) - mongo (2.15.0) - bson (>= 4.8.2, < 5.0.0) - mongoid (7.5.4) - activemodel (>= 5.1, < 7.1, != 7.0.0) - mongo (>= 2.10.5, < 3.0.0) - ruby2_keywords (~> 0.0.5) msgpack (1.7.2) multi_xml (0.6.0) net-imap (0.3.7) @@ -457,6 +450,7 @@ GEM ast (~> 2.4.1) racc parslet (2.0.0) + pg (1.5.4) plek (5.0.0) prometheus_exporter (2.0.8) webrick @@ -642,7 +636,7 @@ DEPENDENCIES bootsnap ci_reporter_rspec climate_control - database_cleaner-mongoid + database_cleaner-active_record deepsort factory_bot gds-api-adapters @@ -651,9 +645,8 @@ DEPENDENCIES govuk_schemas govuk_test listen - mongo (= 2.15.0) - mongoid pact + pg plek pry-byebug rack-cache From ac7e15de9d576f85044a085dfb80c1a58848cd93 Mon Sep 17 00:00:00 2001 From: Bruce Bolt Date: Wed, 27 Dec 2023 09:56:27 +0000 Subject: [PATCH 04/34] Add `.byebug_history` to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 13e80e2c..b8ca9bb2 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,5 @@ /coverage /spec/reports + +.byebug_history From 1589c84113088af73754fee9671fe884d51f53b0 Mon Sep 17 00:00:00 2001 From: Bruce Bolt Date: Thu, 21 Dec 2023 16:03:59 +0000 Subject: [PATCH 05/34] Update Jenkinsfile for migration to PostgreSQL. --- Jenkinsfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 79b9ce82..18f8c58c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,9 @@ library("govuk") -node("mongodb-2.4") { +node { + govuk.setEnvar("TEST_DATABASE_URL", "postgresql://postgres@127.0.0.1:54313/content_store_test") + govuk.buildProject( extraParameters: [ stringParam( From bd37f081b64e2c56b5cdbc609fcb359f950a57e2 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Mon, 20 Mar 2023 11:23:50 +0000 Subject: [PATCH 06/34] Add basic ActiveRecord configuration for PostgreSQL This adds migrations which construct the basics of a database in PostgreSQL and dumps the schema. Remove mongo_instrumentation and setup in tests. --- .rubocop.yml | 3 - app/controllers/application_controller.rb | 2 +- app/lib/find_by_path.rb | 4 +- app/lib/mongo_instrumentation.rb | 3 - .../controller_runtime.rb | 28 ----- .../monitoring_subscriber.rb | 34 ------ .../mongo_instrumentation/runtime_registry.rb | 9 -- app/models/application_record.rb | 3 + app/models/content_item.rb | 84 +++---------- app/models/publish_intent.rb | 18 +-- app/models/scheduled_publishing_log_entry.rb | 11 +- app/models/user.rb | 16 +-- config/application.rb | 2 +- config/database.yml | 19 +++ config/initializers/mongo_instrumentation.rb | 1 - config/mongoid.yml | 34 ------ .../20230319100101_enable_pg_extensions.rb | 6 + .../20230320150042_add_content_items.rb | 48 ++++++++ .../20230324113335_add_publish_intents.rb | 17 +++ ...18_add_scheduled_publishing_log_entries.rb | 16 +++ db/migrate/20230327101936_add_users.rb | 25 ++++ ...042_add_routes_indexes_to_content_items.rb | 6 + ...957_add_routes_index_to_publish_intents.rb | 5 + db/schema.rb | 110 ++++++++++++++++++ .../integration/mongo_instrumentation_spec.rb | 38 ------ spec/integration/publish_intent_crud_spec.rb | 2 +- spec/lib/find_by_path_spec.rb | 2 +- spec/models/content_item_spec.rb | 14 +-- spec/support/database_cleaner.rb | 14 +++ spec/support/mongoid_indexes.rb | 7 -- spec/support/mongoid_inspection_helpers.rb | 40 ------- 31 files changed, 303 insertions(+), 318 deletions(-) delete mode 100644 app/lib/mongo_instrumentation.rb delete mode 100644 app/lib/mongo_instrumentation/controller_runtime.rb delete mode 100644 app/lib/mongo_instrumentation/monitoring_subscriber.rb delete mode 100644 app/lib/mongo_instrumentation/runtime_registry.rb create mode 100644 app/models/application_record.rb create mode 100644 config/database.yml delete mode 100644 config/initializers/mongo_instrumentation.rb delete mode 100644 config/mongoid.yml create mode 100644 db/migrate/20230319100101_enable_pg_extensions.rb create mode 100644 db/migrate/20230320150042_add_content_items.rb create mode 100644 db/migrate/20230324113335_add_publish_intents.rb create mode 100644 db/migrate/20230327101118_add_scheduled_publishing_log_entries.rb create mode 100644 db/migrate/20230327101936_add_users.rb create mode 100644 db/migrate/20230328131042_add_routes_indexes_to_content_items.rb create mode 100644 db/migrate/20230328141957_add_routes_index_to_publish_intents.rb create mode 100644 db/schema.rb delete mode 100644 spec/integration/mongo_instrumentation_spec.rb delete mode 100644 spec/support/mongoid_indexes.rb delete mode 100644 spec/support/mongoid_inspection_helpers.rb diff --git a/.rubocop.yml b/.rubocop.yml index a7fa9c42..945d6fc3 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -17,6 +17,3 @@ inherit_mode: # See https://github.com/alphagov/rubocop-govuk/blob/main/CONTRIBUTING.md # ************************************************************** -# Mongoid doesn't support this -Rails/FindBy: - Enabled: false diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index b39756d8..f3c55552 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -3,7 +3,7 @@ class ApplicationController < ActionController::API class InvalidRequest < RuntimeError; end before_action :authenticate_user! - rescue_from Mongoid::Errors::DocumentNotFound, with: :error_404 + rescue_from ActiveRecord::RecordNotFound, with: :error_404 rescue_from InvalidRequest, with: :error_400 private diff --git a/app/lib/find_by_path.rb b/app/lib/find_by_path.rb index 47785c74..dd918d0e 100644 --- a/app/lib/find_by_path.rb +++ b/app/lib/find_by_path.rb @@ -1,4 +1,4 @@ -# This class is designed to work with a Mongoid model that has base_path, +# This class is designed to work with a model that has base_path, # routes and (optionally) redirect fields (where the routes and redirects # field matches the govuk schema of an array of objects with path and type # fields) @@ -13,7 +13,7 @@ def initialize(model_class) end def find(path) - exact_match = model_class.where(base_path: path).find_first + exact_match = model_class.where(base_path: path).first return exact_match if exact_match matches = find_route_matches(path) diff --git a/app/lib/mongo_instrumentation.rb b/app/lib/mongo_instrumentation.rb deleted file mode 100644 index 8b0a3bd7..00000000 --- a/app/lib/mongo_instrumentation.rb +++ /dev/null @@ -1,3 +0,0 @@ -require "mongo_instrumentation/runtime_registry" -require "mongo_instrumentation/monitoring_subscriber" -require "mongo_instrumentation/controller_runtime" diff --git a/app/lib/mongo_instrumentation/controller_runtime.rb b/app/lib/mongo_instrumentation/controller_runtime.rb deleted file mode 100644 index 322f7fbf..00000000 --- a/app/lib/mongo_instrumentation/controller_runtime.rb +++ /dev/null @@ -1,28 +0,0 @@ -module MongoInstrumentation - # Used to extend ActionController to output additional logging information on - # the duration of Mongo queries. - module ControllerRuntime - extend ActiveSupport::Concern - - protected - - def append_info_to_payload(payload) - super - payload[:db_runtime] = MongoInstrumentation::MonitoringSubscriber.runtime || 0 - MongoInstrumentation::MonitoringSubscriber.reset_runtime - end - - module ClassMethods - def log_process_action(payload) - super.tap do |messages| - runtime = payload[:db_runtime] - messages << sprintf("Mongo: %.1fms", (runtime.to_f * 1000)) - end - end - end - end -end - -ActiveSupport.on_load(:action_controller) do - include MongoInstrumentation::ControllerRuntime -end diff --git a/app/lib/mongo_instrumentation/monitoring_subscriber.rb b/app/lib/mongo_instrumentation/monitoring_subscriber.rb deleted file mode 100644 index 7415c901..00000000 --- a/app/lib/mongo_instrumentation/monitoring_subscriber.rb +++ /dev/null @@ -1,34 +0,0 @@ -module MongoInstrumentation - # Object for consuming events generated by the mongodb driver. - # - # Subscribers must implement started, succeeded and failed, but we do nothing - # for started. - # - # This uses `MongoInstrumentation::RuntimeRegistry` to store the cumulative - # duration of mongo queries. - class MonitoringSubscriber - def self.runtime=(value) - MongoInstrumentation::RuntimeRegistry.mongo_runtime = value - end - - def self.runtime - MongoInstrumentation::RuntimeRegistry.mongo_runtime ||= 0 - end - - def self.reset_runtime - self.runtime = 0 - end - - def started(event); end - - def succeeded(event) - self.class.runtime += event.duration - end - - def failed(event) - self.class.runtime += event.duration - end - end -end - -Mongo::Monitoring::Global.subscribe(Mongo::Monitoring::COMMAND, MongoInstrumentation::MonitoringSubscriber.new) diff --git a/app/lib/mongo_instrumentation/runtime_registry.rb b/app/lib/mongo_instrumentation/runtime_registry.rb deleted file mode 100644 index a0f575ef..00000000 --- a/app/lib/mongo_instrumentation/runtime_registry.rb +++ /dev/null @@ -1,9 +0,0 @@ -require "active_support/per_thread_registry" - -module MongoInstrumentation - # This is a namespaced thread locals registry for tracking the duration of - # mongo queries. - class RuntimeRegistry - thread_mattr_accessor :mongo_runtime, instance_accessor: false - end -end diff --git a/app/models/application_record.rb b/app/models/application_record.rb new file mode 100644 index 00000000..10a4cba8 --- /dev/null +++ b/app/models/application_record.rb @@ -0,0 +1,3 @@ +class ApplicationRecord < ActiveRecord::Base + self.abstract_class = true +end diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 6db004b0..2a742a60 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -1,10 +1,11 @@ -class ContentItem - include Mongoid::Document - include Mongoid::Timestamps +class ContentItem < ApplicationRecord + validates_each :routes, :redirects do |record, attr, value| + record.errors.add attr, "Value of type #{record.class} cannot be written to a field of type Array" unless value.nil? || value.respond_to?(:each) + end def self.revert(previous_item:, item:) - item.remove unless previous_item - previous_item&.upsert + item.destroy! unless previous_item + previous_item&.save! end def self.create_or_replace(base_path, attributes, log_entry) @@ -18,20 +19,14 @@ def self.create_or_replace(base_path, attributes, log_entry) item = ContentItem.new(base_path:) - # This doesn't seem to get set correctly on an upsert so this is to - # maintain it - created_at = previous_item ? previous_item.created_at : Time.zone.now.utc - item.assign_attributes( attributes - .merge(scheduled_publication_details(log_entry)) - .merge(created_at:) - .compact, + .merge(scheduled_publication_details(log_entry)), ) - if item.upsert + if item.save! begin - item.register_routes(previous_item:) + item.register_routes rescue StandardError revert(previous_item:, item:) raise @@ -41,11 +36,11 @@ def self.create_or_replace(base_path, attributes, log_entry) end [result, item] - rescue Mongoid::Errors::UnknownAttribute - extra_fields = attributes.keys - fields.keys + rescue ActiveRecord::UnknownAttributeError + extra_fields = attributes.keys - new.attributes.keys item.errors.add(:base, "unrecognised field(s) #{extra_fields.join(', ')} in input") [false, item] - rescue Mongoid::Errors::InvalidValue => e + rescue ActiveModel::ValidationError => e item.errors.add(:base, e.message) [false, item] rescue OutOfOrderTransmissionError => e @@ -65,53 +60,6 @@ def self.find_by_path(path) ::FindByPath.new(self).find(path) end - field :_id, as: :base_path, type: String, overwrite: true - field :content_id, type: String - field :title, type: String - field :description, type: Hash, default: { "value" => nil } - field :document_type, type: String - - # Supertypes are deprecated, but are still sent by the publishing-api. - field :content_purpose_document_supertype, type: String, default: "" - field :content_purpose_subgroup, type: String, default: "" - field :content_purpose_supergroup, type: String, default: "" - field :email_document_supertype, type: String, default: "" - field :government_document_supertype, type: String, default: "" - field :navigation_document_supertype, type: String, default: "" - field :search_user_need_document_supertype, type: String, default: "" - field :user_journey_document_supertype, type: String, default: "" - - field :schema_name, type: String - field :locale, type: String, default: I18n.default_locale.to_s - field :first_published_at, type: ::ActiveSupport::TimeWithZone - field :public_updated_at, type: ::ActiveSupport::TimeWithZone - field :publishing_scheduled_at, type: ::ActiveSupport::TimeWithZone - field :scheduled_publishing_delay_seconds, type: Integer - field :details, type: Hash, default: {} - field :publishing_app, type: String - field :rendering_app, type: String - field :routes, type: Array, default: [] - field :redirects, type: Array, default: [] - field :expanded_links, type: Hash, default: {} - field :access_limited, type: Hash, default: {} - field :auth_bypass_ids, type: Array, default: [] - field :phase, type: String, default: "live" - field :analytics_identifier, type: String - field :payload_version, type: Integer - field :withdrawn_notice, type: Hash, default: {} - field :publishing_request_id, type: String, default: nil - - # The updated_at field isn't set on upsert - https://jira.mongodb.org/browse/MONGOID-3716 - before_upsert :set_updated_at - - # We want to look up content items by whether they match a route and the type - # of route. - index("routes.path" => 1, "routes.type" => 1) - - # We want to look up content items by whether they match a redirect and the - # type of redirect. - index("redirects.path" => 1, "redirects.type" => 1) - # We want to force the JSON representation to use "base_path" instead of # "_id" to prevent "_id" being exposed outside of the model. def as_json(options = nil) @@ -190,8 +138,8 @@ def access_limited? access_limited_user_ids.any? || access_limited_organisation_ids.any? end - def register_routes(previous_item: nil) - return unless should_register_routes?(previous_item:) + def register_routes + return unless should_register_routes? tries = Rails.application.config.register_router_retries begin @@ -218,8 +166,8 @@ def route_set private - def should_register_routes?(previous_item: nil) - return previous_item.route_set != route_set if previous_item + def should_register_routes? + return false if schema_name.to_s.start_with?("placeholder") true end diff --git a/app/models/publish_intent.rb b/app/models/publish_intent.rb index ec47d74a..7a78f3ce 100644 --- a/app/models/publish_intent.rb +++ b/app/models/publish_intent.rb @@ -1,18 +1,15 @@ -class PublishIntent - include Mongoid::Document - include Mongoid::Timestamps - +class PublishIntent < ApplicationRecord def self.create_or_update(base_path, attributes) intent = PublishIntent.find_or_initialize_by(base_path:) result = intent.new_record? ? :created : :replaced result = false unless intent.update(attributes) [result, intent] - rescue Mongoid::Errors::UnknownAttribute + rescue ActiveRecord::UnknownAttributeError extra_fields = attributes.keys - fields.keys intent.errors.add(:base, "unrecognised field(s) #{extra_fields.join(', ')} in input") [false, intent] - rescue Mongoid::Errors::InvalidValue => e + rescue ActiveModel::ValidationError => e intent.errors.add(:base, e.message) [false, intent] end @@ -23,15 +20,6 @@ def self.find_by_path(path) PUBLISH_TIME_LEEWAY = 5.minutes - field :_id, as: :base_path, type: String, overwrite: true - field :publish_time, type: ::ActiveSupport::TimeWithZone - field :publishing_app, type: String - field :rendering_app, type: String - field :routes, type: Array, default: [] - - # We want to look up this model by route as well as the base_path - index("routes.path" => 1, "routes.type" => 1) - validates :base_path, absolute_path: true validates :publish_time, presence: true validates :rendering_app, presence: true, format: /\A[a-z0-9-]*\z/ diff --git a/app/models/scheduled_publishing_log_entry.rb b/app/models/scheduled_publishing_log_entry.rb index b983d5a4..f671e911 100644 --- a/app/models/scheduled_publishing_log_entry.rb +++ b/app/models/scheduled_publishing_log_entry.rb @@ -1,13 +1,4 @@ -class ScheduledPublishingLogEntry - include Mongoid::Document - include Mongoid::Timestamps::Created - field :base_path, type: String - field :document_type, type: String - field :scheduled_publication_time, type: ::ActiveSupport::TimeWithZone - field :delay_in_milliseconds - - index(base_path: 1) - +class ScheduledPublishingLogEntry < ApplicationRecord before_save do |document| document.delay_in_milliseconds = set_delay_in_milliseconds end diff --git a/app/models/user.rb b/app/models/user.rb index 528bbe86..34743b96 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -1,17 +1,3 @@ -class User - include Mongoid::Document - include Mongoid::Timestamps +class User < ApplicationRecord include GDS::SSO::User - - field "name", type: String - field "uid", type: String - field "email", type: String - field "permissions", type: Array - field "remotely_signed_out", type: Boolean, default: false - field "organisation_slug", type: String - field "disabled", type: Boolean, default: false - field "organisation_content_id", type: String - - index({ uid: 1 }, { unique: true }) - index disabled: 1 end diff --git a/config/application.rb b/config/application.rb index 7052b4d0..c11a063d 100644 --- a/config/application.rb +++ b/config/application.rb @@ -4,7 +4,7 @@ # Pick the frameworks you want: # require "active_model/railtie" # require "active_job/railtie" -# require "active_record/railtie" +require "active_record/railtie" # require "active_storage/engine" require "action_controller/railtie" # require "action_mailer/railtie" diff --git a/config/database.yml b/config/database.yml new file mode 100644 index 00000000..62301a73 --- /dev/null +++ b/config/database.yml @@ -0,0 +1,19 @@ +default: &default + adapter: postgresql + encoding: unicode + pool: 12 + template: template0 + +development: + <<: *default + database: content_store_development + url: <%= ENV["DATABASE_URL"]%> + +test: + <<: *default + database: content_store_test + url: <%= ENV["TEST_DATABASE_URL"] %> + +production: + <<: *default + # Rails reads values from DATABASE_URL env var. diff --git a/config/initializers/mongo_instrumentation.rb b/config/initializers/mongo_instrumentation.rb deleted file mode 100644 index afc10a5f..00000000 --- a/config/initializers/mongo_instrumentation.rb +++ /dev/null @@ -1 +0,0 @@ -require "mongo_instrumentation" diff --git a/config/mongoid.yml b/config/mongoid.yml deleted file mode 100644 index d9dfe832..00000000 --- a/config/mongoid.yml +++ /dev/null @@ -1,34 +0,0 @@ -development: - clients: - default: - # MONGODB_URI includes draft_content_store_development or content_store_development - # depending on whether we're running content store in draft mode or not. - uri: <%= ENV['MONGODB_URI'] || 'mongodb://localhost/content_store_development' %> - options: - write: - w: 1 - -test: - clients: - default: - uri: <%= ENV['TEST_MONGODB_URI'] || 'mongodb://localhost/content_store_test' %> - options: - write: - w: 1 - # In the test environment we lower the retries and retry interval to - # low amounts for fast failures. - max_retries: 1 - retry_interval: 0 - -production: - clients: - default: - uri: <%= ENV['MONGODB_URI'] %> - options: - ssl: <%= ENV['MONGO_SSL'] || 'false' %> - ssl_verify: <%= ENV['MONGO_SSL_VERIFY'] || 'true' %> - server_selection_timeout: 5 - write: - w: <%= ENV['MONGO_WRITE_CONCERN'] || 'majority' %> - read: - mode: :secondary_preferred diff --git a/db/migrate/20230319100101_enable_pg_extensions.rb b/db/migrate/20230319100101_enable_pg_extensions.rb new file mode 100644 index 00000000..f49aa1b8 --- /dev/null +++ b/db/migrate/20230319100101_enable_pg_extensions.rb @@ -0,0 +1,6 @@ +class EnablePgExtensions < ActiveRecord::Migration[7.0] + def change + enable_extension "uuid-ossp" + enable_extension "pgcrypto" + end +end diff --git a/db/migrate/20230320150042_add_content_items.rb b/db/migrate/20230320150042_add_content_items.rb new file mode 100644 index 00000000..46154af3 --- /dev/null +++ b/db/migrate/20230320150042_add_content_items.rb @@ -0,0 +1,48 @@ +class AddContentItems < ActiveRecord::Migration[7.0] + def change + create_table :content_items, id: :uuid do |t| + t.string :base_path, unique: true + t.string :content_id + t.string :title + t.jsonb :description, default: { "value" => nil } + t.string :document_type + + # Supertypes are deprecated, but are still sent by the publishing-api. + t.string :content_purpose_document_supertype, default: "" + t.string :content_purpose_subgroup, default: "" + t.string :content_purpose_supergroup, default: "" + t.string :email_document_supertype, default: "" + t.string :government_document_supertype, default: "" + t.string :navigation_document_supertype, default: "" + t.string :search_user_need_document_supertype, default: "" + t.string :user_journey_document_supertype, default: "" + + t.string :schema_name + t.string :locale, default: I18n.default_locale.to_s + t.datetime :first_published_at + t.datetime :public_updated_at + t.datetime :publishing_scheduled_at + t.integer :scheduled_publishing_delay_seconds + t.jsonb :details, default: {} + t.string :publishing_app + t.string :rendering_app + t.jsonb :routes, default: [] + t.jsonb :redirects, default: [] + t.jsonb :expanded_links, default: {} + t.jsonb :access_limited, default: {} + t.string :auth_bypass_ids, array: true, default: [] + t.string :phase, default: "live" + t.string :analytics_identifier + t.integer :payload_version + t.jsonb :withdrawn_notice, default: {} + t.string :publishing_request_id, null: true, default: nil + + t.timestamps + end + + add_index :content_items, :base_path, unique: true + add_index :content_items, :content_id + add_index :content_items, :created_at + add_index :content_items, :updated_at + end +end diff --git a/db/migrate/20230324113335_add_publish_intents.rb b/db/migrate/20230324113335_add_publish_intents.rb new file mode 100644 index 00000000..e26c019a --- /dev/null +++ b/db/migrate/20230324113335_add_publish_intents.rb @@ -0,0 +1,17 @@ +class AddPublishIntents < ActiveRecord::Migration[7.0] + def change + create_table :publish_intents, id: :uuid do |t| + t.string :base_path, index: { unique: true } + t.datetime :publish_time + t.string :publishing_app + t.string :rendering_app + t.jsonb :routes, default: [] + + t.timestamps + end + + add_index :publish_intents, :publish_time + add_index :publish_intents, :created_at + add_index :publish_intents, :updated_at + end +end diff --git a/db/migrate/20230327101118_add_scheduled_publishing_log_entries.rb b/db/migrate/20230327101118_add_scheduled_publishing_log_entries.rb new file mode 100644 index 00000000..c5479917 --- /dev/null +++ b/db/migrate/20230327101118_add_scheduled_publishing_log_entries.rb @@ -0,0 +1,16 @@ +class AddScheduledPublishingLogEntries < ActiveRecord::Migration[7.0] + def change + create_table :scheduled_publishing_log_entries, id: :uuid do |t| + t.string :base_path + t.string :document_type + t.datetime :scheduled_publication_time + t.bigint :delay_in_milliseconds + + t.timestamps + end + + add_index :scheduled_publishing_log_entries, :base_path, name: "ix_scheduled_pub_log_base_path" + add_index :scheduled_publishing_log_entries, :scheduled_publication_time, name: "ix_scheduled_pub_log_time" + add_index :scheduled_publishing_log_entries, :created_at, name: "ix_scheduled_pub_log_created" + end +end diff --git a/db/migrate/20230327101936_add_users.rb b/db/migrate/20230327101936_add_users.rb new file mode 100644 index 00000000..e5f1fa3c --- /dev/null +++ b/db/migrate/20230327101936_add_users.rb @@ -0,0 +1,25 @@ +class AddUsers < ActiveRecord::Migration[7.0] + def change + create_table :users, id: :uuid do |t| + t.string :name + t.string :uid, unique: true + t.string :email + t.string :permissions, array: true + t.boolean :remotely_signed_out, default: false + t.string :organisation_slug + t.boolean :disabled, default: false + t.string :organisation_content_id + + t.timestamps + end + + add_index :users, :uid, unique: true + add_index :users, :email + add_index :users, :name + add_index :users, :organisation_content_id + add_index :users, :organisation_slug + add_index :users, :created_at + add_index :users, :updated_at + add_index :users, :disabled + end +end diff --git a/db/migrate/20230328131042_add_routes_indexes_to_content_items.rb b/db/migrate/20230328131042_add_routes_indexes_to_content_items.rb new file mode 100644 index 00000000..37150297 --- /dev/null +++ b/db/migrate/20230328131042_add_routes_indexes_to_content_items.rb @@ -0,0 +1,6 @@ +class AddRoutesIndexesToContentItems < ActiveRecord::Migration[7.0] + def change + add_index :content_items, :routes, using: :gin + add_index :content_items, :redirects, using: :gin + end +end diff --git a/db/migrate/20230328141957_add_routes_index_to_publish_intents.rb b/db/migrate/20230328141957_add_routes_index_to_publish_intents.rb new file mode 100644 index 00000000..30dc7161 --- /dev/null +++ b/db/migrate/20230328141957_add_routes_index_to_publish_intents.rb @@ -0,0 +1,5 @@ +class AddRoutesIndexToPublishIntents < ActiveRecord::Migration[7.0] + def change + add_index :publish_intents, :routes, using: :gin + end +end diff --git a/db/schema.rb b/db/schema.rb new file mode 100644 index 00000000..12ee1cb8 --- /dev/null +++ b/db/schema.rb @@ -0,0 +1,110 @@ +# This file is auto-generated from the current state of the database. Instead +# of editing this file, please use the migrations feature of Active Record to +# incrementally modify your database, and then regenerate this schema definition. +# +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. +# +# It's strongly recommended that you check this file into your version control system. + +ActiveRecord::Schema[7.0].define(version: 2023_03_28_141957) do + # These are extensions that must be enabled in order to support this database + enable_extension "pgcrypto" + enable_extension "plpgsql" + enable_extension "uuid-ossp" + + create_table "content_items", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| + t.string "base_path" + t.string "content_id" + t.string "title" + t.jsonb "description", default: {"value"=>nil} + t.string "document_type" + t.string "content_purpose_document_supertype", default: "" + t.string "content_purpose_subgroup", default: "" + t.string "content_purpose_supergroup", default: "" + t.string "email_document_supertype", default: "" + t.string "government_document_supertype", default: "" + t.string "navigation_document_supertype", default: "" + t.string "search_user_need_document_supertype", default: "" + t.string "user_journey_document_supertype", default: "" + t.string "schema_name" + t.string "locale", default: "en" + t.datetime "first_published_at" + t.datetime "public_updated_at" + t.datetime "publishing_scheduled_at" + t.integer "scheduled_publishing_delay_seconds" + t.jsonb "details", default: {} + t.string "publishing_app" + t.string "rendering_app" + t.jsonb "routes", default: [] + t.jsonb "redirects", default: [] + t.jsonb "expanded_links", default: {} + t.jsonb "access_limited", default: {} + t.string "auth_bypass_ids", default: [], array: true + t.string "phase", default: "live" + t.string "analytics_identifier" + t.integer "payload_version" + t.jsonb "withdrawn_notice", default: {} + t.string "publishing_request_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["base_path"], name: "index_content_items_on_base_path", unique: true + t.index ["content_id"], name: "index_content_items_on_content_id" + t.index ["created_at"], name: "index_content_items_on_created_at" + t.index ["redirects"], name: "index_content_items_on_redirects", using: :gin + t.index ["routes"], name: "index_content_items_on_routes", using: :gin + t.index ["updated_at"], name: "index_content_items_on_updated_at" + end + + create_table "publish_intents", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| + t.string "base_path" + t.datetime "publish_time" + t.string "publishing_app" + t.string "rendering_app" + t.jsonb "routes", default: [] + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["base_path"], name: "index_publish_intents_on_base_path", unique: true + t.index ["created_at"], name: "index_publish_intents_on_created_at" + t.index ["publish_time"], name: "index_publish_intents_on_publish_time" + t.index ["routes"], name: "index_publish_intents_on_routes", using: :gin + t.index ["updated_at"], name: "index_publish_intents_on_updated_at" + end + + create_table "scheduled_publishing_log_entries", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| + t.string "base_path" + t.string "document_type" + t.datetime "scheduled_publication_time" + t.bigint "delay_in_milliseconds" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["base_path"], name: "ix_scheduled_pub_log_base_path" + t.index ["created_at"], name: "ix_scheduled_pub_log_created" + t.index ["scheduled_publication_time"], name: "ix_scheduled_pub_log_time" + end + + create_table "users", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| + t.string "name" + t.string "uid" + t.string "email" + t.string "permissions", array: true + t.boolean "remotely_signed_out", default: false + t.string "organisation_slug" + t.boolean "disabled", default: false + t.string "organisation_content_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["created_at"], name: "index_users_on_created_at" + t.index ["disabled"], name: "index_users_on_disabled" + t.index ["email"], name: "index_users_on_email" + t.index ["name"], name: "index_users_on_name" + t.index ["organisation_content_id"], name: "index_users_on_organisation_content_id" + t.index ["organisation_slug"], name: "index_users_on_organisation_slug" + t.index ["uid"], name: "index_users_on_uid", unique: true + t.index ["updated_at"], name: "index_users_on_updated_at" + end + +end diff --git a/spec/integration/mongo_instrumentation_spec.rb b/spec/integration/mongo_instrumentation_spec.rb deleted file mode 100644 index 07353be9..00000000 --- a/spec/integration/mongo_instrumentation_spec.rb +++ /dev/null @@ -1,38 +0,0 @@ -require "rails_helper" -require "active_support/log_subscriber/test_helper" - -describe "monitoring mongo query runtimes", type: :request do - let(:logger) { ActiveSupport::LogSubscriber::TestHelper::MockLogger.new } - - before(:each) do - @old_logger = ActionController::Base.logger - @old_notifier = ActiveSupport::Notifications.notifier - - ActionController::Base.logger = logger - end - - after(:each) do - ActionController::Base.logger = @old_logger - ActiveSupport::Notifications.notifier = @old_notifier - end - - context "a request that results in mongo queries" do - let(:content_item) { create(:content_item) } - - before(:each) do - get content_item_path(content_item) - end - - it "includes the mongo runtime info in the log output" do - expect(logger.logged(:info).last).to match(/Views: [\d.]+ms/) - - runtime = logger.logged(:info).last.match(/Mongo: ([\d.]+)ms/)[1].to_f - - expect(runtime).to be > 0 - end - - it "resets the mongo runtime after the request has completed" do - expect(MongoInstrumentation::MonitoringSubscriber.runtime).to eq(0) - end - end -end diff --git a/spec/integration/publish_intent_crud_spec.rb b/spec/integration/publish_intent_crud_spec.rb index 35153b43..c5cdba7d 100644 --- a/spec/integration/publish_intent_crud_spec.rb +++ b/spec/integration/publish_intent_crud_spec.rb @@ -136,7 +136,7 @@ expect(response.status).to eq(422) data = JSON.parse(response.body) - expected_error_message = Mongoid::Errors::InvalidValue.new(Array, String).message + expected_error_message = ActiveModel::ValidationError.new(Array, String).message expect(data["errors"]).to eq("base" => [expected_error_message]) end diff --git a/spec/lib/find_by_path_spec.rb b/spec/lib/find_by_path_spec.rb index da4d7a4b..30ba637a 100644 --- a/spec/lib/find_by_path_spec.rb +++ b/spec/lib/find_by_path_spec.rb @@ -5,7 +5,7 @@ # Using an actual Model as it's a real pain to mock mongoid criterias and # similar Class.new do - include Mongoid::Document + extend ApplicationRecord store_in collection: "find_by_path_examples" field :base_path, type: String diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index fe78eace..c0d84c3a 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -30,7 +30,7 @@ context "when unknown attributes are provided" do let(:attributes) { { "foo" => "foo", "bar" => "bar" } } - it "handles Mongoid::Errors::UnknownAttribute" do + it "handles ActiveRecord::UnknownAttributeError" do result = item = nil expect { @@ -45,7 +45,7 @@ context "when assigning a value of incorrect type" do let(:attributes) { { "routes" => 12 } } - it "handles Mongoid::Errors::InvalidValue" do + it "handles ActiveModel::ValidationError" do result = item = nil expect { @@ -54,7 +54,7 @@ }.to_not raise_error expect(result).to be false - expected_error_message = Mongoid::Errors::InvalidValue.new(Array, 12.class).message + expected_error_message = "Value of type Integer cannot be written to a field of type Array" expect(item.errors[:base]).to include(expected_error_message) end end @@ -77,7 +77,7 @@ context "with current attributes and no previous item" do let(:attributes) { @item.attributes } - it "upserts the item" do + it "saves the item" do result = item = nil expect { result, item = ContentItem.create_or_replace(@item.base_path, attributes, nil) @@ -93,7 +93,7 @@ let(:attributes) { @item.attributes } - it "upserts the item" do + it "saves the item" do result = item = nil expect { result, item = ContentItem.create_or_replace(@item.base_path, attributes, nil) @@ -142,10 +142,10 @@ it_behaves_like "find_by_path", :content_item end - it "should set updated_at on upsert" do + it "should set updated_at on save" do item = build(:content_item) Timecop.freeze do - item.upsert + item.save! item.reload expect(item.updated_at.to_s).to eq(Time.zone.now.to_s) diff --git a/spec/support/database_cleaner.rb b/spec/support/database_cleaner.rb index 633db191..df02554f 100644 --- a/spec/support/database_cleaner.rb +++ b/spec/support/database_cleaner.rb @@ -1,4 +1,18 @@ RSpec.configure do |config| + config.before(:suite) do + DatabaseCleaner.allow_remote_database_url = true + DatabaseCleaner.clean_with :truncation + end + + # Not using around hook due to https://github.com/DatabaseCleaner/database_cleaner/issues/273 + + config.before :each do + DatabaseCleaner.strategy = :transaction + end + config.before :each, js: true do + DatabaseCleaner.strategy = :truncation + end + config.before :each do DatabaseCleaner.start end diff --git a/spec/support/mongoid_indexes.rb b/spec/support/mongoid_indexes.rb deleted file mode 100644 index 77dd51c3..00000000 --- a/spec/support/mongoid_indexes.rb +++ /dev/null @@ -1,7 +0,0 @@ -RSpec.configure do |config| - config.before(:suite) do - silence_warnings do - ::Mongoid::Tasks::Database.create_indexes - end - end -end diff --git a/spec/support/mongoid_inspection_helpers.rb b/spec/support/mongoid_inspection_helpers.rb deleted file mode 100644 index 85b0e625..00000000 --- a/spec/support/mongoid_inspection_helpers.rb +++ /dev/null @@ -1,40 +0,0 @@ -module MongoidInspectionHelpers - # Object for consuming ActiveSupport::Notifications generated by the moped gem - # that drives mongoid. - # - # The only topic that we are currently interested in is "query.moped", which - # is triggered on every MongoDB query. - class MopedQueryCounter < ActiveSupport::LogSubscriber - cattr_accessor :query_count - - def self.reset - self.query_count = 0 - end - - # The event handler for "query.moped" notifications. - def query(_event) - self.class.query_count += 1 - end - end - - def mongoid_query_count - MopedQueryCounter.query_count - end - - def reset_mongoid_query_count - MopedQueryCounter.reset - end -end - -RSpec.configure do |config| - config.include(MongoidInspectionHelpers) - - config.before :suite do - MongoidInspectionHelpers::MopedQueryCounter.reset - MongoidInspectionHelpers::MopedQueryCounter.attach_to :moped - end - - config.before :each do - reset_mongoid_query_count - end -end From 81acc74ab8e8edd20929168f3e43703f8106cbb9 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Thu, 30 Mar 2023 13:00:00 +0100 Subject: [PATCH 07/34] Port ContentItem.create_or_replace to ActiveRecord --- app/models/content_item.rb | 25 ++++++++++++------------- spec/models/content_item_spec.rb | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 2a742a60..8ba5752e 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -1,6 +1,8 @@ class ContentItem < ApplicationRecord validates_each :routes, :redirects do |record, attr, value| - record.errors.add attr, "Value of type #{record.class} cannot be written to a field of type Array" unless value.nil? || value.respond_to?(:each) + # This wording replicates the original Mongoid error message - we don't know if any downstream + # consumers rely on parsing error messages at the moment + record.errors.add attr, "Value of type #{value.class} cannot be written to a field of type Array" unless value.nil? || value.respond_to?(:each) end def self.revert(previous_item:, item:) @@ -9,30 +11,27 @@ def self.revert(previous_item:, item:) end def self.create_or_replace(base_path, attributes, log_entry) - previous_item = ContentItem.where(base_path:).first + item = ContentItem.find_or_initialize_by(base_path:) + previous_item = item.persisted? ? item : nil lock = UpdateLock.new(previous_item) payload_version = attributes["payload_version"] lock.check_availability!(payload_version) result = previous_item ? :replaced : :created - - item = ContentItem.new(base_path:) - item.assign_attributes( attributes .merge(scheduled_publication_details(log_entry)), ) - if item.save! - begin - item.register_routes - rescue StandardError - revert(previous_item:, item:) - raise + begin + transaction do + item.save! + item.register_routes(previous_item:) end - else + rescue StandardError result = false + raise end [result, item] @@ -40,7 +39,7 @@ def self.create_or_replace(base_path, attributes, log_entry) extra_fields = attributes.keys - new.attributes.keys item.errors.add(:base, "unrecognised field(s) #{extra_fields.join(', ')} in input") [false, item] - rescue ActiveModel::ValidationError => e + rescue ActiveRecord::RecordInvalid => e item.errors.add(:base, e.message) [false, item] rescue OutOfOrderTransmissionError => e diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index c0d84c3a..35bb888c 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -55,7 +55,7 @@ expect(result).to be false expected_error_message = "Value of type Integer cannot be written to a field of type Array" - expect(item.errors[:base]).to include(expected_error_message) + expect(item.errors[:base].find { |e| e.include?(expected_error_message) }).not_to be_nil end end From 64d12101b955eae83ad181d1be984571e767219a Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 4 Apr 2023 12:31:35 +0100 Subject: [PATCH 08/34] Port find_by_path to ActiveRecord --- app/controllers/content_items_controller.rb | 2 +- app/lib/find_by_path.rb | 24 +++++++++++++++------ app/models/publish_intent.rb | 1 - spec/lib/find_by_path_spec.rb | 11 +--------- spec/models/publish_intent_spec.rb | 2 +- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/app/controllers/content_items_controller.rb b/app/controllers/content_items_controller.rb index cf433aa2..652d2a28 100644 --- a/app/controllers/content_items_controller.rb +++ b/app/controllers/content_items_controller.rb @@ -160,7 +160,7 @@ def http_status(item) def find_or_create_scheduled_publishing_log(base_path, document_type, intent) latest_log_entry = ScheduledPublishingLogEntry .where(base_path:) - .order_by(:scheduled_publication_time.desc) + .order(scheduled_publication_time: :desc) .first if new_scheduled_publishing?(intent, document_type, latest_log_entry) diff --git a/app/lib/find_by_path.rb b/app/lib/find_by_path.rb index dd918d0e..e41cb7b4 100644 --- a/app/lib/find_by_path.rb +++ b/app/lib/find_by_path.rb @@ -24,16 +24,28 @@ def find(path) def find_route_matches(path) query = model_class - .or(routes: { "$elemMatch" => { path:, type: "exact" } }) - .or(routes: { "$elemMatch" => { :path.in => potential_prefixes(path), type: "prefix" } }) + .where("routes @> ?", json_path_element(path, "exact")) + # ANY will match any of the given array elements (similar to IN(), but for JSON arrays) + # the ARRAY [?]::jsonb[] is typecasting for PostgreSQL's JSON operators + .or(model_class.where("routes @> ANY (ARRAY [?]::jsonb[])", potential_prefix_json_matches(path))) - if model_class.fields.key?("redirects") + if model_class.attribute_names.include?("redirects") query = query - .or(redirects: { "$elemMatch" => { path:, type: "exact" } }) - .or(redirects: { "$elemMatch" => { :path.in => potential_prefixes(path), type: "prefix" } }) + .or(model_class.where("redirects @> ?", json_path_element(path, "exact"))) + .or(model_class.where("redirects @> ANY (ARRAY [?]::jsonb[])", potential_prefix_json_matches(path))) end + query + end + + # Given a path, will decompose the path into path prefixes, and + # return a JSON array element that can be matched against the + # routes or redirects array in the model_class + def potential_prefix_json_matches(path) + potential_prefixes(path).map { |p| json_path_element(p, "prefix") } + end - query.entries + def json_path_element(path, type) + [{ path:, type: }].to_json end def best_route_match(matches, path) diff --git a/app/models/publish_intent.rb b/app/models/publish_intent.rb index 7a78f3ce..ef264ba9 100644 --- a/app/models/publish_intent.rb +++ b/app/models/publish_intent.rb @@ -28,7 +28,6 @@ def self.find_by_path(path) def as_json(options = nil) super(options).tap do |hash| - hash["base_path"] = hash.delete("_id") hash["errors"] = errors.as_json.stringify_keys if errors.any? end end diff --git a/spec/lib/find_by_path_spec.rb b/spec/lib/find_by_path_spec.rb index 30ba637a..f1622324 100644 --- a/spec/lib/find_by_path_spec.rb +++ b/spec/lib/find_by_path_spec.rb @@ -2,16 +2,7 @@ describe FindByPath do let(:model_class) do - # Using an actual Model as it's a real pain to mock mongoid criterias and - # similar - Class.new do - extend ApplicationRecord - store_in collection: "find_by_path_examples" - - field :base_path, type: String - field :routes, type: Array, default: [] - field :redirects, type: Array, default: [] - + Class.new(ContentItem) do def self.create(base_path: "/base-path", exact_routes: [], prefix_routes: [], redirects: []) routes = if redirects.any? diff --git a/spec/models/publish_intent_spec.rb b/spec/models/publish_intent_spec.rb index 92059a1c..1e60ab78 100644 --- a/spec/models/publish_intent_spec.rb +++ b/spec/models/publish_intent_spec.rb @@ -27,7 +27,7 @@ intent.base_path = "/foo" expect { intent.save! validate: false - }.to raise_error(Mongo::Error::OperationFailure) + }.to raise_error(ActiveRecord::RecordNotUnique) end end From 772e7ccd39cc4e898d59bd692653f767d48f5b1e Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Wed, 5 Apr 2023 07:28:31 +0100 Subject: [PATCH 09/34] Make PublishIntent compatible with ActiveRecord Added a custom ActiveRecord validation to preserve the original Mongoid error message, in case there is anything downstream which relies on detecting that error message (there shouldn't be, but we can't be sure). publish_intents_crud_spec now passes --- app/controllers/publish_intents_controller.rb | 2 +- app/models/publish_intent.rb | 13 ++++++++++--- spec/integration/publish_intent_crud_spec.rb | 6 +++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/app/controllers/publish_intents_controller.rb b/app/controllers/publish_intents_controller.rb index 738fd7c7..8df2a386 100644 --- a/app/controllers/publish_intents_controller.rb +++ b/app/controllers/publish_intents_controller.rb @@ -20,7 +20,7 @@ def update end def destroy - intent = PublishIntent.find_by(base_path: encoded_base_path) + intent = PublishIntent.find_by!(base_path: encoded_base_path) intent.destroy! render json: {} diff --git a/app/models/publish_intent.rb b/app/models/publish_intent.rb index ef264ba9..b2b1085d 100644 --- a/app/models/publish_intent.rb +++ b/app/models/publish_intent.rb @@ -1,15 +1,22 @@ class PublishIntent < ApplicationRecord + validates_each :routes do |record, attr, value| + # This wording replicates the original Mongoid error message - we don't know if any downstream + # consumers rely on parsing error messages at the moment + record.errors.add attr, "Value of type #{value.class} cannot be written to a field of type Array" unless value.nil? || value.respond_to?(:each) + end + def self.create_or_update(base_path, attributes) intent = PublishIntent.find_or_initialize_by(base_path:) result = intent.new_record? ? :created : :replaced - result = false unless intent.update(attributes) + intent.assign_attributes(attributes) + result = false unless intent.save! [result, intent] rescue ActiveRecord::UnknownAttributeError - extra_fields = attributes.keys - fields.keys + extra_fields = attributes.keys - attribute_names intent.errors.add(:base, "unrecognised field(s) #{extra_fields.join(', ')} in input") [false, intent] - rescue ActiveModel::ValidationError => e + rescue ActiveRecord::RecordInvalid => e intent.errors.add(:base, e.message) [false, intent] end diff --git a/spec/integration/publish_intent_crud_spec.rb b/spec/integration/publish_intent_crud_spec.rb index c5cdba7d..a43958d1 100644 --- a/spec/integration/publish_intent_crud_spec.rb +++ b/spec/integration/publish_intent_crud_spec.rb @@ -118,7 +118,7 @@ expect(response.status).to eq(422) data = JSON.parse(response.body) - expect(data["errors"]).to eq("publish_time" => ["can't be blank"]) + expect(data["errors"]["publish_time"]).to include("can't be blank") expect(PublishIntent.where(base_path: "/vat-rates").first).not_to be end @@ -136,8 +136,8 @@ expect(response.status).to eq(422) data = JSON.parse(response.body) - expected_error_message = ActiveModel::ValidationError.new(Array, String).message - expect(data["errors"]).to eq("base" => [expected_error_message]) + expected_error_message = "Value of type String cannot be written to a field of type Array" + expect(data["errors"]["base"].find { |e| e.include?(expected_error_message) }).not_to be_nil end it "returns a 400 with bad json" do From ebe5467a5ecaa32207a18e5976bb23e535fd5916 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Wed, 5 Apr 2023 07:40:05 +0100 Subject: [PATCH 10/34] Make content_items_controller 404 when it can't find the contentitem --- app/controllers/content_items_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/content_items_controller.rb b/app/controllers/content_items_controller.rb index 652d2a28..c66993f5 100644 --- a/app/controllers/content_items_controller.rb +++ b/app/controllers/content_items_controller.rb @@ -50,7 +50,7 @@ def update end def destroy - content_item = ContentItem.find_by(base_path: encoded_base_path) + content_item = ContentItem.find_by!(base_path: encoded_base_path) content_item.delete_routes content_item.destroy! From 5dacee2d710412c6b1c9b2f2e674eb109346aff1 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Thu, 6 Apr 2023 08:13:00 +0100 Subject: [PATCH 11/34] Update find_specific_term for PostgreSQL Added bonus - we can now use PostgreSQL's JSONB search abilities to simplify the previously very-verbose `content_items(term)` query. --- app/lib/find_specific_term.rb | 46 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/app/lib/find_specific_term.rb b/app/lib/find_specific_term.rb index f8c98c48..119dc35f 100644 --- a/app/lib/find_specific_term.rb +++ b/app/lib/find_specific_term.rb @@ -25,15 +25,27 @@ def report logger.info CONTENT_ITEM_HEADERS.join(",") + count = 0 term_content_items.each do |content_item| - logger.info content_item_fields(content_item).join(", ") unless exclude_types.include?(content_item.document_type) + unless excluded?(content_item) + logger.info report_line(content_item) + count += 1 + end end - logger.info "Found #{number_of_terms_items} items containing #{term}" + logger.info "Found #{count} items containing #{term}" logger.info "Finished searching" end + def report_line(content_item) + content_item_fields(content_item).join(", ") + end + + def excluded?(content_item) + exclude_types.include?(content_item.document_type) + end + def content_item_fields(content_item) [ content_item.try(:title), @@ -45,32 +57,18 @@ def content_item_fields(content_item) ] end - def content_items(term) - ContentItem.or('title': term) - .or('details.body': term) - .or('description': term) - .or('description.content': term) - .or('details.body.content': term) - .or('details.parts.body': term) - .or('details.parts.body.content': term) - .or('details.nodes.title': term) - .or('details.nodes.options.label': term) - .or('details.nodes.body': term) - .or('details.nodes.body.content': term) - .or('details.email_addresses.email': term) - .or('details.introductory_paragraph': term) - .or('details.introductory_paragraph.content': term) - .or('details.more_information': term) - .or('details.more_information.content': term) - .or('details.more_info_contact_form': term) - .or('details.more_info_email_address': term).entries + def content_items_matching(term) + ContentItem.where("title ILIKE(?)", "%#{term}%") + .or(ContentItem.where(term_vector_jsonb_search_in("details"), term)) + .or(ContentItem.where(term_vector_jsonb_search_in("description"), term)) + .entries end - def number_of_terms_items - term_content_items.count { |content_item| exclude_types.exclude?(content_item.document_type) } + def term_vector_jsonb_search_in(field) + "jsonb_to_tsvector('english', #{field}, '\"string\"') @@ plainto_tsquery('english', ?)" end def term_content_items - @term_content_items ||= content_items(/#{term}/) + @term_content_items ||= content_items_matching(term) end end From 5c9f464062f5ba65db20deb4156a2d532a4fee69 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Thu, 6 Apr 2023 09:51:57 +0100 Subject: [PATCH 12/34] Simplify queries in data hygiene code These can be simplified after migration to PostgreSQL. --- .../data_hygiene/content_item_deduplicator.rb | 24 ++++++------------- app/lib/data_hygiene/duplicate_report.rb | 19 +++++---------- .../data_hygiene/publishing_delay_reporter.rb | 2 +- app/lib/publication_delay_report.rb | 6 ++--- 4 files changed, 16 insertions(+), 35 deletions(-) diff --git a/app/lib/data_hygiene/content_item_deduplicator.rb b/app/lib/data_hygiene/content_item_deduplicator.rb index 62e0f750..0bbe23d5 100644 --- a/app/lib/data_hygiene/content_item_deduplicator.rb +++ b/app/lib/data_hygiene/content_item_deduplicator.rb @@ -42,28 +42,18 @@ def process_duplicates # Produce an array of arrays containing duplicate ContentItems def fetch_duplicates_arrays - [].tap do |ary| - duplicate_content_id_aggregation.each do |content_id_count| - ary << ContentItem.where( - content_id: content_id_count["_id"]["content_id"], - locale: content_id_count["_id"]["locale"], - ).to_a - end + duplicate_content_id_aggregation.map do |row| + ContentItem.where(content_id: row.content_id, locale: row.locale).to_a end end # Fetch a count of all content items with content ids / locale duplicates. def duplicate_content_id_aggregation - @duplicate_content_id_aggregation ||= ContentItem.collection.aggregate([ - { - "$group" => { - "_id" => { "content_id" => "$content_id", "locale" => "$locale" }, - "uniqueIds" => { "$addToSet" => "$_id" }, - "count" => { "$sum" => 1 }, - }, - }, - { "$match" => { "_id.content_id" => { "$ne" => nil }, "count" => { "$gt" => 1 } } }, - ]) + @duplicate_content_id_aggregation ||= ContentItem + .select("content_id, locale, count(*) as num_records") + .where("content_id IS NOT NULL") + .group("content_id, locale") + .having("count(*) > 1") end def report diff --git a/app/lib/data_hygiene/duplicate_report.rb b/app/lib/data_hygiene/duplicate_report.rb index 8285261c..bb895638 100644 --- a/app/lib/data_hygiene/duplicate_report.rb +++ b/app/lib/data_hygiene/duplicate_report.rb @@ -41,24 +41,17 @@ def full def fetch_all_duplicate_content_items logger.info "Fetching content items for duplicated content ids..." duplicates = duplicate_content_id_aggregation.flat_map do |content_id_count| - next if content_id_count["_id"].blank? - - ContentItem.where(content_id: content_id_count["_id"]).to_a + ContentItem.where(content_id: content_id_count.content_id).to_a end duplicates.compact end def duplicate_content_id_aggregation - @duplicate_content_id_aggregation ||= ContentItem.collection.aggregate([ - { - "$group" => { - "_id" => "$content_id", "count" => { "$sum" => 1 } - }, - }, - { - "$match" => { "count" => { "$gt" => 1 } }, - }, - ]) + @duplicate_content_id_aggregation ||= ContentItem + .select("content_id, count(*) as num_records") + .where("content_id IS NOT NULL") + .group(:content_id) + .having("count(*) > 1") end def count_repeated_content_ids_in(content_items) diff --git a/app/lib/data_hygiene/publishing_delay_reporter.rb b/app/lib/data_hygiene/publishing_delay_reporter.rb index a21552a4..0db2c329 100644 --- a/app/lib/data_hygiene/publishing_delay_reporter.rb +++ b/app/lib/data_hygiene/publishing_delay_reporter.rb @@ -29,7 +29,7 @@ def initialize(log_entries) end def report(namespace, &filter) - delays = log_entries.select(&filter).map(&:delay_in_milliseconds) + delays = log_entries.to_a.select(&filter).map(&:delay_in_milliseconds) return if delays.empty? GovukStatsd.gauge("scheduled_publishing.aggregate.#{namespace}.mean_ms", Stats.mean(delays)) diff --git a/app/lib/publication_delay_report.rb b/app/lib/publication_delay_report.rb index cee952c9..c1654631 100644 --- a/app/lib/publication_delay_report.rb +++ b/app/lib/publication_delay_report.rb @@ -39,10 +39,8 @@ def csv_row(content_item) def delayed_content_items ContentItem - .where( - :scheduled_publishing_delay_seconds.gt => MINIMUM_DELAY_SECONDS, - :publishing_scheduled_at.gt => 7.days.ago, - ) + .where("scheduled_publishing_delay_seconds > ?", MINIMUM_DELAY_SECONDS) + .where("publishing_scheduled_at > ?", 7.days.ago) .order(publishing_scheduled_at: :asc) end end From 9adfa8af92990a577f00cc20c394472ef53ffd97 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Thu, 6 Apr 2023 10:19:48 +0100 Subject: [PATCH 13/34] Enforce stringified keys in content_item.as_json Resolve issue with before/after comparison failing in should_register_routes? --- app/models/content_item.rb | 21 +++++++++++++------ .../submitting_content_item_spec.rb | 4 ++-- .../presenters/content_item_presenter_spec.rb | 4 ++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 8ba5752e..b04c5e96 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -12,7 +12,11 @@ def self.revert(previous_item:, item:) def self.create_or_replace(base_path, attributes, log_entry) item = ContentItem.find_or_initialize_by(base_path:) - previous_item = item.persisted? ? item : nil + # dup is needed to allow the before/after route_set comparison + # in should_register_routes to work. Without it, this sets + # item & previous_item to be the same instance, meaning that + # assign_attributes assigns the same attributes to both + previous_item = item.persisted? ? item.dup : nil lock = UpdateLock.new(previous_item) payload_version = attributes["payload_version"] @@ -59,12 +63,10 @@ def self.find_by_path(path) ::FindByPath.new(self).find(path) end - # We want to force the JSON representation to use "base_path" instead of - # "_id" to prevent "_id" being exposed outside of the model. + # We want to force the JSON representation to use "base_path" + # and prevent "id" being exposed outside of the model. def as_json(options = nil) - super(options).tap do |hash| - hash["base_path"] = hash.delete("_id") - end + super(options).except("id") end # We store the description in a hash because Publishing API can send through @@ -168,6 +170,13 @@ def route_set def should_register_routes? return false if schema_name.to_s.start_with?("placeholder") + if previous_item + # NOTE: this check is failing, because the way the find_or_replace works, + # by the time the route_set check is done, the new value has been assigned + # to the previous item (because it's the same instance as item) + return previous_item.schema_name == "placeholder" || + previous_item.route_set != route_set + end true end diff --git a/spec/integration/submitting_content_item_spec.rb b/spec/integration/submitting_content_item_spec.rb index 88d44274..6b245d25 100644 --- a/spec/integration/submitting_content_item_spec.rb +++ b/spec/integration/submitting_content_item_spec.rb @@ -363,8 +363,8 @@ it "includes an error message" do data = JSON.parse(response.body) - expected_error_message = Mongoid::Errors::InvalidValue.new(Array, @data["routes"].class).message - expect(data["errors"]).to eq("base" => [expected_error_message]) + expected_error_message = "Value of type Integer cannot be written to a field of type Array" + expect(data["errors"].find { |_attr, err| err.include?(expected_error_message) }).not_to be_nil end end diff --git a/spec/presenters/content_item_presenter_spec.rb b/spec/presenters/content_item_presenter_spec.rb index e3a95dc4..e128ebce 100644 --- a/spec/presenters/content_item_presenter_spec.rb +++ b/spec/presenters/content_item_presenter_spec.rb @@ -66,11 +66,11 @@ end it "inlines the 'text/html' content type in the details" do - expect(presenter.as_json["details"]).to eq("body" => "

content

") + expect(presenter.as_json["details"]).to eq({ body: "

content

" }.as_json) end it "inlines the 'text/html' content type in the links" do - expect(presenter.as_json["links"]["person"].first["details"]).to eq("body" => "

content

") + expect(presenter.as_json["links"]["person"].first["details"]).to eq({ body: "

content

" }.as_json) end end From a69d057d1d7d50ed438d69c3e3dc7817f0766ac2 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Thu, 6 Apr 2023 15:02:51 +0100 Subject: [PATCH 14/34] Add explicit specs for ContentItem.create_and_replace ContentItem.create_and_replace had some required behaviour that didn't seem to be tested for - such as exactly what to do with an existing item on the given base_path. Made sure that this behaviour is maintented in this branch, and added explicit specs to test this. --- app/models/content_item.rb | 27 +++++++++++++++++---------- spec/models/content_item_spec.rb | 24 +++++++++++++++++++++--- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index b04c5e96..83b09c94 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -11,27 +11,36 @@ def self.revert(previous_item:, item:) end def self.create_or_replace(base_path, attributes, log_entry) - item = ContentItem.find_or_initialize_by(base_path:) - # dup is needed to allow the before/after route_set comparison - # in should_register_routes to work. Without it, this sets - # item & previous_item to be the same instance, meaning that - # assign_attributes assigns the same attributes to both - previous_item = item.persisted? ? item.dup : nil + previous_item = ContentItem.where(base_path:).first + item_state_before_change = previous_item.dup + lock = UpdateLock.new(previous_item) payload_version = attributes["payload_version"] lock.check_availability!(payload_version) result = previous_item ? :replaced : :created + + # This awkward construction is necessary to maintain the required behaviour - + # a content item sent to Content Store is a complete entity (as defined in a schema) + # and no-remnants of the item it replaces should remain. + item = ContentItem.new(base_path:) item.assign_attributes( attributes .merge(scheduled_publication_details(log_entry)), ) + if previous_item + previous_item.assign_attributes( + item.attributes.except("id", "created_at", "updated_at"), + ) + item = previous_item + end + begin transaction do item.save! - item.register_routes(previous_item:) + item.register_routes(previous_item: item_state_before_change) end rescue StandardError result = false @@ -171,12 +180,10 @@ def should_register_routes? return false if schema_name.to_s.start_with?("placeholder") if previous_item - # NOTE: this check is failing, because the way the find_or_replace works, - # by the time the route_set check is done, the new value has been assigned - # to the previous item (because it's the same instance as item) return previous_item.schema_name == "placeholder" || previous_item.route_set != route_set end + true end diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index 35bb888c..c0265200 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -21,9 +21,27 @@ expect(item.reload.created_at).to eq(@item.reload.created_at) end - it "does not overwrite default attribute values if called with nil attributes" do - _, item = ContentItem.create_or_replace(@item.base_path, { schema_name: "redirect", redirects: nil }, nil) - expect(item.redirects).to eq([]) + context "when there is already an existing item with the same base_path" do + before do + @item.update!( + base_path: @item.base_path, + title: "existing title", + description: "existing description", + schema_name: "existing_schema", + ) + ContentItem.create_or_replace(@item.base_path, { schema_name: "publication" }, nil) + end + + it "updates the given attributes" do + @item.reload + expect(@item.schema_name).to eq("publication") + end + + it "does not retain values of any attributes which were not given" do + @item.reload + expect(@item.title).to be_nil + expect(@item.description).to eq("value" => nil) + end end describe "exceptions" do From ea2f82b6801e5f301da49eb18cafc5103567be45 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 11 Apr 2023 10:39:20 +0100 Subject: [PATCH 15/34] Add DatabaseCleaner config to the Pact tests --- spec/service_consumers/pact_helper.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spec/service_consumers/pact_helper.rb b/spec/service_consumers/pact_helper.rb index 43e72a4e..510d4d37 100644 --- a/spec/service_consumers/pact_helper.rb +++ b/spec/service_consumers/pact_helper.rb @@ -11,6 +11,15 @@ config.include WebMock::Matchers end +Pact.set_up do + DatabaseCleaner.strategy = :transaction + DatabaseCleaner.start +end + +Pact.tear_down do + DatabaseCleaner.clean +end + def url_encode(str) ERB::Util.url_encode(str) end From 29aa93acf277b9d8805fa06c5ca245b7133081f0 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 21 Apr 2023 10:07:33 +0100 Subject: [PATCH 16/34] Add _id field to content_items, to store imported mongo ID This will allow us to cross-reference PostgreSQL records to MongoDB records post-migration if needed --- db/migrate/20230420105019_add_mongo__id_to_content_items.rb | 5 +++++ db/schema.rb | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20230420105019_add_mongo__id_to_content_items.rb diff --git a/db/migrate/20230420105019_add_mongo__id_to_content_items.rb b/db/migrate/20230420105019_add_mongo__id_to_content_items.rb new file mode 100644 index 00000000..0c54175c --- /dev/null +++ b/db/migrate/20230420105019_add_mongo__id_to_content_items.rb @@ -0,0 +1,5 @@ +class AddMongoIdToContentItems < ActiveRecord::Migration[7.0] + def change + add_column :content_items, :_id, :string, null: true + end +end diff --git a/db/schema.rb b/db/schema.rb index 12ee1cb8..9e8f5390 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2023_03_28_141957) do +ActiveRecord::Schema[7.0].define(version: 2023_04_20_105019) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -51,6 +51,7 @@ t.string "publishing_request_id" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.string "_id" t.index ["base_path"], name: "index_content_items_on_base_path", unique: true t.index ["content_id"], name: "index_content_items_on_content_id" t.index ["created_at"], name: "index_content_items_on_created_at" From 13981e70bf0a981bcc6832853c0353c2f18a0ef9 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 21 Apr 2023 10:09:14 +0100 Subject: [PATCH 17/34] Add Mongo import task & supporting classes Support import of doubly-nested mongo date fields Add field mappings for ScheduledPublishingLogEntry and PublishIntent Add mongo_id field to user & scheduled_publishing_log_entry Add `rails_timestamp` method to remove conflicts with ActiveRecord behaviour when doing .insert with some-but-not-all values given Add support for batch_size in JsonImporter --- app/lib/json_importer.rb | 68 +++++++++ app/lib/mongo_field_mapper.rb | 110 ++++++++++++++ .../20230425074342_add_mongo_id_to_user.rb | 7 + ...go_id_to_scheduled_publishing_log_entry.rb | 7 + db/schema.rb | 10 +- lib/tasks/import.rake | 8 + spec/lib/mongo_field_mapper_spec.rb | 138 ++++++++++++++++++ 7 files changed, 347 insertions(+), 1 deletion(-) create mode 100644 app/lib/json_importer.rb create mode 100644 app/lib/mongo_field_mapper.rb create mode 100644 db/migrate/20230425074342_add_mongo_id_to_user.rb create mode 100644 db/migrate/20230425074357_add_mongo_id_to_scheduled_publishing_log_entry.rb create mode 100644 lib/tasks/import.rake create mode 100644 spec/lib/mongo_field_mapper_spec.rb diff --git a/app/lib/json_importer.rb b/app/lib/json_importer.rb new file mode 100644 index 00000000..9ad7de33 --- /dev/null +++ b/app/lib/json_importer.rb @@ -0,0 +1,68 @@ +# Designed for importing JSON from MongoDB's mongoexport tool +# In this format, each line is one complete JSON object +# There is no surrounding array delimiter, or separating comma +# e.g. +# {"_id": "abc123", "field": "value1"} +# {"_id": "def456", "field": "value2"} +# and so on + +class JsonImporter + def initialize(model_class:, file:, batch_size: 1) + @model_class = model_class.constantize + @mapper = MongoFieldMapper.new(@model_class) + @file = file + @batch_size = batch_size + end + + def call + line_no = 0 + lines = [] + IO.foreach(@file) do |line| + log line_no, "Processing" + lines << process_line(line) + log line_no, "Completed" + line_no += 1 + if lines.size >= @batch_size + log(" saving batch of #{@batch_size}") + @model_class.insert_all(lines) + log(" saved") + lines = [] + end + end + end + +private + + def process_line(line) + log("parsing...") + obj = JSON.parse(line) + id = id_value(obj) + log(id, " checking existence") + if exists?(id) + log(id, " exists, skipping") + else + @mapper.active_record_attributes(obj) + end + end + + def id_value(obj) + if obj["_id"].is_a?(Hash) + obj["_id"]["$oid"] + else + obj["_id"] + end + end + + def exists?(id) + if @model_class == ContentItem + ContentItem.where(base_path: id).exists? + else + @model_class.where(id:).exists? + end + end + + def log(*args) + line = args.prepend(Time.zone.now.iso8601).join("\t") + Rails.logger.info line + end +end diff --git a/app/lib/mongo_field_mapper.rb b/app/lib/mongo_field_mapper.rb new file mode 100644 index 00000000..647c3b6b --- /dev/null +++ b/app/lib/mongo_field_mapper.rb @@ -0,0 +1,110 @@ +# Maps fields from a source Mongo JSON object +# into the corresponding field in our ActiveRecord models +class MongoFieldMapper + MAPPINGS = { + ContentItem => { + rename: { + "_id" => "base_path", + }, + process: { + "public_updated_at" => ->(key, value) { { key => unpack_datetime(value) } }, + "first_published_at" => ->(key, value) { { key => unpack_datetime(value) } }, + "created_at" => ->(key, value) { rails_timestamp(key, value) }, + "updated_at" => ->(key, value) { rails_timestamp(key, value) }, + "publishing_scheduled_at" => ->(key, value) { { key => unpack_datetime(value) } }, + }, + }, + PublishIntent => { + rename: { + "_id" => "base_path", + }, + process: { + "publish_time" => ->(key, value) { { key => unpack_datetime(value) } }, + "created_at" => ->(key, value) { rails_timestamp(key, value) }, + "updated_at" => ->(key, value) { rails_timestamp(key, value) }, + + }, + }, + ScheduledPublishingLogEntry => { + process: { + "_id" => ->(_key, value) { { "mongo_id" => value["$oid"] } }, + "scheduled_publication_time" => ->(key, value) { { key => unpack_datetime(value) } }, + "created_at" => ->(key, value) { rails_timestamp(key, value) }, + }, + }, + User => { + process: { + "_id" => ->(_key, value) { { "mongo_id" => value["$oid"] } }, + "updated_at" => ->(key, value) { rails_timestamp(key, value) }, + "created_at" => ->(key, value) { rails_timestamp(key, value) }, + }, + }, + }.freeze + + def initialize(model_class) + @model_class = model_class + end + + def active_record_attributes(obj) + return obj.select { |k, _| keep_this_key?(k) } unless MAPPINGS[@model_class] + + attrs = {} + obj.each do |key, value| + mapped_attr = process(key, value) + this_key = mapped_attr.keys.first + attrs[this_key] = mapped_attr.values.first if this_key + end + attrs + end + + # Mongo datetimes can be $date => '...' or $numberLong => '...' + # or even in some cases $date => { $numberLong => (value) } } + def self.unpack_datetime(value) + if value.is_a?(Hash) + # Recurse until you get something that isn't a Hash with one of these keys + unpack_datetime(value["$date"] || value["$numberLong"].to_i / 1000) + elsif !value + nil + elsif value.is_a?(Integer) + # e.g. -473385600000 + Time.zone.at(value).iso8601 + else + begin + # e.g. "2019-06-21T11:52:37+00:00" + Time.zone.parse(value).iso8601 + value + rescue Date::Error + # we also have some content with dates in the form {"$numberLong" => "(value)"} + # in which case you can end up here with a value like "-473385600000" (quoted) + Time.zone.at(value.to_i).iso8601 + end + end + end + + # Return the given key with the unpacked date value if given, + # otherwise return empty hash, to avoid conflicting with + # the not-null constraint on Rails' timestamp keys + def self.rails_timestamp(key, value) + date = unpack_datetime(value) + date ? { key => date } : {} + end + +private + + def process(key, value) + if (proc = MAPPINGS[@model_class][:process][key]) + proc.call(key, value) + else + processed_key = target_key(key) + keep_this_key?(processed_key) ? { processed_key => value } : {} + end + end + + def keep_this_key?(key) + @model_class.attribute_names.include?(key) + end + + def target_key(key) + MAPPINGS[@model_class][:rename].try(:[], key) || key + end +end diff --git a/db/migrate/20230425074342_add_mongo_id_to_user.rb b/db/migrate/20230425074342_add_mongo_id_to_user.rb new file mode 100644 index 00000000..1441d432 --- /dev/null +++ b/db/migrate/20230425074342_add_mongo_id_to_user.rb @@ -0,0 +1,7 @@ +class AddMongoIdToUser < ActiveRecord::Migration[7.0] + def change + add_column :users, :mongo_id, :string, null: true + + add_index :users, :mongo_id + end +end diff --git a/db/migrate/20230425074357_add_mongo_id_to_scheduled_publishing_log_entry.rb b/db/migrate/20230425074357_add_mongo_id_to_scheduled_publishing_log_entry.rb new file mode 100644 index 00000000..19e35fe2 --- /dev/null +++ b/db/migrate/20230425074357_add_mongo_id_to_scheduled_publishing_log_entry.rb @@ -0,0 +1,7 @@ +class AddMongoIdToScheduledPublishingLogEntry < ActiveRecord::Migration[7.0] + def change + add_column :scheduled_publishing_log_entries, :mongo_id, :string, null: true + + add_index :scheduled_publishing_log_entries, :mongo_id + end +end diff --git a/db/schema.rb b/db/schema.rb index 9e8f5390..8e576e60 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2023_04_20_105019) do +ActiveRecord::Schema[7.0].define(version: 2023_04_25_074357) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -60,6 +60,10 @@ t.index ["updated_at"], name: "index_content_items_on_updated_at" end + create_table "content_items_import", id: false, force: :cascade do |t| + t.jsonb "data" + end + create_table "publish_intents", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| t.string "base_path" t.datetime "publish_time" @@ -82,8 +86,10 @@ t.bigint "delay_in_milliseconds" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.string "mongo_id" t.index ["base_path"], name: "ix_scheduled_pub_log_base_path" t.index ["created_at"], name: "ix_scheduled_pub_log_created" + t.index ["mongo_id"], name: "index_scheduled_publishing_log_entries_on_mongo_id" t.index ["scheduled_publication_time"], name: "ix_scheduled_pub_log_time" end @@ -98,9 +104,11 @@ t.string "organisation_content_id" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.string "mongo_id" t.index ["created_at"], name: "index_users_on_created_at" t.index ["disabled"], name: "index_users_on_disabled" t.index ["email"], name: "index_users_on_email" + t.index ["mongo_id"], name: "index_users_on_mongo_id" t.index ["name"], name: "index_users_on_name" t.index ["organisation_content_id"], name: "index_users_on_organisation_content_id" t.index ["organisation_slug"], name: "index_users_on_organisation_slug" diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 00000000..3e620657 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,8 @@ +namespace :import do + desc " + Import a JSON file which has been generated by mongoexport + " + task json: :environment do |_t, args| + JsonImporter.new(model_class: args.extras[0], file: args.extras[1]).call + end +end diff --git a/spec/lib/mongo_field_mapper_spec.rb b/spec/lib/mongo_field_mapper_spec.rb new file mode 100644 index 00000000..0e64f9a2 --- /dev/null +++ b/spec/lib/mongo_field_mapper_spec.rb @@ -0,0 +1,138 @@ +require "rails_helper" + +describe MongoFieldMapper do + let(:model_class) { ContentItem } + subject { described_class.new(model_class) } + + describe "#process" do + let(:result) { subject.send(:process, key, value) } + + context "given a key which should be renamed" do + let(:key) { "_id" } + let(:value) { "/base/path" } + + it "returns a Hash" do + expect(result).to be_a(Hash) + end + + it "returns the key mapped to its target name" do + expect(result.keys).to eq(%w[base_path]) + end + + it "has the given value" do + expect(result.values).to eq([value]) + end + end + + context "given a key which should be processed" do + let(:key) { "public_updated_at" } + let(:value) { { "$date" => "2019-06-21T11:52:37Z" } } + + it "returns a Hash" do + expect(result).to be_a(Hash) + end + + it "returns the key mapped to its target name" do + expect(result.keys).to eq(%w[public_updated_at]) + end + + it "has the expected value after processing " do + expect(result.values).to eq(["2019-06-21T11:52:37Z"]) + end + end + + context "given a key which is not present in the attributes of model_class" do + let(:key) { "arbitrary_key" } + let(:value) { "anything" } + + it "returns an empty Hash" do + expect(result).to eq({}) + end + end + end + + describe "#active_record_attributes" do + context "given an object with fields to be renamed, processed and dropped" do + let(:mongo_object) do + { + "_uid" => "abc123", + "_id" => "/some/base/path", + "public_updated_at" => { "$date" => "2019-06-21T11:52:37Z" }, + "first_published_at" => { "$numberLong" => "-473385600000" }, + "other_field" => "other_value", + "details" => "details value", + } + end + + let(:result) { subject.active_record_attributes(mongo_object) } + + it "returns a Hash" do + expect(result).to be_a(Hash) + end + + it "does not return any keys which are not in the attributes of the model_class" do + expect(result.keys - model_class.attribute_names).to be_empty + end + + it "renames all keys which should be renamed" do + expect(result.keys).not_to include("_id") + expect(result.keys).to include("base_path") + end + + it "preserves the value of renamed keys" do + expect(result["base_path"]).to eq(mongo_object["_id"]) + end + + it "processes any fields which should be processed" do + expect(result["public_updated_at"]).to eq("2019-06-21T11:52:37Z") + expect(result["first_published_at"]).to eq("1955-01-01T00:00:00Z") + end + + it "does not change any fields which are not to be dropped, processed or renamed" do + expect(result["details"]).to eq(mongo_object["details"]) + end + end + end + + describe ".unpack_datetime" do + context "given a Hash of '$date => 'value'" do + context "where value has timezone signifier Z" do + it "returns the value with a Z" do + expect(described_class.unpack_datetime({ "$date" => "2019-06-21T11:52:37Z" })).to eq("2019-06-21T11:52:37Z") + end + end + context "where value has timezone signifier +00:00" do + it "returns the value with a +00:00" do + expect(described_class.unpack_datetime({ "$date" => "2019-06-21T11:52:37+00:00" })).to eq("2019-06-21T11:52:37+00:00") + end + end + end + + context "given a Hash of '$numberLong => value" do + it "returns the value in iso8601 format" do + expect(described_class.unpack_datetime({ "$numberLong" => -473_385_600_000 })).to eq("1955-01-01T00:00:00Z") + end + end + + context "given a Hash of '$numberLong => \"value\"" do + it "returns the value in iso8601 format" do + expect(described_class.unpack_datetime({ "$numberLong" => "-473385600000" })).to eq("1955-01-01T00:00:00Z") + end + end + + context "given nil" do + it "returns nil" do + expect(described_class.unpack_datetime(nil)).to be_nil + end + end + + # as seen in + # /government/publications/agreement-regarding-the-status-of-forces-of-parties-to-the-north-atlantic-treaty + # "first_published_at":{"$date":{"$numberLong":"-473385600000"}} + context "given a hash of hashes" do + it "returns the final value" do + expect(described_class.unpack_datetime({ "$date" => { "$numberLong" => "-473385600000" } })).to eq("1955-01-01T00:00:00Z") + end + end + end +end From 06208fb387cef6e3bccc27924fa58b4e6b257726 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 28 Apr 2023 13:41:58 +0100 Subject: [PATCH 18/34] Add performance:compare_hosts rake task This will allow us to perform side-by-side performance comparisons of the Mongo and Postgres content-stores on the same hardware (e.g. local dev laptop) and prove that the PostgreSQL content-store is at least as performant as the Mongo version. --- app/lib/response_comparator.rb | 58 ++++++++++++++++++++++++++++++++++ lib/tasks/performance.rake | 14 ++++++++ 2 files changed, 72 insertions(+) create mode 100644 app/lib/response_comparator.rb create mode 100644 lib/tasks/performance.rake diff --git a/app/lib/response_comparator.rb b/app/lib/response_comparator.rb new file mode 100644 index 00000000..c15efded --- /dev/null +++ b/app/lib/response_comparator.rb @@ -0,0 +1,58 @@ +class ResponseComparator + def initialize(path_file:, host_1:, host_2:, output_file:) + @path_file = path_file + @host_1 = host_1 + @host_2 = host_2 + @output_file = output_file + end + + def call + File.open(@output_file, "w+") do |out| + out.write("#{headers.join("\t")}\n") + i = 0 + IO.foreach(@path_file) do |line| + path = line.strip + Rails.logger.info "#{i} - #{path}" + result = compare(host_1: @host_1, host_2: @host_2, path:) + out.write("#{result.join("\t")}\n") + i += 1 + end + end + end + +private + + def compare(host_1:, host_2:, path:) + response_1 = hit_url(host_1, path) + response_2 = hit_url(host_2, path) + results(path:, response_1:, response_2:) + end + + def hit_url(host, path) + t = Time.zone.now + response = Net::HTTP.get_response(host, "/api/content#{path}") + body_size = response.body.strip.length + time = Time.zone.now - t + { + status: response.code, + body_size:, + time:, + } + end + + def headers + ["url", "host-1-status", "host-1-body-size", "host-1-response-time", "host-2-status", "host-2-body-size", "host-2-response-time"] + end + + def results(path:, response_1:, response_2:) + [ + path, + response_1[:status], + response_1[:body_size], + response_1[:time], + response_2[:status], + response_2[:body_size], + response_2[:time], + ] + end +end diff --git a/lib/tasks/performance.rake b/lib/tasks/performance.rake new file mode 100644 index 00000000..806a9819 --- /dev/null +++ b/lib/tasks/performance.rake @@ -0,0 +1,14 @@ +namespace :performance do + desc " + Compare response sizes and times from two hosts, + using a given file of request paths, one path per line + " + task :compare_hosts, %i[path_file host_1 host_2 output_file] => :environment do |_t, args| + ResponseComparator.new( + path_file: args[:path_file], + host_1: args[:host_1], + host_2: args[:host_2], + output_file: args[:output_file], + ).call + end +end From 797853a8e2d8334827556d9fbfe3e86170e5c36f Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 28 Apr 2023 16:14:06 +0100 Subject: [PATCH 19/34] Add optimised indexes on JSONB array routes/redirects columns This improves response times to around 30% of previous values --- db/migrate/20230428144838_add_optimised_routes_indexes.rb | 7 +++++++ db/schema.rb | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20230428144838_add_optimised_routes_indexes.rb diff --git a/db/migrate/20230428144838_add_optimised_routes_indexes.rb b/db/migrate/20230428144838_add_optimised_routes_indexes.rb new file mode 100644 index 00000000..a2e160f2 --- /dev/null +++ b/db/migrate/20230428144838_add_optimised_routes_indexes.rb @@ -0,0 +1,7 @@ +class AddOptimisedRoutesIndexes < ActiveRecord::Migration[7.0] + def up + add_index(:content_items, :routes, using: :gin, opclass: :jsonb_path_ops, name: "ix_ci_routes_jsonb_path_ops") + add_index(:content_items, :redirects, using: :gin, opclass: :jsonb_path_ops, name: "ix_ci_redirects_jsonb_path_ops") + add_index(:publish_intents, :routes, using: :gin, opclass: :jsonb_path_ops, name: "ix_pi_routes_jsonb_path_ops") + end +end diff --git a/db/schema.rb b/db/schema.rb index 8e576e60..396b09d7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2023_04_25_074357) do +ActiveRecord::Schema[7.0].define(version: 2023_04_28_144838) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -56,7 +56,9 @@ t.index ["content_id"], name: "index_content_items_on_content_id" t.index ["created_at"], name: "index_content_items_on_created_at" t.index ["redirects"], name: "index_content_items_on_redirects", using: :gin + t.index ["redirects"], name: "ix_ci_redirects_jsonb_path_ops", opclass: :jsonb_path_ops, using: :gin t.index ["routes"], name: "index_content_items_on_routes", using: :gin + t.index ["routes"], name: "ix_ci_routes_jsonb_path_ops", opclass: :jsonb_path_ops, using: :gin t.index ["updated_at"], name: "index_content_items_on_updated_at" end @@ -76,6 +78,7 @@ t.index ["created_at"], name: "index_publish_intents_on_created_at" t.index ["publish_time"], name: "index_publish_intents_on_publish_time" t.index ["routes"], name: "index_publish_intents_on_routes", using: :gin + t.index ["routes"], name: "ix_pi_routes_jsonb_path_ops", opclass: :jsonb_path_ops, using: :gin t.index ["updated_at"], name: "index_publish_intents_on_updated_at" end From 6eb07aaaab2fc8d83e8ed812c0e25a46c0f7659b Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 16 Jun 2023 09:38:49 +0100 Subject: [PATCH 20/34] Remove mongo export files These will no longer be needed after migration to PostgreSQL. --- app/lib/mongo_exporter.rb | 31 ---------------------- spec/lib/mongo_exporter_spec.rb | 46 --------------------------------- 2 files changed, 77 deletions(-) delete mode 100644 app/lib/mongo_exporter.rb delete mode 100644 spec/lib/mongo_exporter_spec.rb diff --git a/app/lib/mongo_exporter.rb b/app/lib/mongo_exporter.rb deleted file mode 100644 index 476e577f..00000000 --- a/app/lib/mongo_exporter.rb +++ /dev/null @@ -1,31 +0,0 @@ -require "open3" - -class MongoExporter - def self.collection_names - Mongoid.default_client.collections.map(&:name).reject { |e| e == "data_migrations" }.sort - end - - def self.export(collection:, path:) - FileUtils.mkdir_p(path) - zipped_file_path = File.join(path, [collection, "json", "gz"].join(".")) - cmd1 = [ - "mongoexport", - "--uri=#{ENV['MONGODB_URI']}", - "--collection=#{collection}", - "--type=json", - ] - cmd2 = ["gzip > #{zipped_file_path}"] - execute_piped(cmd1, cmd2) - end - - def self.export_all(path:) - collection_names.each do |collection| - export(collection:, path:) - end - end - - # Run the given commands as a pipeline (cmd1 | cmd2 | ...) - def self.execute_piped(*args) - Open3.pipeline(*args) - end -end diff --git a/spec/lib/mongo_exporter_spec.rb b/spec/lib/mongo_exporter_spec.rb deleted file mode 100644 index 9bdb8008..00000000 --- a/spec/lib/mongo_exporter_spec.rb +++ /dev/null @@ -1,46 +0,0 @@ -require "rails_helper" - -describe MongoExporter do - before do - allow(MongoExporter).to receive(:execute_piped).and_return(true) - allow(FileUtils).to receive(:mkdir_p) - end - - describe ".collection_names" do - it "returns an array of collection names" do - %w[content_items publish_intents scheduled_publishing_log_entries users].each do |coll| - expect(described_class.collection_names).to include(coll) - end - end - - it "does not export data_migrations" do - expect(described_class.collection_names).not_to include("data_migrations") - end - end - - describe ".export" do - it "makes the given path if it does not exist" do - expect(FileUtils).to receive(:mkdir_p).with("/my/path") - described_class.export(collection: "my_collection", path: "/my/path") - end - - it "executes mongoexport with the correct arguments" do - expect(described_class).to receive(:execute_piped).with( - ["mongoexport", - "--uri=#{ENV['MONGODB_URI']}", - "--collection=my_collection", - "--type=json"], - anything, - ) - described_class.export(collection: "my_collection", path: "/my/path") - end - - it "pipes the mongoexport output to gzip" do - expect(described_class).to receive(:execute_piped).with( - anything, - ["gzip > /my/path/my_collection.json.gz"], - ) - described_class.export(collection: "my_collection", path: "/my/path") - end - end -end From 9c345350d89254f68f3de3a2f38444b317446634 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 16 Jun 2023 12:24:48 +0100 Subject: [PATCH 21/34] Allow JsonImporter to import all files in a given dir, and add specs --- app/lib/json_importer.rb | 114 ++++++++++-- lib/tasks/import.rake | 9 +- spec/lib/json_importer_spec.rb | 328 +++++++++++++++++++++++++++++++++ 3 files changed, 431 insertions(+), 20 deletions(-) create mode 100644 spec/lib/json_importer_spec.rb diff --git a/app/lib/json_importer.rb b/app/lib/json_importer.rb index 9ad7de33..e7e96834 100644 --- a/app/lib/json_importer.rb +++ b/app/lib/json_importer.rb @@ -7,8 +7,12 @@ # and so on class JsonImporter - def initialize(model_class:, file:, batch_size: 1) - @model_class = model_class.constantize + def initialize(file:, model_class: nil, batch_size: 1, offline_table_class: nil) + @model_class = model_class || infer_model_class(file) + raise ArgumentError, "Could not infer class from #{file}" unless @model_class + + @offline_table_class = offline_table_class || create_offline_table_class + @mapper = MongoFieldMapper.new(@model_class) @file = file @batch_size = batch_size @@ -17,32 +21,104 @@ def initialize(model_class:, file:, batch_size: 1) def call line_no = 0 lines = [] - IO.foreach(@file) do |line| - log line_no, "Processing" - lines << process_line(line) - log line_no, "Completed" - line_no += 1 - if lines.size >= @batch_size - log(" saving batch of #{@batch_size}") - @model_class.insert_all(lines) - log(" saved") - lines = [] + log "Importing file #{@file}" + + begin + IO.foreach(@file) do |line| + log line_no, "Processing" + lines << process_line(line) + log line_no, "Completed" + line_no += 1 + if lines.size >= @batch_size + log(" saving batch of #{@batch_size}") + insert_lines(lines) + log(" saved") + lines = [] + end + end + + @model_class.transaction do + update_model_table_from_offline_table end + ensure + drop_offline_table + end + end + + def self.import_all_in(path) + files = Dir.glob("*.json", base: path) + files.each do |file| + import_file(File.join(path, file)) end end + def self.import_file(path) + new(file: path).call + end + private + def insert_lines(lines) + @offline_table_class.insert_all(lines, unique_by: [@model_class.primary_key.to_sym], record_timestamps: false) + end + + def update_model_table_from_offline_table + log("truncating #{@model_class.table_name}") + @model_class.connection.truncate(@model_class.table_name) + log("insert-selecting all records from #{@offline_table_class.table_name} to #{@model_class.table_name}") + @model_class.connection.execute insert_select_statement + end + + def insert_select_statement + # id is auto-generated - we have to exclude it from INSERT statements + columns = @model_class.column_names - [@model_class.primary_key] + <<-SQL + INSERT INTO #{@model_class.table_name} + (#{columns.join(',')}) + SELECT + #{columns.join(',')} + FROM #{@offline_table_class.table_name} + ; + SQL + end + + def drop_offline_table + @offline_table_class.connection.execute("DROP TABLE #{@offline_table_class.table_name}") + end + + def create_offline_table_class + klass = Class.new(@model_class) + klass.table_name = "offline_import_#{@model_class.table_name}_#{SecureRandom.hex(4)}" + log("creating table #{klass.table_name}") + create_offline_table(klass) + klass + end + + def create_offline_table(klass) + klass.connection.execute("CREATE TABLE #{klass.table_name} AS TABLE #{@model_class.table_name} WITH NO DATA") + klass.connection.execute("CREATE UNIQUE INDEX ON #{klass.table_name} (#{@model_class.primary_key}) ") + end + + def infer_model_class(file) + klass = to_class(File.basename(file)) + klass && is_an_application_model?(klass) ? klass : nil + end + + # Take a given file name like 'content-items.json' and convert it to + # either a Class (if possible) or nil (if not) + def to_class(file) + file.split(".").first.underscore.classify.safe_constantize + end + + def is_an_application_model?(klass) + klass.ancestors.include?(ApplicationRecord) + end + def process_line(line) log("parsing...") obj = JSON.parse(line) - id = id_value(obj) - log(id, " checking existence") - if exists?(id) - log(id, " exists, skipping") - else - @mapper.active_record_attributes(obj) - end + log("id", id_value(obj)) + @mapper.active_record_attributes(obj) end def id_value(obj) diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 3e620657..2cc2edfc 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -3,6 +3,13 @@ namespace :import do Import a JSON file which has been generated by mongoexport " task json: :environment do |_t, args| - JsonImporter.new(model_class: args.extras[0], file: args.extras[1]).call + JsonImporter.new(model_class: args.extras[0].safe_constantize, file: args.extras[1]).call + end + + desc " + Import all .json files in the given path + " + task all: :environment do |_t, args| + JsonImporter.import_all_in(args.extras[0]) end end diff --git a/spec/lib/json_importer_spec.rb b/spec/lib/json_importer_spec.rb new file mode 100644 index 00000000..5dbfad7b --- /dev/null +++ b/spec/lib/json_importer_spec.rb @@ -0,0 +1,328 @@ +require "rails_helper" + +describe JsonImporter do + subject { JsonImporter.new(model_class:, file: "content-items.json", offline_table_class:, batch_size:) } + let(:model_class) { ContentItem } + let(:batch_size) { 1 } + let(:mock_connection) { double(ActiveRecord::Base.connection) } + let(:offline_table_class) { double(ContentItem, connection: mock_connection, table_name: "offline_table") } + + before do + allow(mock_connection).to receive(:execute) + end + + describe "#exists?" do + subject { JsonImporter.new(model_class:, file: "") } + + context "when @model_class is a ContentItem" do + let(:model_class) { ContentItem } + + context "and the given id does not exist in the DB as a base_path" do + let(:id) { "/no/base/path/here" } + + it "returns false" do + expect(subject.send(:exists?, id)).to eq(false) + end + end + + context "and the given id does exist in the DB as a base_path" do + let(:id) { create(:content_item).base_path } + + it "returns true" do + expect(subject.send(:exists?, id)).to eq(true) + end + end + end + + context "when @model_class is not a ContentItem" do + let(:model_class) { User } + + context "and no @model_class with the given id exists" do + let(:id) { 9_999_999 } + + it "returns false" do + expect(subject.send(:exists?, id)).to eq(false) + end + end + + context "and the model exists in the DB" do + let(:id) { create(:user).id } + + it "returns true" do + expect(subject.send(:exists?, id)).to eq(true) + end + end + end + end + + describe "#log" do + context "when given some arguments" do + let(:args) { ["string to log", %w[an array]] } + before do + allow(Rails.logger).to receive(:info) + end + + it "logs a tab-separated line of the arguments, with the timestamp at the start" do + Timecop.freeze do + timestamp = Time.zone.now.iso8601 + expect(Rails.logger).to receive(:info).with([timestamp, "string to log", "an", "array"].join("\t")) + subject.send(:log, args) + end + end + end + end + + describe "#id_value" do + context "when given a nested hash" do + let(:obj) { { "_id" => { "$oid" => "12345" } } } + + it "returns the value of the _id=>$oid key" do + expect(subject.send(:id_value, obj)).to eq("12345") + end + end + + context "when given a single-level hash" do + let(:obj) { { "_id" => "12345" } } + + it "returns the value of the _id key" do + expect(subject.send(:id_value, obj)).to eq("12345") + end + end + end + + describe "#process_line" do + context "when given a line of JSON" do + let(:line) do + { + id: "1234", + base_path: "/my/base/path", + key2: { key2_sub1: "key2 sub1", key2_sub2: "key2 sub2" }, + title: "My title", + }.to_json + end + + it "returns the keys (stringified) & values that match the ActiveRecord attributes of the model_class" do + expect(subject.send(:process_line, line)).to eq("id" => "1234", "base_path" => "/my/base/path", "title" => "My title") + end + end + end + + describe "#is_an_application_model?" do + let(:return_value) { subject.send(:is_an_application_model?, klass) } + + context "when given a class that is a descendant of ApplicationRecord" do + let(:klass) { User } + + it "returns true" do + expect(return_value).to eq(true) + end + end + + context "when given a class that is not a descendant of ApplicationRecord" do + let(:klass) { Hash } + + it "returns false" do + expect(return_value).to eq(false) + end + end + end + + describe "#to_class" do + context "when given a file name that maps to a class name" do + let(:file) { "content-items.json" } + + it "returns the class" do + expect(subject.send(:to_class, file)).to eq(ContentItem) + end + end + + context "when given a file name that does not map to a class name" do + let(:file) { "Untitled FINAL - Final (2).doc" } + + it "returns nil" do + expect(subject.send(:to_class, file)).to be_nil + end + end + end + + describe "#infer_model_class" do + context "when given a file name that maps to a class name" do + context "and the class is not a descendant of ApplicationRecord" do + let(:file) { "hashes.json" } + + it "returns nil" do + expect(subject.send(:infer_model_class, file)).to be_nil + end + end + + context "and the class is a descendant of ApplicationRecord" do + let(:file) { "content-items.json" } + + it "returns the class" do + expect(subject.send(:infer_model_class, file)).to eq(ContentItem) + end + end + end + + context "when given a file name that does not map to a class name" do + let(:file) { "thingies.json" } + + it "returns nil" do + expect(subject.send(:infer_model_class, file)).to be_nil + end + end + end + + describe ".import_file" do + let(:path) { "/my/path" } + before do + allow(described_class).to receive(:new).and_return(subject) + allow(subject).to receive(:call) + end + + it "creates a new JsonImporter, passing the given path" do + expect(described_class).to receive(:new).with(file: path).and_return(subject) + described_class.import_file(path) + end + + it "calls the new JsonImporter" do + expect(subject).to receive(:call) + described_class.import_file(path) + end + end + + describe ".import_all_in" do + let(:path) { "/my/path" } + before do + allow(Dir).to receive(:glob).and_return(%w[file1 file2]) + allow(described_class).to receive(:import_file) + end + + it "globs the given directory for json files" do + expect(Dir).to receive(:glob).with("*.json", base: "/my/path").and_return(%w[file1 file2]) + described_class.import_all_in(path) + end + + it "imports all files in that directory" do + expect(described_class).to receive(:import_file).with("/my/path/file1").ordered + expect(described_class).to receive(:import_file).with("/my/path/file2").ordered + described_class.import_all_in(path) + end + end + + describe "#call" do + before do + allow(subject).to receive(:insert_lines) + allow(subject).to receive(:update_model_table_from_offline_table) + allow(offline_table_class).to receive(:insert_all) + end + + context "for each line in the file" do + before do + allow(IO).to receive(:foreach).and_yield("line1").and_yield("line2") + allow(offline_table_class).to receive(:insert_all) + end + + it "processes the line" do + expect(subject).to receive(:process_line).with("line1").ordered + expect(subject).to receive(:process_line).with("line2").ordered + subject.call + end + + context "when it has processed batch_size lines" do + let(:batch_size) { 2 } + before do + allow(subject).to receive(:process_line).with("line1").and_return("line1") + allow(subject).to receive(:process_line).with("line2").and_return("line2") + end + + it "inserts the lines" do + expect(subject).to receive(:insert_lines).once.with(%w[line1 line2]) + subject.call + end + end + end + end + + describe "#insert_lines" do + before do + allow(model_class).to receive(:primary_key).and_return(:model_primary_key) + end + + it "inserts the lines into the offline table, unique by the primary key" do + expect(offline_table_class).to receive(:insert_all).with(%w[line1 line2], unique_by: [:model_primary_key], record_timestamps: false) + subject.send(:insert_lines, %w[line1 line2]) + end + end + + describe "#update_model_table_from_offline_table" do + before do + allow(model_class.connection).to receive(:execute) + allow(model_class.connection).to receive(:truncate) + allow(subject).to receive(:insert_select_statement).and_return("insert-select statement") + end + it "truncates the model_class table" do + expect(model_class.connection).to receive(:truncate).with(model_class.table_name) + subject.send(:update_model_table_from_offline_table) + end + + it "executes the insert_select_statement" do + expect(model_class.connection).to receive(:execute).with("insert-select statement") + subject.send(:update_model_table_from_offline_table) + end + end + + describe "#insert_select_statement" do + let(:return_value) { subject.send(:insert_select_statement) } + + it "inserts into the model_class table from the offline_class table" do + expect(return_value).to match(/\s*INSERT INTO\s+#{model_class.table_name}.+SELECT.*FROM\s+#{offline_table_class.table_name}.*/im) + end + + it "includes all the columns in the same order, except primary_key" do + allow(model_class).to receive(:column_names).and_return(%w[column1 column2]) + allow(model_class).to receive(:primary_key).and_return("primary_key") + expect(return_value).to match(/INSERT .*(column1,column2).*SELECT.*column1,column2.* FROM.*/im) + end + end + + describe "#drop_offline_table" do + it "drops the offline table" do + expect(mock_connection).to receive(:execute).with("DROP TABLE #{offline_table_class.table_name}") + subject.send(:drop_offline_table) + end + end + + describe "#create_offline_table_class" do + describe "the return value" do + let(:return_value) { subject.send(:create_offline_table_class) } + + it "is a class" do + expect(return_value).to be_a(Class) + end + + it "is a subclass of model_class" do + expect(return_value.ancestors).to include(model_class) + end + + describe "the table_name" do + it "has a prefix of offline_import_" do + expect(return_value.table_name).to start_with("offline_import_") + end + + it "includes the model_class table_name" do + expect(return_value.table_name).to match(/.*_#{model_class.table_name}_.*/) + end + + it "ends in an 8-char hex string" do + expect(return_value.table_name).to match(/.*_[a-f0-9]{8}/) + end + end + end + + it "creates the offline table" do + expect(subject).to receive(:create_offline_table) + subject.send(:create_offline_table_class) + end + end +end From 13757bcfbb52af87e0a0a838f9f59934d2cda003 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Wed, 30 Aug 2023 10:46:02 +0100 Subject: [PATCH 22/34] Allow null timestamps on records imported from mongo Some records in the MongoDB have nil values in `created_at` or `updated_at`. ActiveRecord's `timestamps` migration method by default creates these fields without allowing nil values, so we must explicitly add support for this after the fact. --- .../20230830093643_allow_null_timestamps.rb | 12 ++++++++++++ db/schema.rb | 18 +++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) create mode 100644 db/migrate/20230830093643_allow_null_timestamps.rb diff --git a/db/migrate/20230830093643_allow_null_timestamps.rb b/db/migrate/20230830093643_allow_null_timestamps.rb new file mode 100644 index 00000000..1a2ab825 --- /dev/null +++ b/db/migrate/20230830093643_allow_null_timestamps.rb @@ -0,0 +1,12 @@ +class AllowNullTimestamps < ActiveRecord::Migration[7.0] + def change + change_column_null :content_items, :created_at, true + change_column_null :content_items, :updated_at, true + + change_column_null :publish_intents, :created_at, true + change_column_null :publish_intents, :updated_at, true + + change_column_null :scheduled_publishing_log_entries, :created_at, true + change_column_null :scheduled_publishing_log_entries, :updated_at, true + end +end diff --git a/db/schema.rb b/db/schema.rb index 396b09d7..94d31a23 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2023_04_28_144838) do +ActiveRecord::Schema[7.0].define(version: 2023_08_30_093643) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -49,8 +49,8 @@ t.integer "payload_version" t.jsonb "withdrawn_notice", default: {} t.string "publishing_request_id" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false + t.datetime "created_at" + t.datetime "updated_at" t.string "_id" t.index ["base_path"], name: "index_content_items_on_base_path", unique: true t.index ["content_id"], name: "index_content_items_on_content_id" @@ -62,18 +62,14 @@ t.index ["updated_at"], name: "index_content_items_on_updated_at" end - create_table "content_items_import", id: false, force: :cascade do |t| - t.jsonb "data" - end - create_table "publish_intents", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| t.string "base_path" t.datetime "publish_time" t.string "publishing_app" t.string "rendering_app" t.jsonb "routes", default: [] - t.datetime "created_at", null: false - t.datetime "updated_at", null: false + t.datetime "created_at" + t.datetime "updated_at" t.index ["base_path"], name: "index_publish_intents_on_base_path", unique: true t.index ["created_at"], name: "index_publish_intents_on_created_at" t.index ["publish_time"], name: "index_publish_intents_on_publish_time" @@ -87,8 +83,8 @@ t.string "document_type" t.datetime "scheduled_publication_time" t.bigint "delay_in_milliseconds" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false + t.datetime "created_at" + t.datetime "updated_at" t.string "mongo_id" t.index ["base_path"], name: "ix_scheduled_pub_log_base_path" t.index ["created_at"], name: "ix_scheduled_pub_log_created" From a4e4e96a4404a92da91e31201386a87c3556f0cb Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Mon, 14 Aug 2023 11:33:02 +0100 Subject: [PATCH 23/34] Fix issue that can cause doubly-nested description Hashes on PostgreSQL Some records in the old MongoDb have `description` as a simple value, some have it as a Hash. We need to support both, and make sure that we only wrap the given value in a Hash if it's not already like that. --- app/models/content_item.rb | 3 ++- spec/models/content_item_spec.rb | 25 ++++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 83b09c94..b6251cc5 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -81,7 +81,8 @@ def as_json(options = nil) # We store the description in a hash because Publishing API can send through # multiple content types. def description=(value) - super("value" => value) + # ...but only wrap the given value in a Hash if it's not already a Hash + value.is_a?(Hash) ? super : super("value" => value) end def description diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index c0265200..2a361854 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -40,7 +40,7 @@ it "does not retain values of any attributes which were not given" do @item.reload expect(@item.title).to be_nil - expect(@item.description).to eq("value" => nil) + expect(@item["description"]).to eq("value" => nil) end end @@ -434,11 +434,26 @@ end describe "description" do - it "wraps the description as a hash" do - content_item = FactoryBot.create(:content_item, description: "foo") + context "when given a simple-valued description" do + let(:description) { "foo" } - expect(content_item.description).to eq("foo") - expect(content_item["description"]).to eq("value" => "foo") + it "wraps the description as a hash" do + content_item = FactoryBot.create(:content_item, description:) + + expect(content_item.description).to eq("foo") + expect(content_item["description"]).to eq("value" => "foo") + end + end + + context "when given a description that is already a Hash" do + let(:description) { { "value" => "foo" } } + + it "does not wrap the description hash in another hash" do + content_item = FactoryBot.create(:content_item, description:) + + expect(content_item.description).to eq("foo") + expect(content_item["description"]).to eq("value" => "foo") + end end end From fbaa77d12850af7716c11d817b64ee1db27d2dc6 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 29 Aug 2023 21:38:38 +0100 Subject: [PATCH 24/34] Make JsonImporter only import gzipped files --- app/lib/json_importer.rb | 28 ++++++++++++++-------------- spec/lib/json_importer_spec.rb | 29 ++++++++++++----------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/app/lib/json_importer.rb b/app/lib/json_importer.rb index e7e96834..500cedec 100644 --- a/app/lib/json_importer.rb +++ b/app/lib/json_importer.rb @@ -7,7 +7,7 @@ # and so on class JsonImporter - def initialize(file:, model_class: nil, batch_size: 1, offline_table_class: nil) + def initialize(file:, model_class: nil, offline_table_class: nil) @model_class = model_class || infer_model_class(file) raise ArgumentError, "Could not infer class from #{file}" unless @model_class @@ -15,25 +15,25 @@ def initialize(file:, model_class: nil, batch_size: 1, offline_table_class: nil) @mapper = MongoFieldMapper.new(@model_class) @file = file - @batch_size = batch_size end def call line_no = 0 - lines = [] log "Importing file #{@file}" begin - IO.foreach(@file) do |line| - log line_no, "Processing" - lines << process_line(line) - log line_no, "Completed" - line_no += 1 - if lines.size >= @batch_size - log(" saving batch of #{@batch_size}") - insert_lines(lines) - log(" saved") - lines = [] + # `gunzip -c` writes the gunzip output to STDOUT, `IO.popen` then + # uses pipes to make that an IO stream in Ruby + # see https://stackoverflow.com/a/31857743 + # Net result - we decompress and process a gzipped file line by line + IO.popen({}, "gunzip -c #{@file}", "rb") do |io| + while (line = io.gets) + log line_no, "Processing" + processed_line = process_line(line) + log line_no, "Completed, saving..." + insert_lines([processed_line]) + log line_no, "Saved" + line_no += 1 end end @@ -46,7 +46,7 @@ def call end def self.import_all_in(path) - files = Dir.glob("*.json", base: path) + files = Dir.glob("*.json.gz", base: path) files.each do |file| import_file(File.join(path, file)) end diff --git a/spec/lib/json_importer_spec.rb b/spec/lib/json_importer_spec.rb index 5dbfad7b..82c2a2e7 100644 --- a/spec/lib/json_importer_spec.rb +++ b/spec/lib/json_importer_spec.rb @@ -1,9 +1,8 @@ require "rails_helper" describe JsonImporter do - subject { JsonImporter.new(model_class:, file: "content-items.json", offline_table_class:, batch_size:) } + subject { JsonImporter.new(model_class:, file: "content-items.json", offline_table_class:) } let(:model_class) { ContentItem } - let(:batch_size) { 1 } let(:mock_connection) { double(ActiveRecord::Base.connection) } let(:offline_table_class) { double(ContentItem, connection: mock_connection, table_name: "offline_table") } @@ -25,7 +24,7 @@ end end - context "and the given id does exist in the DB as a base_path" do + context "and the given id does exist in the DB as a base_path" do let(:id) { create(:content_item).base_path } it "returns true" do @@ -198,8 +197,8 @@ allow(described_class).to receive(:import_file) end - it "globs the given directory for json files" do - expect(Dir).to receive(:glob).with("*.json", base: "/my/path").and_return(%w[file1 file2]) + it "globs the given directory for .json.gz files" do + expect(Dir).to receive(:glob).with("*.json.gz", base: "/my/path").and_return(%w[file1 file2]) described_class.import_all_in(path) end @@ -211,15 +210,18 @@ end describe "#call" do + let(:mock_stream) { instance_double(IO) } before do allow(subject).to receive(:insert_lines) + allow(subject).to receive(:process_line).and_return("line1", "line2") allow(subject).to receive(:update_model_table_from_offline_table) allow(offline_table_class).to receive(:insert_all) end context "for each line in the file" do before do - allow(IO).to receive(:foreach).and_yield("line1").and_yield("line2") + allow(IO).to receive(:popen).and_yield(mock_stream) + allow(mock_stream).to receive(:gets).and_return("line1", "line2", nil) allow(offline_table_class).to receive(:insert_all) end @@ -229,17 +231,10 @@ subject.call end - context "when it has processed batch_size lines" do - let(:batch_size) { 2 } - before do - allow(subject).to receive(:process_line).with("line1").and_return("line1") - allow(subject).to receive(:process_line).with("line2").and_return("line2") - end - - it "inserts the lines" do - expect(subject).to receive(:insert_lines).once.with(%w[line1 line2]) - subject.call - end + it "inserts the lines" do + expect(subject).to receive(:insert_lines).with(%w[line1]).ordered + expect(subject).to receive(:insert_lines).with(%w[line2]).ordered + subject.call end end end From d5c3e843adde35512ac3c7c445f9cb20d16603d3 Mon Sep 17 00:00:00 2001 From: Richard Towers Date: Fri, 25 Aug 2023 16:10:32 +0000 Subject: [PATCH 25/34] Don't override default attributes with nil This fixes a bug where unpublished redirects in short URL manager aren't removed from the content store (so continue to work on the website). Users of the content-store API (i.e. publishing-api) might make API calls with values they want to reset provided as `nil`. For example, if you wanted to clear some redirects on a content item, you might do something like: ``` PUT /content/some-made-up-url { ... "redirects": nil ... } ``` The intent of the user of the API is clear here - they want no redirects. However, ContentItem has a default value for redirects: ``` field :redirects, type: Array, default: [] ``` And the rest of the content-store expects this value to be an Array, not to be nil. By passing in potentially nil values in assign_attributes we allow a situation where fields that content-store expects not to be nil (because they have defaults), can be nil. This tends to result in NoMethodErrors, such as this one: ``` NoMethodError undefined method `map' for nil:NilClass redirects = item.redirects.map(&:to_h).map(&:deep_symbolize_keys) ^^^^ /app/app/models/route_set.rb:30:in `from_content_item' /app/app/models/content_item.rb:215:in `route_set' /app/app/models/content_item.rb:225:in `should_register_routes?' /app/app/models/content_item.rb:193:in `register_routes' /app/app/models/content_item.rb:33:in `create_or_replace' /app/app/controllers/content_items_controller.rb:32:in `block in update' ``` I can't think of any valid reason for overriding default attributes with nils, so it feels like calling .compact is the right thing to do here. --- app/models/content_item.rb | 4 +++- spec/models/content_item_spec.rb | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index b6251cc5..0963b1fc 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -27,7 +27,9 @@ def self.create_or_replace(base_path, attributes, log_entry) item = ContentItem.new(base_path:) item.assign_attributes( attributes - .merge(scheduled_publication_details(log_entry)), + .merge(scheduled_publication_details(log_entry)) + .merge(created_at:) + .compact, ) if previous_item diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index 2a361854..d7755e71 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -44,6 +44,11 @@ end end + it "does not overwrite default attribute values if called with nil attributes" do + _, item = ContentItem.create_or_replace(@item.base_path, { schema_name: "redirect", redirects: nil }, nil) + expect(item.redirects).to eq([]) + end + describe "exceptions" do context "when unknown attributes are provided" do let(:attributes) { { "foo" => "foo", "bar" => "bar" } } From 572a0fc84e20f295032359963acf3be0bfb0014b Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Thu, 12 Oct 2023 10:00:13 +0100 Subject: [PATCH 26/34] Speed up FindByPath.find on K8s/RDS environments The `.any?` method, when called on a Relation, seems to instantiate the objects in the resultset, which is very slow on Kubernetes. It worked fine in Mongoid, but not in ActiveRecord. If we replace this with `.count.positive?`, it's much faster. --- app/lib/find_by_path.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/lib/find_by_path.rb b/app/lib/find_by_path.rb index e41cb7b4..8d3a85ab 100644 --- a/app/lib/find_by_path.rb +++ b/app/lib/find_by_path.rb @@ -17,7 +17,10 @@ def find(path) return exact_match if exact_match matches = find_route_matches(path) - matches.any? ? best_route_match(matches, path) : nil + + if matches.count.positive? + best_route_match(matches, path) + end end private From 175f5cc0f10dae518af7bcc9952f13bd9dc56b60 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Mon, 16 Oct 2023 12:39:26 +0100 Subject: [PATCH 27/34] Fix Integer field overflow on scheduled_publishing_delay_seconds Whitehall doesn't do much validation of a given scheduled publishing time. As a result, it can sometimes send us really extreme values for `scheduled_publishing_delay_seconds` (e.g. 400 years into the future). This can cause problems in the importer when Mongo has accepted the value, but PostgreSQL can't. Changing the field type to `bigint` fixes the issue. --- ...uled_publishing_delay_seconds_to_bigint.rb | 58 +++++++++++++++++++ db/schema.rb | 4 +- 2 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 db/migrate/20231016112610_change_scheduled_publishing_delay_seconds_to_bigint.rb diff --git a/db/migrate/20231016112610_change_scheduled_publishing_delay_seconds_to_bigint.rb b/db/migrate/20231016112610_change_scheduled_publishing_delay_seconds_to_bigint.rb new file mode 100644 index 00000000..a3720c78 --- /dev/null +++ b/db/migrate/20231016112610_change_scheduled_publishing_delay_seconds_to_bigint.rb @@ -0,0 +1,58 @@ +class ChangeScheduledPublishingDelaySecondsToBigint < ActiveRecord::Migration[7.0] + # we want manual control of transactions to minimise exclusive locks + disable_ddl_transaction! + + def up + add_column :content_items, :scheduled_publishing_delay_seconds_bigint, :bigint, null: true + + # populate temporary column in small batches, non-transactionally, to minimise locks + done = false + until done == true + rows_updated = ContentItem.connection.update <<-SQL + UPDATE content_items SET scheduled_publishing_delay_seconds_bigint = scheduled_publishing_delay_seconds + WHERE id IN ( + SELECT id FROM content_items ci2 + WHERE ci2.scheduled_publishing_delay_seconds IS NOT NULL + AND ci2.scheduled_publishing_delay_seconds_bigint IS NULL + LIMIT 5000 + ); + SQL + remaining = ContentItem.where("scheduled_publishing_delay_seconds IS NOT NULL AND scheduled_publishing_delay_seconds_bigint IS NULL").count + Rails.logger.debug "#{rows_updated} rows updated, #{remaining} remaining" + done = remaining.zero? + end + + ContentItem.transaction do + rename_column :content_items, :scheduled_publishing_delay_seconds, :scheduled_publishing_delay_seconds_int + rename_column :content_items, :scheduled_publishing_delay_seconds_bigint, :scheduled_publishing_delay_seconds + remove_column :content_items, :scheduled_publishing_delay_seconds_int + end + end + + def down + add_column :content_items, :scheduled_publishing_delay_seconds_int, :integer, null: true + + # populate temporary column in small batches, non-transactionally, to minimise locks + done = false + until done == true + rows_updated = ContentItem.connection.update <<-SQL + UPDATE content_items SET scheduled_publishing_delay_seconds_int = scheduled_publishing_delay_seconds + WHERE id IN ( + SELECT id FROM content_items ci2 + WHERE ci2.scheduled_publishing_delay_seconds IS NOT NULL + AND ci2.scheduled_publishing_delay_seconds_int IS NULL + LIMIT 5000 + ); + SQL + remaining = ContentItem.where("scheduled_publishing_delay_seconds IS NOT NULL AND scheduled_publishing_delay_seconds_int IS NULL").count + Rails.logger.debug "#{rows_updated} rows updated, #{remaining} remaining" + done = remaining.zero? + end + + ContentItem.transaction do + rename_column :content_items, :scheduled_publishing_delay_seconds, :scheduled_publishing_delay_seconds_bigint + rename_column :content_items, :scheduled_publishing_delay_seconds_int, :scheduled_publishing_delay_seconds + remove_column :content_items, :scheduled_publishing_delay_seconds_bigint + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 94d31a23..d5f2cc3a 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2023_08_30_093643) do +ActiveRecord::Schema[7.0].define(version: 2023_10_16_112610) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -35,7 +35,6 @@ t.datetime "first_published_at" t.datetime "public_updated_at" t.datetime "publishing_scheduled_at" - t.integer "scheduled_publishing_delay_seconds" t.jsonb "details", default: {} t.string "publishing_app" t.string "rendering_app" @@ -52,6 +51,7 @@ t.datetime "created_at" t.datetime "updated_at" t.string "_id" + t.bigint "scheduled_publishing_delay_seconds" t.index ["base_path"], name: "index_content_items_on_base_path", unique: true t.index ["content_id"], name: "index_content_items_on_content_id" t.index ["created_at"], name: "index_content_items_on_created_at" From f2f67e323ec45d7e1fa126aa02f0d1bae90c0239 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 17 Oct 2023 13:32:20 +0100 Subject: [PATCH 28/34] Don't dump the schema after running migrations in production ...it will fail due to the read-only filesystem in prod. Also, some whitespace-only corrections to the migration --- config/environments/production.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/environments/production.rb b/config/environments/production.rb index 57c665fb..b1c1aacd 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -67,4 +67,7 @@ logger.formatter = config.log_formatter config.logger = ActiveSupport::TaggedLogging.new(logger) end + + # don't dump the schema after running migrations + config.active_record.dump_schema_after_migration = false end From 624012b527dad4745b272c1cef2c9c5dadd0deee Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 24 Oct 2023 16:15:40 +0100 Subject: [PATCH 29/34] Fix error when auth_bypass_ids is nil --- app/models/content_item.rb | 2 +- spec/models/content_item_spec.rb | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 0963b1fc..645d2379 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -131,7 +131,7 @@ def router_rendering_app def valid_auth_bypass_id?(auth_bypass_id) return false unless auth_bypass_id - return true if auth_bypass_ids.include?(auth_bypass_id) + return true if auth_bypass_ids&.include?(auth_bypass_id) return false if access_limited? # check for linked auth_bypass_id in top level expanded links diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index d7755e71..7a4143a2 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -436,6 +436,22 @@ expect(content_item.valid_auth_bypass_id?(auth_bypass_id)).to be(false) end end + + context "when auth_bypass_ids is nil" do + let(:content_item) { build(:content_item, auth_bypass_ids: nil) } + + context "given an auth_bypass_id" do + let(:auth_bypass_id) { SecureRandom.uuid } + + it "does not raise an error" do + expect { content_item.valid_auth_bypass_id?(auth_bypass_id) }.not_to raise_error + end + + it "returns false" do + expect(content_item.valid_auth_bypass_id?(auth_bypass_id)).to eq(false) + end + end + end end describe "description" do From 29ba6447ab15e9e1f9d223436f948ba28622f9fc Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 31 Oct 2023 14:37:08 +0000 Subject: [PATCH 30/34] Upgrade rails from 7.0.8 to 7.1.2 --- Gemfile | 2 +- Gemfile.lock | 172 +++++++++++------- bin/setup | 2 +- config/application.rb | 16 +- config/boot.rb | 2 +- config/environments/development.rb | 5 +- config/environments/production.rb | 22 ++- config/environments/test.rb | 20 +- .../initializers/filter_parameter_logging.rb | 6 +- config/initializers/permissions_policy.rb | 20 +- .../submitting_content_item_spec.rb | 1 - spec/models/content_item_spec.rb | 1 - 12 files changed, 163 insertions(+), 106 deletions(-) diff --git a/Gemfile b/Gemfile index 824510f2..418cdc5c 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,6 @@ source "https://rubygems.org" -gem "rails", "7.0.8" +gem "rails", "7.1.2" gem "bootsnap", require: false gem "deepsort" diff --git a/Gemfile.lock b/Gemfile.lock index 0714c0c7..235bab5a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,75 +1,86 @@ GEM remote: https://rubygems.org/ specs: - actioncable (7.0.8) - actionpack (= 7.0.8) - activesupport (= 7.0.8) + actioncable (7.1.2) + actionpack (= 7.1.2) + activesupport (= 7.1.2) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailbox (7.0.8) - actionpack (= 7.0.8) - activejob (= 7.0.8) - activerecord (= 7.0.8) - activestorage (= 7.0.8) - activesupport (= 7.0.8) + zeitwerk (~> 2.6) + actionmailbox (7.1.2) + actionpack (= 7.1.2) + activejob (= 7.1.2) + activerecord (= 7.1.2) + activestorage (= 7.1.2) + activesupport (= 7.1.2) mail (>= 2.7.1) net-imap net-pop net-smtp - actionmailer (7.0.8) - actionpack (= 7.0.8) - actionview (= 7.0.8) - activejob (= 7.0.8) - activesupport (= 7.0.8) + actionmailer (7.1.2) + actionpack (= 7.1.2) + actionview (= 7.1.2) + activejob (= 7.1.2) + activesupport (= 7.1.2) mail (~> 2.5, >= 2.5.4) net-imap net-pop net-smtp - rails-dom-testing (~> 2.0) - actionpack (7.0.8) - actionview (= 7.0.8) - activesupport (= 7.0.8) - rack (~> 2.0, >= 2.2.4) + rails-dom-testing (~> 2.2) + actionpack (7.1.2) + actionview (= 7.1.2) + activesupport (= 7.1.2) + nokogiri (>= 1.8.5) + racc + rack (>= 2.2.4) + rack-session (>= 1.0.1) rack-test (>= 0.6.3) - rails-dom-testing (~> 2.0) - rails-html-sanitizer (~> 1.0, >= 1.2.0) - actiontext (7.0.8) - actionpack (= 7.0.8) - activerecord (= 7.0.8) - activestorage (= 7.0.8) - activesupport (= 7.0.8) + rails-dom-testing (~> 2.2) + rails-html-sanitizer (~> 1.6) + actiontext (7.1.2) + actionpack (= 7.1.2) + activerecord (= 7.1.2) + activestorage (= 7.1.2) + activesupport (= 7.1.2) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (7.0.8) - activesupport (= 7.0.8) + actionview (7.1.2) + activesupport (= 7.1.2) builder (~> 3.1) - erubi (~> 1.4) - rails-dom-testing (~> 2.0) - rails-html-sanitizer (~> 1.1, >= 1.2.0) - activejob (7.0.8) - activesupport (= 7.0.8) + erubi (~> 1.11) + rails-dom-testing (~> 2.2) + rails-html-sanitizer (~> 1.6) + activejob (7.1.2) + activesupport (= 7.1.2) globalid (>= 0.3.6) - activemodel (7.0.8) - activesupport (= 7.0.8) - activerecord (7.0.8) - activemodel (= 7.0.8) - activesupport (= 7.0.8) - activestorage (7.0.8) - actionpack (= 7.0.8) - activejob (= 7.0.8) - activerecord (= 7.0.8) - activesupport (= 7.0.8) + activemodel (7.1.2) + activesupport (= 7.1.2) + activerecord (7.1.2) + activemodel (= 7.1.2) + activesupport (= 7.1.2) + timeout (>= 0.4.0) + activestorage (7.1.2) + actionpack (= 7.1.2) + activejob (= 7.1.2) + activerecord (= 7.1.2) + activesupport (= 7.1.2) marcel (~> 1.0) - mini_mime (>= 1.1.0) - activesupport (7.0.8) + activesupport (7.1.2) + base64 + bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) + connection_pool (>= 2.2.5) + drb i18n (>= 1.6, < 2) minitest (>= 5.1) + mutex_m tzinfo (~> 2.0) addressable (2.8.6) public_suffix (>= 2.0.2, < 6.0) ast (2.4.2) awesome_print (1.9.2) + base64 (0.2.0) + bigdecimal (3.1.5) bootsnap (1.17.0) msgpack (~> 1.2) brakeman (6.1.0) @@ -93,6 +104,7 @@ GEM climate_control (1.2.0) coderay (1.1.3) concurrent-ruby (1.2.2) + connection_pool (2.4.1) crack (0.4.5) rexml crass (1.0.6) @@ -100,11 +112,13 @@ GEM activerecord (>= 5.a) database_cleaner-core (~> 2.0.0) database_cleaner-core (2.0.1) - date (3.3.3) + date (3.3.4) deepsort (0.5.0) diff-lcs (1.5.0) docile (1.4.0) domain_name (0.6.20231109) + drb (2.2.0) + ruby2_keywords erubi (1.12.0) expgen (0.1.1) parslet @@ -164,6 +178,10 @@ GEM domain_name (~> 0.5) i18n (1.14.1) concurrent-ruby (~> 1.0) + io-console (0.7.1) + irb (1.11.0) + rdoc + reline (>= 0.3.8) json (2.7.1) json-schema (4.1.1) addressable (>= 2.8) @@ -195,14 +213,15 @@ GEM minitest (5.20.0) msgpack (1.7.2) multi_xml (0.6.0) - net-imap (0.3.7) + mutex_m (0.2.0) + net-imap (0.4.9) date net-protocol net-pop (0.1.2) net-protocol - net-protocol (0.2.1) + net-protocol (0.2.2) timeout - net-smtp (0.3.3) + net-smtp (0.4.0) net-protocol netrc (0.11.0) nio4r (2.7.0) @@ -460,6 +479,8 @@ GEM pry-byebug (3.10.1) byebug (~> 11.0) pry (>= 0.13, < 0.15) + psych (5.1.2) + stringio public_suffix (5.0.4) puma (6.4.0) nio4r (~> 2.0) @@ -471,22 +492,27 @@ GEM rack (~> 2.2, >= 2.2.4) rack-proxy (0.7.7) rack + rack-session (1.0.2) + rack (< 3) rack-test (2.1.0) rack (>= 1.3) - rails (7.0.8) - actioncable (= 7.0.8) - actionmailbox (= 7.0.8) - actionmailer (= 7.0.8) - actionpack (= 7.0.8) - actiontext (= 7.0.8) - actionview (= 7.0.8) - activejob (= 7.0.8) - activemodel (= 7.0.8) - activerecord (= 7.0.8) - activestorage (= 7.0.8) - activesupport (= 7.0.8) + rackup (1.0.0) + rack (< 3) + webrick + rails (7.1.2) + actioncable (= 7.1.2) + actionmailbox (= 7.1.2) + actionmailer (= 7.1.2) + actionpack (= 7.1.2) + actiontext (= 7.1.2) + actionview (= 7.1.2) + activejob (= 7.1.2) + activemodel (= 7.1.2) + activerecord (= 7.1.2) + activestorage (= 7.1.2) + activesupport (= 7.1.2) bundler (>= 1.15.0) - railties (= 7.0.8) + railties (= 7.1.2) rails-dom-testing (2.2.0) activesupport (>= 5.0.0) minitest @@ -494,19 +520,24 @@ GEM rails-html-sanitizer (1.6.0) loofah (~> 2.21) nokogiri (~> 1.14) - railties (7.0.8) - actionpack (= 7.0.8) - activesupport (= 7.0.8) - method_source + railties (7.1.2) + actionpack (= 7.1.2) + activesupport (= 7.1.2) + irb + rackup (>= 1.0.0) rake (>= 12.2) - thor (~> 1.0) - zeitwerk (~> 2.5) + thor (~> 1.0, >= 1.2.2) + zeitwerk (~> 2.6) rainbow (3.1.1) rake (13.1.0) rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) + rdoc (6.6.2) + psych (>= 4.0.0) regexp_parser (2.8.3) + reline (0.4.1) + io-console (~> 0.5) request_store (1.5.1) rack (>= 1.4) rest-client (2.1.0) @@ -594,12 +625,13 @@ GEM hashie version_gem (~> 1.1, >= 1.1.1) statsd-ruby (1.5.0) + stringio (3.1.0) sync (0.5.0) term-ansicolor (1.7.1) tins (~> 1.0) thor (1.3.0) timecop (0.9.8) - timeout (0.4.0) + timeout (0.4.1) tins (1.32.1) sync tzinfo (2.0.6) @@ -650,7 +682,7 @@ DEPENDENCIES plek pry-byebug rack-cache - rails (= 7.0.8) + rails (= 7.1.2) rspec-rails rubocop-govuk simplecov diff --git a/bin/setup b/bin/setup index 57b65c85..cf2acd18 100755 --- a/bin/setup +++ b/bin/setup @@ -5,7 +5,7 @@ require "fileutils" APP_ROOT = File.expand_path("..", __dir__) def system!(*args) - system(*args) || abort("\n== Command #{args} failed ==") + system(*args, exception: true) end FileUtils.chdir APP_ROOT do diff --git a/config/application.rb b/config/application.rb index c11a063d..5fcb299e 100644 --- a/config/application.rb +++ b/config/application.rb @@ -13,6 +13,7 @@ # require "action_view/railtie" # require "action_cable/engine" # require "rails/test_unit/railtie" +require "active_support/core_ext/integer/time" # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. @@ -24,7 +25,17 @@ module ContentStore class Application < Rails::Application # Initialize configuration defaults for originally generated Rails version. - config.load_defaults 7.0 + config.load_defaults 7.1 + + # Once this application is fully deployed to Rails 7.1 and you have no plans to rollback + # replace the line below with config.active_support.cache_format_version = 7.1 + # This will mean that we can revert back to rails 7.0 if there is an issue + config.active_support.cache_format_version = 7.0 + + # Please, add to the `ignore` list any other `lib` subdirectories that do + # not contain `.rb` files, or that should not be reloaded or eager loaded. + # Common ones are `templates`, `generators`, or `middleware`, for example. + config.autoload_lib(ignore: %w[assets tasks]) # Configuration for the application, engines, and railties goes here. # @@ -34,6 +45,9 @@ class Application < Rails::Application # config.time_zone = "Central Time (US & Canada)" # config.eager_load_paths << Rails.root.join("extras") + # Don't generate system test files. + config.generators.system_tests = nil + config.i18n.enforce_available_locales = true config.i18n.available_locales = %i[ ar diff --git a/config/boot.rb b/config/boot.rb index 997563c2..988a5ddc 100644 --- a/config/boot.rb +++ b/config/boot.rb @@ -1,4 +1,4 @@ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__) require "bundler/setup" # Set up gems listed in the Gemfile. -require "bootsnap/setup" +require "bootsnap/setup" # Speed up boot time by caching expensive operations. diff --git a/config/environments/development.rb b/config/environments/development.rb index 0520a391..2d77ae88 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -6,7 +6,7 @@ # In the development environment your application's code is reloaded any time # it changes. This slows down response time but is perfect for development # since you don't have to restart the web server when you make code changes. - config.cache_classes = false + config.enable_reloading = true # Do not eager load code on boot. config.eager_load = false @@ -51,6 +51,9 @@ # Uncomment if you wish to allow Action Cable access from any origin. # config.action_cable.disable_request_forgery_protection = true + # Raise error when a before_action's only/except options reference missing actions + config.action_controller.raise_on_missing_callback_actions = true + # Allow requests from any domain config.hosts.clear end diff --git a/config/environments/production.rb b/config/environments/production.rb index b1c1aacd..f3570da6 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -4,7 +4,7 @@ # Settings specified here will take precedence over those in config/application.rb. # Code is not reloaded between requests. - config.cache_classes = true + config.enable_reloading = false # Eager load code on boot. This eager loads most of Rails and # your application in memory, allowing both threaded web servers @@ -13,15 +13,14 @@ config.eager_load = true # Full error reports are disabled and caching is turned on. - config.consider_all_requests_local = false + config.consider_all_requests_local = false config.action_controller.perform_caching = true - # Ensures that a master key has been made available in either ENV["RAILS_MASTER_KEY"] - # or in config/master.key. This key is used to decrypt credentials (and other encrypted files). + # Ensures that a master key has been made available in ENV["RAILS_MASTER_KEY"], config/master.key, or an environment + # key such as config/credentials/production.key. This key is used to decrypt credentials (and other encrypted files). # config.require_master_key = true - # Disable serving static files from the `/public` folder by default since - # Apache or NGINX already handles this. + # Disable serving static files from `public/`, relying on NGINX/Apache to do so instead. config.public_file_server.enabled = ENV["RAILS_SERVE_STATIC_FILES"].present? # Enable serving of images, stylesheets, and JavaScripts from an asset server. @@ -31,12 +30,17 @@ # config.action_dispatch.x_sendfile_header = "X-Sendfile" # for Apache # config.action_dispatch.x_sendfile_header = "X-Accel-Redirect" # for NGINX + # Assume all access to the app is happening through a SSL-terminating reverse proxy. + # Can be used together with config.force_ssl for Strict-Transport-Security and secure cookies. + # config.assume_ssl = true + # Force all access to the app over SSL, use Strict-Transport-Security, and use secure cookies. # config.force_ssl = true - # Include generic and useful information about system operation, but avoid logging too much - # information to avoid inadvertent exposure of personally identifiable information (PII). - config.log_level = ENV.fetch("RAILS_LOG_LEVEL", :info) + # Info include generic and useful information about system operation, but avoids logging too much + # information to avoid inadvertent exposure of personally identifiable information (PII). If you + # want to log everything, set the level to "debug". + config.log_level = ENV.fetch("RAILS_LOG_LEVEL", "info") # Prepend all log lines with the following tags. # config.log_tags = [ :request_id ] diff --git a/config/environments/test.rb b/config/environments/test.rb index 0fd6280c..ac7ae63f 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -8,12 +8,13 @@ Rails.application.configure do # Settings specified here will take precedence over those in config/application.rb. - # Turn false under Spring and add config.action_view.cache_template_loading = true. - config.cache_classes = true + # While tests run files are not watched, reloading is not necessary. + config.enable_reloading = false - # Eager loading loads your whole application. When running a single test locally, - # this probably isn't necessary. It's a good idea to do in a continuous integration - # system, or in some way before deploying your code. + # Eager loading loads your entire application. When running a single test locally + # this is usually not necessary, and can slow down your test suite. However, it's + # recommended that you enable it in continuous integration systems to ensure eager + # loading is working properly before deploying your code. config.eager_load = ENV["CI"].present? # Configure public file server for tests with Cache-Control for performance. @@ -23,12 +24,12 @@ } # Show full error reports and disable caching. - config.consider_all_requests_local = true + config.consider_all_requests_local = true config.action_controller.perform_caching = false config.cache_store = :null_store - # Raise exceptions instead of rendering exception templates. - config.action_dispatch.show_exceptions = false + # Render exception templates for rescuable exceptions and raise for other exceptions. + config.action_dispatch.show_exceptions = :rescuable # Disable request forgery protection in test environment. config.action_controller.allow_forgery_protection = false @@ -47,4 +48,7 @@ # Annotate rendered view with file names. # config.action_view.annotate_rendered_view_with_filenames = true + + # Raise error when a before_action's only/except options reference missing actions + config.action_controller.raise_on_missing_callback_actions = true end diff --git a/config/initializers/filter_parameter_logging.rb b/config/initializers/filter_parameter_logging.rb index 166997c5..262e8620 100644 --- a/config/initializers/filter_parameter_logging.rb +++ b/config/initializers/filter_parameter_logging.rb @@ -1,8 +1,8 @@ # Be sure to restart your server when you modify this file. -# Configure parameters to be filtered from the log file. Use this to limit dissemination of -# sensitive information. See the ActiveSupport::ParameterFilter documentation for supported -# notations and behaviors. +# Configure parameters to be partially matched (e.g. passw matches password) and filtered from the log file. +# Use this to limit dissemination of sensitive information. +# See the ActiveSupport::ParameterFilter documentation for supported notations and behaviors. Rails.application.config.filter_parameters += %i[ passw secret token _key crypt salt certificate otp ssn ] diff --git a/config/initializers/permissions_policy.rb b/config/initializers/permissions_policy.rb index 00f64d71..7db3b957 100644 --- a/config/initializers/permissions_policy.rb +++ b/config/initializers/permissions_policy.rb @@ -1,11 +1,13 @@ +# Be sure to restart your server when you modify this file. + # Define an application-wide HTTP permissions policy. For further -# information see https://developers.google.com/web/updates/2018/06/feature-policy -# -# Rails.application.config.permissions_policy do |f| -# f.camera :none -# f.gyroscope :none -# f.microphone :none -# f.usb :none -# f.fullscreen :self -# f.payment :self, "https://secure.example.com" +# information see: https://developers.google.com/web/updates/2018/06/feature-policy + +# Rails.application.config.permissions_policy do |policy| +# policy.camera :none +# policy.gyroscope :none +# policy.microphone :none +# policy.usb :none +# policy.fullscreen :self +# policy.payment :self, "https://secure.example.com" # end diff --git a/spec/integration/submitting_content_item_spec.rb b/spec/integration/submitting_content_item_spec.rb index 6b245d25..2984927d 100644 --- a/spec/integration/submitting_content_item_spec.rb +++ b/spec/integration/submitting_content_item_spec.rb @@ -1,5 +1,4 @@ require "rails_helper" -require "update_lock" describe "content item write API", type: :request do before :each do diff --git a/spec/models/content_item_spec.rb b/spec/models/content_item_spec.rb index 7a4143a2..b2214c68 100644 --- a/spec/models/content_item_spec.rb +++ b/spec/models/content_item_spec.rb @@ -1,5 +1,4 @@ require "rails_helper" -require "update_lock" describe ContentItem, type: :model do describe ".create_or_replace" do From a4f1d63c31da096e5d3bb0db3bc8c7535fca2186 Mon Sep 17 00:00:00 2001 From: Mike Patrick Date: Thu, 23 Nov 2023 16:35:11 +0000 Subject: [PATCH 31/34] Fix logger custom field config It turns out that you can't call `LogStasher.add_custom_fields` twice. Because Content Store's custom fields config was being run after the govuk_app_config gem's own custom fields config, the gem's config was being overwritten. So, fields like `govuk_request_id` and `varnish_id` weren't appearing in Content Store's controller request logs (but `govuk_dependency_resolution_source_content_id` was). The gem (version 9.7.0) now provides a mechanism for setting custom fields that doesn't overwrite the gem's own settings https://docs.publishing.service.gov.uk/repos/govuk_app_config.html#logger-configuration: ``` GovukJsonLogging.configure do add_custom_fields do |fields| fields[:govuk_custom_field] = request.headers["GOVUK-Custom-Header"] end end ``` --- config/initializers/logstasher.rb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/config/initializers/logstasher.rb b/config/initializers/logstasher.rb index a0cbeeaf..459111d8 100644 --- a/config/initializers/logstasher.rb +++ b/config/initializers/logstasher.rb @@ -1,7 +1,5 @@ -unless Rails.env.test? - GovukJsonLogging.configure do - add_custom_fields do |fields| - fields[:govuk_dependency_resolution_source_content_id] = request.headers["GOVUK-Dependency-Resolution-Source-Content-Id"] - end +GovukJsonLogging.configure do + add_custom_fields do |fields| + fields[:govuk_dependency_resolution_source_content_id] = request.headers["GOVUK-Dependency-Resolution-Source-Content-Id"] end end From 9820d4bb39c1a62ad75141087d5611ddf309cddd Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 22 Dec 2023 13:45:16 +0000 Subject: [PATCH 32/34] Maintain compatibility with earlier nil-values fix from main (#1136) An earlier commit on main (#d5422b46 in PR #1136) fixed a subtle issue when overriding default values with nil, by explicitly setting `.created_at` and other attributes from the existing item when it was being replaced. This caused issues on this PostgreSQL branch after rebasing, as ActiveRecord behaves more as expected with respect to `created_at` and therefore the line creating a local `created_at` variable had been removed. This commit reintroduces that variable, and tests now pass again. --- app/models/content_item.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 645d2379..eb17e87f 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -21,6 +21,10 @@ def self.create_or_replace(base_path, attributes, log_entry) result = previous_item ? :replaced : :created + # This doesn't seem to get set correctly on an upsert so this is to + # maintain it + created_at = previous_item ? previous_item.created_at : Time.zone.now.utc + # This awkward construction is necessary to maintain the required behaviour - # a content item sent to Content Store is a complete entity (as defined in a schema) # and no-remnants of the item it replaces should remain. From bf06dd05ec07f8a17d9c809e32e4694096a9b232 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Fri, 22 Dec 2023 14:34:05 +0000 Subject: [PATCH 33/34] Retain compatibility with #bbe2fda / PR #1152 on main --- app/models/content_item.rb | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index eb17e87f..26051cd7 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -155,8 +155,8 @@ def access_limited? access_limited_user_ids.any? || access_limited_organisation_ids.any? end - def register_routes - return unless should_register_routes? + def register_routes(previous_item: nil) + return unless should_register_routes?(previous_item:) tries = Rails.application.config.register_router_retries begin @@ -183,13 +183,8 @@ def route_set private - def should_register_routes? - return false if schema_name.to_s.start_with?("placeholder") - - if previous_item - return previous_item.schema_name == "placeholder" || - previous_item.route_set != route_set - end + def should_register_routes?(previous_item: nil) + return previous_item.route_set != route_set if previous_item true end From 79194089fd9f57983efc1f49d2e12cb40981dc31 Mon Sep 17 00:00:00 2001 From: Al Davidson Date: Tue, 2 Jan 2024 11:30:05 +0000 Subject: [PATCH 34/34] update comment about exposing id outside the model --- app/models/content_item.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/models/content_item.rb b/app/models/content_item.rb index 26051cd7..222d7841 100644 --- a/app/models/content_item.rb +++ b/app/models/content_item.rb @@ -78,8 +78,9 @@ def self.find_by_path(path) ::FindByPath.new(self).find(path) end - # We want to force the JSON representation to use "base_path" - # and prevent "id" being exposed outside of the model. + # Prevent "id" being exposed outside of the model - it's synthetic and + # only of internal use. Other applications should use `content_id` or + # `base_path` as their unique identifiers. def as_json(options = nil) super(options).except("id") end