diff --git a/README.md b/README.md index 6605753dc..50e29ff73 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ## Development -A supplementary `docker-compose.development.yml`-file is provided in order to tweak the stack-setup for development purposes. Among other changes, this configuration will for instance prevent crashed services from restarting automatically, in order to catch errors quicker. +A supplementary `docker-compose.development.yml`-file is provided in order to tweak the stack-setup for development purposes. Among other changes, this configuration will for instance prevent crashed services from restarting automatically, in order to catch errors quicker. It also disables certain services that generally aren't required for development purposes. This is done by replacing You can start the stack in development mode by running @@ -13,16 +13,24 @@ You can start the stack in development mode by running docker-compose -f docker-compose.yml -f docker-compose.development.yml up ``` -*Pro tip: The stack consists of some services such as `mu-search` that can potentially consume a lot of resources and often aren't required for basic development-tasks. Adding the following snippet to your `docker-compose.override.yml`-file under `services`, will disable the most resource-consuming services.* +*Pro tip: The stack consists of some services such as `mu-search` that can potentially consume a lot of resources and often aren't required for basic development-tasks. Adding the following snippet to your `docker-compose.override.yml`-file under `services` will replace the most resource-consuming services with "sink" services that accept all requests but do nothing. This saves on your resources and prevents certain errors from occurring due to the services being down.* ```yml search: - entrypoint: "echo 'service disabled'" + image: lblod/sink-service:1.0.0 tika: - entrypoint: "echo 'service disabled'" + image: lblod/sink-service:1.0.0 elasticsearch: - entrypoint: "echo 'service disabled'" + image: lblod/sink-service:1.0.0 yggdrasil: - entrypoint: "echo 'service disabled'" + image: lblod/sink-service:1.0.0 +``` + +If you need to re-enable a service that is disabled in `docker-compose.development.yml` you can do so by adding the the service to your `docker-compose.override.yml` with the image that's defined in `docker-compose.yml`. + +``` yml +# e.g. delta-consumer is disabled but you want to re-enable it + delta-consumer: + image: kanselarij/delta-consumer:0.0.1 ``` ## Data diff --git a/config/consumer/example-custom-dispatching/delta-sync-dispatching.js b/config/consumer/example-custom-dispatching/delta-sync-dispatching.js new file mode 100644 index 000000000..c5e1b1151 --- /dev/null +++ b/config/consumer/example-custom-dispatching/delta-sync-dispatching.js @@ -0,0 +1,33 @@ +/** + * Dispatch the fetched information to a target graph. + * @param { mu, muAuthSudo } lib - The provided libraries from the host service. + * @param { termObjectChangeSets: { deletes, inserts } } data - The fetched changes sets, which objects of serialized Terms + * [ { + * graph: "", + * subject: "", + * predicate: "", + * object: "^^" + * } + * ] + * @return {void} Nothing + */ +async function dispatch(lib, data){ + const { mu, muAuthSudo } = lib; + const { termObjectChangeSets } = data; + + console.log(`Found an amount of ${termObjectChangeSets.length} changesets`); + for (let { deletes, inserts } of termObjectChangeSets) { + console.log(`Logging delete information: `); + const deleteStatements = deletes.map(o => `In graph: ${o.graph}, triple: ${o.subject} ${o.predicate} ${o.object}`); + deleteStatements.forEach(s => console.log(s)); + + console.log(`Logging insert information: `); + const insertStatements = inserts.map(o => `In graph: ${o.graph}, triple: ${o.subject} ${o.predicate} ${o.object}.`); + insertStatements.forEach(s => console.log(s)); + } + console.log(`All changeSets were logged, waiting for next update!`); +} + +module.exports = { + dispatch +}; diff --git a/config/consumer/example-custom-dispatching/initial-sync-dispatching.js b/config/consumer/example-custom-dispatching/initial-sync-dispatching.js new file mode 100644 index 000000000..f3f28b0c4 --- /dev/null +++ b/config/consumer/example-custom-dispatching/initial-sync-dispatching.js @@ -0,0 +1,28 @@ +/** + * Dispatch the fetched information to a target graph. + * @param { mu, muAuthSudo } lib - The provided libraries from the host service. + * @param { termObjects } data - The fetched quad information, which objects of serialized Terms + * [ { + * graph: "", + * subject: "", + * predicate: "", + * object: "^^" + * } + * ] + * @return {void} Nothing + */ +async function dispatch(lib, data){ + const { mu, muAuthSudo } = lib; + + const triples = data.termObjects; + + console.log(`Found ${triples.length} to be processed`); + console.log(`Showing only the first 10.`); + const info = triples.slice(0,10).map(t => `triple: ${t.subject} ${t.predicate} ${t.object}`); + info.forEach(s => console.log(s)); + console.log(`All triples were logged`); +} + +module.exports = { + dispatch +}; diff --git a/config/delta/mirror-rules.js b/config/delta/mirror-rules.js new file mode 100644 index 000000000..dd1d456db --- /dev/null +++ b/config/delta/mirror-rules.js @@ -0,0 +1,18 @@ +export default [ + { + match: { + // form of element is {subject,predicate,object} + subject: { // type: 'uri', value: 'ZZZZhttp://www.semanticdesktop.org/ontologies/2007/03/22/nmo#isPartOf' // ZZZZ in this example is just to ensure we don't match anything 8) + } + }, + callback: { + url: 'http://search/update', + method: 'POST' + }, + options: { + resourceFormat: "v0.0.1", + gracePeriod: 1000, + ignoreFromSelf: true + } + } +]; diff --git a/config/delta/rules.js b/config/delta/rules.js index f80adc699..06873dedd 100644 --- a/config/delta/rules.js +++ b/config/delta/rules.js @@ -206,6 +206,7 @@ export default [ 'http://mu.semte.ch/graphs/organizations/minister', 'http://mu.semte.ch/graphs/public', 'http://mu.semte.ch/graphs/system/email', + 'http://mu.semte.ch/graphs/system/signing', ].map((graph) => { return { match: { graph: { value: graph } }, diff --git a/config/project-scripts/config.json b/config/project-scripts/config.json new file mode 100644 index 000000000..88b1fceb0 --- /dev/null +++ b/config/project-scripts/config.json @@ -0,0 +1,37 @@ +{ + "version": "0.1", + "scripts": [ + { + "documentation": { + "command": "data-diff", + "description": "A data diff will be taken of the data resulting from a grep on each file. Parameters:\n config: the config file to use\n", + "arguments": ["config"] + }, + "environment": { + "image": "ubuntu", + "interactive": false, + "script": "virtuoso/data-diff.sh", + "join_networks": true + }, + "mounts": { + "app": "/project/" + } + }, + { + "documentation": { + "command": "dump-database", + "description": "A virtuoso dump will be created db/dumps.\n Parameters:\n hostname: default triplestore\n username: default dba\n password: default dba", + "arguments": ["hostname", "username", "password"] + }, + "environment": { + "image": "redpencil/virtuoso", + "interactive": false, + "script": "virtuoso/dump-database.sh", + "join_networks": true + }, + "mounts": { + "app": "/project/" + } + } + ] +} diff --git a/config/project-scripts/virtuoso/data-diff.sh b/config/project-scripts/virtuoso/data-diff.sh new file mode 100755 index 000000000..dd54c907c --- /dev/null +++ b/config/project-scripts/virtuoso/data-diff.sh @@ -0,0 +1,11 @@ +#!/bin/bash +apt-get update > /dev/null +apt-get -y install jq python3 > /dev/null + +config=$1 +source=$(jq -r ".source" $config) +target=$(jq -r ".target" $config) + +command=$(python3 generate-datadiff.py $config) + +diff <(cat $source | eval $command | sort) <(cat $target | eval $command | sort) diff --git a/config/project-scripts/virtuoso/dump-database.sh b/config/project-scripts/virtuoso/dump-database.sh new file mode 100755 index 000000000..14ff35e84 --- /dev/null +++ b/config/project-scripts/virtuoso/dump-database.sh @@ -0,0 +1,27 @@ +#!/bin/bash +USERNAME=${2:-"dba"} +PASSWORD=${3:-"dba"} +TRIPLESTORE=${1:-"triplestore"} + +if [[ "$#" -ge 3 ]]; then + echo "Usage:" + echo " mu script triplestore [hostname] [username] [password]" + exit -1; +fi + +if [[ -d "/project/data/db" ]];then + mkdir -p /project/data/db/dumps +else + echo "WARNING:" + echo " did not find data/db folder in your project, so did not create data/db/dumps!" + echo " " +fi + + +echo "connecting to $TRIPLESTORE with $USERNAME" +isql-v -H $TRIPLESTORE -U $USERNAME -P $PASSWORD < /project/data/dumped-quads.nq diff --git a/config/project-scripts/virtuoso/generate-datadiff.py b/config/project-scripts/virtuoso/generate-datadiff.py new file mode 100644 index 000000000..7429924ad --- /dev/null +++ b/config/project-scripts/virtuoso/generate-datadiff.py @@ -0,0 +1,14 @@ +import sys +import json + +config_file = sys.argv[1] +config = {} +with open(config_file) as f: + config = json.load(f) + +graph_regex = "<{}> .$".format("|".join(config['graphs'])) +grep_commands = f'egrep "{graph_regex}"' +for regex in config['graphRegexes']: + grep_commands += f'| egrep "{regex}"' + +print(grep_commands) diff --git a/docker-compose.development.yml b/docker-compose.development.yml index 521c7eda3..ac1fd7f81 100644 --- a/docker-compose.development.yml +++ b/docker-compose.development.yml @@ -123,8 +123,11 @@ services: delta-producer: image: lblod/sink-service:1.0.0 restart: "no" + delta-consumer: + image: lblod/sink-service:1.0.0 + restart: "no" fileshare: - entrypoint: "echo 'service disabled'" + image: lblod/sink-service:1.0.0 restart: "no" # kibana: # image: docker.elastic.co/kibana/kibana:7.17.0 diff --git a/docker-compose.mirror.example.yml b/docker-compose.mirror.example.yml new file mode 100644 index 000000000..bcb590ebb --- /dev/null +++ b/docker-compose.mirror.example.yml @@ -0,0 +1,137 @@ +version: '3.4' + +# -- LIVE BACKUP (MIRROR) -- +# You can use this example file on servers that will serve as mirrors. +# Fill in the missing environment variables and either copy-paste the +# contents into docker-compose.override.yml or start the stack using this file: +# drc up -d -f docker-compose.yml -f docker-compose.mirror.example.yml +# +# This should only be used in mirror mode! When using the mirror server as the +# LIVE fail-over a regular override file is needed. +# +# Make sure to use config/delta/mirror-rules.js instead of rules.js when in +# mirror mode. +services: + # Consumer extras + delta-consumer: + image: kanselarij/delta-consumer:0.0.1 + environment: + DCR_SERVICE_NAME: 'consume-from-sync' + DCR_SYNC_BASE_URL: 'https://localhost' # replace with link the application hosting the producer server + DCR_DELTA_SYNC_JOB_OPERATION: "http://delta-consumer.services.semantic.works/id/JobOperation/DeltaFileSyncing" + DCR_JOB_CREATOR_URI: "http://delta-consumer.services.semantic.works/me" + DCR_START_FROM_DELTA_TIMESTAMP: "2023-09-13T14:40:00Z" + DCR_WAIT_FOR_INITIAL_SYNC: "false" + DCR_DISABLE_INITIAL_SYNC: "true" + DCR_KEEP_DELTA_FILES: "true" + DCR_SECRET_KEY: "secret-sync-key" + DCR_CRON_PATTERN_DELTA_SYNC: "*/15 * * * * *" + DOWNLOAD_SHARE_LINKS: "true" + volumes: + - ./data/files:/share + - ./config/consumer/example-custom-dispatching:/config + + # Required services -- database, auth, search, deltas + database: + environment: + QUERY_MAX_PROCESSING_TIME: "60000" + QUERY_MAX_EXECUTION_TIME: "45000" + DATABASE_OVERLOAD_RECOVERY: "on" + LOG_DATABASE_OVERLOAD_TICK: "on" + LOG_ACCESS_RIGHTS: "false" + LOG_OUTGOING_SPARQL_QUERIES: "true" + LOG_INCOMING_SPARQL_QUERIES: "true" + LOG_OUTGOING_SPARQL_QUERY_ROUNDTRIP: "false" + triplestore: + volumes: + - ./config/db/virtuoso.production.ini:/data/virtuoso.ini + search: + environment: + NUMBER_OF_THREADS: 32 + elasticsearch: + environment: + ES_JAVA_OPTS: "-Xms40g -Xmx40g" + http.max_content_length: 2000M + + # Disable extraneous services + frontend: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + identifier: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + dispatcher: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + migrations: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + cache-warmup: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + cache: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + resource: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + file: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + agenda-comparison: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + agenda-approve: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + mocklogin: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + login: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + newsletter: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + yggdrasil: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + sink: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + file-bundling: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + file-bundling-job-creation: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + database-healthcheck: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + case-documents-sync: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + document-versions: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + document-release: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + mail-delivery: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + lod-sbmb: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + staatsblad-import: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + staatsblad-uuid-generation: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + staatsblad-linking: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" + publication-report: + entrypoint: "echo 'service disabled; mirror'" + restart: "no" diff --git a/docker-compose.yml b/docker-compose.yml index da3c5346f..02c14a709 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,10 @@ x-extended-logging: max-size: "50m" max-file: "20" services: + project-scripts: + image: semtech/simple-script-store:1.0.0 + volumes: + - ./config/project-scripts:/app/scripts identifier: image: semtech/mu-identifier:1.10.0 logging: *default-logging