Skip to content

Commit

Permalink
Support S3-compatible blob storage (#1071)
Browse files Browse the repository at this point in the history
  • Loading branch information
alxndrsn authored Sep 13, 2024
1 parent 33f2213 commit 3ae8a69
Show file tree
Hide file tree
Showing 77 changed files with 3,294 additions and 230 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/s3-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: S3 E2E Tests

on: push

jobs:
s3-e2e:
timeout-minutes: 15
# TODO should we use the same container as circle & central?
runs-on: ubuntu-latest
services:
# see: https://docs.github.com/en/enterprise-server@3.5/actions/using-containerized-services/creating-postgresql-service-containers
postgres:
image: postgres:14.10
env:
POSTGRES_PASSWORD: odktest
ports:
- 5432:5432
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
minio:
# see: https://github.com/minio/minio/discussions/16099
image: minio/minio:edge-cicd
env:
MINIO_ROOT_USER: odk-central-dev
MINIO_ROOT_PASSWORD: topSecret123
# Enable encryption - this changes how s3 ETags work
# See: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html
# See: https://github.com/minio/minio/discussions/19012
MINIO_KMS_AUTO_ENCRYPTION: on
MINIO_KMS_SECRET_KEY: odk-minio-test-key:QfdUCrn3UQ58W5pqCS5SX4SOlec9sT8yb4rZ4zK24w0=
ports:
- 9000:9000
options: >-
--health-cmd "curl -s http://localhost:9000/minio/health/live"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v4
- name: Use Node.js 20
uses: actions/setup-node@v4
with:
node-version: 20.10.0
cache: 'npm'
- run: npm ci --legacy-peer-deps
- run: node lib/bin/create-docker-databases.js
- name: E2E Test
timeout-minutes: 10
run: ./test/e2e/s3/run-tests.sh
26 changes: 26 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,32 @@ fake-oidc-server-ci:
cd test/e2e/oidc/fake-oidc-server && \
node index.mjs

.PHONY: fake-s3-accounts
fake-s3-accounts: node_version
NODE_CONFIG_ENV=s3-dev node lib/bin/s3-create-bucket.js

.PHONY: dev-s3
dev-s3: fake-s3-accounts base
NODE_CONFIG_ENV=s3-dev npx nodemon --watch lib --watch config lib/bin/run-server.js

# default admin credentials: minioadmin:minioadmin
# See: https://hub.docker.com/r/minio/minio/
# MINIO_KMS_SECRET_KEY, MINIO_KMS_AUTO_ENCRYPTION enable encryption - this changes how s3 ETags are generated.
# See: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html
# See: https://github.com/minio/minio/discussions/19012
S3_SERVER_ARGS := --network host \
-e MINIO_ROOT_USER=odk-central-dev \
-e MINIO_ROOT_PASSWORD=topSecret123 \
-e MINIO_KMS_AUTO_ENCRYPTION=on \
-e MINIO_KMS_SECRET_KEY=odk-minio-test-key:QfdUCrn3UQ58W5pqCS5SX4SOlec9sT8yb4rZ4zK24w0= \
minio/minio server /data --console-address ":9001"
.PHONY: fake-s3-server-ephemeral
fake-s3-server-ephemeral:
docker run --rm $(S3_SERVER_ARGS)
.PHONY: fake-s3-server-persistent
fake-s3-server-persistent:
docker run --detach $(S3_SERVER_ARGS)

.PHONY: node_version
node_version: node_modules
node lib/bin/enforce-node-version.js
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ CREATE DATABASE jubilant_test WITH OWNER=jubilant ENCODING=UTF8;
\c jubilant_test;
CREATE EXTENSION IF NOT EXISTS CITEXT;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE EXTENSION IF NOT EXISTS pgrowlocks;
CREATE DATABASE jubilant WITH OWNER=jubilant ENCODING=UTF8;
\c jubilant;
CREATE EXTENSION IF NOT EXISTS CITEXT;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE EXTENSION IF NOT EXISTS pgrowlocks;
```

If you are using Docker, you may find it easiest to run the database in Docker by running `make run-docker-postgres`.
Expand Down
3 changes: 2 additions & 1 deletion config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
"url": "https://data.getodk.cloud/v1/key/eOZ7S4bzyUW!g1PF6dIXsnSqktRuewzLTpmc6ipBtRq$LDfIMTUKswCexvE0UwJ9/projects/1/forms/odk-analytics/submissions",
"formId": "odk-analytics",
"version": "v2024.1.0_1"
}
},
"s3blobStore": {}
}
},
"test": {
Expand Down
13 changes: 13 additions & 0 deletions config/s3-dev.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"default": {
"external": {
"s3blobStore": {
"server": "http://localhost:9000",
"accessKey": "odk-central-dev",
"secretKey": "topSecret123",
"bucketName": "odk-central-bucket",
"requestTimeout": 60000
}
}
}
}
1 change: 1 addition & 0 deletions lib/bin/create-docker-databases.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const { log } = program.opts();
const dbj = connect(database);
await dbj.raw('create extension citext;');
await dbj.raw('create extension pg_trgm;');
await dbj.raw('create extension pgrowlocks;');
dbj.destroy();
}));

Expand Down
5 changes: 4 additions & 1 deletion lib/bin/run-server.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,16 @@ const xlsform = require('../external/xlsform').init(config.get('default.xlsform'
// get an Enketo client
const enketo = require('../external/enketo').init(config.get('default.enketo'));

// get an S3 client.
const s3 = require('../external/s3').init(config.get('default.external.s3blobStore'));


////////////////////////////////////////////////////////////////////////////////
// START HTTP SERVICE

// initialize our container, then generate an http service out of it.
const container = require('../model/container')
.withDefaults({ db, mail, env, Sentry, xlsform, enketo });
.withDefaults({ db, mail, env, Sentry, xlsform, enketo, s3 });
const service = require('../http/service')(container);

// insert the graceful exit middleware.
Expand Down
39 changes: 39 additions & 0 deletions lib/bin/s3-create-bucket.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const Minio = require('minio');

const { server, bucketName, accessKey, secretKey } = require('config').get('default.external.s3blobStore');

const minioClient = (() => {
const url = new URL(server);
const useSSL = url.protocol === 'https:';
const endPoint = (url.hostname + url.pathname).replace(/\/$/, '');
const port = parseInt(url.port, 10);

return new Minio.Client({ endPoint, port, useSSL, accessKey, secretKey });
})();

const log = (...args) => console.log(__filename, ...args);

minioClient.bucketExists(bucketName)
.then(exists => {
if (exists) {
log('Bucket already exists.');
return;
}

log('Creating bucket:', bucketName);
return minioClient.makeBucket(bucketName)
.then(() => log('Bucket created OK.'));
})
.catch(err => {
log('ERROR CREATING MINIO BUCKET:', err);
process.exit(1);
});
19 changes: 19 additions & 0 deletions lib/bin/s3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const { program, Argument } = require('commander');

const { getCount, setFailedToPending, uploadPending } = require('../task/s3');

program.command('count-blobs')
.addArgument(new Argument('status').choices(['pending', 'in_progress', 'uploaded', 'failed']))
.action(getCount);
program.command('reset-failed-to-pending').action(setFailedToPending);
program.command('upload-pending').action(uploadPending);
program.parse();
5 changes: 2 additions & 3 deletions lib/data/attachments.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

const { join } = require('path');
const { compose, identity } = require('ramda');
const { Writable, pipeline } = require('stream');
const { rejectIfError } = require('../util/promise');
const { Writable } = require('stream');
const { zipPart } = require('../util/zip');
const sanitize = require('sanitize-filename');

Expand Down Expand Up @@ -47,7 +46,7 @@ const streamAttachments = (inStream, decryptor) => {
done();
}
});
pipeline(inStream, writable, rejectIfError(archive.error.bind(archive)));
inStream.with(writable).pipeline(archive.error.bind(archive));

return archive;
};
Expand Down
7 changes: 4 additions & 3 deletions lib/data/client-audits.js
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,10 @@ const streamClientAudits = (inStream, form, decryptor) => {
});

// only appended (above, in transform()) if data comes in.
const outStream = pipeline(inStream, csvifier, csv(), (err) => {
if (err != null) archive.error(err);
});
const outStream = inStream
.with(csvifier)
.with(csv())
.pipeline(archive.error.bind(archive));
return archive;
};

Expand Down
Loading

0 comments on commit 3ae8a69

Please sign in to comment.