From 86fc7ad96e835e88fe32643e58b9040abc14a763 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Tue, 25 Oct 2022 12:40:12 +1100 Subject: [PATCH] BROKEN: Add initial first-class-files attempt # Conflicts: # db/project.xml --- db/project.xml | 39 ++++++++++++++++++++++++++++++++++++++- db/python/tables/file.py | 38 ++++++++++++++++++++++++++++++++++++++ models/models/file.py | 19 +++++++++++++++++++ scripts/migrate_files.py | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 db/python/tables/file.py create mode 100644 models/models/file.py create mode 100644 scripts/migrate_files.py diff --git a/db/project.xml b/db/project.xml index c411216d9..0b56ba598 100644 --- a/db/project.xml +++ b/db/project.xml @@ -695,5 +695,42 @@ INSERT INTO analysis_type (id, name) VALUES ('analysis-runner', 'analysis-runner'); - + + SET @@system_versioning_alter_history = 1; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/db/python/tables/file.py b/db/python/tables/file.py new file mode 100644 index 000000000..315024f12 --- /dev/null +++ b/db/python/tables/file.py @@ -0,0 +1,38 @@ +from db.python.connect import DbBase +from models.models.file import File + + +class FileTable(DbBase): + """ + Capture File table operations and queries + """ + + table_name = 'file' + + async def get_file_by_id(self, file_id: int) -> File: + pass + + async def get_files_by_ids(self, file_ids: list[int]) -> list[File]: + pass + + async def get_file_by_path(self, path: str) -> File: + pass + + async def get_files_by_paths(self, paths: list[str]) -> list[File]: + pass + + # region CREATE + + async def create_files(self, files: list[File]): + """ + Can insert and get by paths, will need to consider what + should happen if files exist, eg should it: + + - Archive all old analysis / sequences at this location + + notify user of this + - Fail, and make this a precondition + + """ + pass + + # endregion CREATE \ No newline at end of file diff --git a/models/models/file.py b/models/models/file.py new file mode 100644 index 000000000..1149af9f9 --- /dev/null +++ b/models/models/file.py @@ -0,0 +1,19 @@ +from enum import Enum +from typing import Optional + +from pydantic import BaseModel + +class FileType(Enum): + FILE = 'file' + DIRECTORY = 'directory' + + +class File(BaseModel): + """Model to represent File""" + + id: int + type: FileType + path: str + # in bytes + size: int + checksum: str | None diff --git a/scripts/migrate_files.py b/scripts/migrate_files.py new file mode 100644 index 000000000..4daeb025e --- /dev/null +++ b/scripts/migrate_files.py @@ -0,0 +1,36 @@ +""" +We need to apply this migration in three steps: + +- Add schema for files +- migrate all the data to new schema +- Remove old columns + + MariaDB will probably complain about deleting columns in archived data + +This is also the time we should consider archiving analysis with duplicate output +paths, and also +""" +from collections import defaultdict + +from models.models.file import File + +connection = None + + +def main(): + pass + + +async def migrate_analysis(): + # collect all analysis-entries that are gs:// files + get_query = 'SELECT * FROM analysis WHERE output LIKE "gs://%"' + rows = await connection.fetch_all(get_query) + mapped_files = defaultdict(list) + for r in rows: + \mapped_files[r['output']].append(r['id']) + + inserted_files = insert_files(list(mapped_files.keys())) + + for f in inserted_files: + File(path=) + +