diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1c81caa00..faed4cd2f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -66,6 +66,7 @@ jobs:
       - name: "Run unit tests"
         id: runtests
         run: |
+          export SM_ENVIRONMENT=local
           coverage run -m unittest discover -p 'test*.py' -s '.'
           rc=$?
           coverage xml
diff --git a/.gitignore b/.gitignore
index d5fa95d5a..c4a999435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,3 +60,9 @@ web/src/__generated__
 
 # pulumi config files
 Pulumi*.yaml
+
+# pnpm package manager
+pnpm-lock.yaml
+
+# env
+.env
diff --git a/api/graphql/schema.py b/api/graphql/schema.py
index 436a1c334..d19021fca 100644
--- a/api/graphql/schema.py
+++ b/api/graphql/schema.py
@@ -235,27 +235,29 @@ class GraphQLAnalysis:
     id: int
     type: str
     status: strawberry.enum(AnalysisStatus)
-    output: str | None
     timestamp_completed: datetime.datetime | None = None
     active: bool
     meta: strawberry.scalars.JSON
-
+    output: strawberry.scalars.JSON
+    outputs: strawberry.scalars.JSON
     @staticmethod
     def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis':
         return GraphQLAnalysis(
             id=internal.id,
             type=internal.type,
             status=internal.status,
-            output=internal.output,
             timestamp_completed=internal.timestamp_completed,
             active=internal.active,
             meta=internal.meta,
+            output=internal.output,
+            outputs=internal.outputs,
         )
 
     @strawberry.field
     async def sequencing_groups(
         self, info: Info, root: 'GraphQLAnalysis'
     ) -> list['GraphQLSequencingGroup']:
+
         loader = info.context[LoaderKeys.SEQUENCING_GROUPS_FOR_ANALYSIS]
         sgs = await loader.load(root.id)
         return [GraphQLSequencingGroup.from_internal(sg) for sg in sgs]
diff --git a/api/routes/analysis.py b/api/routes/analysis.py
index b3298d2c2..7fd00a11d 100644
--- a/api/routes/analysis.py
+++ b/api/routes/analysis.py
@@ -1,7 +1,7 @@
 import csv
 import io
 from datetime import date
-from typing import Any
+from typing import Any, Optional, Union
 
 from fastapi import APIRouter
 from fastapi.params import Body, Query
@@ -45,7 +45,7 @@ class AnalysisModel(BaseModel):
     type: str
     status: AnalysisStatus
     meta: dict[str, Any] | None = None
-    output: str | None = None
+    outputs: Optional[Union[str, dict]] = None
     active: bool = True
     # please don't use this, unless you're the analysis-runner,
     # the usage is tracked ... (Ծ_Ծ)
@@ -56,7 +56,7 @@ class AnalysisUpdateModel(BaseModel):
     """Update analysis model"""
 
     status: AnalysisStatus
-    output: str | None = None
+    outputs: str | None = None
     meta: dict[str, Any] | None = None
     active: bool | None = None
 
@@ -73,7 +73,7 @@ class AnalysisQueryModel(BaseModel):
     type: str | None = None
     status: AnalysisStatus | None = None
     meta: dict[str, Any] | None = None
-    output: str | None = None
+    outputs: str | None = None
     active: bool | None = None
 
     def to_filter(self, project_id_map: dict[str, int]) -> AnalysisFilter:
@@ -130,7 +130,7 @@ async def update_analysis(
     """Update status of analysis"""
     atable = AnalysisLayer(connection)
     await atable.update_analysis(
-        analysis_id, status=analysis.status, output=analysis.output, meta=analysis.meta
+        analysis_id, status=analysis.status, outputs=analysis.outputs, meta=analysis.meta
     )
     return True
 
diff --git a/db/project.xml b/db/project.xml
index 118a934ba..dccc0ab9d 100644
--- a/db/project.xml
+++ b/db/project.xml
@@ -1107,4 +1107,90 @@
 		<sql>ALTER TABLE sequencing_group_assay CHANGE author author VARCHAR(255) NULL;</sql>
 		<sql>ALTER TABLE sequencing_group_external_id CHANGE author author VARCHAR(255) NULL;</sql>
 	</changeSet>
+	<changeSet id="2024-01-23_output_file" author="yash.pankhania">
+		<sql>SET @@system_versioning_alter_history = 1;</sql>
+		<createTable tableName="output_file">
+			<column name="id" type="INT" autoIncrement="true">
+				<constraints primaryKey="true" nullable="false" />
+			</column>
+			<column name="path" type="VARCHAR(255)">
+				<constraints
+					nullable="false"
+					unique="true"
+				/>
+			</column>
+			<column name="basename" type="VARCHAR(255)">
+				<constraints
+					nullable="false"
+				/>
+			</column>
+			<column name="dirname" type="VARCHAR(100)">
+				<constraints
+					nullable="false"
+				/>
+			</column>
+			<column name="nameroot" type="VARCHAR(255)">
+				<constraints
+					nullable="false"
+				/>
+			</column>
+			<column name="nameext" type="VARCHAR(25)">
+				<constraints
+					nullable="true"
+				/>
+			</column>
+			<column name="file_checksum" type="VARCHAR(255)">
+				<constraints
+					nullable="true"
+				/>
+			</column>
+			<column name="size" type="BIGINT">
+				<constraints
+					nullable="false"
+				/>
+			</column>
+			<column name="meta" type="VARCHAR(255)">
+				<constraints
+					nullable="true"
+				/>
+			</column>
+			<column name="valid" type="BOOLEAN" />
+			<column name="parent_id" type="INT">
+				<constraints
+					nullable="true"
+					foreignKeyName="FK_SECONDARY_FILE_PARENT_ID"
+					references="output_file(id)"
+				/>
+			</column>
+		</createTable>
+		<createTable tableName="analysis_outputs">
+			<column name="analysis_id" type="INT">
+				<constraints
+				nullable="false"
+				foreignKeyName="FK_ANALYSIS_OUTPUTS_ANALYSIS_ID"
+				references="analysis(id)"
+				/>
+			</column>
+			<column name="file_id" type="INT">
+				<constraints
+				nullable="true"
+				foreignKeyName="FK_ANALYSIS_OUTPUTS_FILE_ID"
+				references="output_file(id)"
+				/>
+			</column>
+			<column name="output" type="VARCHAR(255)">
+				<constraints
+				nullable="true"
+				/>
+			</column>
+			<column name="json_structure" type="VARCHAR(255)">
+				<constraints
+					nullable="true"
+				/>
+			</column>
+		</createTable>
+		<sql>ALTER TABLE `output_file` ADD SYSTEM VERSIONING;</sql>
+		<sql>ALTER TABLE `analysis_outputs` ADD SYSTEM VERSIONING;</sql>
+		<sql>ALTER TABLE `analysis_outputs` ADD CONSTRAINT `chk_file_id_output` CHECK ((file_id IS NOT NULL AND output IS NULL) OR (file_id IS NULL AND output IS NOT NULL));</sql>
+	</changeSet>
 </databaseChangeLog>
diff --git a/db/python/connect.py b/db/python/connect.py
index 4a5abcba4..f647b8d6a 100644
--- a/db/python/connect.py
+++ b/db/python/connect.py
@@ -26,6 +26,7 @@
     'sequencing_group',
     'assay',
     'sequencing_group_assay',
+    'analysis_outputs',
     'analysis_sequencing_group',
     'analysis_sample',
     'assay_external_id',
diff --git a/db/python/layers/analysis.py b/db/python/layers/analysis.py
index 878873386..851f8d021 100644
--- a/db/python/layers/analysis.py
+++ b/db/python/layers/analysis.py
@@ -1,4 +1,5 @@
 import datetime
+import warnings
 from collections import defaultdict
 from typing import Any
 
@@ -7,6 +8,7 @@
 from db.python.layers.base import BaseLayer
 from db.python.layers.sequencing_group import SequencingGroupLayer
 from db.python.tables.analysis import AnalysisFilter, AnalysisTable
+from db.python.tables.output_file import OutputFileTable
 from db.python.tables.sample import SampleTable
 from db.python.tables.sequencing_group import SequencingGroupFilter
 from db.python.utils import GenericFilter, get_logger
@@ -46,8 +48,9 @@ class AnalysisLayer(BaseLayer):
     def __init__(self, connection: Connection):
         super().__init__(connection)
 
-        self.sampt = SampleTable(connection)
-        self.at = AnalysisTable(connection)
+        self.sample_table = SampleTable(connection)
+        self.analysis_table = AnalysisTable(connection)
+        self.output_file_table = OutputFileTable(connection)
 
     # GETS
 
@@ -62,7 +65,7 @@ async def get_analyses_for_samples(
         Get a list of all analysis that relevant for samples
 
         """
-        projects, analysis = await self.at.get_analyses_for_samples(
+        projects, analysis = await self.analysis_table.get_analyses_for_samples(
             sample_ids,
             analysis_type=analysis_type,
             status=status,
@@ -80,7 +83,7 @@ async def get_analyses_for_samples(
 
     async def get_analysis_by_id(self, analysis_id: int, check_project_id=True):
         """Get analysis by ID"""
-        project, analysis = await self.at.get_analysis_by_id(analysis_id)
+        project, analysis = await self.analysis_table.get_analysis_by_id(analysis_id)
         if check_project_id:
             await self.ptable.check_access_to_project_id(
                 self.author, project, readonly=True
@@ -95,7 +98,7 @@ async def get_latest_complete_analysis_for_type(
         meta: dict[str, Any] = None,
     ) -> AnalysisInternal:
         """Get SINGLE latest complete analysis for some analysis type"""
-        return await self.at.get_latest_complete_analysis_for_type(
+        return await self.analysis_table.get_latest_complete_analysis_for_type(
             project=project, analysis_type=analysis_type, meta=meta
         )
 
@@ -105,7 +108,7 @@ async def get_all_sequencing_group_ids_without_analysis_type(
         """
         Find all the sequencing_groups that don't have an "analysis_type"
         """
-        return await self.at.get_all_sequencing_group_ids_without_analysis_type(
+        return await self.analysis_table.get_all_sequencing_group_ids_without_analysis_type(
             analysis_type=analysis_type, project=project
         )
 
@@ -115,7 +118,7 @@ async def get_incomplete_analyses(
         """
         Gets details of analysis with status queued or in-progress
         """
-        return await self.at.get_incomplete_analyses(project=project)
+        return await self.analysis_table.get_incomplete_analyses(project=project)
 
     async def get_sample_cram_path_map_for_seqr(
         self,
@@ -124,7 +127,7 @@ async def get_sample_cram_path_map_for_seqr(
         participant_ids: list[int] = None,
     ) -> list[dict[str, Any]]:
         """Get (ext_participant_id, cram_path, internal_id) map"""
-        return await self.at.get_sample_cram_path_map_for_seqr(
+        return await self.analysis_table.get_sample_cram_path_map_for_seqr(
             project=project,
             sequencing_types=sequencing_types,
             participant_ids=participant_ids,
@@ -132,7 +135,7 @@ async def get_sample_cram_path_map_for_seqr(
 
     async def query(self, filter_: AnalysisFilter, check_project_ids=True):
         """Query analyses"""
-        analyses = await self.at.query(filter_)
+        analyses = await self.analysis_table.query(filter_)
 
         if not analyses:
             return []
@@ -186,7 +189,7 @@ async def get_cram_size_proportionate_map(
         sg_by_id = {sg.id: sg for sg in sequencing_groups}
         sg_to_project = {sg.id: sg.project for sg in sequencing_groups}
 
-        cram_list = await self.at.query(
+        cram_list = await self.analysis_table.query(
             AnalysisFilter(
                 sequencing_group_id=GenericFilter(in_=list(sg_to_project.keys())),
                 type=GenericFilter(eq='cram'),
@@ -493,13 +496,13 @@ async def get_sgs_added_by_day_by_es_indices(
         # was removed. So we'll sum up all SGs up to the start date and then use that
         # as the starting point for the prop map.
 
-        by_day[start] = await self.at.find_sgs_in_joint_call_or_es_index_up_to_date(
+        by_day[start] = await self.analysis_table.find_sgs_in_joint_call_or_es_index_up_to_date(
             date=start
         )
 
         if start < ES_ANALYSIS_OBJ_INTRO_DATE:
             # do a special check for joint-calling
-            joint_calls = await self.at.query(
+            joint_calls = await self.analysis_table.query(
                 AnalysisFilter(
                     type=GenericFilter(eq='joint-calling'),
                     status=GenericFilter(eq=AnalysisStatus.COMPLETED),
@@ -514,7 +517,7 @@ async def get_sgs_added_by_day_by_es_indices(
             for jc in joint_calls:
                 by_day[jc.timestamp_completed.date()].update(jc.sequencing_group_ids)
 
-        es_indices = await self.at.query(
+        es_indices = await self.analysis_table.query(
             AnalysisFilter(
                 type=GenericFilter(eq='es-index'),
                 status=GenericFilter(eq=AnalysisStatus.COMPLETED),
@@ -535,7 +538,7 @@ async def get_audit_logs_by_analysis_ids(
         self, analysis_ids: list[int]
     ) -> dict[int, list[AuditLogInternal]]:
         """Get audit logs for analysis IDs"""
-        return await self.at.get_audit_log_for_analysis_ids(analysis_ids)
+        return await self.analysis_table.get_audit_log_for_analysis_ids(analysis_ids)
 
     # CREATE / UPDATE
 
@@ -545,27 +548,37 @@ async def create_analysis(
         project: ProjectId = None,
     ) -> int:
         """Create a new analysis"""
-        return await self.at.create_analysis(
+        new_analysis_id = await self.analysis_table.create_analysis(
             analysis_type=analysis.type,
             status=analysis.status,
             sequencing_group_ids=analysis.sequencing_group_ids,
             meta=analysis.meta,
-            output=analysis.output,
             active=analysis.active,
             project=project,
         )
 
+        # TODO deprecate the output field
+        if analysis.output:
+            warnings.warn('Analysis.output will be deprecated, use Analysis.outputs instead', PendingDeprecationWarning, stacklevel=2)
+
+            await self.output_file_table.create_or_update_analysis_output_files_from_json(analysis_id=new_analysis_id, json_dict=analysis.output)
+
+        elif analysis.outputs:
+            await self.output_file_table.create_or_update_analysis_output_files_from_json(analysis_id=new_analysis_id, json_dict=analysis.outputs)
+
+        return new_analysis_id
+
     async def add_sequencing_groups_to_analysis(
         self, analysis_id: int, sequencing_group_ids: list[int], check_project_id=True
     ):
         """Add samples to an analysis (through the linked table)"""
         if check_project_id:
-            project_ids = await self.at.get_project_ids_for_analysis_ids([analysis_id])
+            project_ids = await self.analysis_table.get_project_ids_for_analysis_ids([analysis_id])
             await self.ptable.check_access_to_project_ids(
                 self.author, project_ids, readonly=False
             )
 
-        return await self.at.add_sequencing_groups_to_analysis(
+        return await self.analysis_table.add_sequencing_groups_to_analysis(
             analysis_id=analysis_id, sequencing_group_ids=sequencing_group_ids
         )
 
@@ -575,24 +588,30 @@ async def update_analysis(
         status: AnalysisStatus,
         meta: dict[str, Any] = None,
         output: str | None = None,
+        outputs: str | None = None,
         check_project_id=True,
     ):
         """
         Update the status of an analysis, set timestamp_completed if relevant
         """
         if check_project_id:
-            project_ids = await self.at.get_project_ids_for_analysis_ids([analysis_id])
+            project_ids = await self.analysis_table.get_project_ids_for_analysis_ids([analysis_id])
             await self.ptable.check_access_to_project_ids(
                 self.author, project_ids, readonly=False
             )
 
-        await self.at.update_analysis(
+        await self.analysis_table.update_analysis(
             analysis_id=analysis_id,
             status=status,
             meta=meta,
-            output=output,
         )
 
+        if output:
+            warnings.warn('Analysis.output will be deprecated, use Analysis.outputs instead', PendingDeprecationWarning, stacklevel=2)
+            await self.output_file_table.create_or_update_analysis_output_files_from_json(analysis_id=analysis_id, json_dict=output)
+        elif outputs:
+            await self.output_file_table.create_or_update_analysis_output_files_from_json(analysis_id=analysis_id, json_dict=outputs)
+
     async def get_analysis_runner_log(
         self,
         project_ids: list[int] = None,
@@ -603,7 +622,7 @@ async def get_analysis_runner_log(
         """
         Get log for the analysis-runner, useful for checking this history of analysis
         """
-        return await self.at.get_analysis_runner_log(
+        return await self.analysis_table.get_analysis_runner_log(
             project_ids,
             # author=author,
             output_dir=output_dir,
diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py
index 7955cfadb..22967d1c2 100644
--- a/db/python/tables/analysis.py
+++ b/db/python/tables/analysis.py
@@ -2,7 +2,7 @@
 import dataclasses
 import datetime
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from db.python.tables.base import DbBase
 from db.python.utils import (
@@ -15,6 +15,7 @@
 from models.enums import AnalysisStatus
 from models.models.analysis import AnalysisInternal
 from models.models.audit_log import AuditLogInternal
+from models.models.output_file import OutputFileInternal
 from models.models.project import ProjectId
 
 
@@ -60,7 +61,6 @@ async def create_analysis(
         status: AnalysisStatus,
         sequencing_group_ids: List[int],
         meta: Optional[Dict[str, Any]] = None,
-        output: str = None,
         active: bool = True,
         project: ProjectId = None,
     ) -> int:
@@ -73,7 +73,6 @@ async def create_analysis(
                 ('type', analysis_type),
                 ('status', status.value),
                 ('meta', to_db_json(meta or {})),
-                ('output', output),
                 ('audit_log_id', await self.audit_log_id()),
                 ('project', project or self.project),
                 ('active', active if active is not None else True),
@@ -145,7 +144,6 @@ async def update_analysis(
         status: AnalysisStatus,
         meta: Dict[str, Any] = None,
         active: bool = None,
-        output: Optional[str] = None,
     ):
         """
         Update the status of an analysis, set timestamp_completed if relevant
@@ -169,10 +167,6 @@ async def update_analysis(
             fields['timestamp_completed'] = datetime.datetime.utcnow()
             setters.append('timestamp_completed = :timestamp_completed')
 
-        if output:
-            fields['output'] = output
-            setters.append('output = :output')
-
         if meta is not None and len(meta) > 0:
             fields['meta'] = to_db_json(meta)
             setters.append('meta = JSON_MERGE_PATCH(COALESCE(meta, "{}"), :meta)')
@@ -209,14 +203,13 @@ async def query(self, filter_: AnalysisFilter) -> List[AnalysisInternal]:
                 'type': 'a.type',
                 'status': 'a.status',
                 'meta': 'a.meta',
-                'output': 'a.output',
                 'active': 'a.active',
             },
         )
 
         _query = f"""
         SELECT a.id as id, a.type as type, a.status as status,
-                a.output as output, a_sg.sequencing_group_id as sequencing_group_id,
+                a_sg.sequencing_group_id as sequencing_group_id,
                 a.project as project, a.timestamp_completed as timestamp_completed,
                 a.active as active, a.meta as meta, a.author as author
         FROM analysis a
@@ -226,15 +219,83 @@ async def query(self, filter_: AnalysisFilter) -> List[AnalysisInternal]:
 
         rows = await self.connection.fetch_all(_query, values)
         retvals: Dict[int, AnalysisInternal] = {}
+        analysis_ids: list = [row['id'] for row in rows]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
         for row in rows:
             key = row['id']
             if key in retvals:
                 retvals[key].sequencing_group_ids.append(row['sequencing_group_id'])
             else:
-                retvals[key] = AnalysisInternal.from_db(**dict(row))
-
+                # Pydantic doesn't allow item assignment on the model
+                # TODO: Deprecate `output` eventually
+                retvals[key] = AnalysisInternal.from_db(**dict(row)).copy(update={'output': analysis_outputs_by_aid.get(key, []), 'outputs': analysis_outputs_by_aid.get(key, [])})
         return list(retvals.values())
 
+    async def get_file_outputs_by_analysis_ids(self, analysis_ids: list[int]) -> dict[int, Union[dict[str, Any], str]]:
+        """Fetches all output files for a list of analysis IDs"""
+
+        _query = """
+        SELECT ao.analysis_id, f.*, ao.json_structure, ao.output
+        FROM analysis_outputs ao
+        LEFT JOIN output_file f ON ao.file_id = f.id
+        WHERE ao.analysis_id IN :analysis_ids
+        """
+        rows = await self.connection.fetch_all(_query, {'analysis_ids': analysis_ids})
+
+        # Preparing to accumulate analysis files
+        analysis_files: dict[int, Union[list[Tuple[OutputFileInternal, str]], str]] = defaultdict(list)
+
+        # Extracting parent IDs with a guard for None values
+        parent_ids = [row['id'] for row in rows if row['id'] is not None]
+
+        # Fetching secondary files only if there are parent IDs to look up
+        secondary_files = await self.get_secondary_files_for_file_output(parent_ids) if parent_ids else {}
+
+        for row in rows:
+            file_id = row['id']
+            if file_id:
+                # If no json_structure, just set to the output.
+                if row['json_structure'] is None:
+                    analysis_files[row['analysis_id']] = row['path']
+                # Building OutputFileInternal object with secondary files if available
+                file_internal = OutputFileInternal.from_db(**dict(row))
+                file_internal_with_secondary = file_internal.copy(update={
+                    'secondary_files': secondary_files.get(file_id, [])
+                })
+                if isinstance(analysis_files[row['analysis_id']], list):
+                    analysis_files[row['analysis_id']].append((file_internal_with_secondary, row['json_structure']))  # type: ignore [union-attr]
+            else:
+                # If no file_id, just set to the output.
+                analysis_files[row['analysis_id']] = row['output']
+
+        # Transforming analysis_files into the desired output format
+        analysis_output_files = {a_id: OutputFileInternal.reconstruct_json(files) for a_id, files in analysis_files.items()}
+
+        return analysis_output_files
+
+    async def get_secondary_files_for_file_output(self, parent_file_ids: list[int]) -> dict[int, list[OutputFileInternal]]:
+        """Fetches all secondary files for a list of parent files"""
+
+        _query = """
+        SELECT f.*
+        FROM output_file f
+        WHERE f.parent_id IN :parent_file_ids
+        """
+        # Ensure parent_file_ids is a list to prevent SQL injection and errors
+        if not isinstance(parent_file_ids, list):
+            raise ValueError('parent_file_ids must be a list of integers')
+
+        # Fetching rows from the database
+        rows = await self.connection.fetch_all(_query, {'parent_file_ids': parent_file_ids})
+
+        # Accumulating secondary files
+        secondary_files: Dict[int, List[OutputFileInternal]] = defaultdict(list)
+        for row in rows:
+            secondary_file = OutputFileInternal.from_db(**dict(row))
+            secondary_files[row['parent_id']].append(secondary_file)
+
+        return secondary_files
+
     async def get_latest_complete_analysis_for_type(
         self,
         project: ProjectId,
@@ -257,7 +318,7 @@ async def get_latest_complete_analysis_for_type(
 
         _query = f"""
 SELECT a.id as id, a.type as type, a.status as status,
-        a.output as output, a_sg.sample_id as sample_id,
+        a_sg.sample_id as sample_id,
         a.project as project, a.timestamp_completed as timestamp_completed,
         a.meta as meta
 FROM analysis_sequencing_group a_sg
@@ -272,7 +333,11 @@ async def get_latest_complete_analysis_for_type(
         rows = await self.connection.fetch_all(_query, values)
         if len(rows) == 0:
             raise NotFoundError(f"Couldn't find any analysis with type {analysis_type}")
+        analysis_ids: list = [row['id'] for row in rows[0]]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
         a = AnalysisInternal.from_db(**dict(rows[0]))
+        a.output = analysis_outputs_by_aid.get(rows[0]['id'], [])
+        a.outputs = analysis_outputs_by_aid.get(rows[0]['id'], [])
         # .from_db maps 'sample_id' -> sample_ids
         for row in rows[1:]:
             a.sample_ids.append(row['sample_id'])
@@ -309,7 +374,7 @@ async def get_incomplete_analyses(
         """
         _query = """
 SELECT a.id as id, a.type as type, a.status as status,
-        a.output as output, a_sg.sequencing_group_id as sequencing_group_id,
+        a_sg.sequencing_group_id as sequencing_group_id,
         a.project as project, a.meta as meta
 FROM analysis_sequencing_group a_sg
 INNER JOIN analysis a ON a_sg.analysis_id = a.id
@@ -319,10 +384,14 @@ async def get_incomplete_analyses(
             _query, {'project': project or self.project}
         )
         analysis_by_id = {}
+        analysis_ids: list = [row['id'] for row in rows]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
         for row in rows:
             aid = row['id']
             if aid not in analysis_by_id:
                 analysis_by_id[aid] = AnalysisInternal.from_db(**dict(row))
+                analysis_by_id[aid].output = analysis_outputs_by_aid.get(aid, [])
+                analysis_by_id[aid].outputs = analysis_outputs_by_aid.get(aid, [])
             else:
                 analysis_by_id[aid].sample_ids.append(row['sequencing_group_id'])
 
@@ -334,7 +403,7 @@ async def get_latest_complete_analysis_for_sequencing_group_ids_by_type(
         """Get the latest complete analysis for samples (one per sample)"""
         _query = """
 SELECT
-    a.id AS id, a.type as type, a.status as status, a.output as output,
+    a.id AS id, a.type as type, a.status as status,
     a.project as project, a_sg.sequencing_group_id as sample_id,
     a.timestamp_completed as timestamp_completed, a.meta as meta
 FROM analysis a
@@ -357,11 +426,16 @@ async def get_latest_complete_analysis_for_sequencing_group_ids_by_type(
         rows = await self.connection.fetch_all(_query, values)
         seen_sequencing_group_ids = set()
         analyses: List[AnalysisInternal] = []
+        analysis_ids: list = [row['id'] for row in rows]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
         for row in rows:
             if row['sequencing_group_id'] in seen_sequencing_group_ids:
                 continue
             seen_sequencing_group_ids.add(row['sequencing_group_id'])
-            analyses.append(AnalysisInternal.from_db(**dict(row)))
+            analysis = AnalysisInternal.from_db(**dict(row))
+            analysis.output = analysis_outputs_by_aid.get(row['id'], [])
+            analysis.outputs = analysis_outputs_by_aid.get(row['id'], [])
+            analyses.append(analysis)
 
         # reverse after timestamp_completed
         return analyses[::-1]
@@ -373,7 +447,7 @@ async def get_analysis_by_id(
         _query = """
 SELECT
     a.id as id, a.type as type, a.status as status,
-    a.output as output, a.project as project,
+    a.project as project,
     a_sg.sequencing_group_id as sequencing_group_id,
     a.timestamp_completed as timestamp_completed, a.meta as meta
 FROM analysis a
@@ -385,8 +459,11 @@ async def get_analysis_by_id(
             raise NotFoundError(f"Couldn't find analysis with id = {analysis_id}")
 
         project = rows[0]['project']
-
+        analysis_ids: list = [rows[0]['id']]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
         a = AnalysisInternal.from_db(**dict(rows[0]))
+        a.output = analysis_outputs_by_aid.get(rows[0], [])
+        a.outputs = analysis_outputs_by_aid.get(rows[0], [])
         for row in rows[1:]:
             a.sample_ids.append(row['sequencing_group_id'])
 
@@ -417,7 +494,7 @@ async def get_analyses_for_samples(
         _query = f"""
     SELECT
         a.id as id, a.type as type, a.status as status,
-        a.output as output, a.project as project,
+        a.project as project,
         a_sg.sequencing_group_id as sequencing_group_id,
         a.timestamp_completed as timestamp_completed, a.meta as meta
     FROM analysis a
@@ -428,13 +505,17 @@ async def get_analyses_for_samples(
         rows = await self.connection.fetch_all(_query, values)
         analyses = {}
         projects: set[ProjectId] = set()
-        for a in rows:
-            a_id = a['id']
+        analysis_ids: list = [row['id'] for row in rows]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
+        for row in rows:
+            a_id = row['id']
             if a_id not in analyses:
-                analyses[a_id] = AnalysisInternal.from_db(**dict(a))
-                projects.add(a['project'])
+                analyses[a_id] = AnalysisInternal.from_db(**dict(row))
+                analyses[a_id].output = analysis_outputs_by_aid.get(a_id, [])
+                analyses[a_id].outputs = analysis_outputs_by_aid.get(a_id, [])
+                projects.add(row['project'])
 
-            analyses[a_id].sample_ids.append(a['sample_id'])
+            analyses[a_id].sample_ids.append(row['sample_id'])
 
         return projects, list(analyses.values())
 
@@ -468,7 +549,7 @@ async def get_sample_cram_path_map_for_seqr(
             values['pids'] = list(participant_ids)
 
         _query = f"""
-SELECT p.external_id as participant_id, a.output as output, sg.id as sequencing_group_id
+SELECT p.external_id as participant_id, sg.id as sequencing_group_id
 FROM analysis a
 INNER JOIN analysis_sequencing_group a_sg ON a_sg.analysis_id = a.id
 INNER JOIN sequencing_group sg ON a_sg.sequencing_group_id = sg.id
@@ -480,8 +561,10 @@ async def get_sample_cram_path_map_for_seqr(
 """
 
         rows = await self.connection.fetch_all(_query, values)
+        analysis_ids: list = [row['id'] for row in rows[0]]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
         # many per analysis
-        return [dict(d) for d in rows]
+        return [dict(d).update({'output': analysis_outputs_by_aid.get(d['id'], []), 'outputs': analysis_outputs_by_aid.get(d['id'], [])}) for d in rows]
 
     async def get_analysis_runner_log(
         self,
@@ -498,12 +581,15 @@ async def get_analysis_runner_log(
             "type = 'analysis-runner'",
             'active',
         ]
+        joins = []
         if project_ids:
             wheres.append('project in :project_ids')
             values['project_ids'] = project_ids
 
         if output_dir:
-            wheres.append('(output = :output OR output LIKE :output_like)')
+            joins.append('LEFT JOIN analysis_outputs ao ON analysis.id = ao.analysis_id',)
+            joins.append('LEFT JOIN output_file f ON ao.file_id = f.id')
+            wheres.append('(f.path = :output OR ao.output LIKE :output_like)')
             values['output'] = output_dir
             values['output_like'] = f'%{output_dir}'
 
@@ -512,9 +598,18 @@ async def get_analysis_runner_log(
             values['ar_guid'] = ar_guid
 
         wheres_str = ' AND '.join(wheres)
-        _query = f'SELECT * FROM analysis WHERE {wheres_str}'
+        joins_str = ' '.join(joins)
+        _query = f'SELECT * FROM analysis {joins_str} WHERE {wheres_str}'
         rows = await self.connection.fetch_all(_query, values)
-        return [AnalysisInternal.from_db(**dict(r)) for r in rows]
+        analysis_ids: list = [row['id'] for row in rows]
+        analysis_outputs_by_aid = await self.get_file_outputs_by_analysis_ids(analysis_ids)
+        analyses: List[AnalysisInternal] = []
+        for row in rows:
+            analysis = AnalysisInternal.from_db(**dict(row))
+            analysis.output = analysis_outputs_by_aid.get(row['id'], [])
+            analysis.outputs = analysis_outputs_by_aid.get(row['id'], [])
+            analyses.append(analysis)
+        return analyses
 
     # region STATS
 
diff --git a/db/python/tables/output_file.py b/db/python/tables/output_file.py
new file mode 100644
index 000000000..ba0975315
--- /dev/null
+++ b/db/python/tables/output_file.py
@@ -0,0 +1,170 @@
+import os
+from textwrap import dedent
+from typing import Optional, Union
+
+from cloudpathlib import AnyPath, GSClient
+from google.auth.credentials import AnonymousCredentials
+from google.cloud.storage import Client
+
+from db.python.tables.base import DbBase
+from models.models.output_file import OutputFileInternal
+
+
+class OutputFileTable(DbBase):
+    """
+    Capture Analysis table operations and queries
+    """
+
+    table_name = 'output_file'
+
+    async def create_or_update_output_file(
+        self,
+        path: str,
+        parent_id: Optional[int] = None,
+        client: Optional[Client] = None
+    ) -> int | None:
+        """
+        Create a new file, and add it to database
+        """
+        file_obj = AnyPath(path, client=GSClient(storage_client=client))
+
+        file_info = await OutputFileInternal.get_file_info(file_obj=file_obj, client=client)
+
+        if not file_info or not file_info.get('valid', False):
+            return None
+
+        kv_pairs = [
+            ('path', path),
+            ('basename', file_info['basename']),
+            ('dirname', file_info['dirname']),
+            ('nameroot', file_info['nameroot']),
+            ('nameext', file_info['nameext']),
+            ('file_checksum', file_info['checksum']),
+            ('size', file_info['size']),
+            ('valid', file_info['valid']),
+            ('parent_id', parent_id)
+        ]
+
+        kv_pairs = [(k, v) for k, v in kv_pairs if v is not None]
+        keys = [k for k, _ in kv_pairs]
+        cs_keys = ', '.join(keys)
+        cs_id_keys = ', '.join(f':{k}' for k in keys)
+        non_pk_keys = [k for k in keys if k != 'path']
+        update_clause = ', '.join([f'{k} = VALUES({k})' for k in non_pk_keys])  # ON DUPLICATE KEY UPDATE {update_clause}
+
+        async with self.connection.transaction():
+            _query = dedent(f"""INSERT INTO output_file ({cs_keys}) VALUES ({cs_id_keys}) ON DUPLICATE KEY UPDATE {update_clause} RETURNING id""")
+            id_of_new_file = await self.connection.fetch_val(
+                _query,
+                dict(kv_pairs),
+            )
+
+        return id_of_new_file
+
+    async def add_output_file_to_analysis(self, analysis_id: int, file_id: int, json_structure: Optional[str] = None, output: Optional[str] = None):
+        """Add file to an analysis (through the join table)"""
+        _query = dedent("""
+            INSERT INTO analysis_outputs
+                (analysis_id, file_id, json_structure, output)
+            VALUES (:analysis_id, :file_id, :json_structure, :output)
+        """)
+        await self.connection.execute(
+                _query,
+                {'analysis_id': analysis_id, 'file_id': file_id, 'json_structure': json_structure, 'output': output}
+            )
+
+    async def create_or_update_analysis_output_files_from_json(
+        self,
+        analysis_id: int,
+        json_dict: Union[dict, str],
+    ) -> None:
+        """
+        Create analysis files from JSON
+        """
+        files = await self.find_files_from_dict(json_dict=json_dict)
+        file_ids : list[int] = []
+
+        if os.environ.get('SM_ENVIRONMENT').lower() in ('development', 'local'):
+            client = Client(
+                credentials=AnonymousCredentials(),
+                project='test',
+                # Alternatively instead of using the global env STORAGE_EMULATOR_HOST. You can define it here.
+                # This will set this client object to point to the local google cloud storage.
+                client_options={'api_endpoint': 'http://localhost:4443'},
+            )
+        else:
+            client = Client()
+
+        async with self.connection.transaction():
+            if 'main_files' in files:
+                for primary_file in files['main_files']:
+                    parent_file_id = await self.create_or_update_output_file(path=primary_file['basename'], client=client)
+                    await self.add_output_file_to_analysis(
+                        analysis_id,
+                        parent_file_id,
+                        json_structure=primary_file['json_path'],
+                        output=None if parent_file_id else primary_file['basename']
+                        )
+                    if 'secondary_files_grouped' in files:
+                        secondary_files = files['secondary_files_grouped']
+                        if primary_file['basename'] in secondary_files:
+                            for secondary_file in secondary_files[primary_file['basename']]:
+                                await self.create_or_update_output_file(path=secondary_file, parent_id=parent_file_id, client=client)
+                        file_ids.append(parent_file_id)
+
+        client.close()
+        # check that only the files in this json_dict should be in the analysis. Remove what isn't in this dict.
+        _update_query = dedent("""
+            DELETE ao FROM analysis_outputs ao
+            WHERE (ao.analysis_id = :analysis_id)
+            AND (ao.file_id NOT IN :file_ids)
+            """)
+
+        await self.connection.execute(
+            _update_query,
+            {
+                'analysis_id': analysis_id,
+                'file_ids': file_ids
+            }
+        )
+
+    async def find_files_from_dict(self, json_dict, json_path=None, collected=None) -> dict:
+        """Retrieve filepaths from a dict of outputs"""
+        if collected is None:
+            collected = {'main_files': [], 'secondary_files_grouped': {}}
+
+        if json_path is None:
+            json_path = []  # Initialize path for tracking key path
+
+        if isinstance(json_dict, str):
+            # If the data is a plain string, return it as the basename with None as its keypath
+            collected['main_files'].append({'json_path': None, 'basename': json_dict})
+            return collected
+
+        if isinstance(json_dict, dict):
+            # Check if current dict contains 'basename'
+            if 'basename' in json_dict:
+                # Add current item to main_files
+                collected['main_files'].append({'json_path': '.'.join(json_path), 'basename': json_dict['basename']})
+                current_basename = json_dict['basename']  # Keep track of current basename for secondary files
+
+                # Handle secondary files if present
+                if 'secondaryFiles' in json_dict:
+                    secondary = json_dict['secondaryFiles']
+                    if current_basename not in collected['secondary_files_grouped']:
+                        collected['secondary_files_grouped'][current_basename] = []
+                    for _, value in secondary.items():
+                        # Append each secondary file to the list in secondary_files under its parent basename
+                        collected['secondary_files_grouped'][current_basename].append(value['basename'])
+
+            else:
+                for key, value in json_dict.items():
+                    # Recur for each sub-dictionary, updating the path
+                    await self.find_files_from_dict(value, json_path + [key], collected)
+
+        elif isinstance(json_dict, list):
+            # Recur for each item in the list, without updating the path (as lists don't contribute to JSON path)
+            for item in json_dict:
+                await self.find_files_from_dict(item, json_path, collected)
+
+        return collected
diff --git a/db/python/tables/project.py b/db/python/tables/project.py
index d87c28960..2f401af6e 100644
--- a/db/python/tables/project.py
+++ b/db/python/tables/project.py
@@ -442,6 +442,9 @@ async def delete_project_data(
     INNER JOIN sample ON sample.id = sg.sample_id
     WHERE sample.project = :project
 );
+DELETE FROM analysis_outputs WHERE analysis_id in (
+    SELECT id FROM analysis WHERE project = :project
+);
 DELETE FROM analysis_sample WHERE sample_id in (
     SELECT s.id FROM sample s
     WHERE s.project = :project
diff --git a/metamist/audit/README.md b/metamist/audit/README.md
index b099376c8..bbbe8c626 100644
--- a/metamist/audit/README.md
+++ b/metamist/audit/README.md
@@ -80,7 +80,9 @@ print(json.dumps(participant_data))
               "analyses": [
                 {
                   "id": 456,
-                  "output": "gs://cpg-test-dataset-main/cram/CPG123456.cram",
+                  "outputs": {
+                    "basename": "gs://cpg-test-dataset-main/cram/CPG123456.cram"
+                  },
                   "timestampCompleted": "2023-09-01T05:04:24"
                 }
               ]
diff --git a/metamist/audit/generic_auditor.py b/metamist/audit/generic_auditor.py
index c69eacf40..82e873740 100644
--- a/metamist/audit/generic_auditor.py
+++ b/metamist/audit/generic_auditor.py
@@ -53,7 +53,7 @@
             analyses(status: {eq: COMPLETED}, type: {in_: $analysisTypes}, project: {eq: $dataset}) {
               id
               meta
-              output
+              outputs
               type
               timestampCompleted
             }
@@ -277,14 +277,15 @@ async def get_analysis_cram_paths_for_dataset_sgs(
         # For each sg id, collect the analysis id and cram paths
         sg_cram_paths: dict[str, dict[int, str]] = defaultdict(dict)
         for sg_id, analysis in analyses.items():
-            cram_path = analysis['output']
+            logging.info(analysis['outputs'])
+            cram_path = analysis['outputs']['basename']
             if not cram_path.startswith('gs://') or not cram_path.endswith('.cram'):
                 logging.warning(
-                    f'Analysis {analysis["id"]} invalid output path: {analysis["output"]}'
+                    f'Analysis {analysis["id"]} invalid output path: {analysis["outputs"]["basename"]}'
                 )
                 continue
 
-            sg_cram_paths[sg_id][analysis['id']] = analysis['output']
+            sg_cram_paths[sg_id][analysis['id']] = analysis['outputs']
 
         return sg_cram_paths
 
@@ -313,7 +314,7 @@ async def analyses_for_sgs_without_crams(self, sgs_without_crams: list[str]):
                 analysis_entry = {
                     'analysis_id': analysis['id'],
                     'analysis_type': analysis['type'],
-                    'analysis_output': analysis['output'],
+                    'analysis_output': analysis['outputs'],
                     'timestamp_completed': analysis['timestampCompleted'],
                 }
                 all_sg_analyses[sg_id].append(analysis_entry)
diff --git a/models/models/__init__.py b/models/models/__init__.py
index ce5868cb1..c5b71c2f0 100644
--- a/models/models/__init__.py
+++ b/models/models/__init__.py
@@ -27,6 +27,7 @@
     FamilySimpleInternal,
     PedRowInternal,
 )
+from models.models.output_file import OutputFile, OutputFileInternal
 from models.models.participant import (
     NestedParticipant,
     NestedParticipantInternal,
diff --git a/models/models/analysis.py b/models/models/analysis.py
index fb6e3152d..8ef56de2d 100644
--- a/models/models/analysis.py
+++ b/models/models/analysis.py
@@ -1,9 +1,9 @@
 import enum
 import json
 from datetime import date, datetime
-from typing import Any
+from typing import Any, Optional, Union
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 from models.base import SMBase
 from models.enums import AnalysisStatus
@@ -15,17 +15,19 @@
 
 class AnalysisInternal(SMBase):
     """Model for Analysis"""
+    model_config = ConfigDict(extra='allow')
 
-    id: int | None = None
+    id: Optional[int] = None
     type: str
     status: AnalysisStatus
-    output: str = None
+    output: Optional[Union[str, dict]] = None
+    outputs: Optional[Union[str, dict]] = {}
     sequencing_group_ids: list[int] = []
-    timestamp_completed: datetime | None = None
-    project: int | None = None
-    active: bool | None = None
+    timestamp_completed: Optional[datetime] = None
+    project: Optional[int] = None
+    active: Optional[bool] = None
     meta: dict[str, Any] = {}
-    author: str | None = None
+    author: Optional[str] = None
 
     @staticmethod
     def from_db(**kwargs):
@@ -52,7 +54,8 @@ def from_db(**kwargs):
             type=analysis_type,
             status=AnalysisStatus(status),
             sequencing_group_ids=sequencing_group_ids or [],
-            output=kwargs.pop('output', []),
+            output=kwargs.pop('output', None),  # TODO deprecate
+            outputs=kwargs.pop('outputs', []),
             timestamp_completed=timestamp_completed,
             project=kwargs.get('project'),
             meta=meta,
@@ -72,6 +75,7 @@ def to_external(self):
                 self.sequencing_group_ids
             ),
             output=self.output,
+            outputs=self.outputs,
             timestamp_completed=self.timestamp_completed.isoformat()
             if self.timestamp_completed
             else None,
@@ -87,13 +91,14 @@ class Analysis(BaseModel):
 
     type: str
     status: AnalysisStatus
-    id: int | None = None
-    output: str | None = None
+    id: Optional[int] = None
+    output: Optional[Union[str, dict]] = None
+    outputs: Optional[Union[str, dict]] = {}
     sequencing_group_ids: list[str] = []
-    author: str | None = None
-    timestamp_completed: str | None = None
-    project: int | None = None
-    active: bool | None = None
+    author: Optional[str] = None
+    timestamp_completed: Optional[str] = None
+    project: Optional[int] = None
+    active: Optional[bool] = None
     meta: dict[str, Any] = {}
 
     def to_internal(self):
@@ -108,6 +113,7 @@ def to_internal(self):
                 self.sequencing_group_ids
             ),
             output=self.output,
+            outputs=self.outputs,
             # don't allow this to be set
             timestamp_completed=None,
             project=self.project,
diff --git a/models/models/output_file.py b/models/models/output_file.py
new file mode 100644
index 000000000..fd36fe98c
--- /dev/null
+++ b/models/models/output_file.py
@@ -0,0 +1,174 @@
+from typing import Any, Optional, Union
+
+from cloudpathlib import AnyPath, GSPath
+from google.cloud.storage import Client
+from pydantic import BaseModel
+
+from models.base import SMBase, parse_sql_bool
+
+
+class OutputFileInternal(SMBase):
+    """File model for internal use"""
+
+    id: int
+    path: str
+    basename: str
+    dirname: str
+    nameroot: str
+    nameext: Optional[str]
+    file_checksum: Optional[str]
+    size: int
+    meta: Optional[str] = None
+    valid: bool = False
+    secondary_files: list[dict[str, Any]] | None = None
+
+    @staticmethod
+    def from_db(**kwargs):
+        """
+        Convert from db keys, mainly converting id to id_
+        """
+
+        return OutputFileInternal(
+            id=kwargs.pop('id'),
+            path=kwargs.get('path'),
+            basename=kwargs.get('basename'),
+            dirname=kwargs.get('dirname'),
+            nameroot=kwargs.get('nameroot'),
+            nameext=kwargs.get('nameext'),
+            file_checksum=kwargs.get('file_checksum'),
+            size=kwargs.get('size'),
+            meta=kwargs.get('meta'),
+            valid=parse_sql_bool(kwargs.get('valid')),
+        )
+
+    def to_external(self):
+        """
+        Convert to external model
+        """
+        return OutputFile(
+            id=self.id,
+            path=self.path,
+            basename=self.basename,
+            dirname=self.dirname,
+            nameroot=self.nameroot,
+            nameext=self.nameext,
+            file_checksum=self.file_checksum,
+            size=self.size,
+            meta=self.meta,
+            valid=self.valid,
+        )
+
+    @staticmethod
+    async def get_file_info(file_obj: AnyPath | GSPath, client: Client) -> dict | None:
+        """Get file info for file at given path"""
+        try:
+            file_checksum = None
+            valid = False
+            size = 0
+            # try:
+            if isinstance(file_obj, GSPath) and client:
+                bucket = client.get_bucket(file_obj.bucket)  # pylint: disable=E1101
+                blob = bucket.get_blob(file_obj.blob)  # pylint: disable=E1101
+                if file_obj.suffix != '.mt':
+                    file_checksum = blob.crc32c  # pylint: disable=E1101
+                valid = True
+                size = blob.size  # pylint: disable=E1101
+
+            return {
+                'basename': file_obj.name,  # pylint: disable=E1101
+                'dirname': str(file_obj.parent),  # pylint: disable=E1101
+                'nameroot': file_obj.stem,  # pylint: disable=E1101
+                'nameext': file_obj.suffix,  # pylint: disable=E1101
+                'checksum': file_checksum,
+                'size': size,  # pylint: disable=E1101
+                'valid': valid,
+            }
+        except (FileNotFoundError, ValueError):
+            return None
+
+    @staticmethod
+    def reconstruct_json(data: list | str) -> Union[dict[str, Any], str]:
+        """_summary_
+
+        Args:
+            data (list): A tuple of (OutputFileInternal, json_structure) if file_id is not None, else just the output string based on the
+                analysis_outputs table
+
+        Returns:
+            dict: Should return the JSON structure based on the input data.
+        """
+        root: dict = {}
+        if isinstance(data, str):
+            return data
+        for file in data:
+            file_root: dict = {}
+
+            # Check if the file is a tuple or a string
+            # If it's a tuple, it's a file object and a json structure
+            if isinstance(file, tuple):
+                file_obj, json_structure = file
+                file_obj = file_obj.dict()
+                fields = OutputFileInternal.__fields__.keys()  # type:ignore[attr-defined]
+                for field in fields:
+                    file_root[field] = file_obj.get(field)
+
+                if json_structure:
+                    if isinstance(json_structure, str):
+                        json_structure = json_structure.split('.')
+                    # Split the path into components and parse the JSON content
+                    path = json_structure
+                    content = file_root
+
+                    # Navigate down the tree to the correct position, creating dictionaries as needed
+                    current = root
+                    for key in path[:-1]:
+                        current = current.setdefault(key, {})
+
+                    if path[-1] in current:
+                        current[path[-1]].update(content)
+                    else:
+                        current[path[-1]] = content
+                else:
+                    root.update(file_root)
+
+            # If it's a string, it's just the output
+            else:
+                file_root['output'] = file
+                if 'output' in root:
+                    root['output'].append(file)
+                else:
+                    root['output'] = [file]
+        return root
+
+
+class OutputFile(BaseModel):
+    """File model for external use"""
+
+    id: int
+    path: str
+    basename: str
+    dirname: str
+    nameroot: str
+    nameext: Optional[str]
+    file_checksum: Optional[str]
+    size: int
+    meta: Optional[str] = None
+    valid: bool = False
+    secondary_files: list[dict[str, Any]] | None = None
+
+    def to_internal(self):
+        """
+        Convert to internal model
+        """
+        return OutputFileInternal(
+            id=self.id,
+            path=self.path,
+            basename=self.basename,
+            dirname=self.dirname,
+            nameroot=self.nameroot,
+            nameext=self.nameext,
+            file_checksum=self.file_checksum,
+            size=self.size,
+            meta=self.meta,
+            valid=self.valid,
+        )
diff --git a/requirements.txt b/requirements.txt
index 172fa8fe0..19517939f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,8 @@ botocore==1.31.56
 cpg-utils
 aiohttp
 async_lru
-cloudpathlib
+# later cloudpathlib versions require a more recent google-cloud-storage
+cloudpathlib==0.17.0
 requests
 google-auth>=2.19.0
 google-cloud-secret-manager==2.8.0
diff --git a/scripts/20240124_migrate_output_to_file.py b/scripts/20240124_migrate_output_to_file.py
new file mode 100644
index 000000000..47ea14e08
--- /dev/null
+++ b/scripts/20240124_migrate_output_to_file.py
@@ -0,0 +1,168 @@
+import asyncio
+import json
+import re
+from textwrap import dedent
+from typing import Dict
+
+import click
+from cloudpathlib import AnyPath
+from databases import Database
+from google.cloud.storage import Client
+
+from db.python.connect import CredentialedDatabaseConfiguration  # pylint: disable=C0415
+from models.models.output_file import OutputFileInternal
+
+
+def _get_connection_string():
+    """Get connection string"""
+
+    config = CredentialedDatabaseConfiguration.dev_config()
+
+    # config = CredentialedDatabaseConfiguration(dbname='sm_dev', username='root')
+
+    return config.get_connection_string()
+
+
+async def get_analyses_without_fileid(connection):
+    """Get analyses without fileid"""
+    query = dedent(
+        """
+        SELECT a.id, a.output
+        FROM analysis a
+        LEFT JOIN analysis_outputs ao ON ao.analysis_id = a.id
+        LEFT JOIN output_file f ON f.id = ao.file_id
+        WHERE f.id IS NULL AND ao.output IS NULL
+        """
+    )
+    print('Fetching...')
+    rows = await connection.fetch_all(query=query)
+    print(f'Found {len(rows)} analyses without file_id and output fields set.')
+
+    return rows
+
+
+async def execute(connection, query, inserts):
+    """Executes inserts"""
+    await connection.execute(query, inserts)
+
+
+async def get_file_info(path: str, client: Client) -> Dict:
+    """Get file dict"""
+    print('Extracting file dict')
+    file_obj = AnyPath(path)
+
+    file_info = await OutputFileInternal.get_file_info(file_obj=file_obj, client=client)
+
+    if not file_info:
+        return None
+
+    return {
+        'path': path,
+        'basename': file_info['basename'],
+        'dirname': file_info['dirname'],
+        'nameroot': file_info['nameroot'],
+        'nameext': file_info['nameext'],
+        'file_checksum': file_info['checksum'],
+        'valid': file_info['valid'],
+        'size': file_info['size'],
+    }
+
+
+def extract_file_paths(input_str):
+    """Extract file paths from JSON-like string as well as plain strings into a dict"""
+    # Check if the input string matches the JSON-like pattern
+    json_pattern = r'^\{.*\}$'
+    if re.match(json_pattern, input_str):
+        try:
+            # Attempt to parse the modified JSON string
+            pattern = r"GSPath\('([^']+)'\)"
+
+            matches = re.findall(pattern, input_str)
+            file_paths = dict(zip(matches[::2], matches[1::2]))
+            return file_paths
+        except json.JSONDecodeError:
+            print('JSON Error')
+
+    # Treat input as a plain file path
+    return {'plain_file_path': input_str}
+
+
+async def prepare_files(analyses):
+    """Serialize files for insertion"""
+    files = []
+    client = Client()
+    print(f'Preparing files...{len(analyses)} analyses to process.')
+    for analysis in analyses:
+        path = analysis['output']
+        if path is None:
+            print('Path is None')
+            continue
+
+        path_dict = extract_file_paths(path)
+        print(path_dict)
+        if path_dict:
+            print('Found path dict')
+            for _, path in path_dict.items():
+                print(path)
+                files.append((
+                    analysis['id'],
+                    await get_file_info(path=path, client=client)
+                ))
+                print('Extracted and added.')
+    return files
+
+
+async def insert_files(connection, files):
+    """Insert files"""
+    query = dedent(
+        """INSERT INTO output_file (path, basename, dirname, nameroot, nameext, file_checksum, size, valid)
+        VALUES (:path, :basename, :dirname, :nameroot, :nameext, :file_checksum, :size, :valid)
+        RETURNING id"""
+    )
+    af_query = dedent(
+        """
+        INSERT INTO analysis_outputs (analysis_id, file_id, output, json_structure) VALUES (:analysis_id, :file_id, :output, :json_structure)
+        """
+    )
+    for analysis_id, file in files:
+        print('Inserting...')
+        file_id = await connection.fetch_val(
+            query,
+            file,
+        )
+        if not file_id:
+            join_inserts = {'analysis_id': analysis_id, 'file_id': None, 'output': file.get('path'), 'json_structure': None}
+        else:
+            join_inserts = {'analysis_id': analysis_id, 'file_id': file_id, 'output': None, 'json_structure': None}
+        await execute(
+            connection=connection,
+            query=af_query,
+            inserts=join_inserts,
+        )
+    print(f'Inserted {len(files)} files')
+
+
+@click.command()
+# @click.option('--dry-run/--no-dry-run', default=True)
+@click.option('--connection-string', default=None)
+# @click.argument('author', default='sequencing-group-migration')
+def main_sync(connection_string: str = None):
+    """Run synchronisation"""
+    asyncio.get_event_loop().run_until_complete(
+        main(connection_string=connection_string)
+    )
+
+
+async def main(connection_string: str = None):
+    """Run synchronisation"""
+    connection = Database(connection_string or _get_connection_string(), echo=True)
+    await connection.connect()
+    async with connection.transaction():
+        analyses = await get_analyses_without_fileid(connection=connection)
+        files = await prepare_files(analyses)
+        await insert_files(connection=connection, files=files)
+    await connection.disconnect()
+
+
+if __name__ == '__main__':
+    main_sync()  # pylint: disable=no-value-for-parameter
diff --git a/test/data/fakegcs/file1.cram b/test/data/fakegcs/file1.cram
new file mode 100644
index 000000000..99304afb9
--- /dev/null
+++ b/test/data/fakegcs/file1.cram
@@ -0,0 +1 @@
+SAMPLE CRAM CONTENT
diff --git a/test/data/fakegcs/file1.qc b/test/data/fakegcs/file1.qc
new file mode 100644
index 000000000..303bc3e08
--- /dev/null
+++ b/test/data/fakegcs/file1.qc
@@ -0,0 +1 @@
+SAMPLE QC FILE
diff --git a/test/data/fakegcs/file1.qc.ext b/test/data/fakegcs/file1.qc.ext
new file mode 100644
index 000000000..6f1cd4372
--- /dev/null
+++ b/test/data/fakegcs/file1.qc.ext
@@ -0,0 +1 @@
+SAMPLE QC EXT
diff --git a/test/data/fakegcs/file1.qc.meta b/test/data/fakegcs/file1.qc.meta
new file mode 100644
index 000000000..c549c3e09
--- /dev/null
+++ b/test/data/fakegcs/file1.qc.meta
@@ -0,0 +1 @@
+SAMPLE QC META
diff --git a/test/data/fakegcs/file1.txt b/test/data/fakegcs/file1.txt
new file mode 100644
index 000000000..088eccbc7
--- /dev/null
+++ b/test/data/fakegcs/file1.txt
@@ -0,0 +1 @@
+SMAPLE RESULTS TXT
diff --git a/test/data/fakegcs/file2.cram b/test/data/fakegcs/file2.cram
new file mode 100644
index 000000000..99304afb9
--- /dev/null
+++ b/test/data/fakegcs/file2.cram
@@ -0,0 +1 @@
+SAMPLE CRAM CONTENT
diff --git a/test/data/fakegcs/file2.cram.ext b/test/data/fakegcs/file2.cram.ext
new file mode 100644
index 000000000..7fe99d84c
--- /dev/null
+++ b/test/data/fakegcs/file2.cram.ext
@@ -0,0 +1 @@
+SAMPLE CRAM EXT FILE
diff --git a/test/data/fakegcs/file2.cram.meta b/test/data/fakegcs/file2.cram.meta
new file mode 100644
index 000000000..1664e7db8
--- /dev/null
+++ b/test/data/fakegcs/file2.cram.meta
@@ -0,0 +1 @@
+SAMPLE CRAM META
diff --git a/test/data/fakegcs/file3.cram b/test/data/fakegcs/file3.cram
new file mode 100644
index 000000000..99304afb9
--- /dev/null
+++ b/test/data/fakegcs/file3.cram
@@ -0,0 +1 @@
+SAMPLE CRAM CONTENT
diff --git a/test/data/fakegcs/file3.cram.ext b/test/data/fakegcs/file3.cram.ext
new file mode 100644
index 000000000..1d2089a5d
--- /dev/null
+++ b/test/data/fakegcs/file3.cram.ext
@@ -0,0 +1 @@
+SAMPLE CRAM EXT
diff --git a/test/data/fakegcs/file3.cram.meta b/test/data/fakegcs/file3.cram.meta
new file mode 100644
index 000000000..1664e7db8
--- /dev/null
+++ b/test/data/fakegcs/file3.cram.meta
@@ -0,0 +1 @@
+SAMPLE CRAM META
diff --git a/test/data/generate_data.py b/test/data/generate_data.py
index b04f270bc..38b80cace 100755
--- a/test/data/generate_data.py
+++ b/test/data/generate_data.py
@@ -167,11 +167,68 @@ def generate_random_number_within_distribution():
     ]
 
     analyses_to_insert = [
+        Analysis(
+            sequencing_group_ids=random.sample(
+                sequencing_group_ids, len(sequencing_group_ids) // 2
+            ),
+            type='es-index',
+            status=AnalysisStatus('completed'),
+            outputs={
+                'qc_results': {
+                    'basename': 'gs://sm-dev-test/file1.txt'
+                    },
+                'cram': {
+                    'basename': 'gs://sm-dev-test/file1.cram'
+                },
+                'qc': {
+                    'cram': {
+                        'basename': 'gs://sm-dev-test/file2.cram',
+                        'secondaryFiles':
+                            {
+                                'cram_ext': {
+                                    'basename': 'gs://sm-dev-test/file2.cram.ext'
+                                },
+                                'cram_meta': {
+                                    'basename': 'gs://sm-dev-test/file2.cram.meta'
+                                }
+                            }
+                    },
+                    'aggregate': {
+                        'cram': {
+                            'basename': 'gs://sm-dev-test/file3.cram',
+                            'secondaryFiles': {
+                                    'cram_ext': {
+                                        'basename': 'gs://sm-dev-test/file3.cram.ext'
+                                    },
+                                    'cram_meta': {
+                                        'basename': 'gs://sm-dev-test/file3.cram.meta'
+                                    }
+                                }
+                        },
+                        'qc': {
+                            'basename': 'gs://sm-dev-test/file1.qc',
+                            'secondaryFiles': {
+                                'qc_ext': {
+                                    'basename': 'gs://sm-dev-test/file1.qc.ext'
+                                },
+                                'qc_meta': {
+                                    'basename': 'gs://sm-dev-test/file1.qc.meta'
+                                }
+                            }
+                        },
+                    }
+                }
+            },
+            meta={},
+        )
+    ]
+
+    analyses_to_insert.extend([
         Analysis(
             sequencing_group_ids=[s],
             type='cram',
             status=AnalysisStatus('completed'),
-            output=f'FAKE://greek-myth/crams/{s}.cram',
+            outputs={'basename': f'FAKE://greek-myth/crams/{s}.cram'},
             timestamp_completed=(
                 datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 15))
             ).isoformat(),
@@ -182,7 +239,7 @@ def generate_random_number_within_distribution():
             },
         )
         for s in sequencing_group_ids
-    ]
+    ])
 
     analyses_to_insert.extend(
         [
@@ -190,7 +247,7 @@ def generate_random_number_within_distribution():
                 sample_ids=[],
                 type='analysis-runner',
                 status=AnalysisStatus('completed'),
-                output=f'FAKE://greek-myth-test/joint-calling/{s}.joint',
+                outputs={'basename': f'FAKE://greek-myth-test/joint-calling/{s}.joint'},
                 active=True,
                 meta={
                     'accessLevel': 'full',
@@ -217,7 +274,7 @@ def generate_random_number_within_distribution():
             ),
             type='es-index',
             status=AnalysisStatus('completed'),
-            output=f'FAKE::greek-myth-genome-{datetime.date.today()}',
+            outputs={'basename': f'FAKE::greek-myth-genome-{datetime.date.today()}'},
             meta={},
         )
     )
@@ -237,7 +294,7 @@ def generate_random_number_within_distribution():
                     sequencing_group_ids=[],
                     type='analysis-runner',
                     status=AnalysisStatus('unknown'),
-                    output='gs://cpg-fake-bucket/output',
+                    outputs={'basename': 'FAKE://cpg-fake-bucket/output'},
                     meta={
                         'timestamp': f'2022-08-{i+1}T10:00:00.0000+00:00',
                         'accessLevel': 'standard',
diff --git a/test/data/generate_seqr_project_data.py b/test/data/generate_seqr_project_data.py
index 5a3f7f5bf..34885876e 100644
--- a/test/data/generate_seqr_project_data.py
+++ b/test/data/generate_seqr_project_data.py
@@ -349,7 +349,7 @@ async def generate_cram_analyses(project: str, analyses_to_insert: list[Analysis
                 sequencing_group_ids=[sg['id']],
                 type='cram',
                 status=AnalysisStatus('completed'),
-                output=f'FAKE://{project}/crams/{sg["id"]}.cram',
+                outputs={'basename': f'FAKE://{project}/crams/{sg["id"]}.cram'},
                 timestamp_completed=(
                     datetime.datetime.now() - datetime.timedelta(days=random.randint(1, 15))
                 ).isoformat(),
@@ -386,14 +386,14 @@ async def generate_joint_called_analyses(project: str, aligned_sgs: list[dict],
                     sequencing_group_ids=joint_called_sgs,
                     type='custom',
                     status=AnalysisStatus('completed'),
-                    output=f'FAKE::{project}-{seq_type}-{datetime.date.today()}.mt',
+                    outputs={'basename': f'FAKE::{project}-{seq_type}-{datetime.date.today()}.mt'},
                     meta={'stage': 'AnnotateDataset', 'sequencing_type': seq_type},
                 ),
                 Analysis(
                     sequencing_group_ids=joint_called_sgs,
                     type='es-index',
                     status=AnalysisStatus('completed'),
-                    output=f'FAKE::{project}-{seq_type}-es-{datetime.date.today()}',
+                    outputs={'basename': f'FAKE::{project}-{seq_type}-es-{datetime.date.today()}'},
                     meta={'stage': 'MtToEs', 'sequencing_type': seq_type},
                 )
             ]
diff --git a/test/test_analysis.py b/test/test_analysis.py
index be7a15e90..a1b841f50 100644
--- a/test/test_analysis.py
+++ b/test/test_analysis.py
@@ -1,6 +1,11 @@
 # pylint: disable=invalid-overridden-method
+import logging
+import os
 from test.testbase import DbIsolatedTest, run_as_sync
 
+import requests
+from testcontainers.core.container import DockerContainer
+
 from db.python.layers.analysis import AnalysisLayer
 from db.python.layers.assay import AssayLayer
 from db.python.layers.sample import SampleLayer
@@ -19,6 +24,17 @@
 class TestAnalysis(DbIsolatedTest):
     """Test sample class"""
 
+    gcs: DockerContainer
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+        # Convert the relative path to an absolute path
+        absolute_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
+        gcs = DockerContainer('fsouza/fake-gcs-server').with_bind_ports(4443, 4443).with_volume_mapping(absolute_path, '/data',).with_command('-scheme http')
+        gcs.start()
+        cls.gcs = gcs
+
     @run_as_sync
     async def setUp(self) -> None:
         # don't need to await because it's tagged @run_as_sync
@@ -76,6 +92,12 @@ async def setUp(self) -> None:
         self.genome_sequencing_group_id = sample.sequencing_groups[0].id
         self.exome_sequencing_group_id = sample.sequencing_groups[self.project_id].id
 
+    @classmethod
+    def tearDownClass(cls) -> None:
+        if cls.gcs:
+            cls.gcs.stop()
+        super().tearDownClass()
+
     @run_as_sync
     async def test_get_analysis_by_id(self):
         """
@@ -148,6 +170,581 @@ async def test_get_analysis(self):
                 status=AnalysisStatus.UNKNOWN,
                 sequencing_group_ids=[],
                 output=None,
+                outputs=None,
+                timestamp_completed=None,
+                project=1,
+                meta={},
+                active=True,
+                author=None,
+            ).copy(update={'output': [], 'outputs': []})
+        ]
+
+        self.assertEqual(analyses, expected)
+
+    @run_as_sync
+    async def test_analysis_output_str(self):
+        """
+        Test adding an analysis of type ANALYSIS_RUNNER
+        with output file as a string via `output` field
+        """
+
+        a_id = await self.al.create_analysis(
+            AnalysisInternal(
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                meta={},
+                output='RANDOM_OUTPUT_STRING'
+            )
+        )
+
+        analyses = await self.al.query(
+            AnalysisFilter(
+                id=GenericFilter(eq=a_id),
+            )
+        )
+        expected = [
+            AnalysisInternal(
+                id=a_id,
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                output='RANDOM_OUTPUT_STRING',
+                outputs='RANDOM_OUTPUT_STRING',
+                timestamp_completed=None,
+                project=1,
+                meta={},
+                active=True,
+                author=None,
+            )
+        ]
+
+        self.assertEqual(analyses, expected)
+
+    @run_as_sync
+    async def test_analysis_outputs_str(self):
+        """
+        Test adding an analysis of type ANALYSIS_RUNNER
+        with output file as a string via `outputs` field
+        """
+
+        a_id = await self.al.create_analysis(
+            AnalysisInternal(
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                meta={},
+                outputs='RANDOM_OUTPUT_STRING'
+            )
+        )
+
+        analyses = await self.al.query(
+            AnalysisFilter(
+                id=GenericFilter(eq=a_id),
+            )
+        )
+        expected = [
+            AnalysisInternal(
+                id=a_id,
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                output='RANDOM_OUTPUT_STRING',
+                outputs='RANDOM_OUTPUT_STRING',
+                timestamp_completed=None,
+                project=1,
+                meta={},
+                active=True,
+                author=None,
+            )
+        ]
+
+        self.assertEqual(analyses, expected)
+
+    @run_as_sync
+    async def test_analysis_output_gspath_str(self):
+        """
+        Test adding an analysis of type ANALYSIS_RUNNER
+        with valid gs path output file as a string via `output` field
+        """
+
+        a_id = await self.al.create_analysis(
+            AnalysisInternal(
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                meta={},
+                output='gs://fakegcs/file1.txt'
+            )
+        )
+
+        analyses = await self.al.query(
+            AnalysisFilter(
+                id=GenericFilter(eq=a_id),
+            )
+        )
+        expected = [
+            AnalysisInternal(
+                id=a_id,
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                output='gs://fakegcs/file1.txt',
+                outputs='gs://fakegcs/file1.txt',
+                timestamp_completed=None,
+                project=1,
+                meta={},
+                active=True,
+                author=None,
+            )
+        ]
+
+        self.assertEqual(analyses, expected)
+
+    @run_as_sync
+    async def test_analysis_output_gspath_dict(self):
+        """
+        Test adding an analysis of type ANALYSIS_RUNNER
+        with valid gs path output file as a string via `output` field
+        """
+
+        a_id = await self.al.create_analysis(
+            AnalysisInternal(
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                meta={},
+                outputs={'basename': 'gs://fakegcs/file1.txt'}
+            )
+        )
+
+        analyses = await self.al.query(
+            AnalysisFilter(
+                id=GenericFilter(eq=a_id),
+            )
+        )
+        expected = [
+            AnalysisInternal(
+                id=a_id,
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                output={
+                    'id': 1,
+                    'path': 'gs://fakegcs/file1.txt',
+                    'basename': 'file1.txt',
+                    'dirname': 'gs://fakegcs',
+                    'nameroot': 'file1',
+                    'nameext': '.txt',
+                    'file_checksum': 'DG+fhg==',
+                    'size': 19,
+                    'meta': None,
+                    'valid': True,
+                    'secondary_files': []
+                },
+                outputs={
+                    'id': 1,
+                    'path': 'gs://fakegcs/file1.txt',
+                    'basename': 'file1.txt',
+                    'dirname': 'gs://fakegcs',
+                    'nameroot': 'file1',
+                    'nameext': '.txt',
+                    'file_checksum': 'DG+fhg==',
+                    'size': 19,
+                    'meta': None,
+                    'valid': True,
+                    'secondary_files': []
+                },
+                timestamp_completed=None,
+                project=1,
+                meta={},
+                active=True,
+                author=None,
+            )
+        ]
+
+        self.assertEqual(analyses, expected)
+
+    @run_as_sync
+    async def test_analysis_output_files_json(self):
+        """
+        Test adding an analysis of type ANALYSIS_RUNNER
+        with output files as a dict/json via `outputs` field
+        """
+
+        logger = logging.getLogger()
+        logger.info(requests.get('http://0.0.0.0:4443/storage/v1/b/fakegcs/o', timeout=10).text)
+
+        a_id = await self.al.create_analysis(
+            AnalysisInternal(
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                meta={},
+                outputs={
+                    'qc_results': {
+                        'basename': 'gs://fakegcs/file1.txt'
+                        },
+                    'cram': {
+                        'basename': 'gs://fakegcs/file1.cram'
+                    },
+                    'qc': {
+                        'cram': {
+                            'basename': 'gs://fakegcs/file2.cram',
+                            'secondaryFiles':
+                                {
+                                    'cram_ext': {
+                                        'basename': 'gs://fakegcs/file2.cram.ext'
+                                    },
+                                    'cram_meta': {
+                                        'basename': 'gs://fakegcs/file2.cram.meta'
+                                    }
+                                }
+                        },
+                        'aggregate': {
+                            'cram': {
+                                'basename': 'gs://fakegcs/file3.cram',
+                                'secondaryFiles': {
+                                        'cram_ext': {
+                                            'basename': 'gs://fakegcs/file3.cram.ext'
+                                        },
+                                        'cram_meta': {
+                                            'basename': 'gs://fakegcs/file3.cram.meta'
+                                        }
+                                    }
+                            },
+                            'qc': {
+                                'basename': 'gs://fakegcs/file1.qc',
+                                'secondaryFiles': {
+                                    'qc_ext': {
+                                        'basename': 'gs://fakegcs/file1.qc.ext'
+                                    },
+                                    'qc_meta': {
+                                        'basename': 'gs://fakegcs/file1.qc.meta'
+                                    }
+                                }
+                            },
+                        }
+                    }
+                }
+            )
+        )
+
+        analyses = await self.al.query(
+            AnalysisFilter(
+                id=GenericFilter(eq=a_id),
+            )
+        )
+        expected = [
+            AnalysisInternal(
+                id=a_id,
+                type='analysis-runner',
+                status=AnalysisStatus.UNKNOWN,
+                sequencing_group_ids=[],
+                output={
+                    'qc_results': {
+                        'id': 1,
+                        'path': 'gs://fakegcs/file1.txt',
+                        'basename': 'file1.txt',
+                        'dirname': 'gs://fakegcs',
+                        'nameroot': 'file1',
+                        'nameext': '.txt',
+                        'file_checksum': 'DG+fhg==',
+                        'size': 19,
+                        'meta': None,
+                        'valid': True,
+                        'secondary_files': []
+                    },
+                    'cram': {
+                        'id': 2,
+                        'path': 'gs://fakegcs/file1.cram',
+                        'basename': 'file1.cram',
+                        'dirname': 'gs://fakegcs',
+                        'nameroot': 'file1',
+                        'nameext': '.cram',
+                        'file_checksum': 'sl7SXw==',
+                        'size': 20,
+                        'meta': None,
+                        'valid': True,
+                        'secondary_files': []
+                    },
+                    'qc': {
+                        'cram': {
+                            'id': 3,
+                            'path': 'gs://fakegcs/file2.cram',
+                            'basename': 'file2.cram',
+                            'dirname': 'gs://fakegcs',
+                            'nameroot': 'file2',
+                            'nameext': '.cram',
+                            'file_checksum': 'sl7SXw==',
+                            'size': 20,
+                            'meta': None,
+                            'valid': True,
+                            'secondary_files': [
+                                {
+                                    'id': 4,
+                                    'path': 'gs://fakegcs/file2.cram.ext',
+                                    'basename': 'file2.cram.ext',
+                                    'dirname': 'gs://fakegcs',
+                                    'nameroot': 'file2.cram',
+                                    'nameext': '.ext',
+                                    'file_checksum': 'gb1EbA==',
+                                    'size': 21,
+                                    'meta': None,
+                                    'valid': True,
+                                    'secondary_files': None
+                                },
+                                {
+                                    'id': 5,
+                                    'path': 'gs://fakegcs/file2.cram.meta',
+                                    'basename': 'file2.cram.meta',
+                                    'dirname': 'gs://fakegcs',
+                                    'nameroot': 'file2.cram',
+                                    'nameext': '.meta',
+                                    'file_checksum': 'af/YSw==',
+                                    'size': 17,
+                                    'meta': None,
+                                    'valid': True,
+                                    'secondary_files': None
+                                }
+                            ]
+                        },
+                        'aggregate': {
+                            'cram': {
+                                'id': 6,
+                                'path': 'gs://fakegcs/file3.cram',
+                                'basename': 'file3.cram',
+                                'dirname': 'gs://fakegcs',
+                                'nameroot': 'file3',
+                                'nameext': '.cram',
+                                'file_checksum': 'sl7SXw==',
+                                'size': 20,
+                                'meta': None,
+                                'valid': True,
+                                'secondary_files': [
+                                    {
+                                        'id': 7,
+                                        'path': 'gs://fakegcs/file3.cram.ext',
+                                        'basename': 'file3.cram.ext',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file3.cram',
+                                        'nameext': '.ext',
+                                        'file_checksum': 'HU8n6w==',
+                                        'size': 16,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    },
+                                    {
+                                        'id': 8,
+                                        'path': 'gs://fakegcs/file3.cram.meta',
+                                        'basename': 'file3.cram.meta',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file3.cram',
+                                        'nameext': '.meta',
+                                        'file_checksum': 'af/YSw==',
+                                        'size': 17,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    }
+                                ]
+                            },
+                            'qc': {
+                                'id': 9,
+                                'path': 'gs://fakegcs/file1.qc',
+                                'basename': 'file1.qc',
+                                'dirname': 'gs://fakegcs',
+                                'nameroot': 'file1',
+                                'nameext': '.qc',
+                                'file_checksum': 'uZe/hQ==',
+                                'size': 15,
+                                'meta': None,
+                                'valid': True,
+                                'secondary_files': [
+                                    {
+                                        'id': 10,
+                                        'path': 'gs://fakegcs/file1.qc.ext',
+                                        'basename': 'file1.qc.ext',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file1.qc',
+                                        'nameext': '.ext',
+                                        'file_checksum': '/18MDg==',
+                                        'size': 14,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    },
+                                    {
+                                        'id': 11,
+                                        'path': 'gs://fakegcs/file1.qc.meta',
+                                        'basename': 'file1.qc.meta',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file1.qc',
+                                        'nameext': '.meta',
+                                        'file_checksum': 'v9x0Zg==',
+                                        'size': 15,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    }
+                                ]
+                            }
+                        }
+                    }
+                },
+                outputs={
+                    'qc_results': {
+                        'id': 1,
+                        'path': 'gs://fakegcs/file1.txt',
+                        'basename': 'file1.txt',
+                        'dirname': 'gs://fakegcs',
+                        'nameroot': 'file1',
+                        'nameext': '.txt',
+                        'file_checksum': 'DG+fhg==',
+                        'size': 19,
+                        'meta': None,
+                        'valid': True,
+                        'secondary_files': []
+                    },
+                    'cram': {
+                        'id': 2,
+                        'path': 'gs://fakegcs/file1.cram',
+                        'basename': 'file1.cram',
+                        'dirname': 'gs://fakegcs',
+                        'nameroot': 'file1',
+                        'nameext': '.cram',
+                        'file_checksum': 'sl7SXw==',
+                        'size': 20,
+                        'meta': None,
+                        'valid': True,
+                        'secondary_files': []
+                    },
+                    'qc': {
+                        'cram': {
+                            'id': 3,
+                            'path': 'gs://fakegcs/file2.cram',
+                            'basename': 'file2.cram',
+                            'dirname': 'gs://fakegcs',
+                            'nameroot': 'file2',
+                            'nameext': '.cram',
+                            'file_checksum': 'sl7SXw==',
+                            'size': 20,
+                            'meta': None,
+                            'valid': True,
+                            'secondary_files': [
+                                {
+                                    'id': 4,
+                                    'path': 'gs://fakegcs/file2.cram.ext',
+                                    'basename': 'file2.cram.ext',
+                                    'dirname': 'gs://fakegcs',
+                                    'nameroot': 'file2.cram',
+                                    'nameext': '.ext',
+                                    'file_checksum': 'gb1EbA==',
+                                    'size': 21,
+                                    'meta': None,
+                                    'valid': True,
+                                    'secondary_files': None
+                                },
+                                {
+                                    'id': 5,
+                                    'path': 'gs://fakegcs/file2.cram.meta',
+                                    'basename': 'file2.cram.meta',
+                                    'dirname': 'gs://fakegcs',
+                                    'nameroot': 'file2.cram',
+                                    'nameext': '.meta',
+                                    'file_checksum': 'af/YSw==',
+                                    'size': 17,
+                                    'meta': None,
+                                    'valid': True,
+                                    'secondary_files': None
+                                }
+                            ]
+                        },
+                        'aggregate': {
+                            'cram': {
+                                'id': 6,
+                                'path': 'gs://fakegcs/file3.cram',
+                                'basename': 'file3.cram',
+                                'dirname': 'gs://fakegcs',
+                                'nameroot': 'file3',
+                                'nameext': '.cram',
+                                'file_checksum': 'sl7SXw==',
+                                'size': 20,
+                                'meta': None,
+                                'valid': True,
+                                'secondary_files': [
+                                    {
+                                        'id': 7,
+                                        'path': 'gs://fakegcs/file3.cram.ext',
+                                        'basename': 'file3.cram.ext',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file3.cram',
+                                        'nameext': '.ext',
+                                        'file_checksum': 'HU8n6w==',
+                                        'size': 16,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    },
+                                    {
+                                        'id': 8,
+                                        'path': 'gs://fakegcs/file3.cram.meta',
+                                        'basename': 'file3.cram.meta',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file3.cram',
+                                        'nameext': '.meta',
+                                        'file_checksum': 'af/YSw==',
+                                        'size': 17,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    }
+                                ]
+                            },
+                            'qc': {
+                                'id': 9,
+                                'path': 'gs://fakegcs/file1.qc',
+                                'basename': 'file1.qc',
+                                'dirname': 'gs://fakegcs',
+                                'nameroot': 'file1',
+                                'nameext': '.qc',
+                                'file_checksum': 'uZe/hQ==',
+                                'size': 15,
+                                'meta': None,
+                                'valid': True,
+                                'secondary_files': [
+                                    {
+                                        'id': 10,
+                                        'path': 'gs://fakegcs/file1.qc.ext',
+                                        'basename': 'file1.qc.ext',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file1.qc',
+                                        'nameext': '.ext',
+                                        'file_checksum': '/18MDg==',
+                                        'size': 14,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    },
+                                    {
+                                        'id': 11,
+                                        'path': 'gs://fakegcs/file1.qc.meta',
+                                        'basename': 'file1.qc.meta',
+                                        'dirname': 'gs://fakegcs',
+                                        'nameroot': 'file1.qc',
+                                        'nameext': '.meta',
+                                        'file_checksum': 'v9x0Zg==',
+                                        'size': 15,
+                                        'meta': None,
+                                        'valid': True,
+                                        'secondary_files': None
+                                    }
+                                ]
+                            }
+                        }
+                    }
+                },
                 timestamp_completed=None,
                 project=1,
                 meta={},
diff --git a/test/test_generic_auditor.py b/test/test_generic_auditor.py
index 491c97d05..574be2e57 100644
--- a/test/test_generic_auditor.py
+++ b/test/test_generic_auditor.py
@@ -384,7 +384,9 @@ async def test_query_genome_analyses_crams(self, mock_query):
                                         'CPGaaa',
                                     ],
                                 },
-                                'output': 'gs://cpg-dataset-main/cram/CPGaaa.cram',
+                                'outputs': {
+                                    'basename': 'gs://cpg-dataset-main/cram/CPGaaa.cram'
+                                },
                             },
                         ],
                     },
@@ -395,7 +397,7 @@ async def test_query_genome_analyses_crams(self, mock_query):
         test_result = await auditor.get_analysis_cram_paths_for_dataset_sgs(
             assay_sg_id_map={1: 'CPGaaa'}
         )
-        expected_result = {'CPGaaa': {1: 'gs://cpg-dataset-main/cram/CPGaaa.cram'}}
+        expected_result = {'CPGaaa': {1:  {'basename': 'gs://cpg-dataset-main/cram/CPGaaa.cram'}}}
 
         self.assertDictEqual(test_result, expected_result)
 
@@ -421,7 +423,9 @@ async def test_query_genome_and_exome_analyses_crams(self, mock_query):
                                         'CPGaaa',
                                     ],
                                 },
-                                'output': 'gs://cpg-dataset-main/cram/CPGaaa.cram',
+                                'outputs': {
+                                    'basename': 'gs://cpg-dataset-main/cram/CPGaaa.cram'
+                                },
                             },
                         ],
                     },
@@ -437,7 +441,9 @@ async def test_query_genome_and_exome_analyses_crams(self, mock_query):
                                         'CPGbbb',
                                     ],
                                 },
-                                'output': 'gs://cpg-dataset-main/exome/cram/CPGaaa.cram',
+                                'outputs': {
+                                    'basename': 'gs://cpg-dataset-main/exome/cram/CPGaaa.cram'}
+                                ,
                             },
                         ],
                     },
@@ -450,8 +456,8 @@ async def test_query_genome_and_exome_analyses_crams(self, mock_query):
         )
 
         expected_result = {
-            'CPGaaa': {1: 'gs://cpg-dataset-main/cram/CPGaaa.cram'},
-            'CPGbbb': {2: 'gs://cpg-dataset-main/exome/cram/CPGaaa.cram'},
+            'CPGaaa': {1:  {'basename': 'gs://cpg-dataset-main/cram/CPGaaa.cram'}},
+            'CPGbbb': {2:  {'basename': 'gs://cpg-dataset-main/exome/cram/CPGaaa.cram'}},
         }
 
         self.assertDictEqual(test_result, expected_result)
@@ -479,7 +485,7 @@ async def test_query_broken_analyses_crams(self, mock_query):
                             'sample_ids': [
                                 'CPGaaa',
                             ],
-                            'output': '',
+                            'outputs': {},
                         },
                     ],
                 },
@@ -508,7 +514,9 @@ async def test_query_analyses_crams_warning(self, mock_query):
                             'meta': {
                                 'sequencing_type': 'genome',
                             },
-                            'output': 'gs://cpg-dataset-main/cram/CPGaaa.notcram',
+                            'outputs': {
+                                'basename': 'gs://cpg-dataset-main/cram/CPGaaa.notcram'
+                                },
                         },
                     ],
                 }
@@ -563,7 +571,9 @@ async def test_analyses_for_sgs_without_crams(self, mock_query):
                         {
                             'id': 1,
                             'meta': {'sequencing_type': 'genome', 'sample': 'CPGaaa'},
-                            'output': 'gs://cpg-dataset-main/gvcf/CPGaaa.g.vcf.gz',
+                            'outputs': {
+                                'basename': 'gs://cpg-dataset-main/gvcf/CPGaaa.g.vcf.gz'
+                                },
                             'type': 'gvcf',
                             'timestampCompleted': '2023-05-11T16:33:00',
                         }
@@ -584,7 +594,7 @@ async def test_analyses_for_sgs_without_crams(self, mock_query):
             self.assertEqual(len(log.output), 1)
             self.assertEqual(len(log.records), 1)
             self.assertIn(
-                "WARNING:root:dev :: SG CPGaaa missing CRAM but has analysis {'analysis_id': 1, 'analysis_type': 'gvcf', 'analysis_output': 'gs://cpg-dataset-main/gvcf/CPGaaa.g.vcf.gz', 'timestamp_completed': '2023-05-11T16:33:00'}",
+                "WARNING:root:dev :: SG CPGaaa missing CRAM but has analysis {'analysis_id': 1, 'analysis_type': 'gvcf', 'analysis_output': {'basename': 'gs://cpg-dataset-main/gvcf/CPGaaa.g.vcf.gz'}, 'timestamp_completed': '2023-05-11T16:33:00'}",
                 log.output[0],
             )
 
diff --git a/test/test_graphql.py b/test/test_graphql.py
index 294e1ad56..8ecf05433 100644
--- a/test/test_graphql.py
+++ b/test/test_graphql.py
@@ -203,7 +203,7 @@ async def test_sg_analyses_query(self):
                 type='cram',
                 status=AnalysisStatus.COMPLETED,
                 meta={},
-                output='some-output',
+                outputs={'basename': 'some-output'},
             )
         )
 
@@ -213,7 +213,7 @@ async def test_sg_analyses_query(self):
     analyses(project: {eq: $project}) {
       id
       meta
-      output
+      outputs
     }
   }
 }"""
@@ -229,7 +229,7 @@ async def test_sg_analyses_query(self):
         analyses = resp['sequencingGroups'][0]['analyses']
         self.assertIn('id', analyses[0])
         self.assertIn('meta', analyses[0])
-        self.assertIn('output', analyses[0])
+        self.assertIn('outputs', analyses[0])
 
     @run_as_sync
     async def test_participant_phenotypes(self):
diff --git a/test/testbase.py b/test/testbase.py
index fa60ee66f..3019a2961 100644
--- a/test/testbase.py
+++ b/test/testbase.py
@@ -179,6 +179,9 @@ def tearDownClass(cls) -> None:
             db.exec(f'DROP DATABASE {db.dbname};')
             db.stop()
 
+        # gcs = cls.gcs
+        # gcs.stop()
+
     def setUp(self) -> None:
         self._connection = self.connections[self.__class__.__name__]
         # create a connection on each test so we can generate a new
diff --git a/web/src/pages/project/AnalysisRunnerView/AnalysisRunnerSummary.tsx b/web/src/pages/project/AnalysisRunnerView/AnalysisRunnerSummary.tsx
index 7bb4f8db2..17588bbd6 100644
--- a/web/src/pages/project/AnalysisRunnerView/AnalysisRunnerSummary.tsx
+++ b/web/src/pages/project/AnalysisRunnerView/AnalysisRunnerSummary.tsx
@@ -23,7 +23,7 @@ query AnalysisRunnerLogs($project_name: String!) {
         analyses(type: { eq: "analysis-runner" }, status: {eq: UNKNOWN}) {
           id
           meta
-          output
+          outputs
           auditLogs {
             author
             timestamp