Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding manipulation of tags #129

Merged
merged 5 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
name: coverage-report
path: coverage-report

Codecov:
Code-Coverage:
needs: Unit-Tests
runs-on: ubuntu-latest
steps:
Expand Down
24 changes: 24 additions & 0 deletions src/nbiatoolkit/dicomtags/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from .tags import (
convert_element_to_int,
convert_int_to_element,
LOOKUP_TAG,
element_VR_lookup,
getSeriesModality,
)

from .tags import (
subsetSeriesTags,
getReferencedFrameOfReferenceSequence,
getReferencedSeriesUIDS,
)

__all__ = [
"convert_element_to_int",
"convert_int_to_element",
"LOOKUP_TAG",
"element_VR_lookup",
"getSeriesModality",
"subsetSeriesTags",
"getReferencedFrameOfReferenceSequence",
"getReferencedSeriesUIDS",
]
144 changes: 141 additions & 3 deletions src/nbiatoolkit/dicomtags/tags.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from math import log
from pydicom.datadict import dictionary_VR
from pydicom.datadict import tag_for_keyword
from pydicom._dicom_dict import DicomDictionary
import pandas as pd
from typing import Any, Union, List
from typing import List


def convert_element_to_int(element_str: str) -> int:
Expand Down Expand Up @@ -154,6 +154,17 @@ def getSeriesModality(series_tags_df: pd.DataFrame) -> str:
def subsetSeriesTags(series_tags_df: pd.DataFrame, element: str) -> pd.DataFrame:
"""
Subsets a DataFrame containing DICOM series tags based on the start and end elements.

Args:
series_tags_df (pd.DataFrame): A DataFrame containing DICOM series tags.
element (str): The element to subset the DataFrame.

Returns:
pd.DataFrame: A DataFrame containing the subset of the series tags.

Raises:
ValueError: If the element is not found in the series tags.
ValueError: If more than two elements are found in the series tags.
"""

locs: pd.DataFrame
Expand All @@ -162,13 +173,31 @@ def subsetSeriesTags(series_tags_df: pd.DataFrame, element: str) -> pd.DataFrame
if len(locs) == 0:
raise ValueError("Element not found in the series tags.")

if len(locs) == 1:
raise ValueError(
"Only one element found in the series tags. Ensure element is a sequence"
)

if len(locs) > 2:
raise ValueError("More than two elements found in the series tags.")

return series_tags_df.iloc[locs.index[0] : locs.index[1]]
return series_tags_df.iloc[locs.index[0] : locs.index[1] + 1]


def getReferencedFrameOfReferenceSequence(series_tags_df: pd.DataFrame) -> pd.DataFrame:
"""
Given a DataFrame containing DICOM series tags, retrieves the ReferencedFrameOfReferenceSequence.

Args:
series_tags_df (pd.DataFrame): A DataFrame containing DICOM series tags.

Returns:
pd.DataFrame: A DataFrame containing the ReferencedFrameOfReferenceSequence.

Raises:
ValueError: If the series is not an RTSTRUCT.

"""
modality = getSeriesModality(series_tags_df=series_tags_df)
if modality != "RTSTRUCT":
raise ValueError("Series is not an RTSTRUCT.")
Expand Down Expand Up @@ -220,3 +249,112 @@ def getReferencedSeriesUIDS(series_tags_df: pd.DataFrame) -> List[str]:
UIDS: list[str] = value["data"].to_list()

return UIDS


def getSequenceElement(
sequence_tags_df: pd.DataFrame, element_keyword: str
) -> pd.DataFrame:
"""
Given a DataFrame containing DICOM sequence tags, retrieves the search space
based on the element keyword.

Args:
sequence_tags_df (pd.DataFrame): A DataFrame containing DICOM sequence tags.
element_keyword (str): The keyword of the element to search for.

Returns:
pd.DataFrame: A DataFrame containing the search space based on the element keyword.

Raises:
ValueError: If the element is not found in the sequence tags.
ValueError: If more than two elements are found in the sequence tags.
"""
tag: int = LOOKUP_TAG(keyword=element_keyword)
element: str = convert_int_to_element(combined_int=tag)

df: pd.DataFrame = subsetSeriesTags(
series_tags_df=sequence_tags_df, element=element
)

return df


def camel_case_tag(string: str) -> str:
"""
Convert a string to camel case.

Args:
string (str): The input string to be converted.

Returns:
str: The camel case string.

Example:
>>> camel_case_tag("hello world")
'HelloWorld'

Note:
This function does not actually convert to camel case to not modify
the tags from the DICOM dictionary.
"""
return "".join(word for word in string.split())


def extract_ROI_info(StructureSetROISequence) -> dict[str, dict[str, str]]:
"""
Extracts ROI information from the StructureSetROISequence.

Args:
StructureSetROISequence (pandas.DataFrame): A pandas DataFrame representing the StructureSetROISequence.

Returns:
dict[str, dict[str, str]]: A dictionary containing ROI information, where the key is the ROI number and the value is the ROI information.

Raises:
ValueError: If ROI Number is not found in the StructureSetROISequence.
"""

# Initialize an empty dictionary to store ROI information
ROISet: dict[str, dict[str, str]] = {}

# get the rows where name = " ROI Number"
ROI_indices = StructureSetROISequence[
StructureSetROISequence["name"] == "ROI Number"
].index

if ROI_indices.empty:
raise ValueError("ROI Number not found in the StructureSetROISequence.")

# Iterate between the indices of the ROI numbers, to extract the ROI information
# add to the dictionary where the key is the ROI number and the value is the ROI information
for i in range(len(ROI_indices) - 1):
ROI_number: str = StructureSetROISequence.loc[ROI_indices[i], "data"]

ROI_info: pd.DataFrame = StructureSetROISequence.loc[
ROI_indices[i] + 1 : ROI_indices[i + 1] - 1
]

ROISet[ROI_number] = {
camel_case_tag(string=row["name"]): row["data"]
for _, row in ROI_info.iterrows()
}

return ROISet


# def getRTSTRUCT_ROI_info(seriesUID: str) -> dict[str, dict[str, str]]:
# """
# Given a SeriesInstanceUID of an RTSTRUCT, retrieves the ROI information.

# Args:
# seriesUID (str): The SeriesInstanceUID of the RTSTRUCT.

# Returns:
# dict[str, dict[str, str]]: A dictionary containing the ROI information.
# """

# RTSTRUCT_Tags = client.getDICOMTags(seriesUID)

# StructureSetROISequence = getSequenceElement(sequence_tags_df=RTSTRUCT_Tags, element_keyword="StructureSetROISequence")

# return extract_ROI_info(StructureSetROISequence)
22 changes: 22 additions & 0 deletions src/nbiatoolkit/nbia.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
ReturnType,
conv_response_list,
)

from .dicomtags.tags import (
getReferencedSeriesUIDS,
extract_ROI_info,
getSequenceElement,
)

import pandas as pd
import requests
from requests.exceptions import JSONDecodeError as JSONDecodeError
Expand Down Expand Up @@ -615,6 +622,21 @@ def getDICOMTags(

return conv_response_list(response, returnType)

def getRefSeriesUIDs(
self,
SeriesInstanceUID: str,
) -> List[str]:

tags_df = self.getDICOMTags(
SeriesInstanceUID=SeriesInstanceUID,
return_type=ReturnType.DATAFRAME,
)

if type(tags_df) != pd.DataFrame:
raise ValueError("DICOM Tags not df or not found in the response.")

return getReferencedSeriesUIDS(series_tags_df=tags_df)

def downloadSeries(
self,
SeriesInstanceUID: Union[str, list],
Expand Down
37 changes: 37 additions & 0 deletions tests/test_tags.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pandas import DataFrame
import pytest
from src.nbiatoolkit import NBIAClient
from src.nbiatoolkit.dicomtags.tags import convert_int_to_element
Expand Down Expand Up @@ -154,3 +155,39 @@ def test_getSeriesModality(RTSTRUCT_Tags):
def test_failsubsetSeriesTags(RTSTRUCT_Series):
with pytest.raises(KeyError) as e:
subsetSeriesTags(RTSTRUCT_Series, "(0008,0060)")


def test_extract_ROI_info(RTSTRUCT_Tags):
# tests both getSequenceElement and extract_ROI_info

StructureSetROISequence: DataFrame = getSequenceElement(
sequence_tags_df=RTSTRUCT_Tags, element_keyword="StructureSetROISequence"
)

# make sure that the StructureSetROISequence is not empty
assert (
not StructureSetROISequence.empty
), "Expected StructureSetROISequence to not be empty, but got empty"

ROI_info: dict[str, dict[str, str]] = extract_ROI_info(StructureSetROISequence)

assert ROI_info is not None, "Expected ROI_info to not be None, but got None"

# ROI_info should have atleast 29 keys all of which are strings of ints from 1 to 28
assert len(ROI_info) >= 26, f"Expected atleast 26 keys, but got {len(ROI_info)}"
keys = [int(key) for key in ROI_info.keys()]

# assert all keys are between 1 and 29
assert all(
[1 <= key <= 29 for key in keys]
), "Expected all keys to be between 1 and 28"

print("All test cases passed!")


def test_getReferencedSeriesUIDS(client, RTSTRUCT_Series):
result = client.getRefSeriesUIDs(RTSTRUCT_Series["SeriesInstanceUID"].values[0])

expected = ["1.3.6.1.4.1.14519.5.2.1.133742245714270925254982946723351496764"]

assert result == expected, f"Expected {expected}, but got {result}"
Loading