Skip to content

Commit

Permalink
Merge pull request #449 from datalad/enh-rich-data
Browse files Browse the repository at this point in the history
Rich data enrichment v2
  • Loading branch information
jsheunis authored Apr 27, 2024
2 parents 53ac02a + 8e52e4c commit 821b12f
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 8 deletions.
2 changes: 2 additions & 0 deletions datalad_catalog/catalog/assets/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ var datacat = new Vue({
links: {},
dataset_options: {},
config_ready: false,
catalog_config: {},
},
methods: {
gotoHome() {
Expand Down Expand Up @@ -45,6 +46,7 @@ var datacat = new Vue({
obj[key] = value;
}
}
this.catalog_config = obj
// set social links
this.social_links = obj.social_links
// set dataset options
Expand Down
168 changes: 165 additions & 3 deletions datalad_catalog/catalog/assets/app_component_dataset.js
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,39 @@ const datasetView = () =>
scripttag.setAttribute("id", "structured-data");
document.head.appendChild(scripttag);
}
keys_to_populate = [
"name", // Text
"description", // Text
"alternateName", // Text
"creator", // Person or Organization
"citation", // Text or CreativeWork
"funder", // Person or Organization
"hasPart", // URL or Dataset
// "isPartOf", // URL or Dataset
"identifier", // URL, Text, or PropertyValue
// "isAccessibleForFree", // Boolean
"keywords", // Text
"license", // URL or CreativeWork
// "measurementTechnique", // Text or URL
"sameAs", // URL
// "spatialCoverage", // Text or Place
// "temporalCoverage", // Text
// "variableMeasured", // Text or PropertyValue
"version", // Text or Number
// "url", // URL
"includedInDataCatalog", // DataCatalog
// "distribution", // DataDownload
]
obj = {
"@context": "https://schema.org/",
"@type": "Dataset",
"name": this.displayData.display_name ? this.displayData.display_name : "",
"description": this.selectedDataset.description ? this.selectedDataset.description : ""
}
scripttag.textContent = JSON.stringify(obj);
for (var k=0; k<keys_to_populate.length; k++) {
key = keys_to_populate[k]
obj[key] = this.getRichData(key, dataset, disp_dataset)
}

scripttag.textContent = JSON.stringify(pruneObject(obj));

dataset_id_path = getFilePath(this.selectedDataset.dataset_id)
fetch(dataset_id_path)
Expand Down Expand Up @@ -403,6 +429,142 @@ const datasetView = () =>
let url_qp2 = new URL(document.location.toString()).searchParams
console.debug("- After: URL query string: %s", url_qp2.toString())
},
getRichData(key, selectedDS, displayDS) {
switch (key) {
case "name":
return displayDS.display_name ? displayDS.display_name : ""
case "description":
return selectedDS.description ? selectedDS.description : ""
case "alternateName":
// use alias if present
return [selectedDS.alias ? selectedDS.alias : ""]
case "creator":
// authors
return selectedDS.authors?.map( (auth) => {
return {
"@type": "Person",
"givenName": auth.givenName ? auth.givenName : null,
"familyName": auth.familyName ? auth.familyName : null,
"name": auth.name ? auth.name : null,
"sameAs": this.getAuthorORCID(auth),
}
})
case "citation":
// from publications
return selectedDS.publications?.map( (pub) => {
return pub.doi
})
case "funder":
// from funding
return selectedDS.funding?.map( (fund) => {
var fund_obj = {
"@type": "Organization",
"name": fund.funder ? fund.funder : (fund.name ? fund.name : (fund.description ? fund.description : null)),
}
var sameas = this.getFunderSameAs(fund)
if (sameas) {
fund_obj["sameAs"] = sameas
}
return fund_obj
})
case "hasPart":
// from subdatasets
var parts = selectedDS.subdatasets?.map( (ds) => {
return {
"@type": "Dataset",
"name": ds.dirs_from_path[ds.dirs_from_path.length - 1]
}
})
return parts.length ? parts : null
// case "isPartOf":
case "identifier":
// use DOI
return selectedDS.doi ? selectedDS.doi : null
// "isAccessibleForFree", // Boolean
case "keywords":
return selectedDS.keywords?.length ? selectedDS.keywords : null
case "license":
return selectedDS.license?.url ? selectedDS.license.url : null
// "measurementTechnique", // Text or URL
case "sameAs":
// homepage
if (selectedDS.additional_display && selectedDS.additional_display.length) {
for (var t=0; t<selectedDS.additional_display.length; t++) {
var current_display = selectedDS.additional_display[t]
var homepage = current_display.content?.homepage?.["@value"]
if (homepage) {
return homepage
}
}
} else {
return null
}
selectedDS.additional_display[0]["content"]["homepage"]["@value"]
return
// "spatialCoverage", // Text or Place
// "temporalCoverage", // Text
// "variableMeasured", // Text or PropertyValue
case "version":
return selectedDS.dataset_version
// "url", // URL
case "includedInDataCatalog":
var obj = {
"@type":"DataCatalog",
"name": this.$root.catalog_config?.catalog_name ? this.$root.catalog_config.catalog_name : null,
"url": this.$root.catalog_config?.catalog_url ? this.$root.catalog_config.catalog_url : null,
}
if (obj.name == null && obj.url == null) {
return null
} else {
return obj
}
// "distribution", // DataDownload
default:
return null
}
},
getAuthorORCID(author) {
if (author.hasOwnProperty("identifiers") && author.identifiers.length > 0) {
orcid_element = author.identifiers.filter(
(x) => x.name === "ORCID"
);
if (orcid_element.length > 0) {
orcid_code = orcid_element[0].identifier
const prefix = "https://orcid.org/"
return orcid_code.indexOf(prefix) >= 0 ? orcid_code : prefix + orcid_code
} else {
return null
}
} else {
return null
}
},
getFunderSameAs(fund) {
const common_funders = [
{
"name": "Deutsche Forschungsgemeinschaft",
"alternate_name": "DFG",
"ror": "https://ror.org/018mejw64"
},
{
"name": "National Science Foundation",
"alternate_name": "NSF",
"ror": "https://ror.org/021nxhr62"
}
]
for (var i=0; i<common_funders.length; i++) {
var cf = common_funders[i]
if (fund.funder?.indexOf(cf.name) >= 0 ||
fund.name?.indexOf(cf.name) >= 0 ||
fund.description?.indexOf(cf.name) >= 0 ||
fund.funder?.indexOf(cf.alternate_name) >= 0 ||
fund.name?.indexOf(cf.alternate_name) >= 0 ||
fund.description?.indexOf(cf.alternate_name) >= 0 ) {
return cf.ror
}
}
return null
},
copyCloneCommand(index) {
// https://stackoverflow.com/questions/60581285/execcommand-is-now-obsolete-whats-the-alternative
// https://www.sitepoint.com/clipboard-api/
Expand Down
28 changes: 26 additions & 2 deletions datalad_catalog/catalog/assets/app_globals.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ const superdatasets_file = metadata_dir + "/super.json";
const SPLIT_INDEX = 3;
const SHORT_NAME_LENGTH = 0; // number of characters in name to display, zero if all
const default_config = {
catalog_name: "DataCat",
catalog_name: "DataCat Demo",
catalog_url: "https://datalad-catalog.netlify.app/",
link_color: "#fba304",
link_hover_color: "#af7714",
logo_path: "/artwork/catalog_logo.svg",
Expand Down Expand Up @@ -101,4 +102,27 @@ async function checkFileExists(url) {
} catch (error) {
return false;
}
}
}

function pruneObject(obj) {
const newObj = {};
Object.entries(obj).forEach(([k, v]) => {
if (typeof v === 'object' && !Array.isArray(v) && v !== null) {
newObj[k] = pruneObject(v);
} else if ((v instanceof Array || Array.isArray(v)) && v.length > 0) {
newArr = []
for (const el of v) {
if (typeof el === 'object' && !Array.isArray(el) && el !== null) {
newArr.push(pruneObject(el))
} else if (el != null) {
newArr.push(el)
}
}
newObj[k] = newArr;
} else if (v != null) {
newObj[k] = obj[k];
}
});
return newObj;
}

3 changes: 2 additions & 1 deletion datalad_catalog/catalog/config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"catalog_name": "DataCat",
"catalog_name": "DataCat Demo",
"catalog_url": "https://datalad-catalog.netlify.app/",
"link_color": "#fba304",
"link_hover_color": "#af7714",
"social_links": {
Expand Down
3 changes: 2 additions & 1 deletion datalad_catalog/config/config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"catalog_name": "DataCat",
"catalog_name": "DataCat Demo",
"catalog_url": "https://datalad-catalog.netlify.app/",
"logo_path": "",
"link_color": "#fba304",
"link_hover_color": "#af7714",
Expand Down
2 changes: 1 addition & 1 deletion datalad_catalog/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_config_without_file(demo_catalog_without_config):
assert demo_catalog_without_config.config_path == default_config_path
assert hasattr(demo_catalog_without_config, "config")
assert demo_catalog_without_config.config is not None
assert demo_catalog_without_config.config[CATALOG_NAME] == "DataCat"
assert demo_catalog_without_config.config[CATALOG_NAME] == "DataCat Demo"


def test_dataset_config(tmp_path):
Expand Down

0 comments on commit 821b12f

Please sign in to comment.