Skip to content

Commit

Permalink
Use glossarist for reading concepts (#20)
Browse files Browse the repository at this point in the history
* using glossarist for reading concepts

* updated specs

* code refactoring and updated specs
  • Loading branch information
HassanAkbar authored Mar 6, 2024
1 parent af501c8 commit d17ba8e
Show file tree
Hide file tree
Showing 11 changed files with 758 additions and 393 deletions.
1 change: 1 addition & 0 deletions jekyll-geolexica.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
spec.require_paths = ["lib"]

spec.add_runtime_dependency "glossarist", "~> 2.0"
# Jekyll 4.1.0 adds some breaking changes which are reverted in 4.1.1.
# I doubt we should be worried, but there is no good reason to allow them
# either.
Expand Down
1 change: 1 addition & 0 deletions lib/jekyll/geolexica.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#

require "jekyll"
require "glossarist"

module Jekyll
module Geolexica
Expand Down
5 changes: 5 additions & 0 deletions lib/jekyll/geolexica/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ def localized_concepts_path
File.expand_path(path, site.source)
end

def glossary_path
glob_string = glossary_config["glossary_path"]
File.expand_path(glob_string, site.source)
end

def glossary_format
glossary_config["format"]
end
Expand Down
82 changes: 17 additions & 65 deletions lib/jekyll/geolexica/glossary.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,27 @@ class Glossary < Hash

def initialize(site)
@site = site
@collection = Glossarist::ManagedConceptCollection.new
end

def load_glossary
Jekyll.logger.info('Geolexica:', 'Loading concepts')
Dir.glob(concepts_glob).each { |path| load_concept(path) }
@collection.load_from_files(glossary_path)

@collection.each do |managed_concept|
concept_hash = {
"id" => managed_concept.uuid,
"term" => managed_concept.default_designation,
"termid" => managed_concept.id,
}.merge(managed_concept.to_h)

managed_concept.localizations.each do |lang, localization|
concept_hash[lang] = localization.to_h["data"]
end

preprocess_concept_hash(concept_hash)
store(Concept.new(concept_hash))
end
end

def store(concept)
Expand All @@ -37,70 +53,6 @@ def to_liquid

protected

def load_concept(concept_file_path)
Jekyll.logger.debug('Geolexica:',
"reading concept from file #{concept_file_path}")

concept_hash = if glossary_format == 'paneron'
read_paneron_concept_file(concept_file_path)
else
read_concept_file(concept_file_path)
end

preprocess_concept_hash(concept_hash)
store Concept.new(concept_hash)
rescue StandardError
Jekyll.logger.error('Geolexica:',
"failed to read concept from file #{concept_file_path}")
raise
end

# Reads and parses concept file located at given path.
def read_concept_file(path)
YAML.safe_load(File.read(path), permitted_classes: [Time])
end

def read_paneron_concept_file(path)
safe_load_options = { permitted_classes: [Date, Time] }
concept = YAML.safe_load(File.read(path), **safe_load_options)
concept['termid'] = concept['data']['identifier']

(concept['data']['localizedConcepts'] || []).each do |lang, local_concept_id|
localized_concept_path = File.join(localized_concepts_path, "#{local_concept_id}.yaml")
concept[lang] = YAML.safe_load(File.read(localized_concept_path), **safe_load_options)['data']

next unless concept[lang]

normalize_sources(concept[lang])
concept['term'] = concept[lang]['terms'].first['designation'] if lang == 'eng'
end

concept
end

def normalize_sources(concept)
authoritative_sources = concept.delete('authoritativeSource') || []
concept['sources'] ||= []

authoritative_sources.each do |authoritative_source|
if authoritative_source['relationship']
status = authoritative_source['relationship']['type']
modification = authoritative_source['relationship']['modification']
end

concept['sources'] << {
"status" => status,
"modification" => modification,
"origin" => {
'ref' => authoritative_source['ref'],
'clause' => authoritative_source['clause'],
'link' => authoritative_source['link'],
}.compact,
'type' => 'authoritative'
}.compact
end
end

# Does nothing, but some sites may replace this method.
def preprocess_concept_hash(concept_hash); end

Expand Down
147 changes: 71 additions & 76 deletions spec/concept_fixtures/concept-10.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,26 @@
"type" : "expression"
}
],
"authoritative_source" : {
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"ref" : "ISO 1087-1:2000",
"link" : "https://www.iso.org/standard/20057.html"
},
"authoritative_source" : [
{
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"ref" : "ISO 1087-1:2000",
"link" : "https://www.iso.org/standard/20057.html"
}
],
"lineage_source_similarity" : 1,
"review_decision_event" : "Publication of ISO 19104:2016",
"notes" : [],
"definition" : "término clasificado de acuerdo a la escala de aceptanción como un sinónimo de un término preferente",
"definition" : ["término clasificado de acuerdo a la escala de aceptanción como un sinónimo de un término preferente"],
"language_code" : "spa",
"lineage_source" : "ISO/TS 19104:2008",
"review_decision_date" : "2016-10-01 00:00:00 +0800",
"examples" : [],
"release" : "2",
"review_date" : "2013-01-29 00:00:00 +0800",
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"review_status" : "final",
"entry_status" : "valid",
"id" : 10,
"review_decision" : "accepted"
"id" : "10"
},
"ara" : {
"terms" : [
Expand All @@ -41,27 +40,26 @@
"type" : "expression"
}
],
"authoritative_source" : {
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"link" : "https://www.iso.org/standard/20057.html"
},
"authoritative_source" : [
{
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"link" : "https://www.iso.org/standard/20057.html"
}
],
"lineage_source_similarity" : 1,
"review_decision_event" : "Publication of ISO 19104:2016",
"notes" : [],
"language_code" : "ara",
"definition" : "مصطلح صنف وفقا لمقياس تصنيف قبول المصطلحات كمرادف للمصطلح المفضل",
"definition" : ["مصطلح صنف وفقا لمقياس تصنيف قبول المصطلحات كمرادف للمصطلح المفضل"],
"lineage_source" : "ISO/TS 19104:2008",
"review_decision_date" : "2016-10-01 00:00:00 +0800",
"examples" : [],
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"release" : "2",
"review_date" : "2013-01-29 00:00:00 +0800",
"entry_status" : "valid",
"review_decision" : "accepted",
"id" : 10,
"review_status" : "final"
"id" : "10"
},
"msa" : null,
"dut" : null,
Expand All @@ -74,37 +72,38 @@
"designation" : "승인 용어"
}
],
"authoritative_source" : {
"link" : "https://www.iso.org/standard/20057.html",
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added."
},
"authoritative_source" : [
{
"link" : "https://www.iso.org/standard/20057.html",
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added."
}
],
"review_decision_event" : "Publication of ISO 19104:2016",
"notes" : [],
"definition" : "상용 용어의 동의어로써 용어 수용가능성 등급체계에 의거하여 등급이 부여된 용어",
"definition" : ["상용 용어의 동의어로써 용어 수용가능성 등급체계에 의거하여 등급이 부여된 용어"],
"language_code" : "kor",
"review_decision_date" : "2016-10-01 00:00:00 +0800",
"lineage_source" : "ISO/TS 19104:2008",
"examples" : [],
"review_date" : "2013-01-29 00:00:00 +0800",
"release" : "2",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"review_status" : "final",
"id" : 10,
"review_decision" : "accepted",
"id" : "10",
"entry_status" : "valid"
},
"eng" : {
"notes" : [],
"language_code" : "eng",
"definition" : "term rated according to the scale of the term acceptability rating as a synonym for a preferred term",
"definition" : ["term rated according to the scale of the term acceptability rating as a synonym for a preferred term"],
"lineage_source_similarity" : 1,
"authoritative_source" : {
"link" : "https://www.iso.org/standard/20057.html",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"ref" : "ISO 1087-1:2000"
},
"authoritative_source" : [
{
"link" : "https://www.iso.org/standard/20057.html",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"ref" : "ISO 1087-1:2000"
}
],
"terms" : [
{
"designation" : "admitted term",
Expand All @@ -114,13 +113,10 @@
],
"review_decision_event" : "Publication of ISO 19104:2016",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"review_date" : "2013-01-29 00:00:00 +0800",
"release" : "2",
"review_decision" : "accepted",
"id" : 10,
"id" : "10",
"entry_status" : "valid",
"review_status" : "final",
"review_decision_date" : "2016-10-01 00:00:00 +0800",
"lineage_source" : "ISO/TS 19104:2008",
"examples" : []
Expand All @@ -132,17 +128,16 @@
"review_date" : "2013-01-29 00:00:00 +0800",
"release" : "2",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"review_status" : "final",
"review_decision" : "accepted",
"id" : 10,
"id" : "10",
"entry_status" : "valid",
"lineage_source_similarity" : 1,
"authoritative_source" : {
"link" : "https://www.iso.org/standard/20057.html",
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added."
},
"authoritative_source" : [
{
"link" : "https://www.iso.org/standard/20057.html",
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added."
}
],
"terms" : [
{
"designation" : "tilladt term",
Expand All @@ -152,7 +147,7 @@
],
"review_decision_event" : "Publication of ISO 19104:2016",
"notes" : [],
"definition" : "term, klassificeret efter en skala for tilladte termer, anvendt som synonym for en foretrukket term",
"definition" : ["term, klassificeret efter en skala for tilladte termer, anvendt som synonym for en foretrukket term"],
"language_code" : "dan"
},
"fin" : null,
Expand All @@ -165,11 +160,13 @@
"type" : "expression"
}
],
"authoritative_source" : {
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"link" : "https://www.iso.org/standard/20057.html"
},
"authoritative_source" : [
{
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"link" : "https://www.iso.org/standard/20057.html"
}
],
"review_decision_event" : "Publication of ISO 19104:2016",
"notes" : [],
"language_code" : "ger",
Expand All @@ -179,10 +176,7 @@
"review_date" : "2013-01-29 00:00:00 +0800",
"release" : "2",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"review_status" : "final",
"review_decision" : "accepted",
"id" : 10,
"id" : "10",
"entry_status" : "valid"
},
"jpn" : null,
Expand All @@ -194,10 +188,7 @@
"review_date" : "2013-01-29 00:00:00 +0800",
"release" : "2",
"date_accepted" : "2008-11-15 00:00:00 +0800",
"review_decision_notes" : "Authoritative reference changed from ISO 1087-1:2000 to ISO 1087-1:2000, 3.4.16, modified — the Note 1 to entry has been added.. Lineage source added as ISO/TS 19104:2008",
"review_status" : "final",
"id" : 10,
"review_decision" : "accepted",
"id" : "10",
"entry_status" : "valid",
"lineage_source_similarity" : 1,
"terms" : [
Expand All @@ -207,22 +198,24 @@
"type" : "expression"
}
],
"authoritative_source" : {
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"link" : "https://www.iso.org/standard/20057.html"
},
"authoritative_source" : [
{
"ref" : "ISO 1087-1:2000",
"clause" : "3.4.16, modified — the Note 1 to entry has been added.",
"link" : "https://www.iso.org/standard/20057.html"
}
],
"review_decision_event" : "Publication of ISO 19104:2016",
"notes" : [],
"definition" : "term som bedömts vara lämplig för ett visst begrepp och som används vid sidan av en rekommenderad term",
"definition" : ["term som bedömts vara lämplig för ett visst begrepp och som används vid sidan av en rekommenderad term"],
"language_code" : "swe"
},
"termid" : 10,
"termid" : "10",
"rus" : {
"id" : 10,
"id" : "10",
"entry_status" : "valid",
"language_code" : "rus",
"definition" : "термин, оцененный по шкале рейтинга приемлемости термина как синоним предпочтительного термина",
"definition" : ["термин, оцененный по шкале рейтинга приемлемости термина как синоним предпочтительного термина"],
"date_accepted" : "2008-11-15 00:00:00 +0800",
"notes" : [],
"release" : "2",
Expand All @@ -236,9 +229,11 @@
"normative_status" : "preferred"
}
],
"authoritative_source" : {
"ref" : "ISO 1087-1:2000",
"link" : "https://www.iso.org/standard/20057.html"
}
"authoritative_source" : [
{
"ref" : "ISO 1087-1:2000",
"link" : "https://www.iso.org/standard/20057.html"
}
]
}
}
Loading

0 comments on commit d17ba8e

Please sign in to comment.