From bbf478c410c523bed94cbc0546b805574149c397 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 24 Sep 2021 14:24:00 +0300 Subject: [PATCH] Add notes for 2021-09-24 --- content/posts/2021-09.md | 47 +++++++++++++++++++++ docs/2021-09/index.html | 55 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/sitemap.xml | 10 ++--- 26 files changed, 127 insertions(+), 31 deletions(-) diff --git a/content/posts/2021-09.md b/content/posts/2021-09.md index 0f6f93c6f..2334068b9 100644 --- a/content/posts/2021-09.md +++ b/content/posts/2021-09.md @@ -241,4 +241,51 @@ localhost/dspace63= > \COPY (SELECT collection_id,uuid FROM collection WHERE col COPY 1139 ``` +## 2021-09-24 + +- Peter and Abenet agreed that we should consider converting more of our UPPER CASE metadata values to Title Case + - It seems that these fields are all still using UPPER CASE: + - cg.subject.alliancebiovciat + - cg.species.breed + - cg.subject.bioversity + - cg.subject.ccafs + - cg.subject.ciat + - cg.subject.cip + - cg.identifier.iitatheme + - cg.subject.iita + - cg.subject.ilri + - cg.subject.pabra + - cg.river.basin + - cg.coverage.subregion (done) + - dcterms.audience (done) + - cg.subject.wle + - We can do some of these without even asking anyone, for example `cg.coverage.subregion`, `cg.river.basin`, and `dcterms.audience` +- First, I will look at `cg.coverage.subregion` + - These should ideally come from ISO 3166-2 subdivisions + - I will sentence case them and then create a controlled vocabulary from those that are matching (and worry about cleaning the rest up later) + +```console +localhost/dspace63= > UPDATE metadatavalue SET text_value=INITCAP(text_value) WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=231; +UPDATE 2903 +localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.coverage.subregion" FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id = 231) to /tmp/2021-09-24-subregions.txt; +COPY 1200 +``` + +- Then I process the list for matches with my `subdivision-lookup.py` script, and extract only the values that matched: + +```console +$ ./ilri/subdivision-lookup.py -i /tmp/2021-09-24-subregions.txt -o /tmp/subregions.csv +$ csvgrep -c matched -m 'true' /tmp/subregions.csv | csvcut -c 1 | sed 1d > /tmp/subregions-matched.txt +$ wc -l /tmp/subregions-matched.txt +81 /tmp/subregions-matched.txt +``` + +- Then I updated the controlled vocabulary in the submission forms +- I did the same for `dcterms.audience`, taking special care to a few all-caps values: + +```console +localhost/dspace63= > UPDATE metadatavalue SET text_value=INITCAP(text_value) WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=144 AND text_value != 'NGOS' AND text_value != 'CGIAR'; +localhost/dspace63= > UPDATE metadatavalue SET text_value='NGOs' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=144 AND text_value = 'NGOS'; +``` + diff --git a/docs/2021-09/index.html b/docs/2021-09/index.html index d54ea9ed8..08eb32d5e 100644 --- a/docs/2021-09/index.html +++ b/docs/2021-09/index.html @@ -26,7 +26,7 @@ The syntax Moayad showed me last month doesn’t seem to honor the search qu - + @@ -58,9 +58,9 @@ The syntax Moayad showed me last month doesn’t seem to honor the search qu "@type": "BlogPosting", "headline": "September, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-09/", - "wordCount": "1775", + "wordCount": "2030", "datePublished": "2021-09-01T09:14:07+03:00", - "dateModified": "2021-09-23T18:19:11+03:00", + "dateModified": "2021-09-23T18:32:47+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -403,6 +403,55 @@ $ csvcut -c 1 /tmp/2021-09-23-affiliations.csv | sed 1d > /tmp/affiliations.t
localhost/dspace63= > \COPY (SELECT collection_id,uuid FROM collection WHERE collection_id IS NOT NULL) TO /tmp/2021-09-23-collection-id2uuid.csv WITH CSV HEADER;
 COPY 1139
+

2021-09-24

+ +
localhost/dspace63= > UPDATE metadatavalue SET text_value=INITCAP(text_value) WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=231;
+UPDATE 2903
+localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.coverage.subregion" FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id = 231) to /tmp/2021-09-24-subregions.txt;
+COPY 1200
+
+
$ ./ilri/subdivision-lookup.py -i /tmp/2021-09-24-subregions.txt -o /tmp/subregions.csv
+$ csvgrep -c matched -m 'true' /tmp/subregions.csv | csvcut -c 1 | sed 1d > /tmp/subregions-matched.txt
+$ wc -l /tmp/subregions-matched.txt 
+81 /tmp/subregions-matched.txt
+
+
localhost/dspace63= > UPDATE metadatavalue SET text_value=INITCAP(text_value) WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=144 AND text_value != 'NGOS' AND text_value != 'CGIAR';
+localhost/dspace63= > UPDATE metadatavalue SET text_value='NGOs' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=144 AND text_value = 'NGOS';
 
diff --git a/docs/categories/index.html b/docs/categories/index.html index f733b4139..0d6dd7eae 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 827a5ea7f..706839f8d 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index b4044db81..5b91ad5b4 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 7a27f1539..2a4376618 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index d06f49b23..4405c6e7d 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 46784451c..1b38833f7 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 6ced66fa2..51481c44d 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index dd217f6c9..1558f0306 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 32aac3441..cb3b8a567 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 77b79c9c4..151d49fd3 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index d1fde4baf..20f6d0cdc 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index ad2cbe137..663fb7161 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 91867eedc..98fbeda6e 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 0a523be6d..50c84df14 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 562e86095..2fb129b56 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 05b913456..b11b76a03 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index a18ba362f..d0f0f8c2a 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 5f59c2081..ea0ba95fb 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 65f3a9f21..889ed5cf2 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 7708db7d6..b0a59b639 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index b9a11e864..90176b9f6 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 7ddca79a1..cf7974358 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index a3d641c6b..19fe5bbb3 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 96e5c36b9..d1d2d6e37 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2021-09-23T18:19:11+03:00 + 2021-09-23T18:32:47+03:00 https://alanorth.github.io/cgspace-notes/ - 2021-09-23T18:19:11+03:00 + 2021-09-23T18:32:47+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2021-09-23T18:19:11+03:00 + 2021-09-23T18:32:47+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2021-09-23T18:19:11+03:00 + 2021-09-23T18:32:47+03:00 https://alanorth.github.io/cgspace-notes/2021-09/ - 2021-09-23T18:19:11+03:00 + 2021-09-23T18:32:47+03:00 https://alanorth.github.io/cgspace-notes/2021-08/ 2021-09-02T17:06:28+03:00