diff --git a/content/posts/2020-01.md b/content/posts/2020-01.md index 05ee007e2..f29bffe82 100644 --- a/content/posts/2020-01.md +++ b/content/posts/2020-01.md @@ -93,4 +93,23 @@ In [8]: unicodedata.is_normalized('NFC', 'é') Out[8]: True ``` +## 2020-01-15 + +- I added support for Unicode normalization to my [csv-metadata-quality](https://github.com/ilri/csv-metadata-quality) tool in [v0.4.0](https://github.com/ilri/csv-metadata-quality/releases/tag/v0.4.0) +- Generate ILRI and Bioversity subject lists for Elizabeth Arnaud from Bioversity: + +``` +dspace=# \COPY (SELECT DISTINCT text_value as "cg.subject.ilri", count(*) FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 203 GROUP BY text_value ORDER BY count DESC) to /tmp/2020-01-15-ilri-subjects.csv WITH CSV HEADER; +COPY 144 +dspace=# \COPY (SELECT DISTINCT text_value as "cg.subject.bioversity", count(*) FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 120 GROUP BY text_value ORDER BY count DESC) to /tmp/2020-01-15-bioversity-subjects.csv WITH CSV HEADER; +COPY 1325 +``` + +- She will be meeting with FAO and will look over the terms to see if they can add some to AGROVOC +- I noticed a few errors in the ILRI subjects so I fixed them locally and on CGSpace (linode18) using my `fix-metadata.py` script: + +``` +$ ./fix-metadata-values.py -i 2020-01-15-fix-8-ilri-subjects.csv -db dspace -u dspace -p 'fuuu' -f cg.subject.ilri -m 203 -t correct -d +``` + diff --git a/docs/2020-01/index.html b/docs/2020-01/index.html index d8b52f38c..38abb0d2f 100644 --- a/docs/2020-01/index.html +++ b/docs/2020-01/index.html @@ -29,7 +29,7 @@ I tweeted the CGSpace repository link - + @@ -63,9 +63,9 @@ I tweeted the CGSpace repository link "@type": "BlogPosting", "headline": "January, 2020", "url": "https:\/\/alanorth.github.io\/cgspace-notes\/2020-01\/", - "wordCount": "617", + "wordCount": "765", "datePublished": "2020-01-06T10:48:30+02:00", - "dateModified": "2020-01-08T15:33:56+02:00", + "dateModified": "2020-01-14T20:40:41+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -222,6 +222,20 @@ Out[7]: False In [8]: unicodedata.is_normalized('NFC', 'é') Out[8]: True +

2020-01-15

+ +
dspace=# \COPY (SELECT DISTINCT text_value as "cg.subject.ilri", count(*) FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 203 GROUP BY text_value ORDER BY count DESC) to /tmp/2020-01-15-ilri-subjects.csv WITH CSV HEADER;
+COPY 144
+dspace=# \COPY (SELECT DISTINCT text_value as "cg.subject.bioversity", count(*) FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 120 GROUP BY text_value ORDER BY count DESC) to /tmp/2020-01-15-bioversity-subjects.csv WITH CSV HEADER;
+COPY 1325
+
+
$ ./fix-metadata-values.py -i 2020-01-15-fix-8-ilri-subjects.csv -db dspace -u dspace -p 'fuuu' -f cg.subject.ilri -m 203 -t correct -d
 
diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 59a2dbc97..66194b81b 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-01-08T15:33:56+02:00 + 2020-01-14T20:40:41+02:00 https://alanorth.github.io/cgspace-notes/ - 2020-01-08T15:33:56+02:00 + 2020-01-14T20:40:41+02:00 https://alanorth.github.io/cgspace-notes/2020-01/ - 2020-01-08T15:33:56+02:00 + 2020-01-14T20:40:41+02:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-01-08T15:33:56+02:00 + 2020-01-14T20:40:41+02:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-01-08T15:33:56+02:00 + 2020-01-14T20:40:41+02:00