diff --git a/content/posts/2021-09.md b/content/posts/2021-09.md index 511f00e5c..38872c366 100644 --- a/content/posts/2021-09.md +++ b/content/posts/2021-09.md @@ -211,4 +211,28 @@ localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.contributor.affil COPY 8091 ``` +## 2021-09-23 + +- Peter sent me back the corrections for the affiliations + - It is about 1,280 corrections and fourteen deletions + - I cleaned them up in csv-metadata-quality and then extracted the deletes and fixes to separate files to run with `fix-metadata-values.py` and `delete-metadata-values.py`: + +```console +$ csv-metadata-quality -i ~/Downloads/2021-09-20-affiliations.csv -o /tmp/affiliations.csv -x cg.contributor.affiliation +$ csvgrep -c 'correct' -m 'DELETE' /tmp/affiliations.csv > /tmp/affiliations-delete.csv +$ csvgrep -c 'correct' -r '^.+$' /tmp/affiliations.csv | csvgrep -i -c 'correct' -m 'DELETE' > /tmp/affiliations-fix.csv +$ ./ilri/fix-metadata-values.py -i /tmp/affiliations-fix.csv -db dspace -u dspace -p 'fuuu' -f cg.contributor.affiliation -t 'correct' -m 211 +$ ./ilri/delete-metadata-values.py -i /tmp/affiliations-fix.csv -db dspace -u dspace -p 'fuuu' -f cg.contributor.affiliation -m 211 +``` + +- Then I updated the controlled vocabulary for affiliations by exporting the top 1,000 used terms: + +```console +localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.contributor.affiliation", count(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id = 211 GROUP BY text_value ORDER BY count DESC LIMIT 1000) to /tmp/2021-09-23-affiliations.csv WITH CSV HEADER; +$ csvcut -c 1 /tmp/2021-09-23-affiliations.csv | sed 1d > /tmp/affiliations.txt +``` + +- Peter also sent me 310 corrections and 234 deletions for donors so I applied those and updated the controlled vocabularies too +- Move some One CGIAR-related collections around the CGSpace hierarchy for Peter Ballantyne + diff --git a/docs/2021-09/index.html b/docs/2021-09/index.html index b70321420..942e2e9e7 100644 --- a/docs/2021-09/index.html +++ b/docs/2021-09/index.html @@ -58,7 +58,7 @@ The syntax Moayad showed me last month doesn’t seem to honor the search qu "@type": "BlogPosting", "headline": "September, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-09/", - "wordCount": "1532", + "wordCount": "1729", "datePublished": "2021-09-01T09:14:07+03:00", "dateModified": "2021-09-20T17:31:45+03:00", "author": { @@ -377,7 +377,30 @@ localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.contribut COPY 1274 localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.contributor.affiliation", count(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id = 211 GROUP BY text_value ORDER BY count DESC) to /tmp/2021-09-20-affiliations.csv WITH CSV HEADER; COPY 8091 - +

2021-09-23

+ +
$ csv-metadata-quality -i ~/Downloads/2021-09-20-affiliations.csv -o /tmp/affiliations.csv -x cg.contributor.affiliation
+$ csvgrep -c 'correct' -m 'DELETE' /tmp/affiliations.csv > /tmp/affiliations-delete.csv
+$ csvgrep -c 'correct' -r '^.+$' /tmp/affiliations.csv | csvgrep -i -c 'correct' -m 'DELETE' > /tmp/affiliations-fix.csv
+$ ./ilri/fix-metadata-values.py -i /tmp/affiliations-fix.csv -db dspace -u dspace -p 'fuuu' -f cg.contributor.affiliation -t 'correct' -m 211
+$ ./ilri/delete-metadata-values.py -i /tmp/affiliations-fix.csv -db dspace -u dspace -p 'fuuu' -f cg.contributor.affiliation -m 211
+
+
localhost/dspace63= > \COPY (SELECT DISTINCT text_value as "cg.contributor.affiliation", count(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id = 211 GROUP BY text_value ORDER BY count DESC LIMIT 1000) to /tmp/2021-09-23-affiliations.csv WITH CSV HEADER;
+$ csvcut -c 1 /tmp/2021-09-23-affiliations.csv | sed 1d > /tmp/affiliations.txt
+
+