From 04359a12cc61fcf6964e71244dc1ed576ed42955 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 14 Feb 2018 16:45:03 +0200 Subject: [PATCH] Update notes for 2018-02-14 --- content/post/2018-02.md | 38 +++++++++++++++++++++++++++++++- docs/2018-02/index.html | 48 +++++++++++++++++++++++++++++++++++++---- docs/sitemap.xml | 10 ++++----- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/content/post/2018-02.md b/content/post/2018-02.md index 3730f3324..7e1057a11 100644 --- a/content/post/2018-02.md +++ b/content/post/2018-02.md @@ -486,4 +486,40 @@ $ tidy -xml -iq -m -w 0 dspace/config/controlled-vocabularies/cg-creator-id.xml $ tidy -xml -utf8 -iq -m -w 0 dspace/config/controlled-vocabularies/cg-creator-id.xml ``` -- Then it preserves them and submitting them is fine +- This preserves special accent characters +- I tested the display and store of these in the XMLUI and PostgreSQL and it looks good +- Sisay exported all ILRI, CIAT, etc authors from ORCID and sent a list of 600+ +- Peter combined it with mine and we have 1204 unique ORCIDs! + +``` +$ grep -coE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' CGcenter_ORCID_ID_combined.csv +1204 +$ grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' CGcenter_ORCID_ID_combined.csv | sort | uniq | wc -l +1204 +``` + +- Also, save that regex for the future because it will be very useful! +- CIAT sent a list of their authors' ORCIDs and combined with ours there are now 1227: + +``` +$ cat CGcenter_ORCID_ID_combined.csv ciat-orcids.txt | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort | uniq | wc -l +1227 +``` + +- There are some formatting issues with names in Peter's list, so I should remember to re-generate the list of names from ORCID's API once we're done +- The `dspace cleanup -v` currently fails on CGSpace with the following: + +``` + - Deleting bitstream record from database (ID: 149473) +Error: ERROR: update or delete on table "bitstream" violates foreign key constraint "bundle_primary_bitstream_id_fkey" on table "bundle" + Detail: Key (bitstream_id)=(149473) is still referenced from table "bundle". +``` + +- The solution is to update the bitstream table, as I've discovered several other times in 2016 and 2017: + +``` +$ psql dspace -c 'update bundle set primary_bitstream_id=NULL where primary_bitstream_id in (149473);' +UPDATE 1 +``` + +- Then the cleanup process will continue for awhile and hit another foreign key conflict, and eventually it will complete after you manually resolve them all diff --git a/docs/2018-02/index.html b/docs/2018-02/index.html index cf91c9462..4f6b95b29 100644 --- a/docs/2018-02/index.html +++ b/docs/2018-02/index.html @@ -23,7 +23,7 @@ I copied the logic in the jmx_tomcat_dbpools provided by Ubuntu’s munin-pl - + @@ -57,9 +57,9 @@ I copied the logic in the jmx_tomcat_dbpools provided by Ubuntu’s munin-pl "@type": "BlogPosting", "headline": "February, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-02/", - "wordCount": "3297", + "wordCount": "3527", "datePublished": "2018-02-01T16:28:54+02:00", - "dateModified": "2018-02-13T17:50:12+02:00", + "dateModified": "2018-02-14T13:56:18+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -667,7 +667,47 @@ $ tidy -xml -iq -m -w 0 dspace/config/controlled-vocabularies/cg-creator-id.xml + +
$ grep -coE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' CGcenter_ORCID_ID_combined.csv
+1204
+$ grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' CGcenter_ORCID_ID_combined.csv | sort | uniq | wc -l
+1204
+
+ + + +
$ cat CGcenter_ORCID_ID_combined.csv ciat-orcids.txt | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort | uniq | wc -l
+1227
+
+ + + +
 - Deleting bitstream record from database (ID: 149473)
+Error: ERROR: update or delete on table "bitstream" violates foreign key constraint "bundle_primary_bitstream_id_fkey" on table "bundle"
+  Detail: Key (bitstream_id)=(149473) is still referenced from table "bundle".
+
+ + + +
$ psql dspace -c 'update bundle set primary_bitstream_id=NULL where primary_bitstream_id in (149473);'
+UPDATE 1
+
+ + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index bd7f67d24..dd0f09e83 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2018-02/ - 2018-02-13T17:50:12+02:00 + 2018-02-14T13:56:18+02:00 @@ -149,7 +149,7 @@ https://alanorth.github.io/cgspace-notes/ - 2018-02-13T17:50:12+02:00 + 2018-02-14T13:56:18+02:00 0 @@ -160,7 +160,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2018-02-13T17:50:12+02:00 + 2018-02-14T13:56:18+02:00 0 @@ -172,13 +172,13 @@ https://alanorth.github.io/cgspace-notes/post/ - 2018-02-13T17:50:12+02:00 + 2018-02-14T13:56:18+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2018-02-13T17:50:12+02:00 + 2018-02-14T13:56:18+02:00 0