From 49d08e2db914df135b2d1fadd752b013b5bbd444 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 15 Jul 2020 15:42:23 +0300 Subject: [PATCH] Add notes for 2020-07-15 --- content/posts/2020-07.md | 65 ++++++++++++++++++++++ docs/2020-07/index.html | 73 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/sitemap.xml | 10 ++-- 20 files changed, 156 insertions(+), 26 deletions(-) diff --git a/content/posts/2020-07.md b/content/posts/2020-07.md index fb21e6511..04a18c2b2 100644 --- a/content/posts/2020-07.md +++ b/content/posts/2020-07.md @@ -475,4 +475,69 @@ $ psql -d dspace -U dspace -c 'update bundle set primary_bitstream_id=NULL where UPDATE 1 ``` +- Udana from WLE asked me about some items that didn't show Altmetric donuts + - I checked his list and at least three of them actually *did* show donuts, and for four others I tweeted them manually to see if they would get a donut in a few hours: + - https://hdl.handle.net/10568/108477 + - https://hdl.handle.net/10568/108475 + - https://hdl.handle.net/10568/108361 + - https://hdl.handle.net/10568/108360 + +## 2020-07-15 + +- All four IWMI items that I tweeted yesterday have Altmetric donuts with a score of 1 now... +- Export CGSpace countries to check them against ISO 3166-1 and ISO 3166-3 (historic countries): + +``` +dspace=# \COPY (SELECT DISTINCT text_value FROM metadatavalue WHERE resource_type_id=2 AND metadata_field_id=228) TO /tmp/2020-07-15-countries.csv; +COPY 194 +``` + +- I wrote a script `iso3166-lookup.py` to check them: + +``` +$ ./iso3166-1-lookup.py -i /tmp/2020-07-15-countries.csv -o /tmp/2020-07-15-countries-resolved.csv +$ csvgrep -c matched -m false /tmp/2020-07-15-countries-resolved.csv +country,match type,matched +CAPE VERDE,,false +"KOREA, REPUBLIC",,false +PALESTINE,,false +"CONGO, DR",,false +COTE D'IVOIRE,,false +RUSSIA,,false +SYRIA,,false +"KOREA, DPR",,false +SWAZILAND,,false +MICRONESIA,,false +TIBET,,false +ZAIRE,,false +COCOS ISLANDS,,false +LAOS,,false +IRAN,,false +``` + +- Check the database for DOIs that are not in the preferred "https://doi.org/" format: + +``` +dspace=# \COPY (SELECT text_value as "cg.identifier.doi" FROM metadatavalue WHERE resource_type_id=2 AND metadata_field_id=220 AND text_value NOT LIKE 'https://doi.org/%') TO /tmp/2020-07-15-doi.csv WITH CSV HEADER; +COPY 186 +``` + +- Then I imported them into OpenRefine and replaced them in a new "correct" column using this GREL transform: + +``` +value.replace("dx.doi.org", "doi.org").replace("http://", "https://").replace("https://dx,doi,org", "https://doi.org").replace("https://doi.dx.org", "https://doi.org").replace("https://dx.doi:", "https://doi.org").replace("DOI: ", "https://doi.org/").replace("doi: ", "https://doi.org/").replace("http:/​/​dx.​doi.​org", "https://doi.org").replace("https://dx. doi.org. ", "https://doi.org").replace("https://dx.doi", "https://doi.org").replace("https://dx.doi:", "https://doi.org/").replace("hdl.handle.net", "doi.org") +``` + +- Then I fixed the DOIs on CGSpace: + +``` +$ ./fix-metadata-values.py -i /tmp/2020-07-15-fix-164-DOIs.csv -db dspace -u dspace -p 'fuuu' -f cg.identifier.doi -t 'correct' -m 220 +``` + +- I filed [an issue on Debian's iso-codes](https://salsa.debian.org/iso-codes-team/iso-codes/-/issues/10) project to ask why "Swaziland" does not appear in the ISO 3166-3 list of historical country names despite it being changed to "Eswatini" in 2018. +- Atmire responded about the Solr issue + - They said that it seems like a DSpace issue so that it's not their responsibility, and nobody responded to my question on the dspace-tech mailing list... + - I said I would try to do a migration on DSpace Test with more of CGSpace's Solr data to try and approximate how much of our data be affected + - I also asked them about the Tomcat 8.5 issue with CUA as well as the CUA group name issue that I had asked originally in April + diff --git a/docs/2020-07/index.html b/docs/2020-07/index.html index 18aab2b10..ed84a5696 100644 --- a/docs/2020-07/index.html +++ b/docs/2020-07/index.html @@ -20,7 +20,7 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f - + @@ -45,9 +45,9 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f "@type": "BlogPosting", "headline": "July, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-07/", - "wordCount": "2991", + "wordCount": "3347", "datePublished": "2020-07-01T10:53:54+03:00", - "dateModified": "2020-07-13T12:31:34+03:00", + "dateModified": "2020-07-14T10:57:49+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -594,7 +594,72 @@ $ tidy -xml -utf8 -m -iq -w 0 dspace/config/controlled-vocabularies/dc-descripti
$ psql -d dspace -U dspace -c 'update bundle set primary_bitstream_id=NULL where primary_bitstream_id in (189618, 188837);'
 UPDATE 1
-
+ +

2020-07-15

+ +
dspace=# \COPY (SELECT DISTINCT text_value FROM metadatavalue WHERE resource_type_id=2 AND metadata_field_id=228) TO /tmp/2020-07-15-countries.csv;
+COPY 194
+
+
$ ./iso3166-1-lookup.py -i /tmp/2020-07-15-countries.csv -o /tmp/2020-07-15-countries-resolved.csv
+$ csvgrep -c matched -m false /tmp/2020-07-15-countries-resolved.csv       
+country,match type,matched
+CAPE VERDE,,false
+"KOREA, REPUBLIC",,false
+PALESTINE,,false
+"CONGO, DR",,false
+COTE D'IVOIRE,,false
+RUSSIA,,false
+SYRIA,,false
+"KOREA, DPR",,false
+SWAZILAND,,false
+MICRONESIA,,false
+TIBET,,false
+ZAIRE,,false
+COCOS ISLANDS,,false
+LAOS,,false
+IRAN,,false
+
+
dspace=# \COPY (SELECT text_value as "cg.identifier.doi" FROM metadatavalue WHERE resource_type_id=2 AND metadata_field_id=220 AND text_value NOT LIKE 'https://doi.org/%') TO /tmp/2020-07-15-doi.csv WITH CSV HEADER;
+COPY 186
+
+
value.replace("dx.doi.org", "doi.org").replace("http://", "https://").replace("https://dx,doi,org", "https://doi.org").replace("https://doi.dx.org", "https://doi.org").replace("https://dx.doi:", "https://doi.org").replace("DOI: ", "https://doi.org/").replace("doi: ", "https://doi.org/").replace("http:/​/​dx.​doi.​org", "https://doi.org").replace("https://dx. doi.org. ", "https://doi.org").replace("https://dx.doi", "https://doi.org").replace("https://dx.doi:", "https://doi.org/").replace("hdl.handle.net", "doi.org")
+
+
$ ./fix-metadata-values.py -i /tmp/2020-07-15-fix-164-DOIs.csv -db dspace -u dspace -p 'fuuu' -f cg.identifier.doi -t 'correct' -m 220
+
+ diff --git a/docs/categories/index.html b/docs/categories/index.html index 9cb03b2ec..50a0a6a59 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 2d9e681bb..f3c14617b 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 1557cbaa4..ae931dd8e 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 977a06c46..8ae151cdc 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 5a472d1c3..b510b6aff 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index fd3b205e7..f63c5e5db 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index b0fbf151e..d049dc5c7 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index dc08f4b4a..8539c6d5a 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index dc48d261d..ca9de4b14 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index b8900c34d..988e6be24 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index f9acdfdf6..a012b98ee 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index bbc3dc5da..3bbb9d666 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 016e9b6f2..97da8ffe3 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index e2e0d0cd8..38eae8e17 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 674254cd7..5bb5106e1 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 65999825e..aa3cb73ba 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index c128ab0ad..53867d52a 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a75dc4a4a..d0bf6b929 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-07-13T12:31:34+03:00 + 2020-07-14T10:57:49+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-07-13T12:31:34+03:00 + 2020-07-14T10:57:49+03:00 https://alanorth.github.io/cgspace-notes/2020-07/ - 2020-07-13T12:31:34+03:00 + 2020-07-14T10:57:49+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-07-13T12:31:34+03:00 + 2020-07-14T10:57:49+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-07-13T12:31:34+03:00 + 2020-07-14T10:57:49+03:00