From 8199de67ad95c117ed070e1a463239daa1d1d044 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Mon, 28 Nov 2022 17:42:46 +0300 Subject: [PATCH] Add notes for 2022-11-28 --- content/posts/2022-11.md | 52 ++++++++++++++++++ docs/2022-11/index.html | 71 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/categories/notes/page/7/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/page/9/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/posts/page/9/index.html | 2 +- docs/sitemap.xml | 10 ++-- 29 files changed, 151 insertions(+), 34 deletions(-) diff --git a/content/posts/2022-11.md b/content/posts/2022-11.md index 725fd7a0e..c18ea9293 100644 --- a/content/posts/2022-11.md +++ b/content/posts/2022-11.md @@ -367,4 +367,56 @@ java.lang.IndexOutOfBoundsException: 1-based index out of bounds: 2 - I synced DSpace 7 Test with CGSpace - I had to follow my notes from 2022-03 to delete the missing Atmire migrations +## 2022-11-28 + +- Update `ilri/fix-metadata-values.py` to update the `last_modified` date for items when it updates metadata + - This should allow us to use the normal `index-discovery` (with out `-b`) as well as having REST API responses showing a correct last modified date +- Maria asked me to add some ORCID identifiers for Alliance staff to the controlled vocabulary + - I also updated the `add-orcid-identifiers-csv.py` to update the `last_modified` timestamp of the item +- I re-factored my CGSpace Python scripts to use a helper `util.py` module with common functions + - For now it only has the one for updating an item's `last_modified` timestamp but I will gradually add more +- I also ran our list of ORCID identifiers against ORCID's API to see if anyone changed their name format + - Then I ran them on CGSpace with `ilri/update-orcids.py` to fix them +- Normalize the `text_lang` values for CGSpace metadata again: + +```console +localhost/dspacetest= ☘ SELECT DISTINCT text_lang, count(text_lang) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) GROUP BY text_lang ORDER BY count DESC; + text_lang │ count +───────────┼───────── + en_US │ 2912429 + │ 108387 + en │ 12457 + fr │ 2 + vi │ 2 + es │ 1 + ␀ │ 0 +(7 rows) + +Time: 624.651 ms +localhost/dspacetest= ☘ BEGIN; +BEGIN +Time: 0.130 ms +localhost/dspacetest= ☘ UPDATE metadatavalue SET text_lang='en_US' WHERE dspace_object_id IN (SELECT uuid FROM item) AND text_lang IN ('en', ''); +UPDATE 120844 +Time: 4074.879 ms (00:04.075) +localhost/dspacetest= ☘ SELECT DISTINCT text_lang, count(text_lang) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) GROUP BY text_lang ORDER BY count DESC; + text_lang │ count +───────────┼───────── + en_US │ 3033273 + fr │ 2 + vi │ 2 + es │ 1 + ␀ │ 0 +(5 rows) + +Time: 346.913 ms +localhost/dspacetest= ☘ COMMIT; +``` + +- Discussing the UN M.49 regions on CGSpace with Valentina and Abenet + - The PRMS team is confused about our regions, which are mostly UN M.49 with some legacy stuff using different ones + - I think we can fix all the stuff for Initiatives from this year very easily, then work on the legacy stuff later + - Also, I noticed that that [country_converter was using the wrong UN M.49 region for Myanmar](https://github.com/konstantinstadler/country_converter/issues/124) + - I submitted a [pull request](https://github.com/konstantinstadler/country_converter/pull/125) + diff --git a/docs/2022-11/index.html b/docs/2022-11/index.html index 1daee6137..7ef1be585 100644 --- a/docs/2022-11/index.html +++ b/docs/2022-11/index.html @@ -24,7 +24,7 @@ I reverted the Cocoon autosave change because it was more of a nuissance that Pe - + @@ -54,9 +54,9 @@ I reverted the Cocoon autosave change because it was more of a nuissance that Pe "@type": "BlogPosting", "headline": "November, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-11/", - "wordCount": "2297", + "wordCount": "2640", "datePublished": "2022-11-01T09:11:36+03:00", - "dateModified": "2022-11-27T12:38:48+03:00", + "dateModified": "2022-11-27T13:52:43+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -544,6 +544,71 @@ I reverted the Cocoon autosave change because it was more of a nuissance that Pe +

2022-11-28

+ +
localhost/dspacetest= ☘ SELECT DISTINCT text_lang, count(text_lang) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) GROUP BY text_lang ORDER BY count DESC;
+ text_lang │  count
+───────────┼─────────
+ en_US     │ 2912429
+           │  108387
+ en        │   12457
+ fr        │       2
+ vi        │       2
+ es        │       1
+ ␀         │       0
+(7 rows)
+
+Time: 624.651 ms
+localhost/dspacetest= ☘ BEGIN;
+BEGIN
+Time: 0.130 ms
+localhost/dspacetest= ☘ UPDATE metadatavalue SET text_lang='en_US' WHERE dspace_object_id IN (SELECT uuid FROM item) AND text_lang IN ('en', '');
+UPDATE 120844
+Time: 4074.879 ms (00:04.075)
+localhost/dspacetest= ☘ SELECT DISTINCT text_lang, count(text_lang) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) GROUP BY text_lang ORDER BY count DESC;
+ text_lang │  count  
+───────────┼─────────
+ en_US     │ 3033273
+ fr        │       2
+ vi        │       2
+ es        │       1
+ ␀         │       0
+(5 rows)
+
+Time: 346.913 ms
+localhost/dspacetest= ☘ COMMIT;
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index 0a2fa15e6..ba76e8d3e 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index e0c84c903..2ff5b7864 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 5446b7cc7..363df615f 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 80c42d9ad..8af035917 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index e276070b6..6b354a8e9 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 8d35003e0..a8253e4d9 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 42948102b..46e9df976 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index cf58b03f4..9c571812b 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index a2593a4de..203abcab0 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 8ca112017..445a2819c 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index f01327100..8f46c7f30 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 09332e337..7276f95f1 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index d249403f2..ba8873b28 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index f051b0bdf..35d36c6dd 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 6c640329e..45f189e99 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index afc80d2b6..c214d37c3 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index f8769a95e..ebfa8c75c 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index fc6281f15..db750095a 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 3e5d4b8c5..a138e3a3d 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 88bb6da78..8404fbb6d 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 8c492c85f..d01b62030 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 1f79f3424..637d99549 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 4db1912e4..ef42c95a7 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index cee796510..070cf0b72 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index a146b1356..0d00aabc4 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 8ff31afa0..52a91a7c3 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0ad56dc5f..3b484d3bb 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-11-27T12:38:48+03:00 + 2022-11-27T13:52:43+03:00 https://alanorth.github.io/cgspace-notes/ - 2022-11-27T12:38:48+03:00 + 2022-11-27T13:52:43+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-11-27T12:38:48+03:00 + 2022-11-27T13:52:43+03:00 https://alanorth.github.io/cgspace-notes/2022-11/ - 2022-11-27T12:38:48+03:00 + 2022-11-27T13:52:43+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-11-27T12:38:48+03:00 + 2022-11-27T13:52:43+03:00 https://alanorth.github.io/cgspace-notes/2022-10/ 2022-10-31T16:59:47+03:00