From 300b2e4271464869d3b628bfb1cb3e163392e85c Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 24 Jan 2024 08:24:50 +0300 Subject: [PATCH] Notes for 2024-01-23 --- content/posts/2024-01.md | 45 ++++++++++++++++++ docs/2024-01/index.html | 62 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/categories/notes/page/7/index.html | 2 +- docs/categories/notes/page/8/index.html | 2 +- docs/index.html | 2 +- docs/page/10/index.html | 2 +- docs/page/11/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/page/9/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/10/index.html | 2 +- docs/posts/page/11/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/posts/page/9/index.html | 2 +- docs/sitemap.xml | 10 ++-- 34 files changed, 140 insertions(+), 39 deletions(-) diff --git a/content/posts/2024-01.md b/content/posts/2024-01.md index b14d969b8..066c7c454 100644 --- a/content/posts/2024-01.md +++ b/content/posts/2024-01.md @@ -372,4 +372,49 @@ tee /tmp/ips.txt | wc -l - 396982 Google Cloud - The load on the server *immediately* dropped +## 2024-01-17 + +- It turns out AS701 (UUNET) is Verizon Business, which is used as an ISP for many staff at IFPRI + - This was causing them to see HTTP 429 "too many requests" errors on CGSpace + - I removed this ASN from the rate limiting + +## 2024-01-18 + +- Start looking at Solr stats again + - I found one statistics record that has 22,000 of the same collection in `owningColl` and 22,000 of the same community in `owningComm` + - The record is from 2015 and think it would be easier to delete it than fix it: + +```console +$ curl http://localhost:8983/solr/statistics/update -H "Content-type: text/xml" --data-binary 'uid:3b4eefba-a302-4172-a286-dcb25d70129e' +``` + +- Looking again, there are at least 1,000 of these so I will need to come up with an actual solution to fix these +- I'm noticing we have 1,800+ links to defunct resources on bioversityinternational.org in the `cg.link.permalink` field + - I should ask Alliance if they have any plans to fix those, or upload them to CGSpace + +## 2024-01-22 + +- Meeting with IWMI about ORCID integration on CGSpace now that we've migrated to DSpace 7 +- File an issue for the inaccurate DSpace statistics: https://github.com/DSpace/DSpace/issues/9275 + +## 2024-01-23 + +- Meeting with IWMI about ORCID integration and the DSpace API for use with WordPress +- IFPRI sent me an list of their author ORCIDs to add to our controlled vocabulary + - I joined them with our current list and resolved their names on ORCID and updated them in our database: + +```console +$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml ~/Downloads/IFPRI\ ORCiD\ All.csv | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u > /tmp/2024-01-23-orcids.txt +$ ./ilri/resolve_orcids.py -i /tmp/2024-01-23-orcids.txt -o /tmp/2024-01-23-orcids-names.txt -d +$ ./ilri/update_orcids.py -i /tmp/2024-01-23-orcids-names.txt -db dspace -u dspace -p fuuu +``` + +- This adds about 400 new identifiers to the controlled vocabulary +- I consolidated our various project identifier fields for closed programs into one `cg.identifer.project`: + - `cg.identifier.ccafsproject` + - `cg.identifier.ccafsprojectpii` + - `cg.identifier.ciatproject` + - `cg.identifier.cpwfproject` +- I prefixed the existing 2,644 metadata values with "CCAFS", "CIAT", or "CPWF" so we can figure out where they came from if need be, and deleted the old fields from the metadata registry + diff --git a/docs/2024-01/index.html b/docs/2024-01/index.html index fa640728b..3a760a9c7 100644 --- a/docs/2024-01/index.html +++ b/docs/2024-01/index.html @@ -22,7 +22,7 @@ Work on IFPRI ISNAR archive cleanup - + @@ -50,9 +50,9 @@ Work on IFPRI ISNAR archive cleanup "@type": "BlogPosting", "headline": "January, 2024", "url": "https://alanorth.github.io/cgspace-notes/2024-01/", - "wordCount": "1847", + "wordCount": "2164", "datePublished": "2024-01-02T10:08:00+03:00", - "dateModified": "2024-01-10T17:21:12+03:00", + "dateModified": "2024-01-18T15:59:49+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -511,6 +511,62 @@ Work on IFPRI ISNAR archive cleanup +

2024-01-17

+ +

2024-01-18

+ +
$ curl http://localhost:8983/solr/statistics/update -H "Content-type: text/xml" --data-binary '<delete><query>uid:3b4eefba-a302-4172-a286-dcb25d70129e</query></delete>'
+
+

2024-01-22

+ +

2024-01-23

+ +
$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml ~/Downloads/IFPRI\ ORCiD\ All.csv | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u > /tmp/2024-01-23-orcids.txt
+$ ./ilri/resolve_orcids.py -i /tmp/2024-01-23-orcids.txt -o /tmp/2024-01-23-orcids-names.txt -d
+$ ./ilri/update_orcids.py -i /tmp/2024-01-23-orcids-names.txt -db dspace -u dspace -p fuuu
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index c19241b7c..19ceabeac 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index d5ae56049..741c450eb 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 18e01a1e3..c5e9dc49a 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 9f8462c1f..a793ff647 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index f9c84ba75..f57d1de3e 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 033add2b7..ab3d67a31 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 8177fb6b4..2b72507ce 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index b18d48ebc..cc104e8a2 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/8/index.html b/docs/categories/notes/page/8/index.html index 76955a0e6..ce5275b11 100644 --- a/docs/categories/notes/page/8/index.html +++ b/docs/categories/notes/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 8fd286292..77a8d2b64 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/10/index.html b/docs/page/10/index.html index 203825c2d..ec29dac79 100644 --- a/docs/page/10/index.html +++ b/docs/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/11/index.html b/docs/page/11/index.html index c2c5065b8..7c6e994cc 100644 --- a/docs/page/11/index.html +++ b/docs/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 78afc6967..0421b2c50 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 823821979..70a0883d3 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 010a4dfc7..a3aa3c2b0 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index a2440fbad..11de95656 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index d406d2f87..778e17c0b 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index fdb64d575..b4eb05c0e 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 63b8dc48f..bcf41b18f 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index b23f337ee..e1400611b 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 57591ba3f..982eb74c8 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html index 7132ecef7..cb75c2029 100644 --- a/docs/posts/page/10/index.html +++ b/docs/posts/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/11/index.html b/docs/posts/page/11/index.html index 8842f60a9..393c4b4cf 100644 --- a/docs/posts/page/11/index.html +++ b/docs/posts/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 6dcdd1465..360e7e3f8 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index cc7952675..a528972f2 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 90eaafd18..4a0e10c96 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index c7a92ffc2..fec037daa 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 94e2ef98c..5e2e0fa01 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index da86ac8d8..4af30dce9 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index 9fed04a6f..feaf9107a 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 63d8e1be9..433d1edfc 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 200175e9f..a7b267025 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2024-01-10T17:21:12+03:00 + 2024-01-18T15:59:49+03:00 https://alanorth.github.io/cgspace-notes/ - 2024-01-10T17:21:12+03:00 + 2024-01-18T15:59:49+03:00 https://alanorth.github.io/cgspace-notes/2024-01/ - 2024-01-10T17:21:12+03:00 + 2024-01-18T15:59:49+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2024-01-10T17:21:12+03:00 + 2024-01-18T15:59:49+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2024-01-10T17:21:12+03:00 + 2024-01-18T15:59:49+03:00 https://alanorth.github.io/cgspace-notes/2023-12/ 2023-12-29T12:08:57+03:00