diff --git a/content/posts/2022-06.md b/content/posts/2022-06.md
index 6df39a9e6..bf209134d 100644
--- a/content/posts/2022-06.md
+++ b/content/posts/2022-06.md
@@ -169,7 +169,7 @@ $ csvjoin --outer -c alpha2 ~/Downloads/clarisa-countries.csv ~/Downloads/UNSD\
- Then re-export the UN M.49 countries to a clean list because the one I did yesterday somehow has errors:
```console
-csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv
+$ csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv
```
- Check the number of lines in each file:
diff --git a/content/posts/2024-03.md b/content/posts/2024-03.md
index 52bfbdb82..d85c0eb50 100644
--- a/content/posts/2024-03.md
+++ b/content/posts/2024-03.md
@@ -113,4 +113,48 @@ $ csvcut -c 'id,dc.title[en_US],dc.identifier.uri[en_US],cg.link.permalink[en_US
SELECT ds6_item2itemhandle(dspace_object_id) AS handle FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item WHERE NOT discoverable) AND metadata_field_id=28 AND text_value LIKE 'Submitted by Alliance TIP Submit%';
```
+## 2024-03-14
+
+- Looking in to reports of rate limiting of Altmetric's bot on CGSpace
+ - I don't see any HTTP 429 responses for their user agents in any of our logs...
+ - I tried myself on an item page and never hit a limit...
+
+```console
+$ for num in {1..60}; do echo -n "Request ${num}: "; curl -s -o /dev/null -w "%{http_code}" https://dspace7test.ilri.org/items/c9b8999d-3001-42ba-a267-14f4bfa90b53 && echo; done
+Request 1: 200
+Request 2: 200
+Request 3: 200
+Request 4: 200
+...
+Request 60: 200
+```
+
+- All responses were HTTP 200...
+- In any case, I whitelisted their production IPs and told them to try again
+- I imported 468 of IFPRI's 2023 records that were confirmed to not be duplicates to CGSpace
+ - I also spent some time merging metadata from 415 of the remaining 432 duplicates with the metadata for the existing items on CGSpace
+ - This was a bit of dirty work using csvkit, xsv, and OpenRefine
+
+## 2024-03-17
+
+- There are 17 records from IFPRI's 2023 batch that are remaining from the 432 that I identified as already being on CGSpace
+ - These are different in that they are duplicates on CGSpace as well, so the csvjoin failed and the metadata got messed up in my migration
+ - I looked closer and whittled this down to 14 actual records, and spent some time working on them
+ - I isolated 12 of these items that existed on CGSpace and added publication ranks, project identifiers, and provenance links
+ - Now there only remain two confusing records about the Inkomati catchment
+
+## 2024-03-18
+
+- Checking to see how many IFPRI records we have migrated so far:
+
+```console
+$ csvgrep -c 'dc.description.provenance[en_US]' -m 'Original URL from IFPRI CONTENTdm' cgspace.csv \
+ | csvcut -c 'id,dc.title[en_US],dc.identifier.uri[en_US],dc.description.provenance[en_US],dcterms.type[en_US]' \
+ | tee /tmp/ifpri-records.csv \
+ | csvstat --count
+898
+```
+
+- I finalized the remaining two on Inkomati catchment and now we are at 900!
+
diff --git a/docs/2022-06/index.html b/docs/2022-06/index.html
index 309ce988a..89b96b7c3 100644
--- a/docs/2022-06/index.html
+++ b/docs/2022-06/index.html
@@ -58,7 +58,7 @@ There seem to be many more of these:
"@type": "BlogPosting",
"headline": "June, 2022",
"url": "https://alanorth.github.io/cgspace-notes/2022-06/",
- "wordCount": "1788",
+ "wordCount": "1789",
"datePublished": "2022-06-06T09:01:36+03:00",
"dateModified": "2023-04-27T13:10:13-07:00",
"author": {
@@ -321,7 +321,7 @@ There seem to be many more of these:
Spent some more time working on my countries-to-csv.py
script to fix some logic errors
Then re-export the UN M.49 countries to a clean list because the one I did yesterday somehow has errors:
-csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv
+$ csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv
- Check the number of lines in each file:
diff --git a/docs/2024-03/index.html b/docs/2024-03/index.html
index 4a1359116..6892bbd9f 100644
--- a/docs/2024-03/index.html
+++ b/docs/2024-03/index.html
@@ -19,7 +19,7 @@ It might be this issue: https://github.com/DSpace/dspace-angular/issues/2808
-
+
@@ -44,9 +44,9 @@ It might be this issue: https://github.com/DSpace/dspace-angular/issues/2808
"@type": "BlogPosting",
"headline": "March, 2024",
"url": "https://alanorth.github.io/cgspace-notes/2024-03/",
- "wordCount": "627",
+ "wordCount": "923",
"datePublished": "2024-03-01T09:55:00+03:00",
- "dateModified": "2024-03-11T21:58:15+03:00",
+ "dateModified": "2024-03-14T09:29:05+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -236,7 +236,56 @@ It might be this issue: https://github.com/DSpace/dspace-angular/issues/2808
SELECT ds6_item2itemhandle(dspace_object_id) AS handle FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item WHERE NOT discoverable) AND metadata_field_id=28 AND text_value LIKE 'Submitted by Alliance TIP Submit%';
-
+
2024-03-14
+
+- Looking in to reports of rate limiting of Altmetric’s bot on CGSpace
+
+- I don’t see any HTTP 429 responses for their user agents in any of our logs…
+- I tried myself on an item page and never hit a limit…
+
+
+
+$ for num in {1..60}; do echo -n "Request ${num}: "; curl -s -o /dev/null -w "%{http_code}" https://dspace7test.ilri.org/items/c9b8999d-3001-42ba-a267-14f4bfa90b53 && echo; done
+Request 1: 200
+Request 2: 200
+Request 3: 200
+Request 4: 200
+...
+Request 60: 200
+
+- All responses were HTTP 200…
+- In any case, I whitelisted their production IPs and told them to try again
+- I imported 468 of IFPRI’s 2023 records that were confirmed to not be duplicates to CGSpace
+
+- I also spent some time merging metadata from 415 of the remaining 432 duplicates with the metadata for the existing items on CGSpace
+- This was a bit of dirty work using csvkit, xsv, and OpenRefine
+
+
+
+2024-03-17
+
+- There are 17 records from IFPRI’s 2023 batch that are remaining from the 432 that I identified as already being on CGSpace
+
+- These are different in that they are duplicates on CGSpace as well, so the csvjoin failed and the metadata got messed up in my migration
+- I looked closer and whittled this down to 14 actual records, and spent some time working on them
+- I isolated 12 of these items that existed on CGSpace and added publication ranks, project identifiers, and provenance links
+- Now there only remain two confusing records about the Inkomati catchment
+
+
+
+2024-03-18
+
+- Checking to see how many IFPRI records we have migrated so far:
+
+$ csvgrep -c 'dc.description.provenance[en_US]' -m 'Original URL from IFPRI CONTENTdm' cgspace.csv \
+ | csvcut -c 'id,dc.title[en_US],dc.identifier.uri[en_US],dc.description.provenance[en_US],dcterms.type[en_US]' \
+ | tee /tmp/ifpri-records.csv \
+ | csvstat --count
+898
+
+- I finalized the remaining two on Inkomati catchment and now we are at 900!
+
+
diff --git a/docs/categories/index.html b/docs/categories/index.html
index 6a3d6ef7b..abd8b4c45 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/index.xml b/docs/categories/index.xml
index a7a729ef5..f40c8c48b 100644
--- a/docs/categories/index.xml
+++ b/docs/categories/index.xml
@@ -6,7 +6,7 @@
Recent content in Categories on CGSpace Notes
Hugo -- gohugo.io
en-us
- Mon, 11 Mar 2024 21:58:15 +0300
+ Thu, 14 Mar 2024 09:29:05 +0300
-
Notes
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index 77e56c655..6a00e6bd1 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/index.xml b/docs/categories/notes/index.xml
index c3ee2f04a..da57c587a 100644
--- a/docs/categories/notes/index.xml
+++ b/docs/categories/notes/index.xml
@@ -6,7 +6,7 @@
Recent content in Notes on CGSpace Notes
Hugo -- gohugo.io
en-us
- Mon, 11 Mar 2024 21:58:15 +0300
+ Thu, 14 Mar 2024 09:29:05 +0300
-
March, 2024
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index 29eaf6f1e..c1198436e 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index bfabddbfc..5f2264266 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index 398689773..49e3d03f2 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html
index a4ab74294..5290be160 100644
--- a/docs/categories/notes/page/5/index.html
+++ b/docs/categories/notes/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html
index 4f37395e4..43eb13d3d 100644
--- a/docs/categories/notes/page/6/index.html
+++ b/docs/categories/notes/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html
index c538362fb..8a4738477 100644
--- a/docs/categories/notes/page/7/index.html
+++ b/docs/categories/notes/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/8/index.html b/docs/categories/notes/page/8/index.html
index 47c39bf5f..8fe70f5d1 100644
--- a/docs/categories/notes/page/8/index.html
+++ b/docs/categories/notes/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/9/index.html b/docs/categories/notes/page/9/index.html
index 3987ca776..91f2e8b3f 100644
--- a/docs/categories/notes/page/9/index.html
+++ b/docs/categories/notes/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index 463050e9c..20854154f 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.xml b/docs/index.xml
index c6b13404b..7a1618f61 100644
--- a/docs/index.xml
+++ b/docs/index.xml
@@ -6,7 +6,7 @@
Recent content on CGSpace Notes
Hugo -- gohugo.io
en-us
- Mon, 11 Mar 2024 21:58:15 +0300
+ Thu, 14 Mar 2024 09:29:05 +0300
-
March, 2024
diff --git a/docs/page/10/index.html b/docs/page/10/index.html
index bb75ed9c0..4ef343262 100644
--- a/docs/page/10/index.html
+++ b/docs/page/10/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/11/index.html b/docs/page/11/index.html
index a51c65a68..17308d5fd 100644
--- a/docs/page/11/index.html
+++ b/docs/page/11/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index 8dbd9b1a1..16a8056f0 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index 4f12125c7..7376519e3 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index 8cb88619f..e4212e207 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index beff27150..9c105071a 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index b709ad6f3..07df53fcd 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index bfed31dc0..1948fe33e 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/8/index.html b/docs/page/8/index.html
index d701733bb..986276a33 100644
--- a/docs/page/8/index.html
+++ b/docs/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/9/index.html b/docs/page/9/index.html
index 2d0d6bff5..40a864824 100644
--- a/docs/page/9/index.html
+++ b/docs/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index f0dfc01fc..54abe9686 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.xml b/docs/posts/index.xml
index f4cb22640..9bb27f7c2 100644
--- a/docs/posts/index.xml
+++ b/docs/posts/index.xml
@@ -6,7 +6,7 @@
Recent content in Posts on CGSpace Notes
Hugo -- gohugo.io
en-us
- Mon, 11 Mar 2024 21:58:15 +0300
+ Thu, 14 Mar 2024 09:29:05 +0300
-
March, 2024
diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html
index e1d64bfe4..776b1dacc 100644
--- a/docs/posts/page/10/index.html
+++ b/docs/posts/page/10/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/11/index.html b/docs/posts/page/11/index.html
index e4fd8820e..e48bc6405 100644
--- a/docs/posts/page/11/index.html
+++ b/docs/posts/page/11/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index 0e05b2d0f..9d8b71c3b 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index e94d41976..3c232b13a 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index dee2e9e23..41e785583 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index 8a9cc8481..ac6ff649d 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index 5b03e7cd6..052b5f4be 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index 54982036b..0ad23e5d9 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html
index d9e33b836..de88bc74f 100644
--- a/docs/posts/page/8/index.html
+++ b/docs/posts/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html
index e7877efd0..19f7e423e 100644
--- a/docs/posts/page/9/index.html
+++ b/docs/posts/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 06db4702d..b14f87eca 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,19 +3,19 @@
xmlns:xhtml="http://www.w3.org/1999/xhtml">
https://alanorth.github.io/cgspace-notes/categories/
- 2024-03-11T21:58:15+03:00
+ 2024-03-14T09:29:05+03:00
https://alanorth.github.io/cgspace-notes/
- 2024-03-11T21:58:15+03:00
+ 2024-03-14T09:29:05+03:00
https://alanorth.github.io/cgspace-notes/2024-03/
- 2024-03-11T21:58:15+03:00
+ 2024-03-14T09:29:05+03:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2024-03-11T21:58:15+03:00
+ 2024-03-14T09:29:05+03:00
https://alanorth.github.io/cgspace-notes/posts/
- 2024-03-11T21:58:15+03:00
+ 2024-03-14T09:29:05+03:00
https://alanorth.github.io/cgspace-notes/2024-02/
2024-03-01T09:55:02+03:00