From 90c4d4660765829142ea1552af629d3ced7ae8f6 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 19 Mar 2024 09:01:13 +0300 Subject: [PATCH] Add notes --- content/posts/2022-06.md | 2 +- content/posts/2024-03.md | 44 +++++++++++++++++++ docs/2022-06/index.html | 4 +- docs/2024-03/index.html | 57 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/index.xml | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/index.xml | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/categories/notes/page/7/index.html | 2 +- docs/categories/notes/page/8/index.html | 2 +- docs/categories/notes/page/9/index.html | 2 +- docs/index.html | 2 +- docs/index.xml | 2 +- docs/page/10/index.html | 2 +- docs/page/11/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/page/9/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/index.xml | 2 +- docs/posts/page/10/index.html | 2 +- docs/posts/page/11/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/posts/page/9/index.html | 2 +- docs/sitemap.xml | 10 ++--- 41 files changed, 141 insertions(+), 48 deletions(-) diff --git a/content/posts/2022-06.md b/content/posts/2022-06.md index 6df39a9e6..bf209134d 100644 --- a/content/posts/2022-06.md +++ b/content/posts/2022-06.md @@ -169,7 +169,7 @@ $ csvjoin --outer -c alpha2 ~/Downloads/clarisa-countries.csv ~/Downloads/UNSD\ - Then re-export the UN M.49 countries to a clean list because the one I did yesterday somehow has errors: ```console -csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv +$ csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv ``` - Check the number of lines in each file: diff --git a/content/posts/2024-03.md b/content/posts/2024-03.md index 52bfbdb82..d85c0eb50 100644 --- a/content/posts/2024-03.md +++ b/content/posts/2024-03.md @@ -113,4 +113,48 @@ $ csvcut -c 'id,dc.title[en_US],dc.identifier.uri[en_US],cg.link.permalink[en_US SELECT ds6_item2itemhandle(dspace_object_id) AS handle FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item WHERE NOT discoverable) AND metadata_field_id=28 AND text_value LIKE 'Submitted by Alliance TIP Submit%'; ``` +## 2024-03-14 + +- Looking in to reports of rate limiting of Altmetric's bot on CGSpace + - I don't see any HTTP 429 responses for their user agents in any of our logs... + - I tried myself on an item page and never hit a limit... + +```console +$ for num in {1..60}; do echo -n "Request ${num}: "; curl -s -o /dev/null -w "%{http_code}" https://dspace7test.ilri.org/items/c9b8999d-3001-42ba-a267-14f4bfa90b53 && echo; done +Request 1: 200 +Request 2: 200 +Request 3: 200 +Request 4: 200 +... +Request 60: 200 +``` + +- All responses were HTTP 200... +- In any case, I whitelisted their production IPs and told them to try again +- I imported 468 of IFPRI's 2023 records that were confirmed to not be duplicates to CGSpace + - I also spent some time merging metadata from 415 of the remaining 432 duplicates with the metadata for the existing items on CGSpace + - This was a bit of dirty work using csvkit, xsv, and OpenRefine + +## 2024-03-17 + +- There are 17 records from IFPRI's 2023 batch that are remaining from the 432 that I identified as already being on CGSpace + - These are different in that they are duplicates on CGSpace as well, so the csvjoin failed and the metadata got messed up in my migration + - I looked closer and whittled this down to 14 actual records, and spent some time working on them + - I isolated 12 of these items that existed on CGSpace and added publication ranks, project identifiers, and provenance links + - Now there only remain two confusing records about the Inkomati catchment + +## 2024-03-18 + +- Checking to see how many IFPRI records we have migrated so far: + +```console +$ csvgrep -c 'dc.description.provenance[en_US]' -m 'Original URL from IFPRI CONTENTdm' cgspace.csv \ + | csvcut -c 'id,dc.title[en_US],dc.identifier.uri[en_US],dc.description.provenance[en_US],dcterms.type[en_US]' \ + | tee /tmp/ifpri-records.csv \ + | csvstat --count +898 +``` + +- I finalized the remaining two on Inkomati catchment and now we are at 900! + diff --git a/docs/2022-06/index.html b/docs/2022-06/index.html index 309ce988a..89b96b7c3 100644 --- a/docs/2022-06/index.html +++ b/docs/2022-06/index.html @@ -58,7 +58,7 @@ There seem to be many more of these: "@type": "BlogPosting", "headline": "June, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-06/", - "wordCount": "1788", + "wordCount": "1789", "datePublished": "2022-06-06T09:01:36+03:00", "dateModified": "2023-04-27T13:10:13-07:00", "author": { @@ -321,7 +321,7 @@ There seem to be many more of these:
  • Spent some more time working on my countries-to-csv.py script to fix some logic errors
  • Then re-export the UN M.49 countries to a clean list because the one I did yesterday somehow has errors:
  • -
    csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ —\ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv
    +
    $ csvcut -d ';' -c 'ISO-alpha2 Code,Country or Area' ~/Downloads/UNSD\ \ Methodology.csv | sed -e '1s/ISO-alpha2 Code/alpha2/' -e '1s/Country or Area/UN M.49 Name/' > ~/Downloads/un-countries.csv
     
    • Check the number of lines in each file:
    diff --git a/docs/2024-03/index.html b/docs/2024-03/index.html index 4a1359116..6892bbd9f 100644 --- a/docs/2024-03/index.html +++ b/docs/2024-03/index.html @@ -19,7 +19,7 @@ It might be this issue: https://github.com/DSpace/dspace-angular/issues/2808 - + @@ -44,9 +44,9 @@ It might be this issue: https://github.com/DSpace/dspace-angular/issues/2808 "@type": "BlogPosting", "headline": "March, 2024", "url": "https://alanorth.github.io/cgspace-notes/2024-03/", - "wordCount": "627", + "wordCount": "923", "datePublished": "2024-03-01T09:55:00+03:00", - "dateModified": "2024-03-11T21:58:15+03:00", + "dateModified": "2024-03-14T09:29:05+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -236,7 +236,56 @@ It might be this issue: https://github.com/DSpace/dspace-angular/issues/2808
    SELECT ds6_item2itemhandle(dspace_object_id) AS handle FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item WHERE NOT discoverable) AND metadata_field_id=28 AND text_value LIKE 'Submitted by Alliance TIP Submit%';
    -
    +

    2024-03-14

    + +
    $ for num in {1..60}; do echo -n "Request ${num}: "; curl -s -o /dev/null -w "%{http_code}" https://dspace7test.ilri.org/items/c9b8999d-3001-42ba-a267-14f4bfa90b53 && echo; done
    +Request 1: 200
    +Request 2: 200
    +Request 3: 200
    +Request 4: 200
    +...
    +Request 60: 200
    +
    +

    2024-03-17

    + +

    2024-03-18

    + +
    $ csvgrep -c 'dc.description.provenance[en_US]' -m 'Original URL from IFPRI CONTENTdm' cgspace.csv \
    +  | csvcut -c 'id,dc.title[en_US],dc.identifier.uri[en_US],dc.description.provenance[en_US],dcterms.type[en_US]' \
    +  | tee /tmp/ifpri-records.csv \
    +  | csvstat --count
    +898
    +
    + diff --git a/docs/categories/index.html b/docs/categories/index.html index 6a3d6ef7b..abd8b4c45 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/index.xml b/docs/categories/index.xml index a7a729ef5..f40c8c48b 100644 --- a/docs/categories/index.xml +++ b/docs/categories/index.xml @@ -6,7 +6,7 @@ Recent content in Categories on CGSpace Notes Hugo -- gohugo.io en-us - Mon, 11 Mar 2024 21:58:15 +0300 + Thu, 14 Mar 2024 09:29:05 +0300 Notes diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 77e56c655..6a00e6bd1 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.xml b/docs/categories/notes/index.xml index c3ee2f04a..da57c587a 100644 --- a/docs/categories/notes/index.xml +++ b/docs/categories/notes/index.xml @@ -6,7 +6,7 @@ Recent content in Notes on CGSpace Notes Hugo -- gohugo.io en-us - Mon, 11 Mar 2024 21:58:15 +0300 + Thu, 14 Mar 2024 09:29:05 +0300 March, 2024 diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 29eaf6f1e..c1198436e 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index bfabddbfc..5f2264266 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 398689773..49e3d03f2 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index a4ab74294..5290be160 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 4f37395e4..43eb13d3d 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index c538362fb..8a4738477 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/8/index.html b/docs/categories/notes/page/8/index.html index 47c39bf5f..8fe70f5d1 100644 --- a/docs/categories/notes/page/8/index.html +++ b/docs/categories/notes/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/9/index.html b/docs/categories/notes/page/9/index.html index 3987ca776..91f2e8b3f 100644 --- a/docs/categories/notes/page/9/index.html +++ b/docs/categories/notes/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 463050e9c..20854154f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.xml b/docs/index.xml index c6b13404b..7a1618f61 100644 --- a/docs/index.xml +++ b/docs/index.xml @@ -6,7 +6,7 @@ Recent content on CGSpace Notes Hugo -- gohugo.io en-us - Mon, 11 Mar 2024 21:58:15 +0300 + Thu, 14 Mar 2024 09:29:05 +0300 March, 2024 diff --git a/docs/page/10/index.html b/docs/page/10/index.html index bb75ed9c0..4ef343262 100644 --- a/docs/page/10/index.html +++ b/docs/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/11/index.html b/docs/page/11/index.html index a51c65a68..17308d5fd 100644 --- a/docs/page/11/index.html +++ b/docs/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 8dbd9b1a1..16a8056f0 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 4f12125c7..7376519e3 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 8cb88619f..e4212e207 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index beff27150..9c105071a 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index b709ad6f3..07df53fcd 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index bfed31dc0..1948fe33e 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index d701733bb..986276a33 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index 2d0d6bff5..40a864824 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index f0dfc01fc..54abe9686 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.xml b/docs/posts/index.xml index f4cb22640..9bb27f7c2 100644 --- a/docs/posts/index.xml +++ b/docs/posts/index.xml @@ -6,7 +6,7 @@ Recent content in Posts on CGSpace Notes Hugo -- gohugo.io en-us - Mon, 11 Mar 2024 21:58:15 +0300 + Thu, 14 Mar 2024 09:29:05 +0300 March, 2024 diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html index e1d64bfe4..776b1dacc 100644 --- a/docs/posts/page/10/index.html +++ b/docs/posts/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/11/index.html b/docs/posts/page/11/index.html index e4fd8820e..e48bc6405 100644 --- a/docs/posts/page/11/index.html +++ b/docs/posts/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 0e05b2d0f..9d8b71c3b 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index e94d41976..3c232b13a 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index dee2e9e23..41e785583 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 8a9cc8481..ac6ff649d 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 5b03e7cd6..052b5f4be 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 54982036b..0ad23e5d9 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index d9e33b836..de88bc74f 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index e7877efd0..19f7e423e 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 06db4702d..b14f87eca 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2024-03-11T21:58:15+03:00 + 2024-03-14T09:29:05+03:00 https://alanorth.github.io/cgspace-notes/ - 2024-03-11T21:58:15+03:00 + 2024-03-14T09:29:05+03:00 https://alanorth.github.io/cgspace-notes/2024-03/ - 2024-03-11T21:58:15+03:00 + 2024-03-14T09:29:05+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2024-03-11T21:58:15+03:00 + 2024-03-14T09:29:05+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2024-03-11T21:58:15+03:00 + 2024-03-14T09:29:05+03:00 https://alanorth.github.io/cgspace-notes/2024-02/ 2024-03-01T09:55:02+03:00