diff --git a/content/posts/2024-04.md b/content/posts/2024-04.md index ad758a4f9..edb2a2bf4 100644 --- a/content/posts/2024-04.md +++ b/content/posts/2024-04.md @@ -25,5 +25,35 @@ categories: ["Notes"] - Finish working on the 650 IFPRI 2022 records that were not already on CGSpace, then uploaded them - I need to merge the metadata for the remaining 212 that are already on CGSpace +- Spend some time looking at duplicate DOIs again... + +## 2024-04-13 + +- Spend some time looking at duplicate DOIs again... + +## 2024-04-14 + +- Spend some time looking at duplicate DOIs again... + +## 2024-04-15 + +- Spend some time looking at duplicate DOIs again... +- Delete ~260 duplicate metadata values using the elaborate SQL and sort method I documented here: https://github.com/DSpace/DSpace/issues/8253#issuecomment-1331756418 +- Tony noticed that the DSpace 7 REST API is very slow with the embeds so I profiled a bit: + +``` +$ time curl -s -o /dev/null 'https://cgspace.cgiar.org/server/api/discover/search/objects?query=cg.identifier.project%3AIFPRI*&scope=8f1e9650-fe87-4e6e-889a-1cacfb747408&page=0&size=100&embed=thumbnail,bundles/bitstreams&sort=dcterms.issued,desc' +curl -s -o /dev/null 0.01s user 0.01s system 0% cpu 47.515 total +$ time curl -s -o /dev/null 'https://cgspace.cgiar.org/server/api/discover/search/objects?query=cg.identifier.project%3AIFPRI*&scope=8f1e9650-fe87-4e6e-889a-1cacfb747408&page=0&size=100&sort=dcterms.issued,desc' +curl -s -o /dev/null 0.01s user 0.01s system 0% cpu 4.764 total +``` + +- Finalize processing the remaining 206 items from the IFPRI 2022 batch set that already existed on CGSpace + - I merged metadata with the existing items + - There are still six remaining items that I identified as being duplicates (3x2) in the IFPRI set itself + +## 2024-04-16 + +- Spend some time looking at duplicate DOIs again... diff --git a/docs/2024-04/index.html b/docs/2024-04/index.html index 8504a54a2..6dddaf8d6 100644 --- a/docs/2024-04/index.html +++ b/docs/2024-04/index.html @@ -14,7 +14,7 @@ Work on CGSpace duplicate DOIs more - + @@ -34,9 +34,9 @@ Work on CGSpace duplicate DOIs more "@type": "BlogPosting", "headline": "April, 2024", "url": "https://alanorth.github.io/cgspace-notes/2024-04/", - "wordCount": "77", + "wordCount": "236", "datePublished": "2024-04-04T10:23:00+03:00", - "dateModified": "2024-04-09T16:50:56+03:00", + "dateModified": "2024-04-12T20:40:52+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -137,6 +137,37 @@ Work on CGSpace duplicate DOIs more
  • I need to merge the metadata for the remaining 212 that are already on CGSpace
  • +
  • Spend some time looking at duplicate DOIs again…
  • + +

    2024-04-13

    + +

    2024-04-14

    + +

    2024-04-15

    + +
    $ time curl -s -o /dev/null 'https://cgspace.cgiar.org/server/api/discover/search/objects?query=cg.identifier.project%3AIFPRI*&scope=8f1e9650-fe87-4e6e-889a-1cacfb747408&page=0&size=100&embed=thumbnail,bundles/bitstreams&sort=dcterms.issued,desc'
    +curl -s -o /dev/null   0.01s user 0.01s system 0% cpu 47.515 total
    +$ time curl -s -o /dev/null 'https://cgspace.cgiar.org/server/api/discover/search/objects?query=cg.identifier.project%3AIFPRI*&scope=8f1e9650-fe87-4e6e-889a-1cacfb747408&page=0&size=100&sort=dcterms.issued,desc' 
    +curl -s -o /dev/null   0.01s user 0.01s system 0% cpu 4.764 total
    +
    +

    2024-04-16

    + diff --git a/docs/categories/index.html b/docs/categories/index.html index 63d2ef7dd..f10061cdd 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/index.xml b/docs/categories/index.xml index 76c02ef75..7e84178ef 100644 --- a/docs/categories/index.xml +++ b/docs/categories/index.xml @@ -6,7 +6,7 @@ Recent content in Categories on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 09 Apr 2024 16:50:56 +0300 + Fri, 12 Apr 2024 20:40:52 +0300 Notes diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 115440fd0..301c05cf5 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.xml b/docs/categories/notes/index.xml index 9bd1de549..c71c3bdc6 100644 --- a/docs/categories/notes/index.xml +++ b/docs/categories/notes/index.xml @@ -6,7 +6,7 @@ Recent content in Notes on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 09 Apr 2024 16:50:56 +0300 + Fri, 12 Apr 2024 20:40:52 +0300 April, 2024 diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 85920ff45..a29da8aab 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 5ad383279..1c1dc9942 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index a5699aedf..4ac350b34 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index c06b7fa45..91dc7dd52 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index f1ac066a6..927351af3 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index fa286e75e..2664550bf 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/8/index.html b/docs/categories/notes/page/8/index.html index f6974b95b..a52bc145b 100644 --- a/docs/categories/notes/page/8/index.html +++ b/docs/categories/notes/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/9/index.html b/docs/categories/notes/page/9/index.html index c9f76f75f..5e70d93e8 100644 --- a/docs/categories/notes/page/9/index.html +++ b/docs/categories/notes/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 506521715..b72929034 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.xml b/docs/index.xml index f5df0c8a9..a67d76222 100644 --- a/docs/index.xml +++ b/docs/index.xml @@ -6,7 +6,7 @@ Recent content on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 09 Apr 2024 16:50:56 +0300 + Fri, 12 Apr 2024 20:40:52 +0300 April, 2024 diff --git a/docs/page/10/index.html b/docs/page/10/index.html index e2ef55b2a..8770757f6 100644 --- a/docs/page/10/index.html +++ b/docs/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/11/index.html b/docs/page/11/index.html index 24c4e2916..6b662e3c2 100644 --- a/docs/page/11/index.html +++ b/docs/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 8dfa9b229..37d6a3238 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 945f1013e..dc139de7b 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index f8a21b673..71421d9e2 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 1bdbbabf1..a0ed06a4c 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 28cdee151..f912b6ebd 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 505ba9a6b..19d4590db 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 9ff91117d..a27b68744 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index 0d0fda7c7..d5b8259b9 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index a14029db4..d4286b24e 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.xml b/docs/posts/index.xml index 06b7b3ff9..7cce67669 100644 --- a/docs/posts/index.xml +++ b/docs/posts/index.xml @@ -6,7 +6,7 @@ Recent content in Posts on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 09 Apr 2024 16:50:56 +0300 + Fri, 12 Apr 2024 20:40:52 +0300 April, 2024 diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html index 6d5e6bae0..d1a65e6c0 100644 --- a/docs/posts/page/10/index.html +++ b/docs/posts/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/11/index.html b/docs/posts/page/11/index.html index 36d9a65ad..9cb6022ba 100644 --- a/docs/posts/page/11/index.html +++ b/docs/posts/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index ccae5405d..e83ecfa11 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 5fa76cd66..a793c83a0 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 4c69dac11..1d9508b86 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 1b3ebbf5c..9bf316817 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 5a93b93d6..ea23df44a 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index f6d111988..5899ba707 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index d0991ec6f..b7f56a04b 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 232264b3d..01c230fb6 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 22dd672e7..5f1f5358f 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/2024-04/ - 2024-04-09T16:50:56+03:00 + 2024-04-12T20:40:52+03:00 https://alanorth.github.io/cgspace-notes/categories/ - 2024-04-09T16:50:56+03:00 + 2024-04-12T20:40:52+03:00 https://alanorth.github.io/cgspace-notes/ - 2024-04-09T16:50:56+03:00 + 2024-04-12T20:40:52+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2024-04-09T16:50:56+03:00 + 2024-04-12T20:40:52+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2024-04-09T16:50:56+03:00 + 2024-04-12T20:40:52+03:00 https://alanorth.github.io/cgspace-notes/2024-03/ 2024-04-04T10:23:49+03:00