From 1bafe6ce71491a0ab50e008b7d6f128ea47ce201 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Thu, 8 Dec 2022 18:59:57 +0200 Subject: [PATCH] Add notes for 2022-12-08 --- content/posts/2022-12.md | 35 ++++++++++++++++++++ docs/2022-12/index.html | 44 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/categories/notes/page/7/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/page/9/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/posts/page/9/index.html | 2 +- docs/sitemap.xml | 10 +++--- 29 files changed, 107 insertions(+), 34 deletions(-) diff --git a/content/posts/2022-12.md b/content/posts/2022-12.md index 1a9955cbd..d215689a5 100644 --- a/content/posts/2022-12.md +++ b/content/posts/2022-12.md @@ -88,5 +88,40 @@ $ csvgrep -c matched -m true /tmp/cgspace-matches.csv | wc -l - This means I've added a few thousand UN M.49 regions to the `cg.coverage.subregion` field in the last few days - I had to extract them from CGSpace and delete them using `delete-metadata-values.py` - My [DSpace 7.x pull request to tell ImageMagick about the PDF CropBox](https://github.com/DSpace/DSpace/pull/8550) was merged +- Start a harvest on AReS + +## 2022-12-08 + +- While on the plane I decided to fix some ORCID identifiers, as I had seen some poorly formatted ones + - I couldn't remember the XPath syntax so this was kinda ghetto: + +```console +$ xmllint --xpath '//node/isComposedBy/node()' dspace/config/controlled-vocabularies/cg-creator-identifier.xml | grep -oE 'label=".*"' | sed -e 's/label="//' -e 's/"$//' > /tmp/orcid-names.txt +$ ./ilri/update-orcids.py -i /tmp/orcid-names.txt -db dspace -u dspace -p 'fuuu' -m 247 +``` + +- After that there were still some poorly formatted ones that my script didn't fix, so perhaps these are new ones not in our list + - I dumped them and combined with the existing ones to resolve later: + +```console +localhost/dspace= ☘ \COPY (SELECT dspace_object_id,text_value FROM metadatavalue WHERE metadata_field_id=247 AND text_value LIKE '%http%') to /tmp/orcid-formatting.txt; +COPY 36 +``` + +- I think there are really just some new ones... + +```console +$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml /tmp/orcid-formatting.txt| grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u > /tmp/2022-12-08-orcids.txt +$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u | wc -l +1907 +$ wc -l /tmp/2022-12-08-orcids.txt +1939 /tmp/2022-12-08-orcids.txt +``` + +- Then I applied these updates on CGSpace +- Maria mentioned that she was getting a lot more items in her daily subscription emails + - I had a hunch it was related to me updating the `last_modified` timestamp after updating a bunch of countries, regions, etc in items + - Then today I noticed this option in `dspace.cfg`: `eperson.subscription.onlynew` + - By default DSpace sends notifications for modified items too! I've disabled it now... diff --git a/docs/2022-12/index.html b/docs/2022-12/index.html index d66a011ae..d25d987ec 100644 --- a/docs/2022-12/index.html +++ b/docs/2022-12/index.html @@ -20,7 +20,7 @@ Replace “East Asia” with “Eastern Asia” region on CGSpac - + @@ -46,9 +46,9 @@ Replace “East Asia” with “Eastern Asia” region on CGSpac "@type": "BlogPosting", "headline": "December, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-12/", - "wordCount": "617", + "wordCount": "843", "datePublished": "2022-12-01T08:52:36+03:00", - "dateModified": "2022-12-04T03:19:49+03:00", + "dateModified": "2022-12-07T22:59:37+01:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -209,6 +209,44 @@ Replace “East Asia” with “Eastern Asia” region on CGSpac
  • My DSpace 7.x pull request to tell ImageMagick about the PDF CropBox was merged
  • +
  • Start a harvest on AReS
  • + +

    2022-12-08

    + +
    $ xmllint --xpath '//node/isComposedBy/node()' dspace/config/controlled-vocabularies/cg-creator-identifier.xml | grep -oE 'label=".*"' | sed -e 's/label="//' -e 's/"$//' > /tmp/orcid-names.txt
    +$ ./ilri/update-orcids.py -i /tmp/orcid-names.txt -db dspace -u dspace -p 'fuuu' -m 247
    +
    +
    localhost/dspace= ☘ \COPY (SELECT dspace_object_id,text_value FROM metadatavalue WHERE metadata_field_id=247 AND text_value LIKE '%http%') to /tmp/orcid-formatting.txt;
    +COPY 36
    +
    +
    $ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml /tmp/orcid-formatting.txt| grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u > /tmp/2022-12-08-orcids.txt 
    +$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u | wc -l
    +1907
    +$ wc -l /tmp/2022-12-08-orcids.txt
    +1939 /tmp/2022-12-08-orcids.txt
    +
    diff --git a/docs/categories/index.html b/docs/categories/index.html index 249b9ff6a..6d5a409df 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index d94a28163..8a1a11dc7 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 89301f95f..923223da6 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 5a4e593e2..2409fe364 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index c0d2ef153..2ed57c34a 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index cdc4322b6..43e8d721a 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index e8107cad0..8166eb548 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index df501f1d9..6435d5cdb 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index c90903c9e..8113adf96 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 36009afa6..24639b9ca 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index e5960c957..05c19daba 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 97e4e6bf5..3f06fd96d 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 5cff45a81..68001b922 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index f5df23338..e50cb69c7 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index ddf7078d8..223fa854e 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 63a1bac2f..acccbb117 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index 9ba622361..af5f88519 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 8df53a652..5f5d95f86 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 0bd5727cb..edd2e93e5 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 9edc1f2fb..d32ebf452 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 94ad1288d..14c88d648 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index b20552436..97a4688cd 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 7e605b2be..a64da3b94 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index c46a17551..5b7b10c67 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index 229d84bf1..673520e99 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index b5addddfd..c9a8ff9dd 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0d3fcfc1c..db6cea72c 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-12-04T03:19:49+03:00 + 2022-12-07T22:59:37+01:00 https://alanorth.github.io/cgspace-notes/ - 2022-12-04T03:19:49+03:00 + 2022-12-07T22:59:37+01:00 https://alanorth.github.io/cgspace-notes/2022-12/ - 2022-12-04T03:19:49+03:00 + 2022-12-07T22:59:37+01:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-12-04T03:19:49+03:00 + 2022-12-07T22:59:37+01:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-12-04T03:19:49+03:00 + 2022-12-07T22:59:37+01:00 https://alanorth.github.io/cgspace-notes/2022-11/ 2022-12-03T10:46:29+03:00