From bee6532af2800505f1c6d5a076d012b00b4e2897 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Thu, 9 Mar 2023 17:01:50 +0300 Subject: [PATCH] Add notes for 2023-03-09 --- content/posts/2023-03.md | 12 ++++++++++++ docs/2023-03/index.html | 19 ++++++++++++++++--- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/categories/notes/page/7/index.html | 2 +- docs/index.html | 2 +- docs/page/10/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/page/9/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/10/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/posts/page/9/index.html | 2 +- docs/sitemap.xml | 10 +++++----- 31 files changed, 61 insertions(+), 36 deletions(-) diff --git a/content/posts/2023-03.md b/content/posts/2023-03.md index e8a65d7c9..835251524 100644 --- a/content/posts/2023-03.md +++ b/content/posts/2023-03.md @@ -164,6 +164,12 @@ value.replace("","").replace("", "").replace(""," ``` - I uploaded the 350 items to DSpace Test so Peter and Abenet can explore them +- I exported a list of authors, affiliations, and funders from the new items to let Peter correct them: + +```console +$ csvcut -c dc.contributor.author /tmp/new-items.csv | sed -e 1d -e 's/"//g' -e 's/||/\n/g' | sort | uniq -c | sort -nr | awk '{$1=""; print $0}' | sed -e 's/^ //' > /tmp/new-authors.csv +``` + - Meeting with FAO AGRIS team about how to detect duplicates - They are currently using a sha256 hash on titles, which will work, but will only return exact matches - I told them to try to normalize the string, drop stop words, etc to increase the possibility that the hash matches @@ -172,4 +178,10 @@ value.replace("","").replace("", "").replace(""," - I said I prefer to write a small script for her that will check the first author and first affiliation... I could do it easily in Python, but would need to put a web frontend on it for her - Unless we could do that in AReS reports somehow +## 2023-03-09 + +- Apply a bunch of corrections to authors, affiliations, and donors on the new items on DSpace Test +- Meeting with Peter and Abenet about future OpenRXV developments, DSpace 7, etc + - I submitted an [issue on MEL asking them to add provenance metadata when submitting to CGSpace](https://github.com/CodeObia/MEL/issues/11173) + diff --git a/docs/2023-03/index.html b/docs/2023-03/index.html index aff01e3ec..b1715f92b 100644 --- a/docs/2023-03/index.html +++ b/docs/2023-03/index.html @@ -16,7 +16,7 @@ I finally got through with porting the input form from DSpace 6 to DSpace 7 - + @@ -38,9 +38,9 @@ I finally got through with porting the input form from DSpace 6 to DSpace 7 "@type": "BlogPosting", "headline": "March, 2023", "url": "https://alanorth.github.io/cgspace-notes/2023-03/", - "wordCount": "1336", + "wordCount": "1433", "datePublished": "2023-03-01T07:58:36+03:00", - "dateModified": "2023-03-07T17:15:26+03:00", + "dateModified": "2023-03-08T18:53:32+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -294,6 +294,10 @@ pd.options.mode.nullable_dtypes = True value.replace("<jats:sub>","").replace("</jats:sub>", "").replace("<jats:sup>","").replace("</jats:sup>", "")
  • I uploaded the 350 items to DSpace Test so Peter and Abenet can explore them
  • +
  • I exported a list of authors, affiliations, and funders from the new items to let Peter correct them:
  • +
+
$ csvcut -c dc.contributor.author /tmp/new-items.csv | sed -e 1d -e 's/"//g' -e 's/||/\n/g' | sort | uniq -c | sort -nr | awk '{$1=""; print $0}' | sed -e 's/^ //' > /tmp/new-authors.csv
+
  • Meeting with FAO AGRIS team about how to detect duplicates
    • They are currently using a sha256 hash on titles, which will work, but will only return exact matches
    • @@ -308,6 +312,15 @@ pd.options.mode.nullable_dtypes = True
+

2023-03-09

+ diff --git a/docs/categories/index.html b/docs/categories/index.html index 2e65b06c1..3dfd2bb28 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 9433eb25c..0b2727708 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 770676fcd..de7392e26 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index a222b9973..6a9bfa8b1 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index d80601517..61c05825e 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index b2ae27660..d2bf75ee8 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 1dc73c6f3..2b5a8c919 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index fcf1e9881..120d60231 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index ae2f7c725..944198db7 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/10/index.html b/docs/page/10/index.html index 60d974a46..a8df7c61a 100644 --- a/docs/page/10/index.html +++ b/docs/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 6545f0222..89ffb37fa 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 221e5a8c0..fcb573172 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index eeba2729f..067b2406d 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 6c69e5c65..a2b554247 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 56d88998b..8e6721e9d 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 4e00eec9c..6f7232a29 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 9d6995b78..96be74467 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index d776c18a3..7252db147 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index fc7ffca33..5b3949527 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html index a8b1753e0..dfad92914 100644 --- a/docs/posts/page/10/index.html +++ b/docs/posts/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 634f9ca16..08f1e6b24 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index a76f8cdf8..a6bb8ec79 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 171df0687..cb7d9df1e 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index fdd1c2b36..64c4598ea 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 495c3a4fa..8c36a9dcd 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 89c72b79f..6b9f681bf 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index f2922bdd0..08c79cec9 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index e95b1e5b4..9c743823c 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 49989e83f..98e9e17f5 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2023-03-07T17:15:26+03:00 + 2023-03-08T18:53:32+03:00 https://alanorth.github.io/cgspace-notes/ - 2023-03-07T17:15:26+03:00 + 2023-03-08T18:53:32+03:00 https://alanorth.github.io/cgspace-notes/2023-03/ - 2023-03-07T17:15:26+03:00 + 2023-03-08T18:53:32+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2023-03-07T17:15:26+03:00 + 2023-03-08T18:53:32+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2023-03-07T17:15:26+03:00 + 2023-03-08T18:53:32+03:00 https://alanorth.github.io/cgspace-notes/2023-02/ 2023-03-01T08:30:25+03:00