From cad7ceaba1a849e9c14daa29c77fc1fd2cc314f1 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 2 Dec 2018 17:55:32 +0200 Subject: [PATCH] Update notes for 2018-12-02 --- content/posts/2018-12.md | 24 ++++++++++++++++++++++++ docs/2018-12/index.html | 34 +++++++++++++++++++++++++++++++--- docs/sitemap.xml | 10 +++++----- 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/content/posts/2018-12.md b/content/posts/2018-12.md index 8d0dc9322..1a670c3d5 100644 --- a/content/posts/2018-12.md +++ b/content/posts/2018-12.md @@ -56,4 +56,28 @@ $ gs -q -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dAli DEBUG: FC_WEIGHT didn't match ``` +- Start proofing the latest round of 226 IITA archive records that Bosede sent last week and Sisay uploaded to DSpace Test this weekend ([IITA_Dec_1_1997 aka Daniel1807](https://dspacetest.cgiar.org/handle/10568/108298)) + - One item missing the authorship type + - Some invalid countries (smart quotes, mispellings) + - Added countries to some items that mentioned research in particular countries in their abstracts + - One item had "MADAGASCAR" for ISI Journal + - Minor corrections in IITA subject (LIVELIHOOD→LIVELIHOODS) + - Trim whitespace in abstract field + - Fix some sponsors (though some with "Governments of Canada" etc I'm not sure why those are plural) + - Eighteen items had `en||fr` for the language, but the content was only in French so changed them to just `fr` + - Six items had encoding errors in French text so I will ask Bosede to re-do them carefully + - Correct and normalize a few AGROVOC subjects +- Expand my "encoding error" detection GREL to include `~` as I saw a lot of that in some copy pasted French text recently: + +``` +or( + isNotNull(value.match(/.*\uFFFD.*/)), + isNotNull(value.match(/.*\u00A0.*/)), + isNotNull(value.match(/.*\u200A.*/)), + isNotNull(value.match(/.*\u2019.*/)), + isNotNull(value.match(/.*\u00b4.*/)), + isNotNull(value.match(/.*\u007e.*/)) +) +``` + diff --git a/docs/2018-12/index.html b/docs/2018-12/index.html index 0fc1bd8dc..35f0cbdca 100644 --- a/docs/2018-12/index.html +++ b/docs/2018-12/index.html @@ -21,7 +21,7 @@ I noticed that there is another issue with PDF thumbnails on CGSpace, and I see " /> - + @@ -48,9 +48,9 @@ I noticed that there is another issue with PDF thumbnails on CGSpace, and I see "@type": "BlogPosting", "headline": "December, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-12/", - "wordCount": "301", + "wordCount": "463", "datePublished": "2018-12-02T02:09:30+02:00", - "dateModified": "2018-12-02T10:47:41+02:00", + "dateModified": "2018-12-02T10:57:41+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -172,6 +172,34 @@ zsh: segmentation fault (core dumped) gs -q -dQUIET -dSAFER -dBATCH -dNOPAUSE - DEBUG: FC_WEIGHT didn't match + + +
or(
+  isNotNull(value.match(/.*\uFFFD.*/)),
+  isNotNull(value.match(/.*\u00A0.*/)),
+  isNotNull(value.match(/.*\u200A.*/)),
+  isNotNull(value.match(/.*\u2019.*/)),
+  isNotNull(value.match(/.*\u00b4.*/)),
+  isNotNull(value.match(/.*\u007e.*/))
+)
+
+ diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 41cd0731e..58cd68683 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2018-12/ - 2018-12-02T10:47:41+02:00 + 2018-12-02T10:57:41+02:00 @@ -199,7 +199,7 @@ https://alanorth.github.io/cgspace-notes/ - 2018-12-02T10:47:41+02:00 + 2018-12-02T10:57:41+02:00 0 @@ -210,7 +210,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2018-12-02T10:47:41+02:00 + 2018-12-02T10:57:41+02:00 0 @@ -222,13 +222,13 @@ https://alanorth.github.io/cgspace-notes/posts/ - 2018-12-02T10:47:41+02:00 + 2018-12-02T10:57:41+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2018-12-02T10:47:41+02:00 + 2018-12-02T10:57:41+02:00 0