From 720c15124b0df969da2851018bd0df6504a152b6 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 16 Aug 2017 12:50:03 +0300 Subject: [PATCH] Update notes for 2017-08-16 --- content/post/2017-08.md | 9 +++++++++ public/2017-08/index.html | 16 +++++++++++++--- public/sitemap.xml | 10 +++++----- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/content/post/2017-08.md b/content/post/2017-08.md index 288d76f37..8014aeef4 100644 --- a/content/post/2017-08.md +++ b/content/post/2017-08.md @@ -224,3 +224,12 @@ isNotNull(value.match(/(CGIAR .+?)\|\|\1/)) ``` - This would be true if the authors were like `CGIAR System Management Office||CGIAR System Management Office`, which some of the CGIAR Library's were +- Unfortunately when you fix these in OpenRefine and then submit the metadata to DSpace it doesn't detect any changes, so you have to edit them all manually via DSpace's "Edit Item" +- Ooh! And an even more interesting regex would match _any_ duplicated author: + +``` +isNotNull(value.match(/(.+?)\|\|\1/)) +``` + +- Which means it can also be used to find items with duplicate `dc.subject` fields... +- Finally sent Peter the final dump of the CGIAR System Organization community so he can have a last look at it diff --git a/public/2017-08/index.html b/public/2017-08/index.html index 88ff25b74..3bd40783e 100644 --- a/public/2017-08/index.html +++ b/public/2017-08/index.html @@ -37,7 +37,7 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s - + @@ -85,9 +85,9 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s "@type": "BlogPosting", "headline": "August, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-08/", - "wordCount": "2449", + "wordCount": "2528", "datePublished": "2017-08-01T11:51:52+03:00", - "dateModified": "2017-08-15T16:44:59+03:00", + "dateModified": "2017-08-16T12:00:37+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -417,6 +417,16 @@ UPDATE 4899 + +
isNotNull(value.match(/(.+?)\|\|\1/))
+
+ + diff --git a/public/sitemap.xml b/public/sitemap.xml index 5f23c767c..2391e555d 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-08/ - 2017-08-15T16:44:59+03:00 + 2017-08-16T12:00:37+03:00 @@ -114,7 +114,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-08-15T16:44:59+03:00 + 2017-08-16T12:00:37+03:00 0 @@ -125,19 +125,19 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-08-15T16:44:59+03:00 + 2017-08-16T12:00:37+03:00 0 https://alanorth.github.io/cgspace-notes/post/ - 2017-08-15T16:44:59+03:00 + 2017-08-16T12:00:37+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-08-15T16:44:59+03:00 + 2017-08-16T12:00:37+03:00 0