From 1e04a7da7212395337389338f0acb63adadaac7e Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 30 Oct 2020 15:24:23 +0200 Subject: [PATCH] Add notes for 2020-10-30 --- content/posts/2020-10.md | 32 ++++++++++++++++++ docs/2020-10/index.html | 45 ++++++++++++++++++++++--- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/sitemap.xml | 10 +++--- 22 files changed, 97 insertions(+), 28 deletions(-) diff --git a/content/posts/2020-10.md b/content/posts/2020-10.md index 6535d7b45..29b602b18 100644 --- a/content/posts/2020-10.md +++ b/content/posts/2020-10.md @@ -935,4 +935,36 @@ $ dspace community-filiator --set --parent 10568/83389 --child 10568/1208 $ dspace community-filiator --set --parent 10568/83389 --child 10568/56924 ``` +## 2020-10-30 + +- The `AtomicStatisticsUpdateCLI` process finished on the current DSpace Test statistics core after about 32 hours + - I started it on the statistics-2019 core +- Atmire responded about the duplicate values in Solr that I had asked about a few days ago + - They said it could be due to the schema and asked if I see it only on old records or even on new ones created in the new CUA with DSpace 6 + - I did a test and found that I got duplicate data after browsing for a minute on DSpace Test (version 6) and sent them a screenshot +- Looking over Peter's corrections to journal titles (dc.source) and publishers (dc.publisher) + - I had to check the corrections for strange Unicode errors and replacements with "|" and ";" in OpenRefine using this GREL: + +``` +or( + isNotNull(value.match(/.*\uFFFD.*/)), + isNotNull(value.match(/.*\u00A0.*/)), + isNotNull(value.match(/.*\u200A.*/)), + isNotNull(value.match(/.*\u2019.*/)), + isNotNull(value.match(/.*\u00b4.*/)), + isNotNull(value.match(/.*\u007e.*/)) +).toString() +``` + +- Then I did a test to apply the corrections and deletions on my local DSpace: + +``` +$ ./fix-metadata-values.py -i 2020-10-30-fix-854-journals.csv -db dspace -u dspace -p 'fuuu' -f dc.source -t 'correct' -m 55 +$ ./delete-metadata-values.py -i 2020-10-30-delete-90-journals.csv -db dspace -u dspace -p 'fuuu' -f dc.source -m 55 +$ ./fix-metadata-values.py -i 2020-10-30-fix-386-publishers.csv -db dspace -u dspace -p 'fuuu' -f dc.publisher -t correct -m 39 +$ ./delete-metadata-values.py -i 2020-10-30-delete-10-publishers.csv -db dspace -u dspace -p 'fuuu' -f dc.publisher -m 39 +``` + +- I will wait to apply them on CGSpace when I have all the other corrections from Peter processed + diff --git a/docs/2020-10/index.html b/docs/2020-10/index.html index 42ce5e0d8..5f06a7e46 100644 --- a/docs/2020-10/index.html +++ b/docs/2020-10/index.html @@ -23,7 +23,7 @@ During the FlywayDB migration I got an error: - + @@ -51,9 +51,9 @@ During the FlywayDB migration I got an error: "@type": "BlogPosting", "headline": "October, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-10/", - "wordCount": "6354", + "wordCount": "6585", "datePublished": "2020-10-06T16:55:54+03:00", - "dateModified": "2020-10-28T17:53:19+03:00", + "dateModified": "2020-10-29T00:05:25+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1165,7 +1165,44 @@ dspace=# COMMIT;
$ dspace community-filiator --set --parent 10568/83389 --child 10568/1208
 $ dspace community-filiator --set --parent 10568/83389 --child 10568/56924
-
+

2020-10-30

+ +
or(
+  isNotNull(value.match(/.*\uFFFD.*/)),
+  isNotNull(value.match(/.*\u00A0.*/)),
+  isNotNull(value.match(/.*\u200A.*/)),
+  isNotNull(value.match(/.*\u2019.*/)),
+  isNotNull(value.match(/.*\u00b4.*/)),
+  isNotNull(value.match(/.*\u007e.*/))
+).toString()
+
+
$ ./fix-metadata-values.py -i 2020-10-30-fix-854-journals.csv -db dspace -u dspace -p 'fuuu' -f dc.source -t 'correct' -m 55
+$ ./delete-metadata-values.py -i 2020-10-30-delete-90-journals.csv -db dspace -u dspace -p 'fuuu' -f dc.source -m 55
+$ ./fix-metadata-values.py -i 2020-10-30-fix-386-publishers.csv -db dspace -u dspace -p 'fuuu' -f dc.publisher -t correct -m 39
+$ ./delete-metadata-values.py -i 2020-10-30-delete-10-publishers.csv -db dspace -u dspace -p 'fuuu' -f dc.publisher -m 39
+
+ diff --git a/docs/categories/index.html b/docs/categories/index.html index 3359fb182..84d7e2987 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 641c1e427..f8f0ce063 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index f77bc7a57..cd9d22584 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index d4ebee0ee..470854f16 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 24bea9b6e..9aa06ede1 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 3c55c40d5..416a14486 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 690bd34db..cd5010bce 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 0044457cb..7ef1ee992 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 8d08c066f..fb3050146 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 8c9d925f1..b3fd32348 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 936e409db..66d630e32 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 042e6eb30..874f522da 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index d9fc6a2a4..ff650616c 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index a63f5425c..a7b9a1757 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index dc8f049c9..e020a59b7 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index e76af8536..978ae47fb 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index ad189af44..99d7f1f59 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index d3d70dc70..ed530cae3 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index de7e3466d..8634cad3d 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 648991b80..f03e634ab 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-10-28T17:53:19+03:00 + 2020-10-29T00:05:25+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-10-28T17:53:19+03:00 + 2020-10-29T00:05:25+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-10-28T17:53:19+03:00 + 2020-10-29T00:05:25+03:00 https://alanorth.github.io/cgspace-notes/2020-10/ - 2020-10-28T17:53:19+03:00 + 2020-10-29T00:05:25+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-10-28T17:53:19+03:00 + 2020-10-29T00:05:25+03:00