diff --git a/content/posts/2021-02.md b/content/posts/2021-02.md index abeab64d8..eb51ab8d1 100644 --- a/content/posts/2021-02.md +++ b/content/posts/2021-02.md @@ -234,4 +234,89 @@ $ curl -XDELETE 'http://localhost:9200/openrxv-items-temp' # start indexing in AReS ``` +## 2021-02-08 + +- Finish rotating the AReS indexes after the harvesting last night: + +```console +$ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty' +{ + "count" : 100983, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + } +} +$ curl -X PUT "localhost:9200/openrxv-items/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write":true}}' +$ curl -s -X POST http://localhost:9200/openrxv-items/_clone/openrxv-items-2021-02-08 +$ curl -XDELETE 'http://localhost:9200/openrxv-items' +$ curl -X PUT "localhost:9200/openrxv-items-temp/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write": true}}' +$ curl -s -X POST http://localhost:9200/openrxv-items-temp/_clone/openrxv-items +$ curl -XDELETE 'http://localhost:9200/openrxv-items-temp' +$ curl -XDELETE 'http://localhost:9200/openrxv-items-2021-02-08' +``` + +## 2021-02-10 + +- Talk to Abdullah from CodeObia about a few of the issues we filed on OpenRXV + - Verify a fix he made for the issue with spaces in template file names + - He says that the [Angular expressions support should be enabled](https://github.com/ilri/OpenRXV/issues/49), but I tried it and couldn't get a few simple examples working +- Atmire responded to a few issues today: + - First, the one about a crash while exporting a community CSV, which appears to be a [vanilla DSpace issue with a patch in DSpace 6.4](https://jira.lyrasis.org/browse/DS-4211) + - Second, the MQM batch consumer issue, which appears to be harmless log spam in *most* cases and they have sent a patch that adjusts the logging as such + - Third, a version bump for CUA to fix the `java.lang.UnsupportedOperationException: Multiple update components target the same field:solr_update_time_stamp` error +- I cherry-picked the patches for DS-4111 and was able to export the ILRI community finally, but the results are almost twice as many items as in the community! + - Investigating with csvcut I see there are some ids that appear up to five, six, or seven times! + +```console +$ csvcut -c id /tmp/2021-02-10-ILRI.csv | sed '1d' | wc -l +30354 +$ csvcut -c id /tmp/2021-02-10-ILRI.csv | sed '1d' | sort -u | wc -l +18555 +$ csvcut -c id /tmp/2021-02-10-ILRI.csv | sed '1d' | sort | uniq -c | sort -h | tail + 5 c21a79e5-e24e-4861-aa07-e06703d1deb7 + 5 c2460aa1-ae28-4003-9a99-2d7c5cd7fd38 + 5 d73fb3ae-9fac-4f7e-990f-e394f344246c + 5 dc0e24fa-b7f5-437e-ac09-e15c0704be00 + 5 dc50bcca-0abf-473f-8770-69d5ab95cc33 + 5 e714bdf9-cc0f-4d9a-a808-d572e25c9238 + 6 7dfd1c61-9e8c-4677-8d41-e1c4b11d867d + 6 fb76888c-03ae-4d53-b27d-87d7ca91371a + 6 ff42d1e6-c489-492c-a40a-803cabd901ed + 7 094e9e1d-09ff-40ca-a6b9-eca580936147 +``` + +- I added a comment to that bug to ask if this is a side effect of the patch +- I started working on tagging pre-2010 ILRI items with license information, like we talked about with Peter and Abenet last week + - Due to the export bug I had to sort and remove duplicates first, then use csvgrep to filter out books and journal articles: + +```console +$ csvcut -c 'id,dc.date.issued,dc.date.issued[],dc.date.issued[en_US],dc.rights,dc.rights[],dc.rights[en],dc.rights[en_US],dc.publisher,dc.publisher[],dc.publisher[en_US],dc.type[en_US]' /tmp/2021-02-10-ILRI.csv | csvgrep -c 'dc.type[en_US]' -r '^.+[^(Journal Item|Journal Article|Book|Book Chapter)]' +``` + +- I imported the CSV into OpenRefine and converted the date text values to date types so I could facet by dates before 2010: + +```console +if(diff(value,"01/01/2010".toDate(),"days")<0, true, false) +``` + +- Then I filtered by publisher to make sure they were only ours: + +```console +or( + value.contains("International Livestock Research Institute"), + value.contains("ILRI"), + value.contains("International Livestock Centre for Africa"), + value.contains("ILCA"), + value.contains("ILRAD"), + value.contains("International Laboratory for Research on Animal Diseases") +) +``` + +- I tagged these pre-2010 items with "Other" if they didn't already have a license +- I checked 2010 to 2015, and 2016 to date, but they were all tagged already! +- In the end I added the "Other" license to 1,523 items from before 2010 + diff --git a/docs/2021-02/index.html b/docs/2021-02/index.html index 296b1bf7b..2be749242 100644 --- a/docs/2021-02/index.html +++ b/docs/2021-02/index.html @@ -32,7 +32,7 @@ $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty - + @@ -70,9 +70,9 @@ $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty "@type": "BlogPosting", "headline": "February, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-02/", - "wordCount": "1517", + "wordCount": "2017", "datePublished": "2021-02-01T10:13:54+02:00", - "dateModified": "2021-02-06T14:00:36+02:00", + "dateModified": "2021-02-07T16:27:36+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -389,7 +389,93 @@ sys 2m26.050s
$ curl -XDELETE 'http://localhost:9200/openrxv-items-temp'
 # start indexing in AReS
-
+

2021-02-08

+ +
$ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty'
+{
+  "count" : 100983,
+  "_shards" : {
+    "total" : 1,
+    "successful" : 1,
+    "skipped" : 0,
+    "failed" : 0
+  }
+}
+$ curl -X PUT "localhost:9200/openrxv-items/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write":true}}'
+$ curl -s -X POST http://localhost:9200/openrxv-items/_clone/openrxv-items-2021-02-08
+$ curl -XDELETE 'http://localhost:9200/openrxv-items'
+$ curl -X PUT "localhost:9200/openrxv-items-temp/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write": true}}'
+$ curl -s -X POST http://localhost:9200/openrxv-items-temp/_clone/openrxv-items
+$ curl -XDELETE 'http://localhost:9200/openrxv-items-temp'
+$ curl -XDELETE 'http://localhost:9200/openrxv-items-2021-02-08'
+

2021-02-10

+ +
$ csvcut -c id /tmp/2021-02-10-ILRI.csv | sed '1d' | wc -l
+30354
+$ csvcut -c id /tmp/2021-02-10-ILRI.csv | sed '1d' | sort -u | wc -l         
+18555
+$ csvcut -c id /tmp/2021-02-10-ILRI.csv | sed '1d' | sort | uniq -c | sort -h | tail     
+      5 c21a79e5-e24e-4861-aa07-e06703d1deb7
+      5 c2460aa1-ae28-4003-9a99-2d7c5cd7fd38
+      5 d73fb3ae-9fac-4f7e-990f-e394f344246c
+      5 dc0e24fa-b7f5-437e-ac09-e15c0704be00
+      5 dc50bcca-0abf-473f-8770-69d5ab95cc33
+      5 e714bdf9-cc0f-4d9a-a808-d572e25c9238
+      6 7dfd1c61-9e8c-4677-8d41-e1c4b11d867d
+      6 fb76888c-03ae-4d53-b27d-87d7ca91371a
+      6 ff42d1e6-c489-492c-a40a-803cabd901ed
+      7 094e9e1d-09ff-40ca-a6b9-eca580936147
+
+
$ csvcut -c 'id,dc.date.issued,dc.date.issued[],dc.date.issued[en_US],dc.rights,dc.rights[],dc.rights[en],dc.rights[en_US],dc.publisher,dc.publisher[],dc.publisher[en_US],dc.type[en_US]' /tmp/2021-02-10-ILRI.csv | csvgrep -c 'dc.type[en_US]' -r '^.+[^(Journal Item|Journal Article|Book|Book Chapter)]'
+
+
if(diff(value,"01/01/2010".toDate(),"days")<0, true, false)
+
+
or(
+  value.contains("International Livestock Research Institute"),
+  value.contains("ILRI"),
+  value.contains("International Livestock Centre for Africa"),
+  value.contains("ILCA"),
+  value.contains("ILRAD"),
+  value.contains("International Laboratory for Research on Animal Diseases")
+)
+
+ diff --git a/docs/categories/index.html b/docs/categories/index.html index 8840ade1e..59748359a 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 401b49497..2b1163523 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 43396cd4d..8737c885b 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index cfd677c69..f115efcda 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 37270a17b..caeb771d4 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 6aa7a19ca..7683d6275 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 8de9d56ae..445142dff 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index ce0530a29..fa4098f6a 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index a11cdaf74..8e350ba08 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 58c2c2ce8..82c9e9cfe 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 8be87c0fe..b7b4cc68f 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 45f7489b3..e47d64c84 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 43ec062a9..0b7107871 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 986282ef2..16f6db219 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index be8bacef3..4879bffea 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index d64c3c173..5909e7037 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 30eb6c7bb..26efb36eb 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 39d4fb827..b82c9cb41 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index c83472730..8d65d22d5 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 91bec0bde..295f72e5a 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0350a152d..484229a3f 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2021-02-06T14:00:36+02:00 + 2021-02-07T16:27:36+02:00 https://alanorth.github.io/cgspace-notes/ - 2021-02-06T14:00:36+02:00 + 2021-02-07T16:27:36+02:00 https://alanorth.github.io/cgspace-notes/2021-02/ - 2021-02-06T14:00:36+02:00 + 2021-02-07T16:27:36+02:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2021-02-06T14:00:36+02:00 + 2021-02-07T16:27:36+02:00 https://alanorth.github.io/cgspace-notes/posts/ - 2021-02-06T14:00:36+02:00 + 2021-02-07T16:27:36+02:00