diff --git a/content/posts/2020-12.md b/content/posts/2020-12.md index 9866d3338..8ec3491e2 100644 --- a/content/posts/2020-12.md +++ b/content/posts/2020-12.md @@ -203,4 +203,42 @@ Caused by: org.apache.http.TruncatedChunkException: Truncated chunk ( expected s ![PostgreSQL connections all week](/cgspace-notes/2020/12/postgres_connections_ALL-week.png) ![PostgreSQL locks all week](/cgspace-notes/2020/12/postgres_locks_ALL-week.png) +## 2020-12-13 + +- I tried to harvest a few times on OpenRXV in the last few days and every time it appends all the new records to the items index instead of overwriting it: + +![OpenRXV duplicates](/cgspace-notes/2020/12/openrxv-duplicates.png) + +- I can see it in the `openrxv-items-final` index: + +```console +$ curl -s 'http://localhost:9200/openrxv-items-final/_count?q=*' | json_pp +{ + "_shards" : { + "failed" : 0, + "skipped" : 0, + "successful" : 1, + "total" : 1 + }, + "count" : 299922 +} +``` + +- I filed a bug on OpenRXV: https://github.com/ilri/OpenRXV/issues/64 +- For now I will try to delete the index and start a re-harvest in the Admin UI: + +``` +$ curl -XDELETE http://localhost:9200/openrxv-items-final +{"acknowledged":true}% +``` + +- Moayad said he's working on the harvesting so I stopped it for now to re-deploy his latest changes +- I updated Tomcat to version 7.0.107 on CGSpace (linode18), ran all updates, and restarted the server +- I deleted both items indexes and restarted the harvesting: + +``` +$ curl -XDELETE http://localhost:9200/openrxv-items-final +$ curl -XDELETE http://localhost:9200/openrxv-items-temp +``` + diff --git a/docs/2020-12/index.html b/docs/2020-12/index.html index b7705b789..8b901386b 100644 --- a/docs/2020-12/index.html +++ b/docs/2020-12/index.html @@ -20,7 +20,7 @@ I started processing those (about 411,000 records): - + @@ -46,9 +46,9 @@ I started processing those (about 411,000 records): "@type": "BlogPosting", "headline": "December, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-12/", - "wordCount": "1231", + "wordCount": "1378", "datePublished": "2020-12-01T11:32:54+02:00", - "dateModified": "2020-12-09T22:48:19+02:00", + "dateModified": "2020-12-10T23:43:09+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -333,7 +333,38 @@ Caused by: org.apache.http.TruncatedChunkException: Truncated chunk ( expected s

PostgreSQL connections all week PostgreSQL locks all week

- +

2020-12-13

+ +

OpenRXV duplicates

+ +
$ curl -s 'http://localhost:9200/openrxv-items-final/_count?q=*' | json_pp
+{
+   "_shards" : {
+      "failed" : 0,
+      "skipped" : 0,
+      "successful" : 1,
+      "total" : 1
+   },
+   "count" : 299922
+}
+
+
$ curl -XDELETE http://localhost:9200/openrxv-items-final
+{"acknowledged":true}%
+
+
$ curl -XDELETE http://localhost:9200/openrxv-items-final
+$ curl -XDELETE http://localhost:9200/openrxv-items-temp
+
diff --git a/docs/2020/12/openrxv-duplicates.png b/docs/2020/12/openrxv-duplicates.png new file mode 100644 index 000000000..daa2ac160 Binary files /dev/null and b/docs/2020/12/openrxv-duplicates.png differ diff --git a/docs/categories/index.html b/docs/categories/index.html index 41a551dff..20f3c1e0f 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 8dcd717da..b1ed36e00 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 5de02e639..60de3e9c0 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 4ab1b4ff6..9f3b2d367 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index ae61eacc0..ce0aff192 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 7943ac0a0..ec342e1b5 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index bb4f30fe7..0b6a514a6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 670163fb7..077253f2c 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index fc884b7c8..1521b4c8e 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 31f6dc824..ffee9787d 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index f2b135913..42b731c7f 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 07d9ce82a..5e515503e 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 71adedcb4..0b9f4033d 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 989ca06f8..7e466f22d 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index dacdb1562..9b64074b6 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 13755d0d0..0bf8e3c9a 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index df62a7d11..9fc289170 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 54d4c759e..83d079132 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index d76d4d874..2605d2062 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 13233b24a..696cf2586 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index e3546b7a4..7a2fe863b 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-12-09T22:48:19+02:00 + 2020-12-10T23:43:09+02:00 https://alanorth.github.io/cgspace-notes/ - 2020-12-09T22:48:19+02:00 + 2020-12-10T23:43:09+02:00 https://alanorth.github.io/cgspace-notes/2020-12/ - 2020-12-09T22:48:19+02:00 + 2020-12-10T23:43:09+02:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-12-09T22:48:19+02:00 + 2020-12-10T23:43:09+02:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-12-09T22:48:19+02:00 + 2020-12-10T23:43:09+02:00 diff --git a/static/2020/12/openrxv-duplicates.png b/static/2020/12/openrxv-duplicates.png new file mode 100644 index 000000000..daa2ac160 Binary files /dev/null and b/static/2020/12/openrxv-duplicates.png differ