From f13538360901aad91f68eb2a943104771243c5fb Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 23 Mar 2021 09:34:40 +0200 Subject: [PATCH] Add notes for 2021-03-22 --- content/posts/2021-03.md | 133 +++++++++++++++++++++ docs/2015-11/index.html | 2 +- docs/2015-12/index.html | 2 +- docs/2016-01/index.html | 2 +- docs/2016-02/index.html | 2 +- docs/2016-03/index.html | 2 +- docs/2016-04/index.html | 2 +- docs/2016-05/index.html | 2 +- docs/2016-06/index.html | 2 +- docs/2016-07/index.html | 2 +- docs/2016-08/index.html | 2 +- docs/2016-09/index.html | 2 +- docs/2016-10/index.html | 2 +- docs/2016-11/index.html | 2 +- docs/2016-12/index.html | 2 +- docs/2017-01/index.html | 2 +- docs/2017-02/index.html | 2 +- docs/2017-03/index.html | 2 +- docs/2017-04/index.html | 2 +- docs/2017-05/index.html | 2 +- docs/2017-06/index.html | 2 +- docs/2017-07/index.html | 2 +- docs/2017-08/index.html | 2 +- docs/2017-09/index.html | 2 +- docs/2017-10/index.html | 2 +- docs/2017-11/index.html | 2 +- docs/2017-12/index.html | 2 +- docs/2018-01/index.html | 2 +- docs/2018-02/index.html | 2 +- docs/2018-03/index.html | 2 +- docs/2018-04/index.html | 2 +- docs/2018-05/index.html | 2 +- docs/2018-06/index.html | 2 +- docs/2018-07/index.html | 2 +- docs/2018-08/index.html | 2 +- docs/2018-09/index.html | 2 +- docs/2018-10/index.html | 2 +- docs/2018-11/index.html | 2 +- docs/2018-12/index.html | 2 +- docs/2019-01/index.html | 2 +- docs/2019-02/index.html | 2 +- docs/2019-03/index.html | 2 +- docs/2019-04/index.html | 2 +- docs/2019-05/index.html | 2 +- docs/2019-06/index.html | 2 +- docs/2019-07/index.html | 2 +- docs/2019-08/index.html | 2 +- docs/2019-09/index.html | 2 +- docs/2019-10/index.html | 2 +- docs/2019-11/index.html | 2 +- docs/2019-12/index.html | 2 +- docs/2020-01/index.html | 2 +- docs/2020-02/index.html | 2 +- docs/2020-03/index.html | 2 +- docs/2020-04/index.html | 2 +- docs/2020-05/index.html | 2 +- docs/2020-06/index.html | 2 +- docs/2020-07/index.html | 2 +- docs/2020-08/index.html | 2 +- docs/2020-09/index.html | 2 +- docs/2020-10/index.html | 2 +- docs/2020-11/index.html | 2 +- docs/2020-12/index.html | 2 +- docs/2021-01/index.html | 2 +- docs/2021-02/index.html | 2 +- docs/2021-03/index.html | 133 ++++++++++++++++++++- docs/404.html | 2 +- docs/categories/index.html | 4 +- docs/categories/notes/index.html | 4 +- docs/categories/notes/page/2/index.html | 4 +- docs/categories/notes/page/3/index.html | 4 +- docs/categories/notes/page/4/index.html | 4 +- docs/categories/notes/page/5/index.html | 4 +- docs/cgiar-library-migration/index.html | 2 +- docs/cgspace-cgcorev2-migration/index.html | 2 +- docs/cgspace-dspace6-upgrade/index.html | 2 +- docs/index.html | 4 +- docs/page/2/index.html | 4 +- docs/page/3/index.html | 4 +- docs/page/4/index.html | 4 +- docs/page/5/index.html | 4 +- docs/page/6/index.html | 4 +- docs/page/7/index.html | 4 +- docs/posts/index.html | 4 +- docs/posts/page/2/index.html | 4 +- docs/posts/page/3/index.html | 4 +- docs/posts/page/4/index.html | 4 +- docs/posts/page/5/index.html | 4 +- docs/posts/page/6/index.html | 4 +- docs/posts/page/7/index.html | 4 +- docs/sitemap.xml | 10 +- docs/tags/index.html | 2 +- docs/tags/migration/index.html | 2 +- docs/tags/notes/index.html | 2 +- docs/tags/notes/page/2/index.html | 2 +- docs/tags/notes/page/3/index.html | 2 +- 96 files changed, 379 insertions(+), 123 deletions(-) diff --git a/content/posts/2021-03.md b/content/posts/2021-03.md index fe8190485..924db2c47 100644 --- a/content/posts/2021-03.md +++ b/content/posts/2021-03.md @@ -367,4 +367,137 @@ $ curl -s 'http://localhost:9200/_alias/' | python -m json.tool | less - I also made some minor optimizations in the Pandas code - I [tagged version 0.4.7 of csv-metadata-quality on GitHub](https://github.com/ilri/csv-metadata-quality/releases/tag/v0.4.7) +## 2021-03-18 + +- I added the ability to check for, and fix, "mojibake" characters in csv-metadata-quality + +## 2021-03-21 + +- Last week Atmire asked me which browser I was using to test the duplicate checker, which I had [reported](https://tracker.atmire.com/tickets-cgiar-ilri/view-ticket?id=934) as not loading + - I tried to load it in Chrome and it works... hmmm +- Back up the current `openrxv-items-final` index to start a fresh AReS Harvest: + +```console +$ curl -X PUT "localhost:9200/openrxv-items-final/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write": true}}' +$ curl -s -X POST http://localhost:9200/openrxv-items-final/_clone/openrxv-items-final-2021-03-21 +$ curl -X PUT "localhost:9200/openrxv-items-final/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write": false}}' +``` + +- Then start harvesting in the AReS Explorer admin UI + +## 2021-03-22 + +- The harvesting on AReS yesterday completed, but somehow I have twice the number of items: + +```console +$ curl -s 'http://localhost:9200/openrxv-items-final/_count?q=*&pretty' +{ + "count" : 206204, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + } +} +``` + +- Hmmm and even my backup index has a strange number of items: + +```console +$ curl -s 'http://localhost:9200/openrxv-items-final-2021-03-21/_count?q=*&pretty' +{ + "count" : 844, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + } +} +``` + +- I deleted all indexes and re-created the openrxv-items alias: + +```console +$ curl -s -X POST 'http://localhost:9200/_aliases' -H 'Content-Type: application/json' -d'{"actions" : [{"add" : { "index" : "openrxv-items-final", "alias" : "openrxv-items"}}]}' +$ curl -s 'http://localhost:9200/_alias/' | python -m json.tool | less +... + "openrxv-items-temp": { + "aliases": {} + }, + "openrxv-items-final": { + "aliases": { + "openrxv-items": {} + } + } +``` + +- Then I started a new harvesting +- I switched the Node.js in the [Ansible infrastructure scripts](https://github.com/ilri/rmg-ansible-public) to v12 since v10 will cease to be supported soon + - I re-deployed DSpace Test (linode26) with Node.js 12 and restarted the server +- The AReS harvest finally finished, with 1047 pages of items, but the `openrxv-items-final` index is empty and the `openrxv-items-temp` index has a 103,000 items: + +```console +$ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty' +{ + "count" : 103162, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + } +} +``` + +- I tried to clone the temp index to the final, but got an error: + +```console +$ curl -s -X POST http://localhost:9200/openrxv-items-temp/_clone/openrxv-items-final +{"error":{"root_cause":[{"type":"resource_already_exists_exception","reason":"index [openrxv-items-final/LmxH-rQsTRmTyWex2d8jxw] already exists","index_uuid":"LmxH-rQsTRmTyWex2d8jxw","index":"openrxv-items-final"}],"type":"resource_already_exists_exception","reason":"index [openrxv-items-final/LmxH-rQsTRmTyWex2d8jxw] already exists","index_uuid":"LmxH-rQsTRmTyWex2d8jxw","index":"openrxv-items-final"},"status":400}% +``` + +- I looked in the Docker logs for Elasticsearch and saw a few memory errors: + +```console +java.lang.OutOfMemoryError: Java heap space +``` + +- According to `/usr/share/elasticsearch/config/jvm.options` in the Elasticsearch container the default JVM heap is 1g + - I see the running Java process has `-Xms 1g -Xmx 1g` in its process invocation so I guess that it must be indeed using 1g + - We can [change the heap size with the ES_JAVA_OPTS environment variable](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) + - Or perhaps better, we should [use a jvm.options.d file](https://www.elastic.co/guide/en/elasticsearch/reference/master/jvm-options.html) because if you use the environment variable it overrides all other JVM options from the default `jvm.options` + - I tried to set memory to 1536m by binding an options file and restarting the container, but it didn't seem to work + - Nevertheless, after restarting I see 103,000 items in the Explorer... + - But the indexes are still kinda messed up... the `openrxv-items` index is an alias of the wrong index! + +```console + "openrxv-items-final": { + "aliases": {} + }, + "openrxv-items-temp": { + "aliases": { + "openrxv-items": {} + } + }, +``` + +## 2021-03-23 + +- For reference you can also get the Elasticsearch JVM stats from the API: + +```console +$ curl -s 'http://localhost:9200/_nodes/jvm?human' | python -m json.tool +``` + +- I re-deployed AReS with 1.5GB of heap using the `ES_JAVA_OPTS` environment variable + - It turns out that this *is* the recommended way to set the heap: https://www.elastic.co/guide/en/elasticsearch/reference/7.6/jvm-options.html +- Then I fixed the aliases to make sure `openrxv-items` was an alias of `openrxv-items-final`, similar to how I did a few weeks ago +- I re-created the temp index: + +```console +$ curl -XPUT 'http://localhost:9200/openrxv-items-temp' +``` + diff --git a/docs/2015-11/index.html b/docs/2015-11/index.html index 1e6595780..0f05cfee3 100644 --- a/docs/2015-11/index.html +++ b/docs/2015-11/index.html @@ -34,7 +34,7 @@ Last week I had increased the limit from 30 to 60, which seemed to help, but now $ psql -c 'SELECT * from pg_stat_activity;' | grep idle | grep -c cgspace 78 "/> - + diff --git a/docs/2015-12/index.html b/docs/2015-12/index.html index 6a6464dfc..a984a224e 100644 --- a/docs/2015-12/index.html +++ b/docs/2015-12/index.html @@ -36,7 +36,7 @@ Replace lzop with xz in log compression cron jobs on DSpace Test—it uses less -rw-rw-r-- 1 tomcat7 tomcat7 387K Nov 18 23:59 dspace.log.2015-11-18.lzo -rw-rw-r-- 1 tomcat7 tomcat7 169K Nov 18 23:59 dspace.log.2015-11-18.xz "/> - + diff --git a/docs/2016-01/index.html b/docs/2016-01/index.html index 0bf37b1dc..8eacb0082 100644 --- a/docs/2016-01/index.html +++ b/docs/2016-01/index.html @@ -28,7 +28,7 @@ Move ILRI collection 10568/12503 from 10568/27869 to 10568/27629 using the move_ I realized it is only necessary to clear the Cocoon cache after moving collections—rather than reindexing—as no metadata has changed, and therefore no search or browse indexes need to be updated. Update GitHub wiki for documentation of maintenance tasks. "/> - + diff --git a/docs/2016-02/index.html b/docs/2016-02/index.html index c73163210..e62dd3f2c 100644 --- a/docs/2016-02/index.html +++ b/docs/2016-02/index.html @@ -38,7 +38,7 @@ I noticed we have a very interesting list of countries on CGSpace: Not only are there 49,000 countries, we have some blanks (25)… Also, lots of things like “COTE D`LVOIRE” and “COTE D IVOIRE” "/> - + diff --git a/docs/2016-03/index.html b/docs/2016-03/index.html index 5d9a7e984..0eaa927a3 100644 --- a/docs/2016-03/index.html +++ b/docs/2016-03/index.html @@ -28,7 +28,7 @@ Looking at issues with author authorities on CGSpace For some reason we still have the index-lucene-update cron job active on CGSpace, but I’m pretty sure we don’t need it as of the latest few versions of Atmire’s Listings and Reports module Reinstall my local (Mac OS X) DSpace stack with Tomcat 7, PostgreSQL 9.3, and Java JDK 1.7 to match environment on CGSpace server "/> - + diff --git a/docs/2016-04/index.html b/docs/2016-04/index.html index 3bedec55b..1df4d94f0 100644 --- a/docs/2016-04/index.html +++ b/docs/2016-04/index.html @@ -32,7 +32,7 @@ After running DSpace for over five years I’ve never needed to look in any This will save us a few gigs of backup space we’re paying for on S3 Also, I noticed the checker log has some errors we should pay attention to: "/> - + diff --git a/docs/2016-05/index.html b/docs/2016-05/index.html index 50234d372..92cb9b576 100644 --- a/docs/2016-05/index.html +++ b/docs/2016-05/index.html @@ -34,7 +34,7 @@ There are 3,000 IPs accessing the REST API in a 24-hour period! # awk '{print $1}' /var/log/nginx/rest.log | uniq | wc -l 3168 "/> - + diff --git a/docs/2016-06/index.html b/docs/2016-06/index.html index e85159c3a..97a53f3c3 100644 --- a/docs/2016-06/index.html +++ b/docs/2016-06/index.html @@ -34,7 +34,7 @@ This is their publications set: http://ebrary.ifpri.org/oai/oai.php?verb=ListRec You can see the others by using the OAI ListSets verb: http://ebrary.ifpri.org/oai/oai.php?verb=ListSets Working on second phase of metadata migration, looks like this will work for moving CPWF-specific data in dc.identifier.fund to cg.identifier.cpwfproject and then the rest to dc.description.sponsorship "/> - + diff --git a/docs/2016-07/index.html b/docs/2016-07/index.html index 40e9f6358..8205d90ea 100644 --- a/docs/2016-07/index.html +++ b/docs/2016-07/index.html @@ -44,7 +44,7 @@ dspacetest=# select text_value from metadatavalue where metadata_field_id=3 and In this case the select query was showing 95 results before the update "/> - + diff --git a/docs/2016-08/index.html b/docs/2016-08/index.html index 9c39d420b..7165e2834 100644 --- a/docs/2016-08/index.html +++ b/docs/2016-08/index.html @@ -42,7 +42,7 @@ $ git checkout -b 55new 5_x-prod $ git reset --hard ilri/5_x-prod $ git rebase -i dspace-5.5 "/> - + diff --git a/docs/2016-09/index.html b/docs/2016-09/index.html index 31ded8565..ed1f7a1ca 100644 --- a/docs/2016-09/index.html +++ b/docs/2016-09/index.html @@ -34,7 +34,7 @@ It looks like we might be able to use OUs now, instead of DCs: $ ldapsearch -x -H ldaps://svcgroot2.cgiarad.org:3269/ -b "dc=cgiarad,dc=org" -D "admigration1@cgiarad.org" -W "(sAMAccountName=admigration1)" "/> - + diff --git a/docs/2016-10/index.html b/docs/2016-10/index.html index 5a9145a74..282a4776c 100644 --- a/docs/2016-10/index.html +++ b/docs/2016-10/index.html @@ -42,7 +42,7 @@ I exported a random item’s metadata as CSV, deleted all columns except id 0000-0002-6115-0956||0000-0002-3812-8793||0000-0001-7462-405X "/> - + diff --git a/docs/2016-11/index.html b/docs/2016-11/index.html index b57f35ce3..7f7606309 100644 --- a/docs/2016-11/index.html +++ b/docs/2016-11/index.html @@ -26,7 +26,7 @@ Add dc.type to the output options for Atmire’s Listings and Reports module Add dc.type to the output options for Atmire’s Listings and Reports module (#286) "/> - + diff --git a/docs/2016-12/index.html b/docs/2016-12/index.html index 5ebd3ed39..134e5c66e 100644 --- a/docs/2016-12/index.html +++ b/docs/2016-12/index.html @@ -46,7 +46,7 @@ I see thousands of them in the logs for the last few months, so it’s not r I’ve raised a ticket with Atmire to ask Another worrying error from dspace.log is: "/> - + diff --git a/docs/2017-01/index.html b/docs/2017-01/index.html index 89031d36e..eaf9bca32 100644 --- a/docs/2017-01/index.html +++ b/docs/2017-01/index.html @@ -28,7 +28,7 @@ I checked to see if the Solr sharding task that is supposed to run on January 1s I tested on DSpace Test as well and it doesn’t work there either I asked on the dspace-tech mailing list because it seems to be broken, and actually now I’m not sure if we’ve ever had the sharding task run successfully over all these years "/> - + diff --git a/docs/2017-02/index.html b/docs/2017-02/index.html index ea98e1aa7..37b89ccca 100644 --- a/docs/2017-02/index.html +++ b/docs/2017-02/index.html @@ -50,7 +50,7 @@ DELETE 1 Create issue on GitHub to track the addition of CCAFS Phase II project tags (#301) Looks like we’ll be using cg.identifier.ccafsprojectpii as the field name "/> - + diff --git a/docs/2017-03/index.html b/docs/2017-03/index.html index efbd96aa6..17d545116 100644 --- a/docs/2017-03/index.html +++ b/docs/2017-03/index.html @@ -54,7 +54,7 @@ Interestingly, it seems DSpace 4.x’s thumbnails were sRGB, but forcing reg $ identify ~/Desktop/alc_contrastes_desafios.jpg /Users/aorth/Desktop/alc_contrastes_desafios.jpg JPEG 464x600 464x600+0+0 8-bit CMYK 168KB 0.000u 0:00.000 "/> - + diff --git a/docs/2017-04/index.html b/docs/2017-04/index.html index 53d218168..dab830886 100644 --- a/docs/2017-04/index.html +++ b/docs/2017-04/index.html @@ -40,7 +40,7 @@ Testing the CMYK patch on a collection with 650 items: $ [dspace]/bin/dspace filter-media -f -i 10568/16498 -p "ImageMagick PDF Thumbnail" -v >& /tmp/filter-media-cmyk.txt "/> - + diff --git a/docs/2017-05/index.html b/docs/2017-05/index.html index 1847b0b15..bd754582a 100644 --- a/docs/2017-05/index.html +++ b/docs/2017-05/index.html @@ -18,7 +18,7 @@ - + diff --git a/docs/2017-06/index.html b/docs/2017-06/index.html index f7e67726b..39f7da2a8 100644 --- a/docs/2017-06/index.html +++ b/docs/2017-06/index.html @@ -18,7 +18,7 @@ - + diff --git a/docs/2017-07/index.html b/docs/2017-07/index.html index 3475fce06..e3de54e18 100644 --- a/docs/2017-07/index.html +++ b/docs/2017-07/index.html @@ -36,7 +36,7 @@ Merge changes for WLE Phase II theme rename (#329) Looking at extracting the metadata registries from ICARDA’s MEL DSpace database so we can compare fields with CGSpace We can use PostgreSQL’s extended output format (-x) plus sed to format the output into quasi XML: "/> - + diff --git a/docs/2017-08/index.html b/docs/2017-08/index.html index 1005fd614..3e4c46362 100644 --- a/docs/2017-08/index.html +++ b/docs/2017-08/index.html @@ -60,7 +60,7 @@ This was due to newline characters in the dc.description.abstract column, which I exported a new CSV from the collection on DSpace Test and then manually removed the characters in vim using g/^$/d Then I cleaned up the author authorities and HTML characters in OpenRefine and sent the file back to Abenet "/> - + diff --git a/docs/2017-09/index.html b/docs/2017-09/index.html index 62c3c5cc2..ba87921f8 100644 --- a/docs/2017-09/index.html +++ b/docs/2017-09/index.html @@ -32,7 +32,7 @@ Linode sent an alert that CGSpace (linode18) was using 261% CPU for the past two Ask Sisay to clean up the WLE approvers a bit, as Marianne’s user account is both in the approvers step as well as the group "/> - + diff --git a/docs/2017-10/index.html b/docs/2017-10/index.html index 1d8f1e3d6..9e9dba195 100644 --- a/docs/2017-10/index.html +++ b/docs/2017-10/index.html @@ -34,7 +34,7 @@ http://hdl.handle.net/10568/78495||http://hdl.handle.net/10568/79336 There appears to be a pattern but I’ll have to look a bit closer and try to clean them up automatically, either in SQL or in OpenRefine Add Katherine Lutz to the groups for content submission and edit steps of the CGIAR System collections "/> - + diff --git a/docs/2017-11/index.html b/docs/2017-11/index.html index 7112cda21..8e176dd34 100644 --- a/docs/2017-11/index.html +++ b/docs/2017-11/index.html @@ -48,7 +48,7 @@ Generate list of authors on CGSpace for Peter to go through and correct: dspace=# \copy (select distinct text_value, count(*) as count from metadatavalue where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author') AND resource_type_id = 2 group by text_value order by count desc) to /tmp/authors.csv with csv; COPY 54701 "/> - + diff --git a/docs/2017-12/index.html b/docs/2017-12/index.html index e7121eda3..899f40dd1 100644 --- a/docs/2017-12/index.html +++ b/docs/2017-12/index.html @@ -30,7 +30,7 @@ The logs say “Timeout waiting for idle object” PostgreSQL activity says there are 115 connections currently The list of connections to XMLUI and REST API for today: "/> - + diff --git a/docs/2018-01/index.html b/docs/2018-01/index.html index 7f3d3361f..c5e335b30 100644 --- a/docs/2018-01/index.html +++ b/docs/2018-01/index.html @@ -150,7 +150,7 @@ dspace.log.2018-01-02:34 Danny wrote to ask for help renewing the wildcard ilri.org certificate and I advised that we should probably use Let’s Encrypt if it’s just a handful of domains "/> - + diff --git a/docs/2018-02/index.html b/docs/2018-02/index.html index 6b4e9ef1e..7c3430026 100644 --- a/docs/2018-02/index.html +++ b/docs/2018-02/index.html @@ -30,7 +30,7 @@ We don’t need to distinguish between internal and external works, so that Yesterday I figured out how to monitor DSpace sessions using JMX I copied the logic in the jmx_tomcat_dbpools provided by Ubuntu’s munin-plugins-java package and used the stuff I discovered about JMX in 2018-01 "/> - + diff --git a/docs/2018-03/index.html b/docs/2018-03/index.html index 082bc4e1b..21d62124f 100644 --- a/docs/2018-03/index.html +++ b/docs/2018-03/index.html @@ -24,7 +24,7 @@ Export a CSV of the IITA community metadata for Martin Mueller Export a CSV of the IITA community metadata for Martin Mueller "/> - + diff --git a/docs/2018-04/index.html b/docs/2018-04/index.html index aa56a9436..871382e89 100644 --- a/docs/2018-04/index.html +++ b/docs/2018-04/index.html @@ -26,7 +26,7 @@ Catalina logs at least show some memory errors yesterday: I tried to test something on DSpace Test but noticed that it’s down since god knows when Catalina logs at least show some memory errors yesterday: "/> - + diff --git a/docs/2018-05/index.html b/docs/2018-05/index.html index 48b7173b0..88cb23252 100644 --- a/docs/2018-05/index.html +++ b/docs/2018-05/index.html @@ -38,7 +38,7 @@ http://localhost:3000/solr/statistics/update?stream.body=%3Ccommit/%3E Then I reduced the JVM heap size from 6144 back to 5120m Also, I switched it to use OpenJDK instead of Oracle Java, as well as re-worked the Ansible infrastructure scripts to support hosts choosing which distribution they want to use "/> - + diff --git a/docs/2018-06/index.html b/docs/2018-06/index.html index e53f77be8..de5da0b5a 100644 --- a/docs/2018-06/index.html +++ b/docs/2018-06/index.html @@ -58,7 +58,7 @@ real 74m42.646s user 8m5.056s sys 2m7.289s "/> - + diff --git a/docs/2018-07/index.html b/docs/2018-07/index.html index db778383c..83065df41 100644 --- a/docs/2018-07/index.html +++ b/docs/2018-07/index.html @@ -36,7 +36,7 @@ During the mvn package stage on the 5.8 branch I kept getting issues with java r There is insufficient memory for the Java Runtime Environment to continue. "/> - + diff --git a/docs/2018-08/index.html b/docs/2018-08/index.html index 42a35b7e9..d831c7bb1 100644 --- a/docs/2018-08/index.html +++ b/docs/2018-08/index.html @@ -46,7 +46,7 @@ Anyways, perhaps I should increase the JVM heap from 5120m to 6144m like we did The server only has 8GB of RAM so we’ll eventually need to upgrade to a larger one because we’ll start starving the OS, PostgreSQL, and command line batch processes I ran all system updates on DSpace Test and rebooted it "/> - + diff --git a/docs/2018-09/index.html b/docs/2018-09/index.html index 88b0a5e14..dd5406b81 100644 --- a/docs/2018-09/index.html +++ b/docs/2018-09/index.html @@ -30,7 +30,7 @@ I’ll update the DSpace role in our Ansible infrastructure playbooks and ru Also, I’ll re-run the postgresql tasks because the custom PostgreSQL variables are dynamic according to the system’s RAM, and we never re-ran them after migrating to larger Linodes last month I’m testing the new DSpace 5.8 branch in my Ubuntu 18.04 environment and I’m getting those autowire errors in Tomcat 8.5.30 again: "/> - + diff --git a/docs/2018-10/index.html b/docs/2018-10/index.html index 9c841ed9d..04e1e1b01 100644 --- a/docs/2018-10/index.html +++ b/docs/2018-10/index.html @@ -26,7 +26,7 @@ I created a GitHub issue to track this #389, because I’m super busy in Nai Phil Thornton got an ORCID identifier so we need to add it to the list on CGSpace and tag his existing items I created a GitHub issue to track this #389, because I’m super busy in Nairobi right now "/> - + diff --git a/docs/2018-11/index.html b/docs/2018-11/index.html index c3857ec47..2f356a5bc 100644 --- a/docs/2018-11/index.html +++ b/docs/2018-11/index.html @@ -36,7 +36,7 @@ Send a note about my dspace-statistics-api to the dspace-tech mailing list Linode has been sending mails a few times a day recently that CGSpace (linode18) has had high CPU usage Today these are the top 10 IPs: "/> - + diff --git a/docs/2018-12/index.html b/docs/2018-12/index.html index 3a8b796b5..9ca826b97 100644 --- a/docs/2018-12/index.html +++ b/docs/2018-12/index.html @@ -36,7 +36,7 @@ Then I ran all system updates and restarted the server I noticed that there is another issue with PDF thumbnails on CGSpace, and I see there was another Ghostscript vulnerability last week "/> - + diff --git a/docs/2019-01/index.html b/docs/2019-01/index.html index 257d93fd6..4bfc2c6dd 100644 --- a/docs/2019-01/index.html +++ b/docs/2019-01/index.html @@ -50,7 +50,7 @@ I don’t see anything interesting in the web server logs around that time t 357 207.46.13.1 903 54.70.40.11 "/> - + diff --git a/docs/2019-02/index.html b/docs/2019-02/index.html index ea087cbcd..56432d96b 100644 --- a/docs/2019-02/index.html +++ b/docs/2019-02/index.html @@ -72,7 +72,7 @@ real 0m19.873s user 0m22.203s sys 0m1.979s "/> - + diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html index 396f977eb..24f35c68a 100644 --- a/docs/2019-03/index.html +++ b/docs/2019-03/index.html @@ -46,7 +46,7 @@ Most worryingly, there are encoding errors in the abstracts for eleven items, fo I think I will need to ask Udana to re-copy and paste the abstracts with more care using Google Docs "/> - + diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html index c3f134ac7..63eae6a14 100644 --- a/docs/2019-04/index.html +++ b/docs/2019-04/index.html @@ -64,7 +64,7 @@ $ ./fix-metadata-values.py -i /tmp/2019-02-21-fix-4-regions.csv -db dspace -u ds $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-2-countries.csv -db dspace -u dspace -p 'fuuu' -m 228 -f cg.coverage.country -d $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace -u dspace -p 'fuuu' -m 231 -f cg.coverage.region -d "/> - + diff --git a/docs/2019-05/index.html b/docs/2019-05/index.html index 6f31a29ef..612c3fdbe 100644 --- a/docs/2019-05/index.html +++ b/docs/2019-05/index.html @@ -48,7 +48,7 @@ DELETE 1 But after this I tried to delete the item from the XMLUI and it is still present… "/> - + diff --git a/docs/2019-06/index.html b/docs/2019-06/index.html index c9654cf6e..b500d89d1 100644 --- a/docs/2019-06/index.html +++ b/docs/2019-06/index.html @@ -34,7 +34,7 @@ Run system updates on CGSpace (linode18) and reboot it Skype with Marie-Angélique and Abenet about CG Core v2 "/> - + diff --git a/docs/2019-07/index.html b/docs/2019-07/index.html index f10d82794..f4e805047 100644 --- a/docs/2019-07/index.html +++ b/docs/2019-07/index.html @@ -38,7 +38,7 @@ CGSpace Abenet had another similar issue a few days ago when trying to find the stats for 2018 in the RTB community "/> - + diff --git a/docs/2019-08/index.html b/docs/2019-08/index.html index d7dbc4810..e4c802aaa 100644 --- a/docs/2019-08/index.html +++ b/docs/2019-08/index.html @@ -46,7 +46,7 @@ After rebooting, all statistics cores were loaded… wow, that’s luck Run system updates on DSpace Test (linode19) and reboot it "/> - + diff --git a/docs/2019-09/index.html b/docs/2019-09/index.html index c86d58fd4..042d252a6 100644 --- a/docs/2019-09/index.html +++ b/docs/2019-09/index.html @@ -72,7 +72,7 @@ Here are the top ten IPs in the nginx XMLUI and REST/OAI logs this morning: 7249 2a01:7e00::f03c:91ff:fe18:7396 9124 45.5.186.2 "/> - + diff --git a/docs/2019-10/index.html b/docs/2019-10/index.html index 54d976b2b..1eac464cb 100644 --- a/docs/2019-10/index.html +++ b/docs/2019-10/index.html @@ -18,7 +18,7 @@ - + diff --git a/docs/2019-11/index.html b/docs/2019-11/index.html index ba0fcae01..fb6851639 100644 --- a/docs/2019-11/index.html +++ b/docs/2019-11/index.html @@ -58,7 +58,7 @@ Let’s see how many of the REST API requests were for bitstreams (because t # zcat --force /var/log/nginx/rest.log.*.gz | grep -E "[0-9]{1,2}/Oct/2019" | grep -c -E "/rest/bitstreams" 106781 "/> - + diff --git a/docs/2019-12/index.html b/docs/2019-12/index.html index 33a4f0c0e..57618b1fc 100644 --- a/docs/2019-12/index.html +++ b/docs/2019-12/index.html @@ -46,7 +46,7 @@ Make sure all packages are up to date and the package manager is up to date, the # dpkg -C # reboot "/> - + diff --git a/docs/2020-01/index.html b/docs/2020-01/index.html index ded9f1782..5ff479305 100644 --- a/docs/2020-01/index.html +++ b/docs/2020-01/index.html @@ -56,7 +56,7 @@ I tweeted the CGSpace repository link "/> - + diff --git a/docs/2020-02/index.html b/docs/2020-02/index.html index f1f4cccf9..a65d912f8 100644 --- a/docs/2020-02/index.html +++ b/docs/2020-02/index.html @@ -38,7 +38,7 @@ The code finally builds and runs with a fresh install "/> - + diff --git a/docs/2020-03/index.html b/docs/2020-03/index.html index 4599300c2..9815c85f6 100644 --- a/docs/2020-03/index.html +++ b/docs/2020-03/index.html @@ -42,7 +42,7 @@ You need to download this into the DSpace 6.x source and compile it "/> - + diff --git a/docs/2020-04/index.html b/docs/2020-04/index.html index 6fb0bf9ad..0547a9f05 100644 --- a/docs/2020-04/index.html +++ b/docs/2020-04/index.html @@ -48,7 +48,7 @@ The third item now has a donut with score 1 since I tweeted it last week On the same note, the one item Abenet pointed out last week now has a donut with score of 104 after I tweeted it last week "/> - + diff --git a/docs/2020-05/index.html b/docs/2020-05/index.html index d6855c0c5..6ba3758f1 100644 --- a/docs/2020-05/index.html +++ b/docs/2020-05/index.html @@ -34,7 +34,7 @@ I see that CGSpace (linode18) is still using PostgreSQL JDBC driver version 42.2 "/> - + diff --git a/docs/2020-06/index.html b/docs/2020-06/index.html index 6e1bd6088..49ea8ef9c 100644 --- a/docs/2020-06/index.html +++ b/docs/2020-06/index.html @@ -36,7 +36,7 @@ I sent Atmire the dspace.log from today and told them to log into the server to In other news, I checked the statistics API on DSpace 6 and it’s working I tried to build the OAI registry on the freshly migrated DSpace 6 on DSpace Test and I get an error: "/> - + diff --git a/docs/2020-07/index.html b/docs/2020-07/index.html index e280f1f24..34bf17014 100644 --- a/docs/2020-07/index.html +++ b/docs/2020-07/index.html @@ -38,7 +38,7 @@ I restarted Tomcat and PostgreSQL and the issue was gone Since I was restarting Tomcat anyways I decided to redeploy the latest changes from the 5_x-prod branch and I added a note about COVID-19 items to the CGSpace frontpage at Peter’s request "/> - + diff --git a/docs/2020-08/index.html b/docs/2020-08/index.html index 09ed362af..8a1fa96de 100644 --- a/docs/2020-08/index.html +++ b/docs/2020-08/index.html @@ -36,7 +36,7 @@ It is class based so I can easily add support for other vocabularies, and the te "/> - + diff --git a/docs/2020-09/index.html b/docs/2020-09/index.html index c84d8a2ce..c18acba20 100644 --- a/docs/2020-09/index.html +++ b/docs/2020-09/index.html @@ -48,7 +48,7 @@ I filed a bug on OpenRXV: https://github.com/ilri/OpenRXV/issues/39 I filed an issue on OpenRXV to make some minor edits to the admin UI: https://github.com/ilri/OpenRXV/issues/40 "/> - + diff --git a/docs/2020-10/index.html b/docs/2020-10/index.html index c23203f0a..ba2fa9646 100644 --- a/docs/2020-10/index.html +++ b/docs/2020-10/index.html @@ -44,7 +44,7 @@ During the FlywayDB migration I got an error: "/> - + diff --git a/docs/2020-11/index.html b/docs/2020-11/index.html index cab5d45dd..e4a9ba27c 100644 --- a/docs/2020-11/index.html +++ b/docs/2020-11/index.html @@ -32,7 +32,7 @@ So far we’ve spent at least fifty hours to process the statistics and stat "/> - + diff --git a/docs/2020-12/index.html b/docs/2020-12/index.html index 2b83ea8ab..8ed6481d1 100644 --- a/docs/2020-12/index.html +++ b/docs/2020-12/index.html @@ -36,7 +36,7 @@ I started processing those (about 411,000 records): "/> - + diff --git a/docs/2021-01/index.html b/docs/2021-01/index.html index 8b0f61c26..636bddc2f 100644 --- a/docs/2021-01/index.html +++ b/docs/2021-01/index.html @@ -50,7 +50,7 @@ For example, this item has 51 views on CGSpace, but 0 on AReS "/> - + diff --git a/docs/2021-02/index.html b/docs/2021-02/index.html index bc4d47c9f..4e376508c 100644 --- a/docs/2021-02/index.html +++ b/docs/2021-02/index.html @@ -60,7 +60,7 @@ $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty } } "/> - + diff --git a/docs/2021-03/index.html b/docs/2021-03/index.html index 8126680bd..1854ab9cb 100644 --- a/docs/2021-03/index.html +++ b/docs/2021-03/index.html @@ -19,7 +19,7 @@ Also, we found some issues building and running OpenRXV currently due to ecosyst - + @@ -34,7 +34,7 @@ Also, we found some issues building and running OpenRXV currently due to ecosyst "/> - + @@ -44,9 +44,9 @@ Also, we found some issues building and running OpenRXV currently due to ecosyst "@type": "BlogPosting", "headline": "March, 2021", "url": "https://alanorth.github.io/cgspace-notes/2021-03/", - "wordCount": "2337", + "wordCount": "2914", "datePublished": "2021-03-01T10:13:54+02:00", - "dateModified": "2021-03-14T21:34:07+02:00", + "dateModified": "2021-03-17T14:57:45+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -522,7 +522,130 @@ $ curl -X PUT "localhost:9200/openrxv-items-final/_settings" -H 'Conte
  • I also made some minor optimizations in the Pandas code
  • I tagged version 0.4.7 of csv-metadata-quality on GitHub
  • - +

    2021-03-18

    + +

    2021-03-21

    + +
    $ curl -X PUT "localhost:9200/openrxv-items-final/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write": true}}'
    +$ curl -s -X POST http://localhost:9200/openrxv-items-final/_clone/openrxv-items-final-2021-03-21
    +$ curl -X PUT "localhost:9200/openrxv-items-final/_settings" -H 'Content-Type: application/json' -d'{"settings": {"index.blocks.write": false}}'
    +
    +

    2021-03-22

    + +
    $ curl -s 'http://localhost:9200/openrxv-items-final/_count?q=*&pretty'
    +{
    +  "count" : 206204,
    +  "_shards" : {
    +    "total" : 1,
    +    "successful" : 1,
    +    "skipped" : 0,
    +    "failed" : 0
    +  }
    +}
    +
    +
    $ curl -s 'http://localhost:9200/openrxv-items-final-2021-03-21/_count?q=*&pretty'
    +{
    +  "count" : 844,
    +  "_shards" : {
    +    "total" : 1,
    +    "successful" : 1,
    +    "skipped" : 0,
    +    "failed" : 0
    +  }
    +}
    +
    +
    $ curl -s -X POST 'http://localhost:9200/_aliases' -H 'Content-Type: application/json' -d'{"actions" : [{"add" : { "index" : "openrxv-items-final", "alias" : "openrxv-items"}}]}'
    +$ curl -s 'http://localhost:9200/_alias/' | python -m json.tool | less
    +...
    +    "openrxv-items-temp": {
    +        "aliases": {}
    +    },
    +    "openrxv-items-final": {
    +        "aliases": {
    +            "openrxv-items": {}
    +        }
    +    }
    +
    +
    $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty'
    +{
    +  "count" : 103162,
    +  "_shards" : {
    +    "total" : 1,
    +    "successful" : 1,
    +    "skipped" : 0,
    +    "failed" : 0
    +  }
    +}
    +
    +
    $ curl -s -X POST http://localhost:9200/openrxv-items-temp/_clone/openrxv-items-final
    +{"error":{"root_cause":[{"type":"resource_already_exists_exception","reason":"index [openrxv-items-final/LmxH-rQsTRmTyWex2d8jxw] already exists","index_uuid":"LmxH-rQsTRmTyWex2d8jxw","index":"openrxv-items-final"}],"type":"resource_already_exists_exception","reason":"index [openrxv-items-final/LmxH-rQsTRmTyWex2d8jxw] already exists","index_uuid":"LmxH-rQsTRmTyWex2d8jxw","index":"openrxv-items-final"},"status":400}% 
    +
    +
    java.lang.OutOfMemoryError: Java heap space
    +
    +
        "openrxv-items-final": {
    +        "aliases": {}
    +    },
    +    "openrxv-items-temp": {
    +        "aliases": {
    +            "openrxv-items": {}
    +        }
    +    },
    +

    2021-03-23

    + +
    $ curl -s 'http://localhost:9200/_nodes/jvm?human' | python -m json.tool
    +
    +
    $ curl -XPUT 'http://localhost:9200/openrxv-items-temp'
    +
    diff --git a/docs/404.html b/docs/404.html index 3261ba8d1..ea4826f2d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -17,7 +17,7 @@ - + diff --git a/docs/categories/index.html b/docs/categories/index.html index d768ea921..810eab254 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 7209f5b57..a3c6063f3 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index efaed241f..d04bfbf46 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index ad41128a9..7b5fbd986 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index ed93ef4e7..d10587f0a 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 8ae76786c..ec68a9363 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/cgiar-library-migration/index.html b/docs/cgiar-library-migration/index.html index 3f161c421..108dce5fd 100644 --- a/docs/cgiar-library-migration/index.html +++ b/docs/cgiar-library-migration/index.html @@ -18,7 +18,7 @@ - + diff --git a/docs/cgspace-cgcorev2-migration/index.html b/docs/cgspace-cgcorev2-migration/index.html index 648cbeaf0..9e3a56e26 100644 --- a/docs/cgspace-cgcorev2-migration/index.html +++ b/docs/cgspace-cgcorev2-migration/index.html @@ -18,7 +18,7 @@ - + diff --git a/docs/cgspace-dspace6-upgrade/index.html b/docs/cgspace-dspace6-upgrade/index.html index 6fdbc44b2..d5a92476a 100644 --- a/docs/cgspace-dspace6-upgrade/index.html +++ b/docs/cgspace-dspace6-upgrade/index.html @@ -18,7 +18,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 63b85d544..1d754cc24 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 2ce7ad202..b6181ca5a 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 792aebfc2..6eff501ab 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 90d288626..c782f5264 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index d4d84fbe6..cd5e80553 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 0dc003838..e9374a34f 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index fbd5bf043..354437d4f 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/index.html b/docs/posts/index.html index ad324a830..c26f209c3 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 5e1e03967..9bcfbdbe9 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 59921591d..34b8524c3 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index aa434476c..8fa6ef505 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 99dbf27ce..4072b76c7 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 7484f24eb..d5116f51c 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index ce5a27364..f40cb6224 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,14 +10,14 @@ - + - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 119a002f3..8942df60f 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2021-03-14T21:34:07+02:00 + 2021-03-17T14:57:45+02:00 https://alanorth.github.io/cgspace-notes/ - 2021-03-14T21:34:07+02:00 + 2021-03-17T14:57:45+02:00 https://alanorth.github.io/cgspace-notes/2021-03/ - 2021-03-14T21:34:07+02:00 + 2021-03-17T14:57:45+02:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2021-03-14T21:34:07+02:00 + 2021-03-17T14:57:45+02:00 https://alanorth.github.io/cgspace-notes/posts/ - 2021-03-14T21:34:07+02:00 + 2021-03-17T14:57:45+02:00 https://alanorth.github.io/cgspace-notes/2021-02/ 2021-03-04T22:46:05+02:00 diff --git a/docs/tags/index.html b/docs/tags/index.html index 4935669f4..542de3b98 100644 --- a/docs/tags/index.html +++ b/docs/tags/index.html @@ -17,7 +17,7 @@ - + diff --git a/docs/tags/migration/index.html b/docs/tags/migration/index.html index df51d1ef0..b5cd38a9e 100644 --- a/docs/tags/migration/index.html +++ b/docs/tags/migration/index.html @@ -17,7 +17,7 @@ - + diff --git a/docs/tags/notes/index.html b/docs/tags/notes/index.html index 1f0277791..dabdecb30 100644 --- a/docs/tags/notes/index.html +++ b/docs/tags/notes/index.html @@ -17,7 +17,7 @@ - + diff --git a/docs/tags/notes/page/2/index.html b/docs/tags/notes/page/2/index.html index e21398976..27f199a27 100644 --- a/docs/tags/notes/page/2/index.html +++ b/docs/tags/notes/page/2/index.html @@ -17,7 +17,7 @@ - + diff --git a/docs/tags/notes/page/3/index.html b/docs/tags/notes/page/3/index.html index 5466e9224..2e2f83ed7 100644 --- a/docs/tags/notes/page/3/index.html +++ b/docs/tags/notes/page/3/index.html @@ -17,7 +17,7 @@ - +