diff --git a/content/posts/2022-09.md b/content/posts/2022-09.md index 8045e61cd..62b048822 100644 --- a/content/posts/2022-09.md +++ b/content/posts/2022-09.md @@ -121,4 +121,20 @@ Total number of hits from bots: 12220 - I had a meeting with Nicky from UNEP to discuss issues they are having with their DSpace - I told her about the meeting of DSpace community people that we're planning at ILRI in the next few weeks +## 2022-09-09 + +- Add some value mappings to AReS because I see a lot of incorrect regions and countries +- I also found some values that were blank in CGSpace so I deleted them: + +```console +dspace=# BEGIN; +BEGIN +dspace=# DELETE FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND text_value=''; +DELETE 70 +dspace=# COMMIT; +COMMIT +``` + +- Start a full Discovery index on CGSpace to catch these changes in the Discovery + diff --git a/docs/2015-11/index.html b/docs/2015-11/index.html index 93d043292..28c39375a 100644 --- a/docs/2015-11/index.html +++ b/docs/2015-11/index.html @@ -64,7 +64,7 @@ $ psql -c 'SELECT * from pg_stat_activity;' | grep idle | grep -c cgspac - + diff --git a/docs/2015-12/index.html b/docs/2015-12/index.html index c9e0edfa0..70946375b 100644 --- a/docs/2015-12/index.html +++ b/docs/2015-12/index.html @@ -66,7 +66,7 @@ Replace lzop with xz in log compression cron jobs on DSpace Test—it uses less - + diff --git a/docs/2016-01/index.html b/docs/2016-01/index.html index d1df24510..bdb0ed3aa 100644 --- a/docs/2016-01/index.html +++ b/docs/2016-01/index.html @@ -58,7 +58,7 @@ Update GitHub wiki for documentation of maintenance tasks. - + diff --git a/docs/2016-02/index.html b/docs/2016-02/index.html index 3f987b8f3..bebaa8b3d 100644 --- a/docs/2016-02/index.html +++ b/docs/2016-02/index.html @@ -68,7 +68,7 @@ Also, lots of things like “COTE D`LVOIRE” and “COTE D IVOIRE&r - + diff --git a/docs/2016-03/index.html b/docs/2016-03/index.html index fc5dacc53..dfd977b37 100644 --- a/docs/2016-03/index.html +++ b/docs/2016-03/index.html @@ -58,7 +58,7 @@ Reinstall my local (Mac OS X) DSpace stack with Tomcat 7, PostgreSQL 9.3, and Ja - + diff --git a/docs/2016-04/index.html b/docs/2016-04/index.html index 4d41cdf9f..579ea5652 100644 --- a/docs/2016-04/index.html +++ b/docs/2016-04/index.html @@ -62,7 +62,7 @@ Also, I noticed the checker log has some errors we should pay attention to: - + diff --git a/docs/2016-05/index.html b/docs/2016-05/index.html index f37a8cd50..e3eda06bd 100644 --- a/docs/2016-05/index.html +++ b/docs/2016-05/index.html @@ -64,7 +64,7 @@ There are 3,000 IPs accessing the REST API in a 24-hour period! - + diff --git a/docs/2016-06/index.html b/docs/2016-06/index.html index b3658a4fc..41024c25d 100644 --- a/docs/2016-06/index.html +++ b/docs/2016-06/index.html @@ -64,7 +64,7 @@ Working on second phase of metadata migration, looks like this will work for mov - + diff --git a/docs/2016-07/index.html b/docs/2016-07/index.html index f8abee1c8..ca0f729fd 100644 --- a/docs/2016-07/index.html +++ b/docs/2016-07/index.html @@ -74,7 +74,7 @@ In this case the select query was showing 95 results before the update - + diff --git a/docs/2016-08/index.html b/docs/2016-08/index.html index b87a4d18e..c5bb99151 100644 --- a/docs/2016-08/index.html +++ b/docs/2016-08/index.html @@ -72,7 +72,7 @@ $ git rebase -i dspace-5.5 - + diff --git a/docs/2016-09/index.html b/docs/2016-09/index.html index 46326c8eb..edbaf193a 100644 --- a/docs/2016-09/index.html +++ b/docs/2016-09/index.html @@ -64,7 +64,7 @@ $ ldapsearch -x -H ldaps://svcgroot2.cgiarad.org:3269/ -b "dc=cgiarad,dc=org - + diff --git a/docs/2016-10/index.html b/docs/2016-10/index.html index 0a123814b..85eedca5d 100644 --- a/docs/2016-10/index.html +++ b/docs/2016-10/index.html @@ -72,7 +72,7 @@ I exported a random item’s metadata as CSV, deleted all columns except id - + diff --git a/docs/2016-11/index.html b/docs/2016-11/index.html index 343235d9f..0240d7c54 100644 --- a/docs/2016-11/index.html +++ b/docs/2016-11/index.html @@ -56,7 +56,7 @@ Add dc.type to the output options for Atmire’s Listings and Reports module - + diff --git a/docs/2016-12/index.html b/docs/2016-12/index.html index ea66a3c1a..76dafba1a 100644 --- a/docs/2016-12/index.html +++ b/docs/2016-12/index.html @@ -76,7 +76,7 @@ Another worrying error from dspace.log is: - + diff --git a/docs/2017-01/index.html b/docs/2017-01/index.html index 44f81dae7..8e2d5a0d3 100644 --- a/docs/2017-01/index.html +++ b/docs/2017-01/index.html @@ -58,7 +58,7 @@ I asked on the dspace-tech mailing list because it seems to be broken, and actua - + diff --git a/docs/2017-02/index.html b/docs/2017-02/index.html index 0ac98c8e0..cd5249c34 100644 --- a/docs/2017-02/index.html +++ b/docs/2017-02/index.html @@ -80,7 +80,7 @@ Looks like we’ll be using cg.identifier.ccafsprojectpii as the field name - + diff --git a/docs/2017-03/index.html b/docs/2017-03/index.html index a7f12c035..49d5d10d4 100644 --- a/docs/2017-03/index.html +++ b/docs/2017-03/index.html @@ -84,7 +84,7 @@ $ identify ~/Desktop/alc_contrastes_desafios.jpg - + diff --git a/docs/2017-04/index.html b/docs/2017-04/index.html index 43cd53f02..b6518bebb 100644 --- a/docs/2017-04/index.html +++ b/docs/2017-04/index.html @@ -70,7 +70,7 @@ $ [dspace]/bin/dspace filter-media -f -i 10568/16498 -p "ImageMagick PDF Thu - + diff --git a/docs/2017-05/index.html b/docs/2017-05/index.html index 19fd73b7d..5796e9479 100644 --- a/docs/2017-05/index.html +++ b/docs/2017-05/index.html @@ -48,7 +48,7 @@ - + diff --git a/docs/2017-06/index.html b/docs/2017-06/index.html index 39669166d..2ca6fdd73 100644 --- a/docs/2017-06/index.html +++ b/docs/2017-06/index.html @@ -48,7 +48,7 @@ - + diff --git a/docs/2017-07/index.html b/docs/2017-07/index.html index 5699fd43c..9868648f0 100644 --- a/docs/2017-07/index.html +++ b/docs/2017-07/index.html @@ -66,7 +66,7 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the - + diff --git a/docs/2017-08/index.html b/docs/2017-08/index.html index 519dea5ea..19f8499fb 100644 --- a/docs/2017-08/index.html +++ b/docs/2017-08/index.html @@ -90,7 +90,7 @@ Then I cleaned up the author authorities and HTML characters in OpenRefine and s - + diff --git a/docs/2017-09/index.html b/docs/2017-09/index.html index 4c311fb6b..7286aca8f 100644 --- a/docs/2017-09/index.html +++ b/docs/2017-09/index.html @@ -62,7 +62,7 @@ Ask Sisay to clean up the WLE approvers a bit, as Marianne’s user account - + diff --git a/docs/2017-10/index.html b/docs/2017-10/index.html index 173b30ea3..4bffbf16d 100644 --- a/docs/2017-10/index.html +++ b/docs/2017-10/index.html @@ -64,7 +64,7 @@ Add Katherine Lutz to the groups for content submission and edit steps of the CG - + diff --git a/docs/2017-11/index.html b/docs/2017-11/index.html index 40b386e0e..5a11554cf 100644 --- a/docs/2017-11/index.html +++ b/docs/2017-11/index.html @@ -78,7 +78,7 @@ COPY 54701 - + diff --git a/docs/2017-12/index.html b/docs/2017-12/index.html index bd334c2dc..363f0aa94 100644 --- a/docs/2017-12/index.html +++ b/docs/2017-12/index.html @@ -60,7 +60,7 @@ The list of connections to XMLUI and REST API for today: - + diff --git a/docs/2018-01/index.html b/docs/2018-01/index.html index 9377e2d45..261e7cbd7 100644 --- a/docs/2018-01/index.html +++ b/docs/2018-01/index.html @@ -180,7 +180,7 @@ Danny wrote to ask for help renewing the wildcard ilri.org certificate and I adv - + diff --git a/docs/2018-02/index.html b/docs/2018-02/index.html index 30c634f1f..884d2cc16 100644 --- a/docs/2018-02/index.html +++ b/docs/2018-02/index.html @@ -60,7 +60,7 @@ I copied the logic in the jmx_tomcat_dbpools provided by Ubuntu’s munin-pl - + diff --git a/docs/2018-03/index.html b/docs/2018-03/index.html index 0fc8ecb9e..f2992c219 100644 --- a/docs/2018-03/index.html +++ b/docs/2018-03/index.html @@ -54,7 +54,7 @@ Export a CSV of the IITA community metadata for Martin Mueller - + diff --git a/docs/2018-04/index.html b/docs/2018-04/index.html index 5c69835a0..1520776a0 100644 --- a/docs/2018-04/index.html +++ b/docs/2018-04/index.html @@ -56,7 +56,7 @@ Catalina logs at least show some memory errors yesterday: - + diff --git a/docs/2018-05/index.html b/docs/2018-05/index.html index 878ef7656..b7a4e7e8b 100644 --- a/docs/2018-05/index.html +++ b/docs/2018-05/index.html @@ -68,7 +68,7 @@ Also, I switched it to use OpenJDK instead of Oracle Java, as well as re-worked - + diff --git a/docs/2018-06/index.html b/docs/2018-06/index.html index 11ccccc68..483f3bd0c 100644 --- a/docs/2018-06/index.html +++ b/docs/2018-06/index.html @@ -88,7 +88,7 @@ sys 2m7.289s - + diff --git a/docs/2018-07/index.html b/docs/2018-07/index.html index 38b2da271..674b58afc 100644 --- a/docs/2018-07/index.html +++ b/docs/2018-07/index.html @@ -66,7 +66,7 @@ There is insufficient memory for the Java Runtime Environment to continue. - + diff --git a/docs/2018-08/index.html b/docs/2018-08/index.html index 7ecc188d9..c4e64eba9 100644 --- a/docs/2018-08/index.html +++ b/docs/2018-08/index.html @@ -76,7 +76,7 @@ I ran all system updates on DSpace Test and rebooted it - + diff --git a/docs/2018-09/index.html b/docs/2018-09/index.html index 2b9187862..868cf1be6 100644 --- a/docs/2018-09/index.html +++ b/docs/2018-09/index.html @@ -60,7 +60,7 @@ I’m testing the new DSpace 5.8 branch in my Ubuntu 18.04 environment and I - + diff --git a/docs/2018-10/index.html b/docs/2018-10/index.html index e409cc526..66adf972f 100644 --- a/docs/2018-10/index.html +++ b/docs/2018-10/index.html @@ -56,7 +56,7 @@ I created a GitHub issue to track this #389, because I’m super busy in Nai - + diff --git a/docs/2018-11/index.html b/docs/2018-11/index.html index 17efc4fd6..722e6e9d8 100644 --- a/docs/2018-11/index.html +++ b/docs/2018-11/index.html @@ -66,7 +66,7 @@ Today these are the top 10 IPs: - + diff --git a/docs/2018-12/index.html b/docs/2018-12/index.html index 43f6088e5..f02c61581 100644 --- a/docs/2018-12/index.html +++ b/docs/2018-12/index.html @@ -66,7 +66,7 @@ I noticed that there is another issue with PDF thumbnails on CGSpace, and I see - + diff --git a/docs/2019-01/index.html b/docs/2019-01/index.html index 00572e294..f95e69602 100644 --- a/docs/2019-01/index.html +++ b/docs/2019-01/index.html @@ -80,7 +80,7 @@ I don’t see anything interesting in the web server logs around that time t - + diff --git a/docs/2019-02/index.html b/docs/2019-02/index.html index 1e33aea75..11b0c1458 100644 --- a/docs/2019-02/index.html +++ b/docs/2019-02/index.html @@ -102,7 +102,7 @@ sys 0m1.979s - + diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html index fa836e607..1d2d1befc 100644 --- a/docs/2019-03/index.html +++ b/docs/2019-03/index.html @@ -76,7 +76,7 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca - + diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html index 3cd334f1d..de7523592 100644 --- a/docs/2019-04/index.html +++ b/docs/2019-04/index.html @@ -94,7 +94,7 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace - + diff --git a/docs/2019-05/index.html b/docs/2019-05/index.html index 05d66a661..c6da2582a 100644 --- a/docs/2019-05/index.html +++ b/docs/2019-05/index.html @@ -78,7 +78,7 @@ But after this I tried to delete the item from the XMLUI and it is still present - + diff --git a/docs/2019-06/index.html b/docs/2019-06/index.html index b53ae8a2c..97e481701 100644 --- a/docs/2019-06/index.html +++ b/docs/2019-06/index.html @@ -64,7 +64,7 @@ Skype with Marie-Angélique and Abenet about CG Core v2 - + diff --git a/docs/2019-07/index.html b/docs/2019-07/index.html index a30bfa35c..09d3a186f 100644 --- a/docs/2019-07/index.html +++ b/docs/2019-07/index.html @@ -68,7 +68,7 @@ Abenet had another similar issue a few days ago when trying to find the stats fo - + diff --git a/docs/2019-08/index.html b/docs/2019-08/index.html index 2548601bd..b910ef884 100644 --- a/docs/2019-08/index.html +++ b/docs/2019-08/index.html @@ -76,7 +76,7 @@ Run system updates on DSpace Test (linode19) and reboot it - + diff --git a/docs/2019-09/index.html b/docs/2019-09/index.html index 6839a9fd3..ca171ee79 100644 --- a/docs/2019-09/index.html +++ b/docs/2019-09/index.html @@ -102,7 +102,7 @@ Here are the top ten IPs in the nginx XMLUI and REST/OAI logs this morning: - + diff --git a/docs/2019-10/index.html b/docs/2019-10/index.html index 4b26f5523..0c9280942 100644 --- a/docs/2019-10/index.html +++ b/docs/2019-10/index.html @@ -48,7 +48,7 @@ - + diff --git a/docs/2019-11/index.html b/docs/2019-11/index.html index d8db39d16..e180b1b54 100644 --- a/docs/2019-11/index.html +++ b/docs/2019-11/index.html @@ -88,7 +88,7 @@ Let’s see how many of the REST API requests were for bitstreams (because t - + diff --git a/docs/2019-12/index.html b/docs/2019-12/index.html index a8f519b14..56dd99888 100644 --- a/docs/2019-12/index.html +++ b/docs/2019-12/index.html @@ -76,7 +76,7 @@ Make sure all packages are up to date and the package manager is up to date, the - + diff --git a/docs/2020-01/index.html b/docs/2020-01/index.html index 2e6450606..1ffc706e3 100644 --- a/docs/2020-01/index.html +++ b/docs/2020-01/index.html @@ -86,7 +86,7 @@ I tweeted the CGSpace repository link - + diff --git a/docs/2020-02/index.html b/docs/2020-02/index.html index 97f2163b8..260a33c2b 100644 --- a/docs/2020-02/index.html +++ b/docs/2020-02/index.html @@ -68,7 +68,7 @@ The code finally builds and runs with a fresh install - + diff --git a/docs/2020-03/index.html b/docs/2020-03/index.html index f7ce58705..be6a6998c 100644 --- a/docs/2020-03/index.html +++ b/docs/2020-03/index.html @@ -72,7 +72,7 @@ You need to download this into the DSpace 6.x source and compile it - + diff --git a/docs/2020-04/index.html b/docs/2020-04/index.html index 6ca75099b..159d20122 100644 --- a/docs/2020-04/index.html +++ b/docs/2020-04/index.html @@ -78,7 +78,7 @@ On the same note, the one item Abenet pointed out last week now has a donut with - + diff --git a/docs/2020-05/index.html b/docs/2020-05/index.html index da953f1d0..8ae72a11e 100644 --- a/docs/2020-05/index.html +++ b/docs/2020-05/index.html @@ -64,7 +64,7 @@ I see that CGSpace (linode18) is still using PostgreSQL JDBC driver version 42.2 - + diff --git a/docs/2020-06/index.html b/docs/2020-06/index.html index 4b29cc630..5b6656d8a 100644 --- a/docs/2020-06/index.html +++ b/docs/2020-06/index.html @@ -66,7 +66,7 @@ I tried to build the OAI registry on the freshly migrated DSpace 6 on DSpace Tes - + diff --git a/docs/2020-07/index.html b/docs/2020-07/index.html index 5bcdf3a8d..18c6c54ca 100644 --- a/docs/2020-07/index.html +++ b/docs/2020-07/index.html @@ -68,7 +68,7 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f - + diff --git a/docs/2020-08/index.html b/docs/2020-08/index.html index b8b0b4859..7dc4a6bf4 100644 --- a/docs/2020-08/index.html +++ b/docs/2020-08/index.html @@ -66,7 +66,7 @@ It is class based so I can easily add support for other vocabularies, and the te - + diff --git a/docs/2020-09/index.html b/docs/2020-09/index.html index 92c87099a..3e84d25cf 100644 --- a/docs/2020-09/index.html +++ b/docs/2020-09/index.html @@ -78,7 +78,7 @@ I filed an issue on OpenRXV to make some minor edits to the admin UI: https://gi - + diff --git a/docs/2020-10/index.html b/docs/2020-10/index.html index 8934dce55..b5d9bc25b 100644 --- a/docs/2020-10/index.html +++ b/docs/2020-10/index.html @@ -74,7 +74,7 @@ During the FlywayDB migration I got an error: - + diff --git a/docs/2020-11/index.html b/docs/2020-11/index.html index cf792838c..b290dd05d 100644 --- a/docs/2020-11/index.html +++ b/docs/2020-11/index.html @@ -62,7 +62,7 @@ So far we’ve spent at least fifty hours to process the statistics and stat - + diff --git a/docs/2020-12/index.html b/docs/2020-12/index.html index d496b460c..105d5ff9e 100644 --- a/docs/2020-12/index.html +++ b/docs/2020-12/index.html @@ -66,7 +66,7 @@ I started processing those (about 411,000 records): - + diff --git a/docs/2021-01/index.html b/docs/2021-01/index.html index 93c57410d..3d793b4de 100644 --- a/docs/2021-01/index.html +++ b/docs/2021-01/index.html @@ -80,7 +80,7 @@ For example, this item has 51 views on CGSpace, but 0 on AReS - + diff --git a/docs/2021-02/index.html b/docs/2021-02/index.html index 6ac6bf6e0..39d821f3d 100644 --- a/docs/2021-02/index.html +++ b/docs/2021-02/index.html @@ -90,7 +90,7 @@ $ curl -s 'http://localhost:9200/openrxv-items-temp/_count?q=*&pretty - + diff --git a/docs/2021-03/index.html b/docs/2021-03/index.html index 85f894dc1..07ef6e553 100644 --- a/docs/2021-03/index.html +++ b/docs/2021-03/index.html @@ -64,7 +64,7 @@ Also, we found some issues building and running OpenRXV currently due to ecosyst - + diff --git a/docs/2021-04/index.html b/docs/2021-04/index.html index 7dcbed45a..8c058d3ff 100644 --- a/docs/2021-04/index.html +++ b/docs/2021-04/index.html @@ -74,7 +74,7 @@ Perhaps one of the containers crashed, I should have looked closer but I was in - + diff --git a/docs/2021-05/index.html b/docs/2021-05/index.html index 495363f6e..9d663054b 100644 --- a/docs/2021-05/index.html +++ b/docs/2021-05/index.html @@ -66,7 +66,7 @@ I will add the RI/1.0 pattern to our DSpace agents overload and purge them from - + diff --git a/docs/2021-06/index.html b/docs/2021-06/index.html index 0a88834a3..69d2e6e22 100644 --- a/docs/2021-06/index.html +++ b/docs/2021-06/index.html @@ -66,7 +66,7 @@ I simply started it and AReS was running again: - + diff --git a/docs/2021-07/index.html b/docs/2021-07/index.html index 5518992ae..0c5435df1 100644 --- a/docs/2021-07/index.html +++ b/docs/2021-07/index.html @@ -60,7 +60,7 @@ COPY 20994 - + diff --git a/docs/2021-08/index.html b/docs/2021-08/index.html index c9b8df777..c414a65b9 100644 --- a/docs/2021-08/index.html +++ b/docs/2021-08/index.html @@ -62,7 +62,7 @@ I decided to upgrade linode20 from Ubuntu 18.04 to 20.04 - + diff --git a/docs/2021-09/index.html b/docs/2021-09/index.html index 448776b19..7a1d9b5cb 100644 --- a/docs/2021-09/index.html +++ b/docs/2021-09/index.html @@ -78,7 +78,7 @@ The syntax Moayad showed me last month doesn’t seem to honor the search qu - + diff --git a/docs/2021-10/index.html b/docs/2021-10/index.html index b2ecb38ee..962afbd7c 100644 --- a/docs/2021-10/index.html +++ b/docs/2021-10/index.html @@ -76,7 +76,7 @@ So we have 1879/7100 (26.46%) matching already - + diff --git a/docs/2021-11/index.html b/docs/2021-11/index.html index 015003b68..bd4ed9f8e 100644 --- a/docs/2021-11/index.html +++ b/docs/2021-11/index.html @@ -62,7 +62,7 @@ $ zstd statistics-2019.json - + diff --git a/docs/2021-12/index.html b/docs/2021-12/index.html index 646cfba60..3e401e191 100644 --- a/docs/2021-12/index.html +++ b/docs/2021-12/index.html @@ -70,7 +70,7 @@ Total number of bot hits purged: 3679 - + diff --git a/docs/2022-01/index.html b/docs/2022-01/index.html index 3ebcb9f7e..8fecca31e 100644 --- a/docs/2022-01/index.html +++ b/docs/2022-01/index.html @@ -25,7 +25,7 @@ I also fixed a few bugs and improved the region-matching logic - + @@ -56,9 +56,9 @@ I also fixed a few bugs and improved the region-matching logic "@type": "BlogPosting", "headline": "September, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-01/", - "wordCount": "778", + "wordCount": "844", "datePublished": "2022-01-01T09:41:36+03:00", - "dateModified": "2022-09-07T17:58:52+03:00", + "dateModified": "2022-09-08T17:47:25+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -76,7 +76,7 @@ I also fixed a few bugs and improved the region-matching logic - + @@ -269,6 +269,20 @@ I also fixed a few bugs and improved the region-matching logic +
dspace=# BEGIN;
+BEGIN
+dspace=# DELETE FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND text_value='';
+DELETE 70
+dspace=# COMMIT;
+COMMIT
+