From c7304e21fd76d1fd8cd6e046b5ee4109e76375f3 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 24 Apr 2019 16:50:24 +0300 Subject: [PATCH] Add notes for 2019-04-24 --- content/posts/2019-04.md | 20 ++++++++++++++++++++ docs/2019-04/index.html | 41 +++++++++++++++++++++++++++++++++++++--- docs/sitemap.xml | 10 +++++----- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/content/posts/2019-04.md b/content/posts/2019-04.md index 6072ec7d2..a2494ec61 100644 --- a/content/posts/2019-04.md +++ b/content/posts/2019-04.md @@ -860,4 +860,24 @@ dspace.log.2019-04-20:1515 - Add a privacy page to CGSpace - The work was mostly similar to the About page at `/page/about`, but in addition to adding i18n strings etc, I had to add the logic for the trail to `dspace-xmlui-mirage2/src/main/webapp/xsl/preprocess/general.xsl` +## 2019-04-24 + +- Linode migrated CGSpace (linode18) to a new host, but I am still getting poor performance when copying data to DSpace Test (linode19) + - I asked them if we can migrate DSpace Test to a new host + - They migrated DSpace Test to a new host and the rsync speed from Frankfurt was still capped at 20KiB/sec... + - I booted DSpace Test to a rescue CD and tried the rsync from CGSpace there too, but it was still capped at 20KiB/sec... +- Finally upload the 218 IITA items from March to CGSpace + - Abenet and I had to do a little bit more work to correct the metadata of one item that appeared to be a duplicate, but really just had the wrong DOI +- While I was uploading the IITA records I noticed that twenty of the records Sisay uploaded in 2018-09 had double Handles (`dc.identifier.uri`) + - According to my notes in 2018-09 I had noticed this when he uploaded the records and told him to remove them, but he didn't... + - I exported the IITA community as a CSV then used `csvcut` to extract the two URI columns and identify and fix the records: + +``` +$ csvcut -c id,dc.identifier.uri,'dc.identifier.uri[]' ~/Downloads/2019-04-24-IITA.csv > /tmp/iita.csv +``` + +- Carlos Tejo from the Land Portal had been emailing me this week to ask about the old REST API that Tsega was building in 2017 + - I told him we never finished it, and that he should try to use the `/items/find-by-metadata-field` endpoint, with the caveat that you need to match the language attribute exactly (ie "en", "en_US", null, etc) + - I asked him how many terms they are interested in, as we could probably make it easier by normalizing the language attributes of these fields (it would help us anyways) + diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html index 07a1d6267..d8ee4bf24 100644 --- a/docs/2019-04/index.html +++ b/docs/2019-04/index.html @@ -38,7 +38,7 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace - + @@ -81,9 +81,9 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace "@type": "BlogPosting", "headline": "April, 2019", "url": "https:\/\/alanorth.github.io\/cgspace-notes\/2019-04\/", - "wordCount": "5349", + "wordCount": "5633", "datePublished": "2019-04-01T09:00:43\x2b03:00", - "dateModified": "2019-04-22T16:09:58\x2b03:00", + "dateModified": "2019-04-23T13:04:37\x2b03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1217,6 +1217,41 @@ dspace.log.2019-04-20:1515 +

2019-04-24

+ + + +
$ csvcut -c id,dc.identifier.uri,'dc.identifier.uri[]' ~/Downloads/2019-04-24-IITA.csv > /tmp/iita.csv
+
+ + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 90091ea65..c9c0882c5 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,30 +4,30 @@ https://alanorth.github.io/cgspace-notes/2019-04/ - 2019-04-22T16:09:58+03:00 + 2019-04-23T13:04:37+03:00 https://alanorth.github.io/cgspace-notes/ - 2019-04-22T16:09:58+03:00 + 2019-04-23T13:04:37+03:00 0 https://alanorth.github.io/cgspace-notes/tags/notes/ - 2019-04-22T16:09:58+03:00 + 2019-04-23T13:04:37+03:00 0 https://alanorth.github.io/cgspace-notes/posts/ - 2019-04-22T16:09:58+03:00 + 2019-04-23T13:04:37+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2019-04-22T16:09:58+03:00 + 2019-04-23T13:04:37+03:00 0