From 1fe5b9b7f7d6451089d8add099e647ea5475fc6b Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Mon, 15 Apr 2019 17:42:53 +0300 Subject: [PATCH] Update notes for 2019-04-15 --- content/posts/2019-04.md | 25 +++++++++++++++++++++++++ docs/2019-04/index.html | 38 +++++++++++++++++++++++++++++++++++--- docs/sitemap.xml | 10 +++++----- 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/content/posts/2019-04.md b/content/posts/2019-04.md index af022f295..92fc560d9 100644 --- a/content/posts/2019-04.md +++ b/content/posts/2019-04.md @@ -649,5 +649,30 @@ GC_TUNE="-XX:NewRatio=3 \ - Rework the dspace-statistics-api to use the vanilla Python requests library instead of Solr client - [Tag version 1.0.0](https://github.com/ilri/dspace-statistics-api/releases/tag/v1.0.0) and deploy it on DSpace Test +- Pretty annoying to see CGSpace (linode18) with 20–50% CPU steal according to `iostat 1 10`, though I haven't had any Linode alerts in a few days +- Abenet sent me a list of ILRI items that don't have CRPs added to them + - The spreadsheet only had Handles (no IDs), so I'm experimenting with using Python in OpenRefine to get the IDs + - I cloned the handle column and then did a transform to get the IDs from the CGSpace REST API: + +``` +import json +import re +import urllib +import urllib2 + +handle = re.findall('[0-9]+/[0-9]+', value) + +url = 'https://cgspace.cgiar.org/rest/handle/' + handle[0] +req = urllib2.Request(url) +req.add_header('User-agent', 'Alan Python bot') +res = urllib2.urlopen(req) +data = json.load(res) +item_id = data['id'] + +return item_id +``` + +- Luckily none of the items already had CRPs, so I didn't have to worry about them getting removed + - It would have been much trickier if I had to get the CRPs for the items first, then add the CRPs... diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html index 8bb7d2963..a88c25a83 100644 --- a/docs/2019-04/index.html +++ b/docs/2019-04/index.html @@ -38,7 +38,7 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace - + @@ -81,9 +81,9 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace "@type": "BlogPosting", "headline": "April, 2019", "url": "https:\/\/alanorth.github.io\/cgspace-notes\/2019-04\/", - "wordCount": "3748", + "wordCount": "3901", "datePublished": "2019-04-01T09:00:43\x2b03:00", - "dateModified": "2019-04-14T16:59:47\x2b03:00", + "dateModified": "2019-04-15T12:58:07\x2b03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -924,6 +924,38 @@ $ ./delete-metadata-values.py -i /tmp/2019-04-11-delete-6-subjects.csv -db dspac +
  • Pretty annoying to see CGSpace (linode18) with 20–50% CPU steal according to iostat 1 10, though I haven’t had any Linode alerts in a few days
  • +
  • Abenet sent me a list of ILRI items that don’t have CRPs added to them + +
  • + + +
    import json
    +import re
    +import urllib
    +import urllib2
    +
    +handle = re.findall('[0-9]+/[0-9]+', value)
    +
    +url = 'https://cgspace.cgiar.org/rest/handle/' + handle[0]
    +req = urllib2.Request(url)
    +req.add_header('User-agent', 'Alan Python bot')
    +res = urllib2.urlopen(req)
    +data = json.load(res)
    +item_id = data['id']
    +
    +return item_id
    +
    + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 8bdd07656..3d231b232 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,30 +4,30 @@ https://alanorth.github.io/cgspace-notes/2019-04/ - 2019-04-14T16:59:47+03:00 + 2019-04-15T12:58:07+03:00 https://alanorth.github.io/cgspace-notes/ - 2019-04-14T16:59:47+03:00 + 2019-04-15T12:58:07+03:00 0 https://alanorth.github.io/cgspace-notes/tags/notes/ - 2019-04-14T16:59:47+03:00 + 2019-04-15T12:58:07+03:00 0 https://alanorth.github.io/cgspace-notes/posts/ - 2019-04-14T16:59:47+03:00 + 2019-04-15T12:58:07+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2019-04-14T16:59:47+03:00 + 2019-04-15T12:58:07+03:00 0