From 1eb62971a57baaec5f11801fea2d7181097f621f Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 30 May 2018 17:44:58 -0700 Subject: [PATCH] Update notes for 2018-05-30 --- content/posts/2018-05.md | 16 ++++++++++++++++ docs/2018-05/index.html | 22 +++++++++++++++++++--- docs/sitemap.xml | 10 +++++----- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/content/posts/2018-05.md b/content/posts/2018-05.md index aa08acfb0..9ae851ba1 100644 --- a/content/posts/2018-05.md +++ b/content/posts/2018-05.md @@ -365,3 +365,19 @@ $ sed 's/.*Item1.*/\n&/g' ~/cifor-duplicates.txt > ~/cifor-duplicates-cleaned.tx ``` - I told Vika to look through the list manually and indicate which ones are indeed duplicates that we should delete, and which ones to map to CIFOR's collection +- A few weeks ago Peter wanted a list of authors from the ILRI collections, so I need to find a way to get the handles of all those collections +- I can use the `/communities/{id}/collections` endpoint of the REST API but it only takes IDs (not handles) and doesn't seem to descend into sub communities +- Shit, so I need the IDs for the the top-level ILRI community and all its sub communities (and their sub communities) +- There has got to be a better way to do this than going to each community and getting their handles and IDs manually +- Oh shit, I literally already wrote a script to get all collections in a community hierarchy from the REST API: [rest-find-collections.py](https://gist.github.com/alanorth/ddd7f555f0e487fe0e9d3eb4ff26ce50) +- The output isn't great, but all the handles and IDs are printed in debug mode: + +``` +$ ./rest-find-collections.py -u https://cgspace.cgiar.org/rest -d 10568/1 2> /tmp/ilri-collections.txt +``` + +- Then I format the list of handles and put it into this SQL query to export authors from items ONLY in those collections (too many to list here): + +``` +dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author') AND resource_type_id = 2 AND resource_id IN (select item_id from collection2item where collection_id IN (select resource_id from handle where handle in ('10568/67236','10568/67274',...))) group by text_value order by count desc) to /tmp/ilri-authors.csv with csv; +``` diff --git a/docs/2018-05/index.html b/docs/2018-05/index.html index c0b4b058a..2ea293a8b 100644 --- a/docs/2018-05/index.html +++ b/docs/2018-05/index.html @@ -27,7 +27,7 @@ Also, I switched it to use OpenJDK instead of Oracle Java, as well as re-worked - + @@ -65,9 +65,9 @@ Also, I switched it to use OpenJDK instead of Oracle Java, as well as re-worked "@type": "BlogPosting", "headline": "May, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-05/", - "wordCount": "3135", + "wordCount": "3361", "datePublished": "2018-05-01T16:43:54+03:00", - "dateModified": "2018-05-30T10:50:55-07:00", + "dateModified": "2018-05-30T14:48:10-07:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -565,8 +565,24 @@ $ sed 's/.*Item1.*/\n&/g' ~/cifor-duplicates.txt > ~/cifor-duplicates-cle +
$ ./rest-find-collections.py -u https://cgspace.cgiar.org/rest -d 10568/1 2> /tmp/ilri-collections.txt
+
+ + + +
dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author') AND resource_type_id = 2 AND resource_id IN (select item_id from collection2item where collection_id IN (select resource_id from handle where handle in ('10568/67236','10568/67274',...))) group by text_value order by count desc) to /tmp/ilri-authors.csv with csv;
+
+ diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 44eb28bf8..4c81cae4b 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2018-05/ - 2018-05-30T10:50:55-07:00 + 2018-05-30T14:48:10-07:00 @@ -164,7 +164,7 @@ https://alanorth.github.io/cgspace-notes/ - 2018-05-30T10:50:55-07:00 + 2018-05-30T14:48:10-07:00 0 @@ -175,7 +175,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2018-05-30T10:50:55-07:00 + 2018-05-30T14:48:10-07:00 0 @@ -187,13 +187,13 @@ https://alanorth.github.io/cgspace-notes/posts/ - 2018-05-30T10:50:55-07:00 + 2018-05-30T14:48:10-07:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2018-05-30T10:50:55-07:00 + 2018-05-30T14:48:10-07:00 0