diff --git a/content/posts/2020-07.md b/content/posts/2020-07.md index df2923cb0..25cf21351 100644 --- a/content/posts/2020-07.md +++ b/content/posts/2020-07.md @@ -337,4 +337,23 @@ dc.contributor.author,correction ![Altmetric and Dimensions badges](/cgspace-notes/2020/07/altmetrics-dimensions-badges.png) +- I wrote a quick script to lookup organizations (affiliations) in the Research Organization Repository (ROR) JSON data release v5 + - I want to use this to evaluate ROR as a controlled vocabulary for CGSpace and MELSpace + - I exported a list of affiliations from CGSpace: + +``` +dspace=# \COPY (SELECT DISTINCT text_value as "cg.contributor.affiliation", count(*) FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 211 GROUP BY text_value ORDER BY count DESC) to /tmp/2020-07-08-affiliations.csv WITH CSV HEADER; +``` + +- Then I stripped the header and quotes to make it a plain text file and ran `ror-lookup.py`: + +``` +$ ./ror-lookup.py -i /tmp/2020-07-08-affiliations.txt -r ror.json -o 2020-07-08-affiliations-ror.csv -d +$ ./ror-lookup.py -i /tmp/2020-07-08-affiliations.txt -r ror.json -o 2020-07-08-affiliations-ror.csv -d +$ csvgrep -c 2 -m true 2020-07-08-affiliations-ror.csv | wc -l +1378 +$ csvgrep -c 2 -m false 2020-07-08-affiliations-ror.csv | wc -l +4490 +``` + diff --git a/docs/2020-06/index.html b/docs/2020-06/index.html index 12f4ae01e..f12c713e7 100644 --- a/docs/2020-06/index.html +++ b/docs/2020-06/index.html @@ -19,7 +19,7 @@ I tried to build the OAI registry on the freshly migrated DSpace 6 on DSpace Tes - + @@ -45,7 +45,7 @@ I tried to build the OAI registry on the freshly migrated DSpace 6 on DSpace Tes "url": "https://alanorth.github.io/cgspace-notes/2020-06/", "wordCount": "4788", "datePublished": "2020-06-01T13:55:39+03:00", - "dateModified": "2020-06-30T19:21:50+03:00", + "dateModified": "2020-07-08T16:30:40+03:00", "author": { "@type": "Person", "name": "Alan Orth" diff --git a/docs/2020-07/index.html b/docs/2020-07/index.html index 6e6f92591..feb9a0c95 100644 --- a/docs/2020-07/index.html +++ b/docs/2020-07/index.html @@ -20,7 +20,7 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f - + @@ -45,9 +45,9 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f "@type": "BlogPosting", "headline": "July, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-07/", - "wordCount": "2116", + "wordCount": "2246", "datePublished": "2020-07-01T10:53:54+03:00", - "dateModified": "2020-07-07T16:14:49+03:00", + "dateModified": "2020-07-08T16:30:40+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -464,7 +464,25 @@ dc.contributor.author,correction

Altmetric and Dimensions badges

- + +
dspace=# \COPY (SELECT DISTINCT text_value as "cg.contributor.affiliation", count(*) FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 211 GROUP BY text_value ORDER BY count DESC) to /tmp/2020-07-08-affiliations.csv WITH CSV HEADER;
+
+
$ ./ror-lookup.py -i /tmp/2020-07-08-affiliations.txt -r ror.json -o 2020-07-08-affiliations-ror.csv -d
+$ ./ror-lookup.py -i /tmp/2020-07-08-affiliations.txt -r ror.json -o 2020-07-08-affiliations-ror.csv -d
+$ csvgrep -c 2 -m true 2020-07-08-affiliations-ror.csv | wc -l 
+1378
+$ csvgrep -c 2 -m false 2020-07-08-affiliations-ror.csv | wc -l
+4490
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index 25148e024..fcee507ee 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index cd0dc0fae..25be03d46 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index f1a696de3..59a5e0d0b 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 32b087c13..81712f775 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index dbbb5bd06..ce4dc4df1 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 8457b6fdf..9a84453ef 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 1a4a41a70..e7f3bc9d0 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 4c0b36e8e..01b09d621 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 47a1967a8..6f873b1e3 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index e3731472c..662eec260 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 281969542..a2318b44f 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 4a5524498..cae6cd884 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index a576b519c..b51390c10 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index a5eee51a5..61ca08002 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 46216190d..c60fe8448 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 96fb70447..d18d521c0 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 6858b6f70..6570cc035 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index d5596621b..94253d775 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,32 +4,32 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-07-07T16:14:49+03:00 + 2020-07-08T16:30:40+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-07-07T16:14:49+03:00 + 2020-07-08T16:30:40+03:00 https://alanorth.github.io/cgspace-notes/2020-07/ - 2020-07-07T16:14:49+03:00 + 2020-07-08T16:30:40+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-07-07T16:14:49+03:00 + 2020-07-08T16:30:40+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-07-07T16:14:49+03:00 + 2020-07-08T16:30:40+03:00 https://alanorth.github.io/cgspace-notes/2020-06/ - 2020-06-30T19:21:50+03:00 + 2020-07-08T16:30:40+03:00