From 2ecafafc178bf1ddd6a6337d3f3ef792d47dc429 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 8 Dec 2023 16:32:48 +0300 Subject: [PATCH] Notes for 2023-12-08 --- content/posts/2023-12.md | 17 +++++++++++++++++ docs/2023-12/index.html | 24 ++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/content/posts/2023-12.md b/content/posts/2023-12.md index 1c0c0d55f..ecb42f2cd 100644 --- a/content/posts/2023-12.md +++ b/content/posts/2023-12.md @@ -109,4 +109,21 @@ $ for network in $(csvcut -c network /tmp/ips.csv | sed 1d | sort -u); do grepci - I will remove those for now so that Altmetric doesn't have any unexpected issues harvesting +## 2023-12-08 + +- Finalized the script to generate Solr statistics for Alliance research Mirjam + - The script is `ilri/generate_solr_statistics.py` + - I generated ~3,200 statistics based on her records of the download statistics of [that item](https://hdl.handle.net/10568/131997) and imported them on CGSpace +- Peter asked for lists of affiliations, investors, and publishers to do some cleanups + - I generated a list from a CSV export instead of doing it based on a SQL dump... + +```console +$ csvcut -c 'cg.contributor.affiliation[en_US]' /tmp/initiatives.csv \ + | sed -e 1d -e 's/^"//' -e 's/"$//' -e 's/||/\n/g' -e '/^$/d' \ + | sort | uniq -c | sort -hr \ + | awk 'BEGIN { FS = "^[[:space:]]+[[:digit:]]+[[:space:]]+" } {print $2}'\ + | sed -e '1i cg.contributor.affiliation' -e 's/^\(.*\)$/"\1"/' \ + > /tmp/2023-12-08-initiatives-affiliations.csv +``` + diff --git a/docs/2023-12/index.html b/docs/2023-12/index.html index c376582f5..86831fe07 100644 --- a/docs/2023-12/index.html +++ b/docs/2023-12/index.html @@ -28,7 +28,7 @@ "@type": "BlogPosting", "headline": "December, 2023", "url": "https://alanorth.github.io/cgspace-notes/2023-12/", - "wordCount": "529", + "wordCount": "643", "datePublished": "2023-12-01T08:48:36+03:00", "dateModified": "2023-12-06T09:55:57+03:00", "author": { @@ -212,7 +212,27 @@ - +

2023-12-08

+ +
$ csvcut -c 'cg.contributor.affiliation[en_US]' /tmp/initiatives.csv       \
+  | sed -e 1d -e 's/^"//' -e 's/"$//' -e 's/||/\n/g' -e '/^$/d'            \
+  | sort | uniq -c | sort -hr                                              \
+  | awk 'BEGIN { FS = "^[[:space:]]+[[:digit:]]+[[:space:]]+" } {print $2}'\
+  | sed -e '1i cg.contributor.affiliation' -e 's/^\(.*\)$/"\1"/'           \
+  > /tmp/2023-12-08-initiatives-affiliations.csv
+