From 2ecafafc178bf1ddd6a6337d3f3ef792d47dc429 Mon Sep 17 00:00:00 2001
From: Alan Orth <alan.orth@gmail.com>
Date: Fri, 8 Dec 2023 16:32:48 +0300
Subject: [PATCH] Notes for 2023-12-08

---
 content/posts/2023-12.md | 17 +++++++++++++++++
 docs/2023-12/index.html  | 24 ++++++++++++++++++++++--
 2 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/content/posts/2023-12.md b/content/posts/2023-12.md
index 1c0c0d55f..ecb42f2cd 100644
--- a/content/posts/2023-12.md
+++ b/content/posts/2023-12.md
@@ -109,4 +109,21 @@ $ for network in $(csvcut -c network /tmp/ips.csv | sed 1d | sort -u); do grepci
 
 - I will remove those for now so that Altmetric doesn't have any unexpected issues harvesting
 
+## 2023-12-08
+
+- Finalized the script to generate Solr statistics for Alliance research Mirjam
+  - The script is `ilri/generate_solr_statistics.py`
+  - I generated ~3,200 statistics based on her records of the download statistics of [that item](https://hdl.handle.net/10568/131997) and imported them on CGSpace
+- Peter asked for lists of affiliations, investors, and publishers to do some cleanups
+  - I generated a list from a CSV export instead of doing it based on a SQL dump...
+
+```console
+$ csvcut -c 'cg.contributor.affiliation[en_US]' /tmp/initiatives.csv       \
+  | sed -e 1d -e 's/^"//' -e 's/"$//' -e 's/||/\n/g' -e '/^$/d'            \
+  | sort | uniq -c | sort -hr                                              \
+  | awk 'BEGIN { FS = "^[[:space:]]+[[:digit:]]+[[:space:]]+" } {print $2}'\
+  | sed -e '1i cg.contributor.affiliation' -e 's/^\(.*\)$/"\1"/'           \
+  > /tmp/2023-12-08-initiatives-affiliations.csv
+```
+
 <!-- vim: set sw=2 ts=2: -->
diff --git a/docs/2023-12/index.html b/docs/2023-12/index.html
index c376582f5..86831fe07 100644
--- a/docs/2023-12/index.html
+++ b/docs/2023-12/index.html
@@ -28,7 +28,7 @@
   "@type": "BlogPosting",
   "headline": "December, 2023",
   "url": "https://alanorth.github.io/cgspace-notes/2023-12/",
-  "wordCount": "529",
+  "wordCount": "643",
   "datePublished": "2023-12-01T08:48:36+03:00",
   "dateModified": "2023-12-06T09:55:57+03:00",
   "author": {
@@ -212,7 +212,27 @@
 </span></span></code></pre></div><ul>
 <li>I will remove those for now so that Altmetric doesn&rsquo;t have any unexpected issues harvesting</li>
 </ul>
-<!-- raw HTML omitted -->
+<h2 id="2023-12-08">2023-12-08</h2>
+<ul>
+<li>Finalized the script to generate Solr statistics for Alliance research Mirjam
+<ul>
+<li>The script is <code>ilri/generate_solr_statistics.py</code></li>
+<li>I generated ~3,200 statistics based on her records of the download statistics of <a href="https://hdl.handle.net/10568/131997">that item</a> and imported them on CGSpace</li>
+</ul>
+</li>
+<li>Peter asked for lists of affiliations, investors, and publishers to do some cleanups
+<ul>
+<li>I generated a list from a CSV export instead of doing it based on a SQL dump&hellip;</li>
+</ul>
+</li>
+</ul>
+<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-console" data-lang="console"><span style="display:flex;"><span>$ csvcut -c <span style="color:#e6db74">&#39;cg.contributor.affiliation[en_US]&#39;</span> /tmp/initiatives.csv       <span style="color:#ae81ff">\
+</span></span></span><span style="display:flex;"><span><span style="color:#ae81ff"></span>  | sed -e 1d -e &#39;s/^&#34;//&#39; -e &#39;s/&#34;$//&#39; -e &#39;s/||/\n/g&#39; -e &#39;/^$/d&#39;            \
+</span></span><span style="display:flex;"><span>  | sort | uniq -c | sort -hr                                              \
+</span></span><span style="display:flex;"><span>  | awk &#39;BEGIN { FS = &#34;^[[:space:]]+[[:digit:]]+[[:space:]]+&#34; } {print $2}&#39;\
+</span></span><span style="display:flex;"><span>  | sed -e &#39;1i cg.contributor.affiliation&#39; -e &#39;s/^\(.*\)$/&#34;\1&#34;/&#39;           \
+</span></span><span style="display:flex;"><span>  &gt; /tmp/2023-12-08-initiatives-affiliations.csv
+</span></span></code></pre></div><!-- raw HTML omitted -->