mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-12-23 21:44:30 +01:00
Update notes for 2020-07-06
This commit is contained in:
parent
13cdfe2981
commit
9b443b200f
@ -261,4 +261,11 @@ $ csvcut -c1 /tmp/2020-07-05-subjects-upper.csv | head -n 6500 > 2020-07-05-cgsp
|
||||
$ ./agrovoc-lookup.py -i 2020-07-05-cgspace-subjects.txt -om 2020-07-05-cgspace-subjects-matched.txt -or 2020-07-05-cgspace-subjects-rejected.txt -d
|
||||
```
|
||||
|
||||
## 2020-07-06
|
||||
|
||||
- I made some optimizations to the suite of Python utility scripts in our DSpace directory as well as the [csv-metadata-quality](https://github.com/ilri/csv-metadata-quality) script
|
||||
- Mostly to make more efficient usage of the requests cache and to use parameterized requests instead of building the request URL by concatenating the URL with query parameters
|
||||
- I modified the `agrovoc-lookup.py` script to save its results as a CSV, with the subject, language, type of match (preferred, alternate, and total number of matches) rather than save two separate files
|
||||
- Note that I see `prefLabel`, `matchedPrefLabel`, and `altLabel` in the REST API responses and I'm not sure what the second one means
|
||||
|
||||
<!-- vim: set sw=2 ts=2: -->
|
||||
|
@ -20,7 +20,7 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f
|
||||
<meta property="og:type" content="article" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2020-07/" />
|
||||
<meta property="article:published_time" content="2020-07-01T10:53:54+03:00" />
|
||||
<meta property="article:modified_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="article:modified_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="July, 2020"/>
|
||||
@ -45,9 +45,9 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f
|
||||
"@type": "BlogPosting",
|
||||
"headline": "July, 2020",
|
||||
"url": "https://alanorth.github.io/cgspace-notes/2020-07/",
|
||||
"wordCount": "1435",
|
||||
"wordCount": "1539",
|
||||
"datePublished": "2020-07-01T10:53:54+03:00",
|
||||
"dateModified": "2020-07-05T16:29:04+03:00",
|
||||
"dateModified": "2020-07-05T21:52:01+03:00",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Alan Orth"
|
||||
@ -376,7 +376,20 @@ $ csvcut -c1 /tmp/2020-07-05-subjects-upper.csv | head -n 6500 > 2020-07-05-c
|
||||
<li>Then start looking them up using <code>agrovoc-lookup.py</code>:</li>
|
||||
</ul>
|
||||
<pre><code>$ ./agrovoc-lookup.py -i 2020-07-05-cgspace-subjects.txt -om 2020-07-05-cgspace-subjects-matched.txt -or 2020-07-05-cgspace-subjects-rejected.txt -d
|
||||
</code></pre><!-- raw HTML omitted -->
|
||||
</code></pre><h2 id="2020-07-06">2020-07-06</h2>
|
||||
<ul>
|
||||
<li>I made some optimizations to the suite of Python utility scripts in our DSpace directory as well as the <a href="https://github.com/ilri/csv-metadata-quality">csv-metadata-quality</a> script
|
||||
<ul>
|
||||
<li>Mostly to make more efficient usage of the requests cache and to use parameterized requests instead of building the request URL by concatenating the URL with query parameters</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>I modified the <code>agrovoc-lookup.py</code> script to save its results as a CSV, with the subject, language, type of match (preferred, alternate, and total number of matches) rather than save two separate files
|
||||
<ul>
|
||||
<li>Note that I see <code>prefLabel</code>, <code>matchedPrefLabel</code>, and <code>altLabel</code> in the REST API responses and I’m not sure what the second one means</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<!-- raw HTML omitted -->
|
||||
|
||||
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Categories"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="CGSpace Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="CGSpace Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="CGSpace Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="CGSpace Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="CGSpace Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="CGSpace Notes"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Posts"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Posts"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Posts"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Posts"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Posts"/>
|
||||
|
@ -9,7 +9,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2020-07-05T16:29:04+03:00" />
|
||||
<meta property="og:updated_time" content="2020-07-05T21:52:01+03:00" />
|
||||
|
||||
<meta name="twitter:card" content="summary"/>
|
||||
<meta name="twitter:title" content="Posts"/>
|
||||
|
@ -4,27 +4,27 @@
|
||||
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/</loc>
|
||||
<lastmod>2020-07-05T16:29:04+03:00</lastmod>
|
||||
<lastmod>2020-07-05T21:52:01+03:00</lastmod>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||
<lastmod>2020-07-05T16:29:04+03:00</lastmod>
|
||||
<lastmod>2020-07-05T21:52:01+03:00</lastmod>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2020-07/</loc>
|
||||
<lastmod>2020-07-05T16:29:04+03:00</lastmod>
|
||||
<lastmod>2020-07-05T21:52:01+03:00</lastmod>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||
<lastmod>2020-07-05T16:29:04+03:00</lastmod>
|
||||
<lastmod>2020-07-05T21:52:01+03:00</lastmod>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||
<lastmod>2020-07-05T16:29:04+03:00</lastmod>
|
||||
<lastmod>2020-07-05T21:52:01+03:00</lastmod>
|
||||
</url>
|
||||
|
||||
<url>
|
||||
|
Loading…
Reference in New Issue
Block a user