diff --git a/content/posts/2020-07.md b/content/posts/2020-07.md index 3ccf1b2e3..a6adc4e7a 100644 --- a/content/posts/2020-07.md +++ b/content/posts/2020-07.md @@ -261,4 +261,11 @@ $ csvcut -c1 /tmp/2020-07-05-subjects-upper.csv | head -n 6500 > 2020-07-05-cgsp $ ./agrovoc-lookup.py -i 2020-07-05-cgspace-subjects.txt -om 2020-07-05-cgspace-subjects-matched.txt -or 2020-07-05-cgspace-subjects-rejected.txt -d ``` +## 2020-07-06 + +- I made some optimizations to the suite of Python utility scripts in our DSpace directory as well as the [csv-metadata-quality](https://github.com/ilri/csv-metadata-quality) script + - Mostly to make more efficient usage of the requests cache and to use parameterized requests instead of building the request URL by concatenating the URL with query parameters +- I modified the `agrovoc-lookup.py` script to save its results as a CSV, with the subject, language, type of match (preferred, alternate, and total number of matches) rather than save two separate files + - Note that I see `prefLabel`, `matchedPrefLabel`, and `altLabel` in the REST API responses and I'm not sure what the second one means + diff --git a/docs/2020-07/index.html b/docs/2020-07/index.html index 576f0c062..803f9c349 100644 --- a/docs/2020-07/index.html +++ b/docs/2020-07/index.html @@ -20,7 +20,7 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f - + @@ -45,9 +45,9 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f "@type": "BlogPosting", "headline": "July, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-07/", - "wordCount": "1435", + "wordCount": "1539", "datePublished": "2020-07-01T10:53:54+03:00", - "dateModified": "2020-07-05T16:29:04+03:00", + "dateModified": "2020-07-05T21:52:01+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -376,7 +376,20 @@ $ csvcut -c1 /tmp/2020-07-05-subjects-upper.csv | head -n 6500 > 2020-07-05-c
  • Then start looking them up using agrovoc-lookup.py:
  • $ ./agrovoc-lookup.py -i 2020-07-05-cgspace-subjects.txt -om 2020-07-05-cgspace-subjects-matched.txt -or 2020-07-05-cgspace-subjects-rejected.txt -d
    -
    +

    2020-07-06

    + + diff --git a/docs/categories/index.html b/docs/categories/index.html index db955d225..8a986e40a 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index e0d1653c3..dadce7728 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 99013bf2c..217cc1dd0 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 0f06b9c63..5b572c585 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 7b80beed0..aef3c9cc8 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 977bae58c..a92c2c739 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index c09eee9b1..18fdf37a1 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 10881db1f..0fc9b667b 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index bdbd252d1..0a32ecc07 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 826011b12..e2b860e7e 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index bcc69b933..93efa0ba9 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 6c440d841..2cd64d32e 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 6ea9a5576..b24d7f3db 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index a174f96a4..347fc6ef0 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index c60523fed..9bfebfdc7 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 399436097..a727d2a96 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 59774b0a2..b16c1519c 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 9a42f468f..cf45e69b1 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-07-05T16:29:04+03:00 + 2020-07-05T21:52:01+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-07-05T16:29:04+03:00 + 2020-07-05T21:52:01+03:00 https://alanorth.github.io/cgspace-notes/2020-07/ - 2020-07-05T16:29:04+03:00 + 2020-07-05T21:52:01+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-07-05T16:29:04+03:00 + 2020-07-05T21:52:01+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-07-05T16:29:04+03:00 + 2020-07-05T21:52:01+03:00