From 054d666fe0f2572151e0cec5f12bf8e4a2817da5 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Thu, 31 Mar 2022 16:09:14 +0300 Subject: [PATCH] Add notes for 2022-03-31 --- content/posts/2022-03.md | 27 ++++++++++++++++++ docs/2022-03/index.html | 38 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/sitemap.xml | 10 +++---- 26 files changed, 90 insertions(+), 31 deletions(-) diff --git a/content/posts/2022-03.md b/content/posts/2022-03.md index a801645c4..585094da7 100644 --- a/content/posts/2022-03.md +++ b/content/posts/2022-03.md @@ -271,4 +271,31 @@ $ chrt -b 0 dspace filter-media -p "ImageMagick PDF Thumbnail" -i 10947/50 - After that I did some normalization on the `cg.subject.system` metadata and extracted a few dozen countries to the country field - Start a harvest on AReS +## 2022-03-30 + +- Yesterday Rafael from CIAT asked me to re-create his approver account on DSpace Test as well + +```console +$ dspace user -a -m tip-approve@cgiar.org -g Rafael -s Rodriguez -p 'fuuuu' +``` + +- I started looking into the request regarding the CIAT Library PDFs + - There are over 4,000 links to PDFs hosted on that server in CGSpace metadata + - The links seem to be down though! I emailed Paola to ask + +## 2022-03-31 + +- Switch DSpace Test (linode26) back to CMS GC so I can do some monitoring and evaluation of GC before switching to G1GC +- Leroy from CIAT said that the CIAT Library server has security issues so was limited to internal traffic + - I extracted a list of URLs from CGSpace to send him: + +```console +localhost/dspacetest= ☘ \COPY (SELECT DISTINCT(text_value) FROM metadatavalue WHERE metadata_field_id=219 AND text_value ~ 'https?://ciat-library') to /tmp/2022-03-31-ciat-library-urls.csv WITH CSV HEADER; +COPY 4552 +``` + +- I did some checks and cleanups in OpenRefine because there are some values with "#page" etc + - Once I sorted them there were only ~2,700, which means there are going to be almost two thousand items with duplicate PDFs + - I suggested that we might want to handle those cases specially and extract the chapters or whatever page range since they are probably books + diff --git a/docs/2022-03/index.html b/docs/2022-03/index.html index 4422e3a5b..f66ff3660 100644 --- a/docs/2022-03/index.html +++ b/docs/2022-03/index.html @@ -19,7 +19,7 @@ $ csvjoin -c id /tmp/2022-03-01-tac-batch4-701-980.csv /tmp/tac4-filenames.csv & - + @@ -44,9 +44,9 @@ $ csvjoin -c id /tmp/2022-03-01-tac-batch4-701-980.csv /tmp/tac4-filenames.csv & "@type": "BlogPosting", "headline": "March, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-03/", - "wordCount": "1589", + "wordCount": "1789", "datePublished": "2022-03-01T16:46:54+03:00", - "dateModified": "2022-03-29T16:01:48+03:00", + "dateModified": "2022-03-29T21:26:07+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -406,6 +406,38 @@ isNotNull(value.match('889'))
  • After that I did some normalization on the cg.subject.system metadata and extracted a few dozen countries to the country field
  • Start a harvest on AReS
  • +

    2022-03-30

    + +
    $ dspace user -a -m tip-approve@cgiar.org -g Rafael -s Rodriguez -p 'fuuuu'
    +
    +

    2022-03-31

    + +
    localhost/dspacetest= ☘ \COPY (SELECT DISTINCT(text_value) FROM metadatavalue WHERE metadata_field_id=219 AND text_value ~ 'https?://ciat-library') to /tmp/2022-03-31-ciat-library-urls.csv WITH CSV HEADER;
    +COPY 4552
    +
    diff --git a/docs/categories/index.html b/docs/categories/index.html index c4164689d..d2eac9488 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 4cfc166a1..d64b35e17 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 61d616624..b991294e8 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 68af448a3..0adbcd37c 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 9224ab7d4..6051cecf9 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 3a88bc384..d06321b69 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index aa58e7b98..fb859024d 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 1236bb020..526c068f2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index d2f92f9a2..8f4a0edde 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 4634a189c..3bdd86d8e 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 4f6444213..794378d12 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 81647b38e..76518f2f3 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index af5614b13..f2540d2ce 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index aa58957d8..2f4e9699d 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index f7a7e0334..80f0f190e 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 569355a57..eeab13b4c 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index d5e6b588b..e1da02757 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index f4c7ab656..ec8d61149 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index b286b490d..5e5e00bdc 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 1efb8139a..b2d5d5299 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 07044e524..548201e27 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 967e555c3..1cf4c33a8 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index dce1236c6..833fe8b51 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 2bc0a20ae..44649781a 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-03-29T16:01:48+03:00 + 2022-03-29T21:26:07+03:00 https://alanorth.github.io/cgspace-notes/ - 2022-03-29T16:01:48+03:00 + 2022-03-29T21:26:07+03:00 https://alanorth.github.io/cgspace-notes/2022-03/ - 2022-03-29T16:01:48+03:00 + 2022-03-29T21:26:07+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-03-29T16:01:48+03:00 + 2022-03-29T21:26:07+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-03-29T16:01:48+03:00 + 2022-03-29T21:26:07+03:00 https://alanorth.github.io/cgspace-notes/2022-02/ 2022-03-01T17:17:27+03:00