From ed9fb3fe994b6162408be26b29f462c791f8156a Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Mon, 31 Jan 2022 09:00:59 +0300 Subject: [PATCH] Add notes for 2022-01-30 --- content/posts/2022-01.md | 32 ++++++++++++ docs/2022-01/index.html | 68 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/categories/notes/page/5/index.html | 2 +- docs/categories/notes/page/6/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/page/7/index.html | 2 +- docs/page/8/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/posts/page/7/index.html | 2 +- docs/posts/page/8/index.html | 2 +- docs/sitemap.xml | 10 ++-- 26 files changed, 124 insertions(+), 32 deletions(-) diff --git a/content/posts/2022-01.md b/content/posts/2022-01.md index 7ebe0871f..10645650b 100644 --- a/content/posts/2022-01.md +++ b/content/posts/2022-01.md @@ -188,5 +188,37 @@ $ grep -E '^2022-01*' /var/log/postgresql/postgresql-10-main.log | grep -c 'stil - I included the id because I will need a unique field to join the resulting list of non-duplicates with the original CSV where the rest of the metadata and filenames are - Since these items are not in DSpace yet, I generated simple numeric IDs in OpenRefine using this GREL transform: `row.index + 1` - Then I ran `check-duplicates.py` on items 1–200 and sent the resulting CSV to Gaia +- Delete one duplicate item I saw in IITA's Journal Articles that was uploaded earlier in WLE + - Also do some general cleanup on IITA's Journal Articles collection in OpenRefine +- Delete one duplicate item I saw in ILRI's Journal Articles collection + - Also do some general cleanup on ILRI's Journal Articles collection in OpenRefine and csv-metadata-quality + +## 2022-01-29 + +- I did some more cleanup on the ILRI Journal Articles + - I added missing journal titles for items that had ISSNs + - Then I added pages for items that had them in the citation + - First, I faceted the citation field based on whether or not the item had something like ": 232-234" present: + +```console +value.contains(/:\s?\d+(-|–)\d+/) +``` + +- Then I faceted by blank on `dcterms.extent` and did a transform to extract the page information for over 1,000 items! + +```console +'p. ' + +cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[0] + +'-' + +cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[2] +``` + +- Then I did similar for `cg.volume` and `cg.issue`, also based on the citation, for example to extract the "16" from "Journal of Blah 16(1)", where "16" is the second capture group in a zero-based match: + +```console +cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*( |;)(\d+)\((\d+)\).*/)[1] +``` + +- This was 3,000 items so I imported the changes on CGSpace 1,000 at a time... diff --git a/docs/2022-01/index.html b/docs/2022-01/index.html index 15c775bbb..d2c1ba1e3 100644 --- a/docs/2022-01/index.html +++ b/docs/2022-01/index.html @@ -14,7 +14,7 @@ Start a full harvest on AReS - + @@ -34,9 +34,9 @@ Start a full harvest on AReS "@type": "BlogPosting", "headline": "January, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-01/", - "wordCount": "855", + "wordCount": "1223", "datePublished": "2022-01-01T15:20:54+02:00", - "dateModified": "2022-01-19T18:14:26+03:00", + "dateModified": "2022-01-28T16:59:40+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -297,7 +297,67 @@ UPDATE 9433 $ grep -E '^2022-01*' /var/log/postgresql/postgresql-10-main.log | grep -c 'still waiting for' 3 +

2022-01-28

+ +

2022-01-29

+ +
value.contains(/:\s?\d+(-|–)\d+/)
+
+
'p. ' +
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[0] +
+'-' +
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[2]
+
+
cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*( |;)(\d+)\((\d+)\).*/)[1]
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index e3eb4917c..bb9437049 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 351a7264d..8c368ee06 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index dd284517b..609405524 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index aaddb825e..17ef175c7 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 09b9245fe..508b4b27d 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 5e81354d4..ed2305aa1 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index a1b263f35..9ca21516d 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 2cbee5ca3..804ff75b2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index a94d58b59..ec43e4c73 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 679340690..8c690e9f3 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index a7e315887..9801670ca 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 439a149bc..e0639a4e7 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 606bea2e3..f629f147d 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 9f6b79bdf..a6f6bbea6 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 97b0432cb..dd05ed259 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 3f27688cd..b850afa39 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 645849975..3e4b1ee34 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 411530231..877d58c5e 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index e44db00dd..2c3afec89 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 3f5deafdb..4eb115485 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 4a7ee09af..8019f6302 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index d7b63b113..9642c0efe 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index 4aef71cb0..eab367dfa 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 53cba9374..5e17efc30 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-01-27T16:58:05+03:00 + 2022-01-28T16:59:40+03:00 https://alanorth.github.io/cgspace-notes/ - 2022-01-27T16:58:05+03:00 + 2022-01-28T16:59:40+03:00 https://alanorth.github.io/cgspace-notes/2022-01/ - 2022-01-27T16:58:05+03:00 + 2022-01-28T16:59:40+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-01-27T16:58:05+03:00 + 2022-01-28T16:59:40+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-01-27T16:58:05+03:00 + 2022-01-28T16:59:40+03:00 https://alanorth.github.io/cgspace-notes/2021-12/ 2022-01-09T10:39:51+02:00