diff --git a/content/posts/2022-01.md b/content/posts/2022-01.md
index 7ebe0871f..10645650b 100644
--- a/content/posts/2022-01.md
+++ b/content/posts/2022-01.md
@@ -188,5 +188,37 @@ $ grep -E '^2022-01*' /var/log/postgresql/postgresql-10-main.log | grep -c 'stil
- I included the id because I will need a unique field to join the resulting list of non-duplicates with the original CSV where the rest of the metadata and filenames are
- Since these items are not in DSpace yet, I generated simple numeric IDs in OpenRefine using this GREL transform: `row.index + 1`
- Then I ran `check-duplicates.py` on items 1–200 and sent the resulting CSV to Gaia
+- Delete one duplicate item I saw in IITA's Journal Articles that was uploaded earlier in WLE
+ - Also do some general cleanup on IITA's Journal Articles collection in OpenRefine
+- Delete one duplicate item I saw in ILRI's Journal Articles collection
+ - Also do some general cleanup on ILRI's Journal Articles collection in OpenRefine and csv-metadata-quality
+
+## 2022-01-29
+
+- I did some more cleanup on the ILRI Journal Articles
+ - I added missing journal titles for items that had ISSNs
+ - Then I added pages for items that had them in the citation
+ - First, I faceted the citation field based on whether or not the item had something like ": 232-234" present:
+
+```console
+value.contains(/:\s?\d+(-|–)\d+/)
+```
+
+- Then I faceted by blank on `dcterms.extent` and did a transform to extract the page information for over 1,000 items!
+
+```console
+'p. ' +
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[0] +
+'-' +
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[2]
+```
+
+- Then I did similar for `cg.volume` and `cg.issue`, also based on the citation, for example to extract the "16" from "Journal of Blah 16(1)", where "16" is the second capture group in a zero-based match:
+
+```console
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*( |;)(\d+)\((\d+)\).*/)[1]
+```
+
+- This was 3,000 items so I imported the changes on CGSpace 1,000 at a time...
diff --git a/docs/2022-01/index.html b/docs/2022-01/index.html
index 15c775bbb..d2c1ba1e3 100644
--- a/docs/2022-01/index.html
+++ b/docs/2022-01/index.html
@@ -14,7 +14,7 @@ Start a full harvest on AReS
-
+
@@ -34,9 +34,9 @@ Start a full harvest on AReS
"@type": "BlogPosting",
"headline": "January, 2022",
"url": "https://alanorth.github.io/cgspace-notes/2022-01/",
- "wordCount": "855",
+ "wordCount": "1223",
"datePublished": "2022-01-01T15:20:54+02:00",
- "dateModified": "2022-01-19T18:14:26+03:00",
+ "dateModified": "2022-01-28T16:59:40+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -297,7 +297,67 @@ UPDATE 9433
$ grep -E '^2022-01*' /var/log/postgresql/postgresql-10-main.log | grep -c 'still waiting for'
3
-- I set a system alert on CGSpace and then restarted Tomcat and PostgreSQL
+- I set a system alert on CGSpace and then restarted Tomcat and PostgreSQL
+
+- The issue in Francesca’s case was actually that someone had taken the task, not that PostgreSQL transactions were locked!
+
+
+
+2022-01-28
+
+- Finalize the last ~100 WLE Journal Article items without licensese and DOIs
+
+- I did as many as I could, also updating http links to https for many journal links
+
+
+- Federica Bottamedi contacted us from the system office to say that she took over for Vini (Abhilasha Vaid)
+
+- She created an account on CGSpace and now we need to see which workflows she should belong to
+
+
+- Start a fresh harvesting on AReS
+- I adjusted the
check-duplicates.py
script to write the output to a CSV file including the id, both titles, both dates, and the handle link
+
+- I included the id because I will need a unique field to join the resulting list of non-duplicates with the original CSV where the rest of the metadata and filenames are
+- Since these items are not in DSpace yet, I generated simple numeric IDs in OpenRefine using this GREL transform:
row.index + 1
+- Then I ran
check-duplicates.py
on items 1–200 and sent the resulting CSV to Gaia
+
+
+- Delete one duplicate item I saw in IITA’s Journal Articles that was uploaded earlier in WLE
+
+- Also do some general cleanup on IITA’s Journal Articles collection in OpenRefine
+
+
+- Delete one duplicate item I saw in ILRI’s Journal Articles collection
+
+- Also do some general cleanup on ILRI’s Journal Articles collection in OpenRefine and csv-metadata-quality
+
+
+
+2022-01-29
+
+- I did some more cleanup on the ILRI Journal Articles
+
+- I added missing journal titles for items that had ISSNs
+- Then I added pages for items that had them in the citation
+- First, I faceted the citation field based on whether or not the item had something like “: 232-234” present:
+
+
+
+value.contains(/:\s?\d+(-|–)\d+/)
+
+- Then I faceted by blank on
dcterms.extent
and did a transform to extract the page information for over 1,000 items!
+
+'p. ' +
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[0] +
+'-' +
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*:\s?(\d+)(-|–)(\d+).*/)[2]
+
+- Then I did similar for
cg.volume
and cg.issue
, also based on the citation, for example to extract the “16” from “Journal of Blah 16(1)”, where “16” is the second capture group in a zero-based match:
+
+cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*( |;)(\d+)\((\d+)\).*/)[1]
+
+- This was 3,000 items so I imported the changes on CGSpace 1,000 at a time…
diff --git a/docs/categories/index.html b/docs/categories/index.html
index e3eb4917c..bb9437049 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index 351a7264d..8c368ee06 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index dd284517b..609405524 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index aaddb825e..17ef175c7 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index 09b9245fe..508b4b27d 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html
index 5e81354d4..ed2305aa1 100644
--- a/docs/categories/notes/page/5/index.html
+++ b/docs/categories/notes/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html
index a1b263f35..9ca21516d 100644
--- a/docs/categories/notes/page/6/index.html
+++ b/docs/categories/notes/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index 2cbee5ca3..804ff75b2 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index a94d58b59..ec43e4c73 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index 679340690..8c690e9f3 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index a7e315887..9801670ca 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index 439a149bc..e0639a4e7 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index 606bea2e3..f629f147d 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index 9f6b79bdf..a6f6bbea6 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/8/index.html b/docs/page/8/index.html
index 97b0432cb..dd05ed259 100644
--- a/docs/page/8/index.html
+++ b/docs/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index 3f27688cd..b850afa39 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index 645849975..3e4b1ee34 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index 411530231..877d58c5e 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index e44db00dd..2c3afec89 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index 3f5deafdb..4eb115485 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index 4a7ee09af..8019f6302 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index d7b63b113..9642c0efe 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html
index 4aef71cb0..eab367dfa 100644
--- a/docs/posts/page/8/index.html
+++ b/docs/posts/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 53cba9374..5e17efc30 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,19 +3,19 @@
xmlns:xhtml="http://www.w3.org/1999/xhtml">
https://alanorth.github.io/cgspace-notes/categories/
- 2022-01-27T16:58:05+03:00
+ 2022-01-28T16:59:40+03:00
https://alanorth.github.io/cgspace-notes/
- 2022-01-27T16:58:05+03:00
+ 2022-01-28T16:59:40+03:00
https://alanorth.github.io/cgspace-notes/2022-01/
- 2022-01-27T16:58:05+03:00
+ 2022-01-28T16:59:40+03:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2022-01-27T16:58:05+03:00
+ 2022-01-28T16:59:40+03:00
https://alanorth.github.io/cgspace-notes/posts/
- 2022-01-27T16:58:05+03:00
+ 2022-01-28T16:59:40+03:00
https://alanorth.github.io/cgspace-notes/2021-12/
2022-01-09T10:39:51+02:00