diff --git a/content/posts/2022-02.md b/content/posts/2022-02.md index 3535271ce..9d12f5cad 100644 --- a/content/posts/2022-02.md +++ b/content/posts/2022-02.md @@ -374,4 +374,42 @@ sys 3m2.459s - Start a full harvest on AReS +## 2022-02-14 + +- Last week Gaia sent me her notes on the second batch of TAC/ICW documents (items 201–400 in the spreadsheet) + - I created a filter in LibreOffice and selected the IDs for items with the action "delete", then I created a custom text facet in OpenRefine with this GREL: + +``` +or( +isNotNull(value.match('201')), +isNotNull(value.match('203')), +isNotNull(value.match('209')), +isNotNull(value.match('209')), +isNotNull(value.match('215')), +isNotNull(value.match('220')), +isNotNull(value.match('225')), +isNotNull(value.match('226')), +isNotNull(value.match('227')), +... +isNotNull(value.match('396')) +``` + +- Then I flagged all matching records and exported a CSV to use with SAFBuilder + - Then I imported the SAF bundle on DSpace Test: + +```console +$ JAVA_OPTS="-Xmx1024m -Dfile.encoding=UTF-8" dspace import --add --eperson=fuuu@umm.com --source /tmp/SimpleArchiveFormat --mapfile=./2022-02-14-tac-batch2-201to400.map +``` + +- Export the next batch from OpenRefine (items with ID 401 to 700), check duplicates, and then join with the file names: + +```console +$ csvcut -c id,dc.title,dcterms.issued,dcterms.type ~/Downloads/2022-01-21-CGSpace-TAC-ICW-batch3-401to700.csv > /tmp/tac3.csv +$ ./ilri/check-duplicates.py -i /tmp/tac3.csv -db dspacetest -u dspacetest -p 'dom@in34sniper' -o /tmp/2022-02-14-tac-batch3-401-700.csv +$ csvcut -c id,filename ~/Downloads/2022-01-21-CGSpace-TAC-ICW-batch3-401to700.csv > /tmp/tac3-filenames.csv +$ csvjoin -c id /tmp/2022-02-14-tac-batch3-401-700.csv /tmp/tac3-filenames.csv > /tmp/2022-02-14-tac-batch3-401-700-filenames.csv +``` + +- I sent these 300 items to Gaia... + diff --git a/docs/2022-02/index.html b/docs/2022-02/index.html index ddfaf7a9a..5f4736fa9 100644 --- a/docs/2022-02/index.html +++ b/docs/2022-02/index.html @@ -21,7 +21,7 @@ We agreed to try to do more alignment of affiliations/funders with ROR - + @@ -48,9 +48,9 @@ We agreed to try to do more alignment of affiliations/funders with ROR "@type": "BlogPosting", "headline": "February, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-02/", - "wordCount": "2039", + "wordCount": "2194", "datePublished": "2022-02-01T14:06:54+02:00", - "dateModified": "2022-02-11T09:41:05+03:00", + "dateModified": "2022-02-14T09:40:59+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -494,6 +494,44 @@ sys 3m2.459s +

2022-02-14

+ +
or(
+isNotNull(value.match('201')),
+isNotNull(value.match('203')),
+isNotNull(value.match('209')),
+isNotNull(value.match('209')),
+isNotNull(value.match('215')),
+isNotNull(value.match('220')),
+isNotNull(value.match('225')),
+isNotNull(value.match('226')),
+isNotNull(value.match('227')),
+...
+isNotNull(value.match('396'))
+
+
$ JAVA_OPTS="-Xmx1024m -Dfile.encoding=UTF-8" dspace import --add --eperson=fuuu@umm.com --source /tmp/SimpleArchiveFormat --mapfile=./2022-02-14-tac-batch2-201to400.map
+
+
$ csvcut -c id,dc.title,dcterms.issued,dcterms.type ~/Downloads/2022-01-21-CGSpace-TAC-ICW-batch3-401to700.csv > /tmp/tac3.csv
+$ ./ilri/check-duplicates.py -i /tmp/tac3.csv -db dspacetest -u dspacetest -p 'dom@in34sniper' -o /tmp/2022-02-14-tac-batch3-401-700.csv
+$ csvcut -c id,filename ~/Downloads/2022-01-21-CGSpace-TAC-ICW-batch3-401to700.csv > /tmp/tac3-filenames.csv
+$ csvjoin -c id /tmp/2022-02-14-tac-batch3-401-700.csv /tmp/tac3-filenames.csv > /tmp/2022-02-14-tac-batch3-401-700-filenames.csv
+
diff --git a/docs/categories/index.html b/docs/categories/index.html index bee9a145e..c64e90c8e 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 2bc168edc..bc05e63cb 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 05673f661..21c082527 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index a6f6c03fc..bf3867dd4 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 534c06124..81da010fc 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 9cc3473a8..1cd762e30 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 1dab3147a..3b5061e9b 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 96c0cf168..54bdb554c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index c0edba763..be516a771 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 0500171c7..1254c855d 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 9a304de47..b73ff6934 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index cd8f76f71..b2e7e83a1 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 45ef08abd..7c2ebbe77 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 8db482455..f96a8dff0 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 9a2d63da6..774058f2a 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 12ec7fecf..5510738c6 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 08bd8a442..8238af0a8 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index e602863e1..5aa45f575 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 9350a3cc9..5184a4c10 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 04b3e7651..8063b1537 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 6fb2883f7..7861727a5 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 2dd219247..c556f5a07 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index bfdf56a99..016182e41 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a1f4e3f86..5273e6776 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-02-11T09:41:05+03:00 + 2022-02-14T09:40:59+03:00 https://alanorth.github.io/cgspace-notes/ - 2022-02-11T09:41:05+03:00 + 2022-02-14T09:40:59+03:00 https://alanorth.github.io/cgspace-notes/2022-02/ - 2022-02-11T09:41:05+03:00 + 2022-02-14T09:40:59+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-02-11T09:41:05+03:00 + 2022-02-14T09:40:59+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-02-11T09:41:05+03:00 + 2022-02-14T09:40:59+03:00 https://alanorth.github.io/cgspace-notes/2022-01/ 2022-02-07T09:49:34+03:00