From 7016d8b3f14408c0ba771288788e578b0b9ea934 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 17 Dec 2017 11:22:21 +0200 Subject: [PATCH] Update notes for 2017-12-17 --- content/post/2017-12.md | 9 +++++++++ public/2017-12/index.html | 14 +++++++++++--- public/sitemap.xml | 10 +++++----- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/content/post/2017-12.md b/content/post/2017-12.md index d67da8ec9..1fcac9db0 100644 --- a/content/post/2017-12.md +++ b/content/post/2017-12.md @@ -202,4 +202,13 @@ UPDATE 1 - The dates are in super long ISO8601 format (from Excel?) like `2016-02-07T00:00:00Z` so I converted them to simpler forms in GREL: `value.toString("yyyy-MM-dd")` - I trimmed the whitespaces in a few fields but it wasn't many - Rename her thumbnail column to filename, and format it so SAFBuilder adds the files to the thumbnail bundle with this GREL in OpenRefine: `value + "__bundle:THUMBNAIL"` + - Rename dc.identifier.status and dc.identifier.url columns to cg.identifier.status and cg.identifier.url + - Item 4 has weird characters in citation, ie: Nagoya et de Trait + - Some author names need normalization, ie: `Aggarwal, Pramod` and `Aggarwal, Pramod K.` + - Something weird going on with duplicate authors that have the same text value, like `Berto, Jayson C.` and `Balmeo, Katherine P.` - I will send her feedback on some author names like UNEP and ICRISAT and ask her for the missing thumbnail11.jpg +- I did a test import of the data locally after building with SAFBuilder but for some reason I had to specify the collection (even though the collections were specified in the `collection` field) + +``` +$ JAVA_OPTS="-Xmx512m -Dfile.encoding=UTF-8" ~/dspace/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/89338 --source /Users/aorth/Downloads/2016\ bulk\ upload\ thumbnails/SimpleArchiveFormat --mapfile=/tmp/ccafs.map &> /tmp/ccafs.log +``` diff --git a/public/2017-12/index.html b/public/2017-12/index.html index 9e5927dfc..b974d5211 100644 --- a/public/2017-12/index.html +++ b/public/2017-12/index.html @@ -23,7 +23,7 @@ The list of connections to XMLUI and REST API for today: - + @@ -56,9 +56,9 @@ The list of connections to XMLUI and REST API for today: "@type": "BlogPosting", "headline": "December, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-12/", - "wordCount": "1128", + "wordCount": "1231", "datePublished": "2017-12-01T13:53:54+03:00", - "dateModified": "2017-12-17T09:55:04+02:00", + "dateModified": "2017-12-17T10:56:56+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -347,10 +347,18 @@ UPDATE 1
  • The dates are in super long ISO8601 format (from Excel?) like 2016-02-07T00:00:00Z so I converted them to simpler forms in GREL: value.toString("yyyy-MM-dd")
  • I trimmed the whitespaces in a few fields but it wasn’t many
  • Rename her thumbnail column to filename, and format it so SAFBuilder adds the files to the thumbnail bundle with this GREL in OpenRefine: value + "__bundle:THUMBNAIL"
  • +
  • Rename dc.identifier.status and dc.identifier.url columns to cg.identifier.status and cg.identifier.url
  • +
  • Item 4 has weird characters in citation, ie: Nagoya et de Trait
  • +
  • Some author names need normalization, ie: Aggarwal, Pramod and Aggarwal, Pramod K.
  • +
  • Something weird going on with duplicate authors that have the same text value, like Berto, Jayson C. and Balmeo, Katherine P.
  • I will send her feedback on some author names like UNEP and ICRISAT and ask her for the missing thumbnail11.jpg
  • +
  • I did a test import of the data locally after building with SAFBuilder but for some reason I had to specify the collection (even though the collections were specified in the collection field)
  • +
    $ JAVA_OPTS="-Xmx512m -Dfile.encoding=UTF-8" ~/dspace/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/89338 --source /Users/aorth/Downloads/2016\ bulk\ upload\ thumbnails/SimpleArchiveFormat --mapfile=/tmp/ccafs.map &> /tmp/ccafs.log
    +
    + diff --git a/public/sitemap.xml b/public/sitemap.xml index 06e265523..f1a4d7d94 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-12/ - 2017-12-17T09:55:04+02:00 + 2017-12-17T10:56:56+02:00 @@ -139,7 +139,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-12-17T09:55:04+02:00 + 2017-12-17T10:56:56+02:00 0 @@ -150,7 +150,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-12-17T09:55:04+02:00 + 2017-12-17T10:56:56+02:00 0 @@ -162,13 +162,13 @@ https://alanorth.github.io/cgspace-notes/post/ - 2017-12-17T09:55:04+02:00 + 2017-12-17T10:56:56+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-12-17T09:55:04+02:00 + 2017-12-17T10:56:56+02:00 0