diff --git a/content/posts/2020-10.md b/content/posts/2020-10.md index cca64ec84..4c02450c1 100644 --- a/content/posts/2020-10.md +++ b/content/posts/2020-10.md @@ -640,5 +640,20 @@ Authorization denied for action WORKFLOW_STEP_1 on COLLECTION:1072 by user 1759 - The submit step is defined, but has no users or groups - I added the IITA submitters there and told Bosede to try again - Add two new blocks to list the top communities and collections on AReS +- I want to extract all CRPs and affiliations from AReS to do some text processing and create some mappings... + - First extract 10,000 affiliations from Elasticsearch by only including the `affiliation` source: + +``` +$ http 'http://localhost:9200/openrxv-items-final/_search?_source_includes=affiliation&size=10000&q=*:*' > /tmp/affiliations.json +``` + +- Then I decided to try a different approach and I adjusted my `convert-mapping.py` script to re-consider some replacement patterns with acronyms from the original AReS `mapping.json` file to hopefully address some MEL to CGSpace mappings + - For example, to changes this: + - find: International Livestock Research Institute + - replace: International Livestock Research Institute - ILRI + - ... into this: + - find: International Livestock Research Institute - ILRI + - replace: International Livestock Research Institute +- I re-uploaded the mappings to Elasticsearch like I did yesterday and restarted the harvesting diff --git a/docs/2020-10/index.html b/docs/2020-10/index.html index 84cfb443c..4ac638314 100644 --- a/docs/2020-10/index.html +++ b/docs/2020-10/index.html @@ -23,7 +23,7 @@ During the FlywayDB migration I got an error: - + @@ -51,9 +51,9 @@ During the FlywayDB migration I got an error: "@type": "BlogPosting", "headline": "October, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-10/", - "wordCount": "4233", + "wordCount": "4350", "datePublished": "2020-10-06T16:55:54+03:00", - "dateModified": "2020-10-21T15:36:31+03:00", + "dateModified": "2020-10-22T11:58:26+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -839,6 +839,31 @@ $ csvcut -c 'id,dc.subject[],dc.subject[en_US],cg.subject.ilri[],cg.subject.ilri
  • Add two new blocks to list the top communities and collections on AReS
  • +
  • I want to extract all CRPs and affiliations from AReS to do some text processing and create some mappings… + +
  • + +
    $ http 'http://localhost:9200/openrxv-items-final/_search?_source_includes=affiliation&size=10000&q=*:*' > /tmp/affiliations.json
    +
    diff --git a/docs/categories/index.html b/docs/categories/index.html index dd59d67b4..458565277 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index cfae15a3d..7bad869ed 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 822ff991f..cfe8a85b1 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 66cc1655d..8e8371fe9 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 05bac3c0a..7a2caecfa 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 104f47196..069f7a4fd 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index a4d912910..3f9b799f8 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 09da340f8..bd630378e 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index a11073b84..ea45cb01e 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index a55509895..5d8229acb 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 063bfb2cc..44a91df4c 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index da169bd53..4b8e4b103 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index bfe5d6a29..0a480f383 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 9d9e17bb8..4a00bd38c 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index d3d687a42..3655fcb9a 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index e49876808..6a0054343 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 9f0086a1f..2383b0b34 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 99da706d2..b10d18af3 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 26f5b5cfa..4b4c9ae38 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index d2f71b89e..6b72a91f8 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-10-21T15:36:31+03:00 + 2020-10-22T11:58:26+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-10-21T15:36:31+03:00 + 2020-10-22T11:58:26+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-10-21T15:36:31+03:00 + 2020-10-22T11:58:26+03:00 https://alanorth.github.io/cgspace-notes/2020-10/ - 2020-10-21T15:36:31+03:00 + 2020-10-22T11:58:26+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-10-21T15:36:31+03:00 + 2020-10-22T11:58:26+03:00