diff --git a/content/posts/2020-10.md b/content/posts/2020-10.md
index cca64ec84..4c02450c1 100644
--- a/content/posts/2020-10.md
+++ b/content/posts/2020-10.md
@@ -640,5 +640,20 @@ Authorization denied for action WORKFLOW_STEP_1 on COLLECTION:1072 by user 1759
- The submit step is defined, but has no users or groups
- I added the IITA submitters there and told Bosede to try again
- Add two new blocks to list the top communities and collections on AReS
+- I want to extract all CRPs and affiliations from AReS to do some text processing and create some mappings...
+ - First extract 10,000 affiliations from Elasticsearch by only including the `affiliation` source:
+
+```
+$ http 'http://localhost:9200/openrxv-items-final/_search?_source_includes=affiliation&size=10000&q=*:*' > /tmp/affiliations.json
+```
+
+- Then I decided to try a different approach and I adjusted my `convert-mapping.py` script to re-consider some replacement patterns with acronyms from the original AReS `mapping.json` file to hopefully address some MEL to CGSpace mappings
+ - For example, to changes this:
+ - find: International Livestock Research Institute
+ - replace: International Livestock Research Institute - ILRI
+ - ... into this:
+ - find: International Livestock Research Institute - ILRI
+ - replace: International Livestock Research Institute
+- I re-uploaded the mappings to Elasticsearch like I did yesterday and restarted the harvesting
diff --git a/docs/2020-10/index.html b/docs/2020-10/index.html
index 84cfb443c..4ac638314 100644
--- a/docs/2020-10/index.html
+++ b/docs/2020-10/index.html
@@ -23,7 +23,7 @@ During the FlywayDB migration I got an error:
-
+
@@ -51,9 +51,9 @@ During the FlywayDB migration I got an error:
"@type": "BlogPosting",
"headline": "October, 2020",
"url": "https://alanorth.github.io/cgspace-notes/2020-10/",
- "wordCount": "4233",
+ "wordCount": "4350",
"datePublished": "2020-10-06T16:55:54+03:00",
- "dateModified": "2020-10-21T15:36:31+03:00",
+ "dateModified": "2020-10-22T11:58:26+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -839,6 +839,31 @@ $ csvcut -c 'id,dc.subject[],dc.subject[en_US],cg.subject.ilri[],cg.subject.ilri
Add two new blocks to list the top communities and collections on AReS
+I want to extract all CRPs and affiliations from AReS to do some text processing and create some mappings…
+
+- First extract 10,000 affiliations from Elasticsearch by only including the
affiliation
source:
+
+
+
+$ http 'http://localhost:9200/openrxv-items-final/_search?_source_includes=affiliation&size=10000&q=*:*' > /tmp/affiliations.json
+
+- Then I decided to try a different approach and I adjusted my
convert-mapping.py
script to re-consider some replacement patterns with acronyms from the original AReS mapping.json
file to hopefully address some MEL to CGSpace mappings
+
+- For example, to changes this:
+
+- find: International Livestock Research Institute
+- replace: International Livestock Research Institute - ILRI
+
+
+- … into this:
+
+- find: International Livestock Research Institute - ILRI
+- replace: International Livestock Research Institute
+
+
+
+
+- I re-uploaded the mappings to Elasticsearch like I did yesterday and restarted the harvesting
diff --git a/docs/categories/index.html b/docs/categories/index.html
index dd59d67b4..458565277 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index cfae15a3d..7bad869ed 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index 822ff991f..cfe8a85b1 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index 66cc1655d..8e8371fe9 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index 05bac3c0a..7a2caecfa 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index 104f47196..069f7a4fd 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index a4d912910..3f9b799f8 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index 09da340f8..bd630378e 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index a11073b84..ea45cb01e 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index a55509895..5d8229acb 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index 063bfb2cc..44a91df4c 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index da169bd53..4b8e4b103 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index bfe5d6a29..0a480f383 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index 9d9e17bb8..4a00bd38c 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index d3d687a42..3655fcb9a 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index e49876808..6a0054343 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index 9f0086a1f..2383b0b34 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index 99da706d2..b10d18af3 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index 26f5b5cfa..4b4c9ae38 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -9,7 +9,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index d2f71b89e..6b72a91f8 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -4,27 +4,27 @@
https://alanorth.github.io/cgspace-notes/categories/
- 2020-10-21T15:36:31+03:00
+ 2020-10-22T11:58:26+03:00
https://alanorth.github.io/cgspace-notes/
- 2020-10-21T15:36:31+03:00
+ 2020-10-22T11:58:26+03:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2020-10-21T15:36:31+03:00
+ 2020-10-22T11:58:26+03:00
https://alanorth.github.io/cgspace-notes/2020-10/
- 2020-10-21T15:36:31+03:00
+ 2020-10-22T11:58:26+03:00
https://alanorth.github.io/cgspace-notes/posts/
- 2020-10-21T15:36:31+03:00
+ 2020-10-22T11:58:26+03:00