diff --git a/content/posts/2019-04.md b/content/posts/2019-04.md index 029fe758d..9af7ddd45 100644 --- a/content/posts/2019-04.md +++ b/content/posts/2019-04.md @@ -427,5 +427,34 @@ $ ./fix-metadata-values.py -i 2019-04-08-fix-13-affiliations.csv -db dspace -u d ``` - We should create a new list of affiliations to update our controlled vocabulary again +- I dumped a list of the top 1500 affiliations: + +``` +dspace=# \COPY (SELECT DISTINCT text_value, count(*) FROM metadatavalue WHERE metadata_field_id = 211 AND resource_type_id = 2 GROUP BY text_value ORDER BY count DESC LIMIT 1500) to /tmp/2019-04-08-top-1500-affiliations.csv WITH CSV HEADER; +COPY 1500 +``` + +- Fix a few more messed up affiliations that have return characters in them (use Ctrl-V Ctrl-M to re-create control character): + +``` +dspace=# UPDATE metadatavalue SET text_value='International Institute for Environment and Development' WHERE resource_type_id = 2 AND metadata_field_id = 211 AND text_value LIKE 'International Institute^M%'; +dspace=# UPDATE metadatavalue SET text_value='Kenya Agriculture and Livestock Research Organization' WHERE resource_type_id = 2 AND metadata_field_id = 211 AND text_value LIKE 'Kenya Agricultural and Livestock Research^M%'; +``` + +- I noticed a bunch of subjects and affiliations that use stylized apostrophes so I will export those and then batch update them: + +``` +dspace=# \COPY (SELECT DISTINCT text_value FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 211 AND text_value LIKE '%’%') to /tmp/2019-04-08-affiliations-apostrophes.csv WITH CSV HEADER; +COPY 60 +dspace=# \COPY (SELECT DISTINCT text_value FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 57 AND text_value LIKE '%’%') to /tmp/2019-04-08-subject-apostrophes.csv WITH CSV HEADER; +COPY 20 +``` + +- I cleaned them up in OpenRefine and then applied the fixes on CGSpace and DSpace Test: + +``` +$ ./fix-metadata-values.py -i /tmp/2019-04-08-fix-60-affiliations-apostrophes.csv -db dspace -u dspace -p 'fuuu' -f cg.contributor.affiliation -m 211 -t correct -d +$ ./fix-metadata-values.py -i /tmp/2019-04-08-fix-20-subject-apostrophes.csv -db dspace -u dspace -p 'fuuu' -f dc.subject -m 57 -t correct -d +``` diff --git a/docs/2019-04/index.html b/docs/2019-04/index.html index e0d720935..4c02bd271 100644 --- a/docs/2019-04/index.html +++ b/docs/2019-04/index.html @@ -38,7 +38,7 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace - + @@ -81,9 +81,9 @@ $ ./delete-metadata-values.py -i /tmp/2019-02-21-delete-1-region.csv -db dspace "@type": "BlogPosting", "headline": "April, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-04/", - "wordCount": "2397", + "wordCount": "2631", "datePublished": "2019-04-01T09:00:43+03:00", - "dateModified": "2019-04-07T21:17:16+03:00", + "dateModified": "2019-04-08T11:26:20+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -659,8 +659,39 @@ X-XSS-Protection: 1; mode=block
dspace=# \COPY (SELECT DISTINCT text_value, count(*) FROM metadatavalue WHERE metadata_field_id = 211 AND resource_type_id = 2 GROUP BY text_value ORDER BY count DESC LIMIT 1500) to /tmp/2019-04-08-top-1500-affiliations.csv WITH CSV HEADER;
+COPY 1500
+
+
+dspace=# UPDATE metadatavalue SET text_value='International Institute for Environment and Development' WHERE resource_type_id = 2 AND metadata_field_id = 211 AND text_value LIKE 'International Institute^M%';
+dspace=# UPDATE metadatavalue SET text_value='Kenya Agriculture and Livestock Research Organization' WHERE resource_type_id = 2 AND metadata_field_id = 211 AND text_value LIKE 'Kenya Agricultural and Livestock Research^M%';
+
+
+dspace=# \COPY (SELECT DISTINCT text_value FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 211 AND text_value LIKE '%’%') to /tmp/2019-04-08-affiliations-apostrophes.csv WITH CSV HEADER;
+COPY 60
+dspace=# \COPY (SELECT DISTINCT text_value FROM metadatavalue WHERE resource_type_id = 2 AND metadata_field_id = 57 AND text_value LIKE '%’%') to /tmp/2019-04-08-subject-apostrophes.csv WITH CSV HEADER;
+COPY 20
+
+
+$ ./fix-metadata-values.py -i /tmp/2019-04-08-fix-60-affiliations-apostrophes.csv -db dspace -u dspace -p 'fuuu' -f cg.contributor.affiliation -m 211 -t correct -d
+$ ./fix-metadata-values.py -i /tmp/2019-04-08-fix-20-subject-apostrophes.csv -db dspace -u dspace -p 'fuuu' -f dc.subject -m 57 -t correct -d
+
+
diff --git a/docs/robots.txt b/docs/robots.txt
index 9ea3f6b9d..fc9a8bbb4 100644
--- a/docs/robots.txt
+++ b/docs/robots.txt
@@ -46,7 +46,7 @@ Disallow: /cgspace-notes/2015-12/
Disallow: /cgspace-notes/2015-11/
Disallow: /cgspace-notes/
Disallow: /cgspace-notes/categories/
-Disallow: /cgspace-notes/tags/notes/
Disallow: /cgspace-notes/categories/notes/
+Disallow: /cgspace-notes/tags/notes/
Disallow: /cgspace-notes/posts/
Disallow: /cgspace-notes/tags/
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 571531f8e..f261b355d 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -4,7 +4,7 @@