diff --git a/content/post/2018-02.md b/content/post/2018-02.md index a893dc40c..a9e9bb615 100644 --- a/content/post/2018-02.md +++ b/content/post/2018-02.md @@ -65,3 +65,21 @@ real 0m23.839s user 0m27.225s sys 0m1.905s ``` + +## 2018-02-05 + +- Toying with correcting authors with trailing spaces via PostgreSQL: + +``` +dspace=# update metadatavalue set text_value=REGEXP_REPLACE(text_value, '\s+$' , '') where resource_type_id=2 and metadata_field_id=3 and text_value ~ '^.*?\s+$'; +UPDATE 20 +``` + +- I tried the `TRIM(TRAILING from text_value)` function and it said it changed 20 items but the spaces didn't go away +- This is on a fresh import of the CGSpace database, but when I tried to apply it on CGSpace there were no changes detected. Weird. +- Anyways, Peter wants a new list of authors to clean up, so I exported another CSV: + +``` +dspace=# \copy (select distinct text_value, count(*) as count from metadatavalue where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author') AND resource_type_id = 2 group by text_value order by count desc) to /tmp/authors-2018-02-05.csv with csv; +COPY 55630 +``` diff --git a/public/2018-02/index.html b/public/2018-02/index.html index 7b55de60c..8be460eeb 100644 --- a/public/2018-02/index.html +++ b/public/2018-02/index.html @@ -23,7 +23,7 @@ I copied the logic in the jmx_tomcat_dbpools provided by Ubuntu’s munin-pl - + @@ -57,9 +57,9 @@ I copied the logic in the jmx_tomcat_dbpools provided by Ubuntu’s munin-pl "@type": "BlogPosting", "headline": "February, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-02/", - "wordCount": "328", + "wordCount": "459", "datePublished": "2018-02-01T16:28:54+02:00", - "dateModified": "2018-02-04T00:00:51+02:00", + "dateModified": "2018-02-04T11:26:07+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -195,6 +195,26 @@ user 0m27.225s sys 0m1.905s +
dspace=# update metadatavalue set text_value=REGEXP_REPLACE(text_value, '\s+$' , '') where resource_type_id=2 and metadata_field_id=3 and text_value ~ '^.*?\s+$';
+UPDATE 20
+
+
+TRIM(TRAILING from text_value)
function and it said it changed 20 items but the spaces didn’t go awaydspace=# \copy (select distinct text_value, count(*) as count from metadatavalue where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author') AND resource_type_id = 2 group by text_value order by count desc) to /tmp/authors-2018-02-05.csv with csv;
+COPY 55630
+
+
diff --git a/public/sitemap.xml b/public/sitemap.xml
index 5348ee9fc..a5d4496ed 100644
--- a/public/sitemap.xml
+++ b/public/sitemap.xml
@@ -4,7 +4,7 @@