diff --git a/content/posts/2022-07.md b/content/posts/2022-07.md index c1c0b391c..534fa0466 100644 --- a/content/posts/2022-07.md +++ b/content/posts/2022-07.md @@ -118,4 +118,53 @@ UPDATE 104 - I will also have to remove "Academicians" from input-forms.xml - + +## 2022-07-07 + +- Finalize lists of non-AGROVOC subjects in CGSpace that I started last week + - I used the [SQL helper functions](https://wiki.lyrasis.org/display/DSPACE/Helper+SQL+functions+for+DSpace+6) to find the collections where each term was used: + +```console +localhost/dspace= ☘ SELECT DISTINCT(ds6_item2collectionhandle(dspace_object_id)) AS collection, COUNT(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND LOWER(text_value) = 'water demand' GROUP BY collection ORDER BY count DESC LIMIT 5; + collection │ count +─────────────┼─────── + 10568/36178 │ 56 + 10568/36185 │ 46 + 10568/36181 │ 35 + 10568/36188 │ 28 + 10568/36179 │ 21 +(5 rows) +``` + +- For now I only did terms from my list that had 100 or more occurrences in CGSpace + - This leaves us with thirty-six terms that I will send to Sara Jani and Elizabeth Arnaud for evaluating possible inclusion to AGROVOC +- Write to some submitters from CIAT, Bioversity, and CCAFS to ask if they are still uploading new items with their legacy subject fields on CGSpace + - We want to remove them from the submission form to create space for new fields +- Update one term I noticed people using that was close to AGROVOC: + +```console +dspace=# UPDATE metadatavalue SET text_value='development policies' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=187 AND text_value='development policy'; +UPDATE 108 +``` + +- After contacting some editors I removed some old metadata fields from the submission form and browse indexes: + - Bioversity subject (`cg.subject.bioversity`) + - CCAFS phase 1 project tag (`cg.identifier.ccafsproject`) + - CIAT project tag (`cg.identifier.ciatproject`) + - CIAT subject (`cg.subject.ciat`) +- Work on cleaning and proofing forty-six AfricaRice items for CGSpace + - Last week we identified some duplicates so I removed those + - The data is of mediocre quality + - I've been fixing citations (nitpick), adding licenses, adding volume/issue/extent, fixing DOIs, and adding some AGROVOC subjects + - I even found titles that have typos, looking something like OCR errors... + +## 2022-07-08 + +- Finalize the cleaning and proofing of AfricaRice records + - I found two suspicious items that claim to have been published but I can't find in the respective journals, so I removed those + - I uploaded the forty-four items to [DSpace Test](https://dspacetest.cgiar.org/handle/10568/119135) +- Margarita from CCAFS said they are no longer using the CCAFS subject or CCAFS phase 2 project tag + - I removed these from the input-form.xml and Discovery facets: + - cg.identifier.ccafsprojectpii + - cg.subject.cifor + - For now we will keep them in the search filters diff --git a/docs/2022-07/index.html b/docs/2022-07/index.html index 69f808265..777a7b6b6 100644 --- a/docs/2022-07/index.html +++ b/docs/2022-07/index.html @@ -19,7 +19,7 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens - + @@ -44,9 +44,9 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens "@type": "BlogPosting", "headline": "July, 2022", "url": "https://alanorth.github.io/cgspace-notes/2022-07/", - "wordCount": "739", + "wordCount": "1095", "datePublished": "2022-07-02T14:07:36+03:00", - "dateModified": "2022-07-04T22:10:02+03:00", + "dateModified": "2022-07-07T10:02:04+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -246,7 +246,76 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens - +

2022-07-07

+ +
localhost/dspace= ☘ SELECT DISTINCT(ds6_item2collectionhandle(dspace_object_id)) AS collection, COUNT(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND LOWER(text_value) = 'water demand' GROUP BY collection ORDER BY count DESC LIMIT 5;
+ collection  │ count 
+─────────────┼───────
+ 10568/36178 │    56
+ 10568/36185 │    46
+ 10568/36181 │    35
+ 10568/36188 │    28
+ 10568/36179 │    21
+(5 rows)
+
+
dspace=# UPDATE metadatavalue SET text_value='development policies' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=187 AND text_value='development policy';
+UPDATE 108
+
+

2022-07-08

+ diff --git a/docs/categories/index.html b/docs/categories/index.html index ae2e983da..208fbfacd 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index c3803b567..f628d8200 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index cc27d02ac..74c333e60 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index 916de1585..c67852a5f 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 6475a5951..2a9178788 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 454f20c04..ced4f95e4 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 42a77601d..cc1054246 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index 934f5d07b..3fad6c16b 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 8c47206f4..42c9ba3eb 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 4fce30841..90c7115d0 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 420e5d61d..534f7e4fd 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 74acd354a..0184cd9ea 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 7cde28874..e3fcbbed1 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index 2dddc557a..260fa9785 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index 931acb758..409b709ba 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index d46da45a9..3bd709463 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index 8bf78a895..437accfee 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index b7f586dd9..70eaea271 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index 02ceab965..fcf9f844a 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index faa0306bd..058713aab 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 6c25befd8..a71f52d0c 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index a3063c5c5..0cc7b80a8 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 0f68729bf..e2ecc926d 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 55f7751b3..1bad7c985 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index c17fbf9dc..52ac16c60 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 7ab398b3c..2731176dc 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index f7a49eba9..af42045ea 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2022-07-04T22:10:02+03:00 + 2022-07-07T10:02:04+03:00 https://alanorth.github.io/cgspace-notes/ - 2022-07-04T22:10:02+03:00 + 2022-07-07T10:02:04+03:00 https://alanorth.github.io/cgspace-notes/2022-07/ - 2022-07-04T22:10:02+03:00 + 2022-07-07T10:02:04+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2022-07-04T22:10:02+03:00 + 2022-07-07T10:02:04+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2022-07-04T22:10:02+03:00 + 2022-07-07T10:02:04+03:00 https://alanorth.github.io/cgspace-notes/2022-06/ 2022-07-04T09:25:14+03:00