diff --git a/content/posts/2022-07.md b/content/posts/2022-07.md
index c1c0b391c..534fa0466 100644
--- a/content/posts/2022-07.md
+++ b/content/posts/2022-07.md
@@ -118,4 +118,53 @@ UPDATE 104
- I will also have to remove "Academicians" from input-forms.xml
-
+
+## 2022-07-07
+
+- Finalize lists of non-AGROVOC subjects in CGSpace that I started last week
+ - I used the [SQL helper functions](https://wiki.lyrasis.org/display/DSPACE/Helper+SQL+functions+for+DSpace+6) to find the collections where each term was used:
+
+```console
+localhost/dspace= ☘ SELECT DISTINCT(ds6_item2collectionhandle(dspace_object_id)) AS collection, COUNT(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND LOWER(text_value) = 'water demand' GROUP BY collection ORDER BY count DESC LIMIT 5;
+ collection │ count
+─────────────┼───────
+ 10568/36178 │ 56
+ 10568/36185 │ 46
+ 10568/36181 │ 35
+ 10568/36188 │ 28
+ 10568/36179 │ 21
+(5 rows)
+```
+
+- For now I only did terms from my list that had 100 or more occurrences in CGSpace
+ - This leaves us with thirty-six terms that I will send to Sara Jani and Elizabeth Arnaud for evaluating possible inclusion to AGROVOC
+- Write to some submitters from CIAT, Bioversity, and CCAFS to ask if they are still uploading new items with their legacy subject fields on CGSpace
+ - We want to remove them from the submission form to create space for new fields
+- Update one term I noticed people using that was close to AGROVOC:
+
+```console
+dspace=# UPDATE metadatavalue SET text_value='development policies' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=187 AND text_value='development policy';
+UPDATE 108
+```
+
+- After contacting some editors I removed some old metadata fields from the submission form and browse indexes:
+ - Bioversity subject (`cg.subject.bioversity`)
+ - CCAFS phase 1 project tag (`cg.identifier.ccafsproject`)
+ - CIAT project tag (`cg.identifier.ciatproject`)
+ - CIAT subject (`cg.subject.ciat`)
+- Work on cleaning and proofing forty-six AfricaRice items for CGSpace
+ - Last week we identified some duplicates so I removed those
+ - The data is of mediocre quality
+ - I've been fixing citations (nitpick), adding licenses, adding volume/issue/extent, fixing DOIs, and adding some AGROVOC subjects
+ - I even found titles that have typos, looking something like OCR errors...
+
+## 2022-07-08
+
+- Finalize the cleaning and proofing of AfricaRice records
+ - I found two suspicious items that claim to have been published but I can't find in the respective journals, so I removed those
+ - I uploaded the forty-four items to [DSpace Test](https://dspacetest.cgiar.org/handle/10568/119135)
+- Margarita from CCAFS said they are no longer using the CCAFS subject or CCAFS phase 2 project tag
+ - I removed these from the input-form.xml and Discovery facets:
+ - cg.identifier.ccafsprojectpii
+ - cg.subject.cifor
+ - For now we will keep them in the search filters
diff --git a/docs/2022-07/index.html b/docs/2022-07/index.html
index 69f808265..777a7b6b6 100644
--- a/docs/2022-07/index.html
+++ b/docs/2022-07/index.html
@@ -19,7 +19,7 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
-
+
@@ -44,9 +44,9 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
"@type": "BlogPosting",
"headline": "July, 2022",
"url": "https://alanorth.github.io/cgspace-notes/2022-07/",
- "wordCount": "739",
+ "wordCount": "1095",
"datePublished": "2022-07-02T14:07:36+03:00",
- "dateModified": "2022-07-04T22:10:02+03:00",
+ "dateModified": "2022-07-07T10:02:04+03:00",
"author": {
"@type": "Person",
"name": "Alan Orth"
@@ -246,7 +246,76 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
- I will also have to remove “Academicians” from input-forms.xml
-
+2022-07-07
+
+- Finalize lists of non-AGROVOC subjects in CGSpace that I started last week
+
+
+
+localhost/dspace= ☘ SELECT DISTINCT(ds6_item2collectionhandle(dspace_object_id)) AS collection, COUNT(*) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND LOWER(text_value) = 'water demand' GROUP BY collection ORDER BY count DESC LIMIT 5;
+ collection │ count
+─────────────┼───────
+ 10568/36178 │ 56
+ 10568/36185 │ 46
+ 10568/36181 │ 35
+ 10568/36188 │ 28
+ 10568/36179 │ 21
+(5 rows)
+
+- For now I only did terms from my list that had 100 or more occurrences in CGSpace
+
+- This leaves us with thirty-six terms that I will send to Sara Jani and Elizabeth Arnaud for evaluating possible inclusion to AGROVOC
+
+
+- Write to some submitters from CIAT, Bioversity, and CCAFS to ask if they are still uploading new items with their legacy subject fields on CGSpace
+
+- We want to remove them from the submission form to create space for new fields
+
+
+- Update one term I noticed people using that was close to AGROVOC:
+
+dspace=# UPDATE metadatavalue SET text_value='development policies' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=187 AND text_value='development policy';
+UPDATE 108
+
+- After contacting some editors I removed some old metadata fields from the submission form and browse indexes:
+
+- Bioversity subject (
cg.subject.bioversity
)
+- CCAFS phase 1 project tag (
cg.identifier.ccafsproject
)
+- CIAT project tag (
cg.identifier.ciatproject
)
+- CIAT subject (
cg.subject.ciat
)
+
+
+- Work on cleaning and proofing forty-six AfricaRice items for CGSpace
+
+- Last week we identified some duplicates so I removed those
+- The data is of mediocre quality
+- I’ve been fixing citations (nitpick), adding licenses, adding volume/issue/extent, fixing DOIs, and adding some AGROVOC subjects
+- I even found titles that have typos, looking something like OCR errors…
+
+
+
+2022-07-08
+
+- Finalize the cleaning and proofing of AfricaRice records
+
+- I found two suspicious items that claim to have been published but I can’t find in the respective journals, so I removed those
+- I uploaded the forty-four items to DSpace Test
+
+
+- Margarita from CCAFS said they are no longer using the CCAFS subject or CCAFS phase 2 project tag
+
+- I removed these from the input-form.xml and Discovery facets:
+
+- cg.identifier.ccafsprojectpii
+- cg.subject.cifor
+
+
+- For now we will keep them in the search filters
+
+
+
diff --git a/docs/categories/index.html b/docs/categories/index.html
index ae2e983da..208fbfacd 100644
--- a/docs/categories/index.html
+++ b/docs/categories/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html
index c3803b567..f628d8200 100644
--- a/docs/categories/notes/index.html
+++ b/docs/categories/notes/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html
index cc27d02ac..74c333e60 100644
--- a/docs/categories/notes/page/2/index.html
+++ b/docs/categories/notes/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html
index 916de1585..c67852a5f 100644
--- a/docs/categories/notes/page/3/index.html
+++ b/docs/categories/notes/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html
index 6475a5951..2a9178788 100644
--- a/docs/categories/notes/page/4/index.html
+++ b/docs/categories/notes/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html
index 454f20c04..ced4f95e4 100644
--- a/docs/categories/notes/page/5/index.html
+++ b/docs/categories/notes/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html
index 42a77601d..cc1054246 100644
--- a/docs/categories/notes/page/6/index.html
+++ b/docs/categories/notes/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html
index 934f5d07b..3fad6c16b 100644
--- a/docs/categories/notes/page/7/index.html
+++ b/docs/categories/notes/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/index.html b/docs/index.html
index 8c47206f4..42c9ba3eb 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/2/index.html b/docs/page/2/index.html
index 4fce30841..90c7115d0 100644
--- a/docs/page/2/index.html
+++ b/docs/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/3/index.html b/docs/page/3/index.html
index 420e5d61d..534f7e4fd 100644
--- a/docs/page/3/index.html
+++ b/docs/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/4/index.html b/docs/page/4/index.html
index 74acd354a..0184cd9ea 100644
--- a/docs/page/4/index.html
+++ b/docs/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/5/index.html b/docs/page/5/index.html
index 7cde28874..e3fcbbed1 100644
--- a/docs/page/5/index.html
+++ b/docs/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/6/index.html b/docs/page/6/index.html
index 2dddc557a..260fa9785 100644
--- a/docs/page/6/index.html
+++ b/docs/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/7/index.html b/docs/page/7/index.html
index 931acb758..409b709ba 100644
--- a/docs/page/7/index.html
+++ b/docs/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/8/index.html b/docs/page/8/index.html
index d46da45a9..3bd709463 100644
--- a/docs/page/8/index.html
+++ b/docs/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/page/9/index.html b/docs/page/9/index.html
index 8bf78a895..437accfee 100644
--- a/docs/page/9/index.html
+++ b/docs/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/index.html b/docs/posts/index.html
index b7f586dd9..70eaea271 100644
--- a/docs/posts/index.html
+++ b/docs/posts/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html
index 02ceab965..fcf9f844a 100644
--- a/docs/posts/page/2/index.html
+++ b/docs/posts/page/2/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html
index faa0306bd..058713aab 100644
--- a/docs/posts/page/3/index.html
+++ b/docs/posts/page/3/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html
index 6c25befd8..a71f52d0c 100644
--- a/docs/posts/page/4/index.html
+++ b/docs/posts/page/4/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html
index a3063c5c5..0cc7b80a8 100644
--- a/docs/posts/page/5/index.html
+++ b/docs/posts/page/5/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html
index 0f68729bf..e2ecc926d 100644
--- a/docs/posts/page/6/index.html
+++ b/docs/posts/page/6/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html
index 55f7751b3..1bad7c985 100644
--- a/docs/posts/page/7/index.html
+++ b/docs/posts/page/7/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html
index c17fbf9dc..52ac16c60 100644
--- a/docs/posts/page/8/index.html
+++ b/docs/posts/page/8/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html
index 7ab398b3c..2731176dc 100644
--- a/docs/posts/page/9/index.html
+++ b/docs/posts/page/9/index.html
@@ -10,7 +10,7 @@
-
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index f7a49eba9..af42045ea 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,19 +3,19 @@
xmlns:xhtml="http://www.w3.org/1999/xhtml">
https://alanorth.github.io/cgspace-notes/categories/
- 2022-07-04T22:10:02+03:00
+ 2022-07-07T10:02:04+03:00
https://alanorth.github.io/cgspace-notes/
- 2022-07-04T22:10:02+03:00
+ 2022-07-07T10:02:04+03:00
https://alanorth.github.io/cgspace-notes/2022-07/
- 2022-07-04T22:10:02+03:00
+ 2022-07-07T10:02:04+03:00
https://alanorth.github.io/cgspace-notes/categories/notes/
- 2022-07-04T22:10:02+03:00
+ 2022-07-07T10:02:04+03:00
https://alanorth.github.io/cgspace-notes/posts/
- 2022-07-04T22:10:02+03:00
+ 2022-07-07T10:02:04+03:00
https://alanorth.github.io/cgspace-notes/2022-06/
2022-07-04T09:25:14+03:00