diff --git a/content/posts/2024-02.md b/content/posts/2024-02.md index 65393d4e4..4f4f207e1 100644 --- a/content/posts/2024-02.md +++ b/content/posts/2024-02.md @@ -111,4 +111,8 @@ $ cat /tmp/authors /tmp/ifpri-authors | sort -u > /tmp/new-authors - I figured out a way to add a new Angular component to handle all our relation fields +## 2024-02-29 + +- Clean up a bunch of metadata on CGSpace + diff --git a/docs/2024-01/index.html b/docs/2024-01/index.html index e4ef19708..7520322df 100644 --- a/docs/2024-01/index.html +++ b/docs/2024-01/index.html @@ -15,7 +15,7 @@ Lower case all the AGROVOC subjects on CGSpace - + @@ -36,9 +36,9 @@ Lower case all the AGROVOC subjects on CGSpace "@type": "BlogPosting", "headline": "February, 2024", "url": "https://alanorth.github.io/cgspace-notes/2024-01/", - "wordCount": "551", + "wordCount": "560", "datePublished": "2024-01-05T11:10:00+03:00", - "dateModified": "2024-02-27T17:18:35+03:00", + "dateModified": "2024-02-29T09:41:44+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -222,6 +222,405 @@ Lower case all the AGROVOC subjects on CGSpace +

2024-02-29

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +tadata values: + + + +
$ csvcut -c dcterms.publisher ~/Downloads/2024-01-09-publishers4.csv | sed -e 1d -e 's/"//g' > /tmp/top-publishers.txt
+
+
localhost/dspace7= ☘ \COPY (SELECT DISTINCT(text_value) FROM metadatavalue WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=247) to /tmp/2024-01-09-orcid-identifiers.txt;
+localhost/dspace7= ☘ \q
+$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml /tmp/2024-01-09-orcid-identifiers.txt | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u > /tmp/2024-01-09-orcids.txt
+$ ./ilri/resolve_orcids.py -i /tmp/2024-01-09-orcids.txt -o /tmp/2024-01-09-orcids-names.txt -d
+
+
$ ./ilri/update_orcids.py -i /tmp/2024-01-09-orcids-names.txt -db dspace -u dspace -p bahhhh
+
+
2024-01-09 06:23:35,893 ERROR unknown unknown org.dspace.authorize.AuthorizeServiceImpl @ Failed getting getting community/collection admin status for bahhhhh@cgiar.org The search error is: Error from server at http://localhost:8983/solr/search: org.apache.solr.search.SyntaxError: Cannot parse 'search.resourcetype:Community AND (admin:eef481147-daf3-4fd2-bb8d-e18af8131d8c OR admin:g80199ef9-bcd6-4961-9512-501dea076607 OR admin:g4ac29263-cf0c-48d0-8be7-7f09317d50ec OR admin:g0e594148-a0f6-4f00-970d-6b7812f89540 OR admin:g0265b87a-2183-4357-a971-7a5b0c7add3a OR admin:g371ae807-f014-4305-b4ec-f2a8f6f0dcfa OR admin:gdc5cb27c-4a5a-45c2-b656-a399fded70de OR admin:ge36d0ece-7a52-4925-afeb-6641d6a348cc OR admin:g15dc1173-7ddf-43cf-a89a-77a7f81c4cfc OR admin:gc3a599d3-c758-46cd-9855-c98f6ab58ae4 OR admin:g3d648c3e-58c3-4342-b500-07cba10ba52d OR admin:g82bf5168-65c1-4627-8eb4-724fa0ea51a7 OR admin:ge751e973-697d-419c-b59b-5a5644702874 OR admin:g44dd0a80-c1e6-4274-9be4-9f342d74928c OR admin:g4842f9c2-73ed-476a-a81a-7167d8aa7946 OR admin:g5f279b3f-c2ce-4c75-b151-1de52c1a540e OR admin:ga6df8adc-2e1d-40f2-8f1e-f77796d0eecd OR admin:gfdfc1621-382e-437a-8674-c9007627565c OR admin:g15cd114a-0b89-442b-a1b4-1febb6959571 OR admin:g12aede99-d018-4c00-b4d4-a732541d0017 OR admin:gc59529d7-002a-4216-b2e1-d909afd2d4a9 OR admin:gd0806714-bc13-460d-bedd-121bdd5436a4 OR admin:gce70739a-8820-4d56-b19c-f191855479e4 OR admin:g7d3409eb-81e3-4156-afb1-7f02de22065f OR admin:g54bc009e-2954-4dad-8c30-be6a09dc5093 OR admin:gc5e1d6b7-4603-40d7-852f-6654c159dec9 OR admin:g0046214d-c85b-4f12-a5e6-2f57a2c3abb0 OR admin:g4c7b4fd0-938f-40e9-ab3e-447c317296c1 OR admin:gcfae9b69-d8dd-4cf3-9a4e-d6e31ff68731 OR ... admin:g20f366c0-96c0-4416-ad0b-46884010925f)': too many boolean clauses The search resourceType filter was: search.resourcetype:Community
+
+
$ dspace dsrun org.dspace.eperson.Groomer -a -b 01/09/2018 -d
+
+
$ dspace user -L > /tmp/users-before.txt
+$ wc -l /tmp/users-before.txt
+8943 /tmp/users-before.txt
+
+

2024-01-10

+ +
localhost/dspace7= ☘ SELECT DISTINCT text_value AS "cg.identifier.ciatproject", count(*) FROM metadatavalue WHERE dspace_object_id in (SELECT dspace_object_id FROM item) AND metadata
+_field_id = 232 GROUP BY "cg.identifier.ciatproject" ORDER BY count DESC;
+ cg.identifier.ciatproject │ count
+───────────────────────────┼───────
+ D145                      │     4
+ LAM_LivestockPlus         │     2
+ A215                      │     1
+ A217                      │     1
+ A220                      │     1
+ A223                      │     1
+ A224                      │     1
+ A227                      │     1
+ A229                      │     1
+ A230                      │     1
+ CLIMATE CHANGE MITIGATION │     1
+ LIVESTOCK                 │     1
+(12 rows)
+
+Time: 240.041 ms
+
+

2024-01-12

+ +
localhost/dspace7= ☘ \COPY (SELECT DISTINCT text_value AS "cg.contributor.affiliation", count(*) FROM metadatavalue WHERE dspace_object_id in (SELECT dspace_object_id FROM item) AND metadata_field_id = 211 GROUP BY "cg.contributor.affiliation" ORDER BY count DESC) to /tmp/2024-01-affiliations.csv WITH CSV HEADER;
+COPY 11719
+
+
$ curl 'http://localhost:8983/solr/statistics/select?q=-id%3A%2F.\{36\}%2F&rows=0'
+{
+  "responseHeader":{
+    "status":0,
+    "QTime":0,
+    "params":{
+      "q":"-id:/.{36}/",
+      "rows":"0"}},
+  "response":{"numFound":800167,"start":0,"numFoundExact":true,"docs":[]
+  }}
+
+
$ curl 'http://localhost:8983/solr/statistics/select?q=-id%3A%2F.\{36\}%2F&facet.range=time&facet=true&facet.range.start=2010-01-01T00:00:00Z&facet.range.end=NOW&facet.range.gap=%2B1YEAR&rows=0'
+{
+  "responseHeader":{
+    "status":0,
+    "QTime":13,
+    "params":{
+      "facet.range":"time",
+      "q":"-id:/.{36}/",
+      "facet.range.gap":"+1YEAR",
+      "rows":"0",
+      "facet":"true",
+      "facet.range.start":"2010-01-01T00:00:00Z",
+      "facet.range.end":"NOW"}},
+  "response":{"numFound":800168,"start":0,"numFoundExact":true,"docs":[]
+  },
+  "facet_counts":{
+    "facet_queries":{},
+    "facet_fields":{},
+    "facet_ranges":{
+      "time":{
+        "counts":[
+          "2010-01-01T00:00:00Z",0,
+          "2011-01-01T00:00:00Z",0,
+          "2012-01-01T00:00:00Z",0,
+          "2013-01-01T00:00:00Z",0,
+          "2014-01-01T00:00:00Z",0,
+          "2015-01-01T00:00:00Z",89,
+          "2016-01-01T00:00:00Z",11,
+          "2017-01-01T00:00:00Z",0,
+          "2018-01-01T00:00:00Z",0,
+          "2019-01-01T00:00:00Z",0,
+          "2020-01-01T00:00:00Z",1339,
+          "2021-01-01T00:00:00Z",0,
+          "2022-01-01T00:00:00Z",0,
+          "2023-01-01T00:00:00Z",653736,
+          "2024-01-01T00:00:00Z",144993],
+        "gap":"+1YEAR",
+        "start":"2010-01-01T00:00:00Z",
+        "end":"2025-01-01T00:00:00Z"}},
+    "facet_intervals":{},
+    "facet_heatmaps":{}}}
+
+
$ curl 'http://localhost:8983/solr/statistics/select?q=-id%3A%2F.\{36\}%2F&facet.range=time&facet=true&facet.range.start=2023-01-01T00:00:00Z&facet.range.end=NOW&facet.range.gap=%2B1MONTH&rows=0'
+{
+  "responseHeader":{
+    "status":0,
+    "QTime":196,
+    "params":{
+      "facet.range":"time",
+      "q":"-id:/.{36}/",
+      "facet.range.gap":"+1MONTH",
+      "rows":"0",
+      "facet":"true",
+      "facet.range.start":"2023-01-01T00:00:00Z",
+      "facet.range.end":"NOW"}},
+  "response":{"numFound":800168,"start":0,"numFoundExact":true,"docs":[]
+  },
+  "facet_counts":{
+    "facet_queries":{},
+    "facet_fields":{},
+    "facet_ranges":{
+      "time":{
+        "counts":[
+          "2023-01-01T00:00:00Z",1,
+          "2023-02-01T00:00:00Z",0,
+          "2023-03-01T00:00:00Z",0,
+          "2023-04-01T00:00:00Z",0,
+          "2023-05-01T00:00:00Z",0,
+          "2023-06-01T00:00:00Z",0,
+          "2023-07-01T00:00:00Z",0,
+          "2023-08-01T00:00:00Z",27621,
+          "2023-09-01T00:00:00Z",59165,
+          "2023-10-01T00:00:00Z",115338,
+          "2023-11-01T00:00:00Z",96147,
+          "2023-12-01T00:00:00Z",355464,
+          "2024-01-01T00:00:00Z",125429],
+        "gap":"+1MONTH",
+        "start":"2023-01-01T00:00:00Z",
+        "end":"2024-02-01T00:00:00Z"}},
+    "facet_intervals":{},
+    "facet_heatmaps":{}}}
+
+

2024-01-13

+ +

2024-01-15

+ +
0|dspace-ui  | 1 rules skipped due to selector errors:
+0|dspace-ui  |   .custom-file-input:lang(en)~.custom-file-label -> unmatched pseudo-class :lang
+
+
# zcat -f /var/log/nginx/*access.log  /var/log/nginx/*access.log.1 /var/log/nginx/*access.log.2.gz /var/log/nginx/*access.log.3.gz /var/log/nginx/*access.log.4.gz /var/log/nginx/*access.log.5.gz /var/log/nginx/*access.log.6.gz | awk '{print $1}' | sort -u |
+tee /tmp/ips.txt | wc -l
+196493
+
+

2024-01-17

+ +

2024-01-18

+ +
$ curl http://localhost:8983/solr/statistics/update -H "Content-type: text/xml" --data-binary '<delete><query>uid:3b4eefba-a302-4172-a286-dcb25d70129e</query></delete>'
+
+

2024-01-22

+ +

2024-01-23

+ +
$ cat ~/src/git/DSpace/dspace/config/controlled-vocabularies/cg-creator-identifier.xml ~/Downloads/IFPRI\ ORCiD\ All.csv | grep -oE '[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}' | sort -u > /tmp/2024-01-23-orcids.txt
+$ ./ilri/resolve_orcids.py -i /tmp/2024-01-23-orcids.txt -o /tmp/2024-01-23-orcids-names.txt -d
+$ ./ilri/update_orcids.py -i /tmp/2024-01-23-orcids-names.txt -db dspace -u dspace -p fuuu
+
+

2024-01-26

+ +

2024-01-29

+ diff --git a/docs/categories/index.html b/docs/categories/index.html index 058e6777d..ccb082d0c 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/index.xml b/docs/categories/index.xml index b47670c2d..523f5c8fb 100644 --- a/docs/categories/index.xml +++ b/docs/categories/index.xml @@ -6,7 +6,7 @@ Recent content in Categories on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 27 Feb 2024 17:18:35 +0300 + Thu, 29 Feb 2024 09:41:44 +0300 Notes diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 721f01782..ca7cfec4a 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/index.xml b/docs/categories/notes/index.xml index 2a55a2afe..337fb2818 100644 --- a/docs/categories/notes/index.xml +++ b/docs/categories/notes/index.xml @@ -6,7 +6,7 @@ Recent content in Notes on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 27 Feb 2024 17:18:35 +0300 + Thu, 29 Feb 2024 09:41:44 +0300 February, 2024 diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index e76f4398c..2f5410dee 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index b102e079b..d8cfcfffe 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 918350dd1..32f5e95f7 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/5/index.html b/docs/categories/notes/page/5/index.html index 9f0f96b04..951b60a00 100644 --- a/docs/categories/notes/page/5/index.html +++ b/docs/categories/notes/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/6/index.html b/docs/categories/notes/page/6/index.html index 2c0b81f59..495f6daba 100644 --- a/docs/categories/notes/page/6/index.html +++ b/docs/categories/notes/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/7/index.html b/docs/categories/notes/page/7/index.html index c40236330..fbfb884ee 100644 --- a/docs/categories/notes/page/7/index.html +++ b/docs/categories/notes/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/categories/notes/page/8/index.html b/docs/categories/notes/page/8/index.html index 3b3b6590b..66da05d54 100644 --- a/docs/categories/notes/page/8/index.html +++ b/docs/categories/notes/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 2a23c1d42..4ab76f7a1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/index.xml b/docs/index.xml index 48338569a..7debd4331 100644 --- a/docs/index.xml +++ b/docs/index.xml @@ -6,7 +6,7 @@ Recent content on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 27 Feb 2024 17:18:35 +0300 + Thu, 29 Feb 2024 09:41:44 +0300 February, 2024 diff --git a/docs/page/10/index.html b/docs/page/10/index.html index 1c204a773..269187a03 100644 --- a/docs/page/10/index.html +++ b/docs/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/11/index.html b/docs/page/11/index.html index 3c729cb91..6b63e61cd 100644 --- a/docs/page/11/index.html +++ b/docs/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index a199bee76..f6cda545f 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index c7357cfab..9d17b150b 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index baa47dd2a..4d00e9b5e 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 00b788d8a..b2a103d99 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index fd0999426..961da7ba8 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/7/index.html b/docs/page/7/index.html index b857995e8..9636e84e4 100644 --- a/docs/page/7/index.html +++ b/docs/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/8/index.html b/docs/page/8/index.html index 29a95ace4..1796bf9a5 100644 --- a/docs/page/8/index.html +++ b/docs/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/page/9/index.html b/docs/page/9/index.html index bd91590c0..a3e0b68dd 100644 --- a/docs/page/9/index.html +++ b/docs/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 8e06c81c9..8b893de56 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/index.xml b/docs/posts/index.xml index f237cd156..133bba6e7 100644 --- a/docs/posts/index.xml +++ b/docs/posts/index.xml @@ -6,7 +6,7 @@ Recent content in Posts on CGSpace Notes Hugo -- gohugo.io en-us - Tue, 27 Feb 2024 17:18:35 +0300 + Thu, 29 Feb 2024 09:41:44 +0300 February, 2024 diff --git a/docs/posts/page/10/index.html b/docs/posts/page/10/index.html index 137ac06e9..0c11e26b4 100644 --- a/docs/posts/page/10/index.html +++ b/docs/posts/page/10/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/11/index.html b/docs/posts/page/11/index.html index a551c86ba..9e0d70640 100644 --- a/docs/posts/page/11/index.html +++ b/docs/posts/page/11/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index a52b20968..83b3f06a1 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 8c85af902..4d7ead47a 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 380740327..35924902f 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 8c1359b0e..d7a3eb395 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index a1539413f..1e138116a 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/7/index.html b/docs/posts/page/7/index.html index 00fe975bc..6344d11ad 100644 --- a/docs/posts/page/7/index.html +++ b/docs/posts/page/7/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/8/index.html b/docs/posts/page/8/index.html index d49bfa28a..ce8ae89aa 100644 --- a/docs/posts/page/8/index.html +++ b/docs/posts/page/8/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/posts/page/9/index.html b/docs/posts/page/9/index.html index 7e40e05dd..e9b9baf77 100644 --- a/docs/posts/page/9/index.html +++ b/docs/posts/page/9/index.html @@ -10,7 +10,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 16a2e804c..84b560c01 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,19 +3,19 @@ xmlns:xhtml="http://www.w3.org/1999/xhtml"> https://alanorth.github.io/cgspace-notes/categories/ - 2024-02-27T17:18:35+03:00 + 2024-02-29T09:41:44+03:00 https://alanorth.github.io/cgspace-notes/ - 2024-02-27T17:18:35+03:00 + 2024-02-29T09:41:44+03:00 https://alanorth.github.io/cgspace-notes/2024-01/ - 2024-02-27T17:18:35+03:00 + 2024-02-29T09:41:44+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2024-02-27T17:18:35+03:00 + 2024-02-29T09:41:44+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2024-02-27T17:18:35+03:00 + 2024-02-29T09:41:44+03:00 https://alanorth.github.io/cgspace-notes/2024-01/ 2024-02-05T11:09:40+03:00