diff --git a/content/posts/2018-11.md b/content/posts/2018-11.md index 73185d776..812bcae0d 100644 --- a/content/posts/2018-11.md +++ b/content/posts/2018-11.md @@ -332,4 +332,50 @@ $ time schedtool -D -e ionice -c2 -n7 nice -n19 dspace index-discovery -b dspace=# \COPY (SELECT DISTINCT text_value, count(*) FROM metadatavalue WHERE metadata_field_id = 57 AND resource_type_id = 2 GROUP BY text_value ORDER BY count DESC LIMIT 1500) to /tmp/2018-11-19-top-1500-subject.csv WITH CSV HEADER; ``` +## 2018-11-20 + +- The Discovery re-indexing on CGSpace never finished yesterday... the command died after six minutes +- The `dspace.log.2018-11-19` shows this at the time: + +``` +2018-11-19 15:23:04,221 ERROR com.atmire.dspace.discovery.AtmireSolrService @ DSpace kernel cannot be null +java.lang.IllegalStateException: DSpace kernel cannot be null + at org.dspace.utils.DSpace.getServiceManager(DSpace.java:63) + at org.dspace.utils.DSpace.getSingletonService(DSpace.java:87) + at com.atmire.dspace.discovery.AtmireSolrService.buildDocument(AtmireSolrService.java:102) + at com.atmire.dspace.discovery.AtmireSolrService.indexContent(AtmireSolrService.java:815) + at com.atmire.dspace.discovery.AtmireSolrService.updateIndex(AtmireSolrService.java:884) + at org.dspace.discovery.SolrServiceImpl.createIndex(SolrServiceImpl.java:370) + at org.dspace.discovery.IndexClient.main(IndexClient.java:117) + at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.lang.reflect.Method.invoke(Method.java:498) + at org.dspace.app.launcher.ScriptLauncher.runOneCommand(ScriptLauncher.java:226) + at org.dspace.app.launcher.ScriptLauncher.main(ScriptLauncher.java:78) +2018-11-19 15:23:04,223 INFO com.atmire.dspace.discovery.AtmireSolrService @ Processing (4629 of 76007): 72731 +``` + +- I looked in the Solr log around that time and I don't see anything... +- Working on Udana's WLE records from last month, first the sixteen records in [2018-11-20 RDL Temp](https://dspacetest.cgiar.org/handle/10568/108254) + - these items will go to the [Restoring Degraded Landscapes collection](https://dspacetest.cgiar.org/handle/10568/81592) + - a few items missing DOIs, but they are easily available on the publication page + - clean up DOIs to use "https://doi.org" format + - clean up some cg.identifier.url to remove unneccessary query strings + - remove columns with no metadata (river basin, place, target audience, isbn, uri, publisher, ispartofseries, subject) + - fix column with invalid spaces in metadata field name (cg. subject. wle) + - trim and collapse whitespace in all fields + - remove some weird Unicode characters (0xfffd) from abstracts, citations, and titles using Open Refine: `value.replace('�','')` + - add dc.rights to some fields that I noticed while checking DOIs +- Then the 24 records in [2018-11-20 VRC Temp](https://dspacetest.cgiar.org/handle/10568/108271) + - these items will go to the [Variability, Risks and Competing Uses collection](https://dspacetest.cgiar.org/handle/10568/81589) + - trim and collapse whitespace in all fields (lots in WLE subject!) + - clean up some cg.identifier.url fields that had unneccessary anchors in their links + - clean up DOIs to use "https://doi.org" format + - fix column with invalid spaces in metadata field name (cg. subject. wle) + - remove columns with no metadata (place, target audience, isbn, uri, publisher, ispartofseries, subject) + - remove some weird Unicode characters (0xfffd) from abstracts, citations, and titles using Open Refine: `value.replace('�','')` + - I notice a few items using DOIs pointing at ICARDA's DSpace like: https://doi.org/20.500.11766/8178, which then points at the "real" DOI on the publisher's site... these should be using the real DOI instead of ICARDA's "fake" Handle DOI + - Some items missing DOIs, but they clearly have them if you look at the publisher's site + diff --git a/docs/2018-11/index.html b/docs/2018-11/index.html index 575ca9be2..fdecfaad7 100644 --- a/docs/2018-11/index.html +++ b/docs/2018-11/index.html @@ -21,7 +21,7 @@ Today these are the top 10 IPs: " /> - + @@ -48,9 +48,9 @@ Today these are the top 10 IPs: "@type": "BlogPosting", "headline": "November, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-11/", - "wordCount": "1774", + "wordCount": "2122", "datePublished": "2018-11-01T16:41:30+02:00", - "dateModified": "2018-11-19T17:17:04+02:00", + "dateModified": "2018-11-19T17:25:08+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -490,6 +490,61 @@ $ ./delete-metadata-values.py -i 2018-11-19-delete-agrovoc.csv -f dc.subject -m
dspace=# \COPY (SELECT DISTINCT text_value, count(*) FROM metadatavalue WHERE metadata_field_id = 57 AND resource_type_id = 2 GROUP BY text_value ORDER BY count DESC LIMIT 1500) to /tmp/2018-11-19-top-1500-subject.csv WITH CSV HEADER;
 
+

2018-11-20

+ + + +
2018-11-19 15:23:04,221 ERROR com.atmire.dspace.discovery.AtmireSolrService @ DSpace kernel cannot be null
+java.lang.IllegalStateException: DSpace kernel cannot be null
+        at org.dspace.utils.DSpace.getServiceManager(DSpace.java:63)
+        at org.dspace.utils.DSpace.getSingletonService(DSpace.java:87)
+        at com.atmire.dspace.discovery.AtmireSolrService.buildDocument(AtmireSolrService.java:102)
+        at com.atmire.dspace.discovery.AtmireSolrService.indexContent(AtmireSolrService.java:815)
+        at com.atmire.dspace.discovery.AtmireSolrService.updateIndex(AtmireSolrService.java:884)
+        at org.dspace.discovery.SolrServiceImpl.createIndex(SolrServiceImpl.java:370)
+        at org.dspace.discovery.IndexClient.main(IndexClient.java:117)
+        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
+        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+        at java.lang.reflect.Method.invoke(Method.java:498)
+        at org.dspace.app.launcher.ScriptLauncher.runOneCommand(ScriptLauncher.java:226)
+        at org.dspace.app.launcher.ScriptLauncher.main(ScriptLauncher.java:78)
+2018-11-19 15:23:04,223 INFO  com.atmire.dspace.discovery.AtmireSolrService @ Processing (4629 of 76007): 72731
+
+ + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index ee168c7b9..09373c2f7 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2018-11/ - 2018-11-19T17:17:04+02:00 + 2018-11-19T17:25:08+02:00 @@ -194,7 +194,7 @@ https://alanorth.github.io/cgspace-notes/ - 2018-11-19T17:17:04+02:00 + 2018-11-19T17:25:08+02:00 0 @@ -205,7 +205,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2018-11-19T17:17:04+02:00 + 2018-11-19T17:25:08+02:00 0 @@ -217,13 +217,13 @@ https://alanorth.github.io/cgspace-notes/posts/ - 2018-11-19T17:17:04+02:00 + 2018-11-19T17:25:08+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2018-11-19T17:17:04+02:00 + 2018-11-19T17:25:08+02:00 0