org.apache.jasper.JasperException: /home.jsp (line: [214], column: [1]) /discovery/static-tagcloud-facet.jsp (line: [57], column: [8]) No tag [tagcloud] defined in tag library imported with prefix [dspace]
at org.apache.jasper.compiler.DefaultErrorHandler.jspError(DefaultErrorHandler.java:41)
at org.apache.jasper.compiler.ErrorDispatcher.dispatch(ErrorDispatcher.java:291)
at org.apache.jasper.compiler.ErrorDispatcher.jspError(ErrorDispatcher.java:97)
at org.apache.jasper.compiler.Parser.processIncludeDirective(Parser.java:347)
at org.apache.jasper.compiler.Parser.parseIncludeDirective(Parser.java:380)
at org.apache.jasper.compiler.Parser.parseDirective(Parser.java:481)
at org.apache.jasper.compiler.Parser.parseElements(Parser.java:1445)
at org.apache.jasper.compiler.Parser.parseBody(Parser.java:1683)
at org.apache.jasper.compiler.Parser.parseOptionalBody(Parser.java:1016)
at org.apache.jasper.compiler.Parser.parseCustomTag(Parser.java:1291)
at org.apache.jasper.compiler.Parser.parseElements(Parser.java:1470)
at org.apache.jasper.compiler.Parser.parse(Parser.java:144)
at org.apache.jasper.compiler.ParserController.doParse(ParserController.java:244)
at org.apache.jasper.compiler.ParserController.parse(ParserController.java:105)
at org.apache.jasper.compiler.Compiler.generateJava(Compiler.java:202)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:373)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:350)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:334)
at org.apache.jasper.JspCompilationContext.compile(JspCompilationContext.java:595)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:399)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:386)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:330)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.ApplicationDispatcher.invoke(ApplicationDispatcher.java:728)
at org.apache.catalina.core.ApplicationDispatcher.processRequest(ApplicationDispatcher.java:470)
at org.apache.catalina.core.ApplicationDispatcher.doForward(ApplicationDispatcher.java:395)
at org.apache.catalina.core.ApplicationDispatcher.forward(ApplicationDispatcher.java:316)
at org.dspace.app.webui.util.JSPManager.showJSP(JSPManager.java:60)
at org.apache.jsp.index_jsp._jspService(index_jsp.java:191)
at org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:70)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:476)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:386)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:330)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.dspace.utils.servlet.DSpaceWebappServletFilter.doFilter(DSpaceWebappServletFilter.java:78)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:198)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:96)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:493)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:140)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:81)
at org.apache.catalina.valves.CrawlerSessionManagerValve.invoke(CrawlerSessionManagerValve.java:234)
at org.apache.catalina.valves.AbstractAccessLogValve.invoke(AbstractAccessLogValve.java:650)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:87)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:342)
at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:800)
at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)
at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:806)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1498)
at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Thread.java:748)
```
- I notice that I get different JSESSIONID cookies for `/` (XMLUI) and `/jspui` (JSPUI) on Tomcat 8.5.37, I wonder if it's the same on Tomcat 7.0.92... yes I do.
- Hmm, on Tomcat 7.0.92 I see that I get a `dspace.current.user.id` session cookie after logging into XMLUI, and then when I browse to JSPUI I am still logged in...
- I didn't see that cookie being set on Tomcat 8.5.37
- I sent a message to the dspace-tech mailing list to ask
## 2019-01-04
- Linode sent a message last night that CGSpace (linode18) had high CPU usage, but I don't see anything around that time in the web server logs:
- I'm thinking about trying to validate our `dc.subject` terms against [AGROVOC webservices](http://aims.fao.org/agrovoc/webservices)
- There seem to be a few APIs and the documentation is kinda confusing, but I found this REST endpoint that does work well, for example searching for `SOIL`:
- I built a clean DSpace 5.8 installation from the upstream `dspace-5.8` tag and the issue with the XMLUI/JSPUI login is still there with Tomcat 8.5.37
- If I log into XMLUI and then nagivate to JSPUI I need to log in again
- XMLUI does not set the `dspace.current.user.id` session cookie in Tomcat 8.5.37 for some reason
- I sent an update to the dspace-tech mailing list to ask for more help troubleshooting
- I built a clean DSpace 6.3 installation from the upstream `dspace-6.3` tag and the issue with the XMLUI/JSPUI login is still there with Tomcat 8.5.37
- If I log into XMLUI and then nagivate to JSPUI I need to log in again
- XMLUI does not set the `dspace.current.user.id` session cookie in Tomcat 8.5.37 for some reason
- I sent an update to the dspace-tech mailing list to ask for more help troubleshooting
- Tim Donohue responded to my thread about the cookies on the dspace-tech mailing list
- He suspects it's a change of behavior in Tomcat 8.5, and indeed I see a mention of new cookie processing in the [Tomcat 8.5 migration guide](https://tomcat.apache.org/migration-85.html#Cookies)
- I tried to switch my XMLUI and JSPUI contexts to use the `LegacyCookieProcessor`, but it didn't seem to help
- I [filed DS-4140 on the DSpace issue tracker](https://jira.duraspace.org/browse/DS-4140)
- Discuss possibly extending the [dspace-statistics-api](https://github.com/ilri/dspace-statistics-api) to make community and collection statistics available
- Discuss new "final" CG Core document and some changes that we'll need to do on CGSpace and other repositories
- We agreed to try to stick to pure Dublin Core where possible, then use fields that exist in standard DSpace, and use "cg" namespace for everything else
- Major changes are to move `dc.contributor.author` to `dc.creator` (which MELSpace and WorldFish are already using in their DSpace repositories)
- I am testing the speed of the WorldFish DSpace repository's REST API and it's five to ten times faster than CGSpace as I tested in [2018-10]({{< relref "2018-10.md" >}}):
```
$ time http --print h 'https://digitalarchive.worldfishcenter.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0'
- In other news, Linode sent a mail last night that the CPU load on CGSpace (linode18) was high, here are the top IPs in the logs around those few hours:
- And what is the relationship between DC and DCTERMS?
- DSpace uses DCTERMS in the metadata it embeds in XMLUI item views!
- We really need to look at this more carefully and see the impacts that might be made from switching core fields like languages, abstract, authors, etc
- We can check WorldFish and MELSpace repositories to see what effects these changes have had on theirs because they have already adopted some of these changes...
- I think I understand the difference between DC and DCTERMS finally: DC is the original set of fifteen elements and DCTERMS is the newer version that was supposed to address much of the drawbacks of the original with regards to digital content
- We might be able to use some proper fields for citation, abstract, etc that are part of DCTERMS
- To make matters more confusing, there is also "qualified Dublin Core" that uses the original fifteen elements of legacy DC and qualifies them, like `dc.date.accessioned`
- According to Wikipedia [Qualified Dublin Core was superseded by DCTERMS in 2008](https://en.wikipedia.org/wiki/Dublin_Core)!
- So we should be trying to use DCTERMS where possible, unless it is some internal thing that might mess up DSpace (like dates)
- There's no official set of Dublin Core qualifiers so I can't tell if things like `dc.contributor.author` that are used by DSpace are official
- I found a great [presentation from 2015 by the Digital Repository of Ireland](https://www.dri.ie/sites/default/files/files/qualified-dublin-core-metadata-guidelines.pdf) that discusses using MARC Relator Terms with Dublin Core elements
- It seems that `dc.contributor.author` would be a supported term according to this [Library of Congress list](https://memory.loc.gov/diglib/loc.terms/relators/dc-contributor.html) linked from the [Dublin Core website](http://dublincore.org/usage/documents/relators/)
- The Library of Congress document specifically says:
These terms conform with the DCMI Abstract Model and may be used in DCMI application profiles. DCMI endorses their use with Dublin Core elements as indicated.
## 2019-01-20
- That's weird, I logged into DSpace Test (linode19) and it says it has been up for 213 days:
- I've definitely rebooted it several times in the past few months... according to `journalctl -b` it was a few weeks ago on 2019-01-02
- I re-ran the Ansible DSpace tag, ran all system updates, and rebooted the host
- After rebooting I notice that the Linode kernel went down from 4.19.8 to 4.18.16...
- Atmire sent a quote on our [ticket about purchasing the Metadata Quality Module (MQM) for DSpace 5.8](https://tracker.atmire.com/tickets-cgiar-ilri/view-ticket?id=657)
- Abenet asked me for an [OpenSearch query that could generate and RSS feed for items in the Livestock CRP](https://cgspace.cgiar.org/open-search/discover?query=crpsubject:Livestock&sort_by=3&order=DESC)
- According to my notes, `sort_by=3` is accession date (as configured in `dspace.cfg)
- The query currently shows 3023 items, but a [Discovery search for Livestock CRP only returns 858 items](https://cgspace.cgiar.org/discover?filtertype_1=crpsubject&filter_relational_operator_1=equals&filter_1=Livestock&submit_apply_filter=&query=)
- That query seems to return items tagged with `Livestock and Fish` CRP as well... hmm.
- Investigating running Tomcat 7 on Ubuntu 18.04 with the tarball and a custom systemd package instead of waiting for our DSpace to get compatible with Ubuntu 18.04's Tomcat 8.5
- I could either run with a simple `tomcat7.service` like this:
```
[Unit]
Description=Apache Tomcat 7 Web Application Container
- I see that `jsvc` and `libcommons-daemon-java` are both available on Ubuntu so that should be easy to port
- We probably don't need Eclipse Java Bytecode Compiler (ecj)
- I tested Tomcat 7.0.92 on Arch Linux using the `tomcat7.service` with `jsvc` and it works... nice!
- I think I might manage this the same way I do the restic releases in the [Ansible infrastructure scripts](https://github.com/ilri/rmg-ansible-public), where I download a specific version and symlink to some generic location without the version number
- I verified that there is indeed an issue with sharded Solr statistics cores on DSpace, which will cause inaccurate results in the dspace-statistics-api:
- I opened an issue on the GitHub issue tracker ([#10](https://github.com/ilri/dspace-statistics-api/issues/10))
- I don't think the [SolrClient library](https://solrclient.readthedocs.io/en/latest/) we are currently using supports these type of queries so we might have to just do raw queries with requests
- The [pysolr](https://github.com/django-haystack/pysolr) library says it supports multicore indexes, but I am not sure it does (or at least not with our setup):
- I should be able to modify the dspace-statistics-api to check the shards via the Solr core status, then add the `shards` parameter to each query to make the search distributed among the cores
- I implemented a proof of concept to query the Solr STATUS for active cores and to add them with a `shards` query string
- A few things I noticed:
- Solr doesn't mind if you use an empty `shards` parameter
- Solr doesn't mind if you have an extra comma at the end of the `shards` parameter
- If you are searching multiple cores, you need to include the base core in the `shards` parameter as well
- For example, compare the following two queries, first including the base core and the shard in the `shards` parameter, and then only including the shard:
- Release [version 0.9.0 of the dspace-statistics-api](https://github.com/ilri/dspace-statistics-api/releases/tag/v0.9.0) to address the issue of querying multiple Solr statistics shards
- I deployed it on DSpace Test (linode19) and restarted the indexer and now it shows all the stats from 2018 as well (756 pages of views, intead of 6)
- Peter noticed that some goo.gl links in our tweets from Feedburner are broken, for example this one from last week:
{{<tweet1086330519904673793>}}
- The shortened link is [goo.gl/fb/VRj9Gq](goo.gl/fb/VRj9Gq) and it shows a "Dynamic Link not found" error from Firebase:
![Dynamic Link not found](/cgspace-notes/2019/01/firebase-link-not-found.png)
- Apparently Google announced last year that they plan to [discontinue the shortner and transition to Firebase Dynamic Links in March, 2019](https://developers.googleblog.com/2018/03/transitioning-google-url-shortener.html), so maybe this is related...
- Create accounts for Bosun from IITA and Valerio from ICARDA / CGMEL on DSpace Test
- Maria Garruccio asked me for a list of author affiliations from all of their submitted items so she can clean them up
- I got a list of their collections from the CGSpace XMLUI and then used an SQL query to dump the unique values to CSV:
```
dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'affiliation') AND resource_type_id = 2 AND resource_id IN (select item_id from collection2item where collection_id IN (select resource_id from handle where handle in ('10568/35501', '10568/41728', '10568/49622', '10568/56589', '10568/56592', '10568/65064', '10568/65718', '10568/65719', '10568/67373', '10568/67731', '10568/68235', '10568/68546', '10568/69089', '10568/69160', '10568/69419', '10568/69556', '10568/70131', '10568/70252', '10568/70978'))) group by text_value order by count desc) to /tmp/bioversity-affiliations.csv with csv;
COPY 1109
```
- Send a mail to the dspace-tech mailing list about the OpenSearch issue we had with the Livestock CRP
- Linode sent an alert that CGSpace (linode18) had a high load this morning, here are the top ten IPs during that time:
- I noticed Ubuntu's Ghostscript 9.26 works on some troublesome PDFs where Arch's Ghostscript 9.26 doesn't, so the fix for the first/last page crash is not the patch I found yesterday
- Ubuntu's Ghostscript uses another [patch from Ghostscript git](http://git.ghostscript.com/?p=ghostpdl.git;h=fae21f1668d2b44b18b84cf0923a1d5f3008a696) ([upstream bug report](https://bugs.ghostscript.com/show_bug.cgi?id=700315))
- I re-compiled Arch's ghostscript with the patch and then I was able to generate a thumbnail from one of the [troublesome PDFs](https://cgspace.cgiar.org/handle/10568/98390)
- I reported it to the Arch Linux bug tracker ([61513](https://bugs.archlinux.org/task/61513))
- I told Atmire to go ahead with the Metadata Quality Module addition based on our `5_x-dev` branch ([657](https://tracker.atmire.com/tickets-cgiar-ilri/view-ticket?id=657))
- Linode sent alerts last night to say that CGSpace (linode18) was using high CPU last night, here are the top ten IPs from the nginx logs around that time:
- The full [list of MARC Relators on the Library of Congress website](http://id.loc.gov/vocabulary/relators.html) linked from the [DMCI relators page](http://dublincore.org/usage/documents/relators/) is very confusing
- Looking at the default DSpace XMLUI crosswalk in [xhtml-head-item.properties](https://github.com/DSpace/DSpace/blob/dspace-5_x/dspace/config/crosswalks/xhtml-head-item.properties) I see a very complete mapping of DSpace DC and QDC fields to DCTERMS
- This is good for standards-compliant web crawlers, but what about for those harvesting via REST or OAI APIs?
- I sent a message titled "[DC, QDC, and DCTERMS: reviewing our metadata practices](https://groups.google.com/forum/#!topic/dspace-tech/phV_t51TGuE)" to the dspace-tech mailing list to ask about some of this
- I refined the tasks so much that I was confident enough to deploy them on DSpace Test and it went very well
- Basically I just stopped tomcat7, created a dspace user, removed tomcat7, chown'd everything to the dspace user, then ran the playbook
- So now DSpace Test (linode19) is running Tomcat 7.0.92... w00t
- Now we need to monitor it for a few weeks to see if there is anything we missed, and then I can change CGSpace (linode18) as well, and we're ready for Ubuntu 18.04 too!
- Udana from WLE asked me about the interaction between their publication website and their items on CGSpace
- There is an item that is mapped into their collection from IWMI and is missing their `cg.identifier.wletheme` metadata
- I told him that, as far as I remember, when WLE introduced Phase II research themes in 2017 we decided to infer theme ownership from the collection hierarchy and we created a [WLE Phase II Research Themes](https://cgspace.cgiar.org/handle/10568/81268) subCommunity
- Perhaps they need to ask Macaroni Bros about the mapping
- Linode alerted that CGSpace (linode18) was using too much CPU again this morning, here are the active IPs from the web server log at the time:
- There seems to be a pattern with `70.32.83.92` and `205.186.128.185` lately!
- Every morning at 8AM they are the top users... I should tell them to stagger their requests...
- I signed up for a [VisualPing](https://visualping.io/) of the [PostgreSQL JDBC driver download page](https://jdbc.postgresql.org/download.html) to my CGIAR email address
- Hopefully this will one day alert me that a new driver is released!
- Last night Linode sent an alert that CGSpace (linode18) was using high CPU, here are the most active IPs in the hours just before, during, and after the alert:
- Linode sent an alert about CGSpace (linode18) CPU usage this morning, here are the top IPs in the web server logs just before, during, and after the alert: