Add notes for 2019-01-04

This commit is contained in:
Alan Orth 2019-01-04 20:38:11 +02:00
parent 8ae63a1bcb
commit eb5c011367
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
4 changed files with 481 additions and 14 deletions

View File

@ -68,4 +68,230 @@ $ sudo docker rm dspacedb
$ sudo docker run --name dspacedb -v /home/aorth/.local/lib/containers/volumes/dspacedb_data:/var/lib/postgresql/data -e POSTGRES_PASSWORD=postgres -p 5432:5432 -d postgres:9.6-alpine $ sudo docker run --name dspacedb -v /home/aorth/.local/lib/containers/volumes/dspacedb_data:/var/lib/postgresql/data -e POSTGRES_PASSWORD=postgres -p 5432:5432 -d postgres:9.6-alpine
``` ```
- Testing DSpace 5.9 with Tomcat 8.5.37 on my local machine and I see that Atmire's Listings and Reports still doesn't work
- After logging in via XMLUI and clicking the Listings and Reports link from the sidebar it redirects me to a JSPUI login page
- If I log in again there the Listings and Reports work... hmm.
- The JSPUI application—which Listings and Reports depends upon—also does not load, though the error is perhaps unrelated:
```
2019-01-03 14:45:21,727 INFO org.dspace.browse.BrowseEngine @ anonymous:session_id=9471D72242DAA05BCC87734FE3C66EA6:ip_addr=127.0.0.1:browse_mini:
2019-01-03 14:45:21,971 INFO org.dspace.app.webui.discovery.DiscoverUtility @ facets for scope, null: 23
2019-01-03 14:45:22,115 WARN org.dspace.app.webui.servlet.InternalErrorServlet @ :session_id=9471D72242DAA05BCC87734FE3C66EA6:internal_error:-- URL Was: http://localhost:8080/jspui/internal-error
-- Method: GET
-- Parameters were:
org.apache.jasper.JasperException: /home.jsp (line: [214], column: [1]) /discovery/static-tagcloud-facet.jsp (line: [57], column: [8]) No tag [tagcloud] defined in tag library imported with prefix [dspace]
at org.apache.jasper.compiler.DefaultErrorHandler.jspError(DefaultErrorHandler.java:41)
at org.apache.jasper.compiler.ErrorDispatcher.dispatch(ErrorDispatcher.java:291)
at org.apache.jasper.compiler.ErrorDispatcher.jspError(ErrorDispatcher.java:97)
at org.apache.jasper.compiler.Parser.processIncludeDirective(Parser.java:347)
at org.apache.jasper.compiler.Parser.parseIncludeDirective(Parser.java:380)
at org.apache.jasper.compiler.Parser.parseDirective(Parser.java:481)
at org.apache.jasper.compiler.Parser.parseElements(Parser.java:1445)
at org.apache.jasper.compiler.Parser.parseBody(Parser.java:1683)
at org.apache.jasper.compiler.Parser.parseOptionalBody(Parser.java:1016)
at org.apache.jasper.compiler.Parser.parseCustomTag(Parser.java:1291)
at org.apache.jasper.compiler.Parser.parseElements(Parser.java:1470)
at org.apache.jasper.compiler.Parser.parse(Parser.java:144)
at org.apache.jasper.compiler.ParserController.doParse(ParserController.java:244)
at org.apache.jasper.compiler.ParserController.parse(ParserController.java:105)
at org.apache.jasper.compiler.Compiler.generateJava(Compiler.java:202)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:373)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:350)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:334)
at org.apache.jasper.JspCompilationContext.compile(JspCompilationContext.java:595)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:399)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:386)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:330)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.ApplicationDispatcher.invoke(ApplicationDispatcher.java:728)
at org.apache.catalina.core.ApplicationDispatcher.processRequest(ApplicationDispatcher.java:470)
at org.apache.catalina.core.ApplicationDispatcher.doForward(ApplicationDispatcher.java:395)
at org.apache.catalina.core.ApplicationDispatcher.forward(ApplicationDispatcher.java:316)
at org.dspace.app.webui.util.JSPManager.showJSP(JSPManager.java:60)
at org.apache.jsp.index_jsp._jspService(index_jsp.java:191)
at org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:70)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:476)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:386)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:330)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.dspace.utils.servlet.DSpaceWebappServletFilter.doFilter(DSpaceWebappServletFilter.java:78)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:198)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:96)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:493)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:140)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:81)
at org.apache.catalina.valves.CrawlerSessionManagerValve.invoke(CrawlerSessionManagerValve.java:234)
at org.apache.catalina.valves.AbstractAccessLogValve.invoke(AbstractAccessLogValve.java:650)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:87)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:342)
at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:800)
at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)
at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:806)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1498)
at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Thread.java:748)
```
- I notice that I get different JSESSIONID cookies for `/` (XMLUI) and `/jspui` (JSPUI) on Tomcat 8.5.37, I wonder if it's the same on Tomcat 7.0.92... yes I do.
- Hmm, on Tomcat 7.0.92 I see that I get a `dspace.current.user.id` session cookie after logging into XMLUI, and then when I browse to JSPUI I am still logged in...
- I didn't see that cookie being set on Tomcat 8.5.37
- I sent a message to the dspace-tech mailing list to ask
## 2019-01-04
- Linode sent a message last night that CGSpace (linode18) had high CPU usage, but I don't see anything around that time in the web server logs:
```
# zcat --force /var/log/nginx/*.log /var/log/nginx/*.log.1 | grep -E "03/Jan/2019:1(7|8|9)" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10
189 207.46.13.192
217 31.6.77.23
340 66.249.70.29
349 40.77.167.86
417 34.218.226.147
630 207.46.13.173
710 35.237.175.180
790 40.77.167.87
1776 66.249.70.27
2099 54.70.40.11
```
- I'm thinking about trying to validate our `dc.subject` terms against [AGROVOC webservices](http://aims.fao.org/agrovoc/webservices)
- There seem to be a few APIs and the documentation is kinda confusing, but I found this REST endpoint that does work well, for example searching for `SOIL`:
```
$ http http://agrovoc.uniroma2.it/agrovoc/rest/v1/search?query=SOIL&lang=en
HTTP/1.1 200 OK
Access-Control-Allow-Origin: *
Connection: Keep-Alive
Content-Length: 493
Content-Type: application/json; charset=utf-8
Date: Fri, 04 Jan 2019 13:44:27 GMT
Keep-Alive: timeout=5, max=100
Server: Apache
Strict-Transport-Security: max-age=63072000; includeSubdomains
Vary: Accept
X-Content-Type-Options: nosniff
X-Frame-Options: ALLOW-FROM http://aims.fao.org
{
"@context": {
"@language": "en",
"altLabel": "skos:altLabel",
"hiddenLabel": "skos:hiddenLabel",
"isothes": "http://purl.org/iso25964/skos-thes#",
"onki": "http://schema.onki.fi/onki#",
"prefLabel": "skos:prefLabel",
"results": {
"@container": "@list",
"@id": "onki:results"
},
"skos": "http://www.w3.org/2004/02/skos/core#",
"type": "@type",
"uri": "@id"
},
"results": [
{
"lang": "en",
"prefLabel": "soil",
"type": [
"skos:Concept"
],
"uri": "http://aims.fao.org/aos/agrovoc/c_7156",
"vocab": "agrovoc"
}
],
"uri": ""
}
```
- The API does not appear to be case sensitive (searches for `SOIL` and `soil` return the same thing)
- I'm a bit confused that there's no obvious return code or status when a term is not found, for example `SOILS`:
```
HTTP/1.1 200 OK
Access-Control-Allow-Origin: *
Connection: Keep-Alive
Content-Length: 367
Content-Type: application/json; charset=utf-8
Date: Fri, 04 Jan 2019 13:48:31 GMT
Keep-Alive: timeout=5, max=100
Server: Apache
Strict-Transport-Security: max-age=63072000; includeSubdomains
Vary: Accept
X-Content-Type-Options: nosniff
X-Frame-Options: ALLOW-FROM http://aims.fao.org
{
"@context": {
"@language": "en",
"altLabel": "skos:altLabel",
"hiddenLabel": "skos:hiddenLabel",
"isothes": "http://purl.org/iso25964/skos-thes#",
"onki": "http://schema.onki.fi/onki#",
"prefLabel": "skos:prefLabel",
"results": {
"@container": "@list",
"@id": "onki:results"
},
"skos": "http://www.w3.org/2004/02/skos/core#",
"type": "@type",
"uri": "@id"
},
"results": [],
"uri": ""
}
```
- I guess the `results` object will just be empty...
- Another way would be to try with SPARQL, perhaps using the Python 2.7 [sparql-client](https://pypi.org/project/sparql-client/):
```
$ python2.7 -m virtualenv /tmp/sparql
$ . /tmp/sparql/bin/activate
$ pip install sparql-client ipython
$ ipython
In [10]: import sparql
In [11]: s = sparql.Service("http://agrovoc.uniroma2.it:3030/agrovoc/sparql", "utf-8", "GET")
In [12]: statement=('PREFIX skos: <http://www.w3.org/2004/02/skos/core#> '
...: 'SELECT '
...: '?label '
...: 'WHERE { '
...: '{ ?concept skos:altLabel ?label . } UNION { ?concept skos:prefLabel ?label . } '
...: 'FILTER regex(str(?label), "^fish", "i") . '
...: '} LIMIT 10')
In [13]: result = s.query(statement)
In [14]: for row in result.fetchone():
...: print(row)
...:
(<Literal "fish catching"@en>,)
(<Literal "fish harvesting"@en>,)
(<Literal "fish meat"@en>,)
(<Literal "fish roe"@en>,)
(<Literal "fish conversion"@en>,)
(<Literal "fisheries catches (composition)"@en>,)
(<Literal "fishtail palm"@en>,)
(<Literal "fishflies"@en>,)
(<Literal "fishery biology"@en>,)
(<Literal "fish production"@en>,)
```
- The SPARQL query comes from my notes in [2017-08]({{< relref "2017-08.md" >}})
<!-- vim: set sw=2 ts=2: --> <!-- vim: set sw=2 ts=2: -->

View File

@ -27,7 +27,7 @@ I don&rsquo;t see anything interesting in the web server logs around that time t
" /> " />
<meta property="og:type" content="article" /> <meta property="og:type" content="article" />
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2019-01/" /><meta property="article:published_time" content="2019-01-02T09:48:30&#43;02:00"/> <meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2019-01/" /><meta property="article:published_time" content="2019-01-02T09:48:30&#43;02:00"/>
<meta property="article:modified_time" content="2019-01-02T20:52:39&#43;02:00"/> <meta property="article:modified_time" content="2019-01-03T11:52:26&#43;02:00"/>
<meta name="twitter:card" content="summary"/> <meta name="twitter:card" content="summary"/>
<meta name="twitter:title" content="January, 2019"/> <meta name="twitter:title" content="January, 2019"/>
@ -60,9 +60,9 @@ I don&rsquo;t see anything interesting in the web server logs around that time t
"@type": "BlogPosting", "@type": "BlogPosting",
"headline": "January, 2019", "headline": "January, 2019",
"url": "https://alanorth.github.io/cgspace-notes/2019-01/", "url": "https://alanorth.github.io/cgspace-notes/2019-01/",
"wordCount": "312", "wordCount": "1098",
"datePublished": "2019-01-02T09:48:30&#43;02:00", "datePublished": "2019-01-02T09:48:30&#43;02:00",
"dateModified": "2019-01-02T20:52:39&#43;02:00", "dateModified": "2019-01-03T11:52:26&#43;02:00",
"author": { "author": {
"@type": "Person", "@type": "Person",
"name": "Alan Orth" "name": "Alan Orth"
@ -197,6 +197,247 @@ $ sudo docker rm dspacedb
$ sudo docker run --name dspacedb -v /home/aorth/.local/lib/containers/volumes/dspacedb_data:/var/lib/postgresql/data -e POSTGRES_PASSWORD=postgres -p 5432:5432 -d postgres:9.6-alpine $ sudo docker run --name dspacedb -v /home/aorth/.local/lib/containers/volumes/dspacedb_data:/var/lib/postgresql/data -e POSTGRES_PASSWORD=postgres -p 5432:5432 -d postgres:9.6-alpine
</code></pre> </code></pre>
<ul>
<li>Testing DSpace 5.9 with Tomcat 8.5.37 on my local machine and I see that Atmire&rsquo;s Listings and Reports still doesn&rsquo;t work
<ul>
<li>After logging in via XMLUI and clicking the Listings and Reports link from the sidebar it redirects me to a JSPUI login page</li>
<li>If I log in again there the Listings and Reports work&hellip; hmm.</li>
</ul></li>
<li>The JSPUI application—which Listings and Reports depends upon—also does not load, though the error is perhaps unrelated:</li>
</ul>
<pre><code>2019-01-03 14:45:21,727 INFO org.dspace.browse.BrowseEngine @ anonymous:session_id=9471D72242DAA05BCC87734FE3C66EA6:ip_addr=127.0.0.1:browse_mini:
2019-01-03 14:45:21,971 INFO org.dspace.app.webui.discovery.DiscoverUtility @ facets for scope, null: 23
2019-01-03 14:45:22,115 WARN org.dspace.app.webui.servlet.InternalErrorServlet @ :session_id=9471D72242DAA05BCC87734FE3C66EA6:internal_error:-- URL Was: http://localhost:8080/jspui/internal-error
-- Method: GET
-- Parameters were:
org.apache.jasper.JasperException: /home.jsp (line: [214], column: [1]) /discovery/static-tagcloud-facet.jsp (line: [57], column: [8]) No tag [tagcloud] defined in tag library imported with prefix [dspace]
at org.apache.jasper.compiler.DefaultErrorHandler.jspError(DefaultErrorHandler.java:41)
at org.apache.jasper.compiler.ErrorDispatcher.dispatch(ErrorDispatcher.java:291)
at org.apache.jasper.compiler.ErrorDispatcher.jspError(ErrorDispatcher.java:97)
at org.apache.jasper.compiler.Parser.processIncludeDirective(Parser.java:347)
at org.apache.jasper.compiler.Parser.parseIncludeDirective(Parser.java:380)
at org.apache.jasper.compiler.Parser.parseDirective(Parser.java:481)
at org.apache.jasper.compiler.Parser.parseElements(Parser.java:1445)
at org.apache.jasper.compiler.Parser.parseBody(Parser.java:1683)
at org.apache.jasper.compiler.Parser.parseOptionalBody(Parser.java:1016)
at org.apache.jasper.compiler.Parser.parseCustomTag(Parser.java:1291)
at org.apache.jasper.compiler.Parser.parseElements(Parser.java:1470)
at org.apache.jasper.compiler.Parser.parse(Parser.java:144)
at org.apache.jasper.compiler.ParserController.doParse(ParserController.java:244)
at org.apache.jasper.compiler.ParserController.parse(ParserController.java:105)
at org.apache.jasper.compiler.Compiler.generateJava(Compiler.java:202)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:373)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:350)
at org.apache.jasper.compiler.Compiler.compile(Compiler.java:334)
at org.apache.jasper.JspCompilationContext.compile(JspCompilationContext.java:595)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:399)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:386)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:330)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.ApplicationDispatcher.invoke(ApplicationDispatcher.java:728)
at org.apache.catalina.core.ApplicationDispatcher.processRequest(ApplicationDispatcher.java:470)
at org.apache.catalina.core.ApplicationDispatcher.doForward(ApplicationDispatcher.java:395)
at org.apache.catalina.core.ApplicationDispatcher.forward(ApplicationDispatcher.java:316)
at org.dspace.app.webui.util.JSPManager.showJSP(JSPManager.java:60)
at org.apache.jsp.index_jsp._jspService(index_jsp.java:191)
at org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:70)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:476)
at org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:386)
at org.apache.jasper.servlet.JspServlet.service(JspServlet.java:330)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:742)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.dspace.utils.servlet.DSpaceWebappServletFilter.doFilter(DSpaceWebappServletFilter.java:78)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:198)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:96)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:493)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:140)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:81)
at org.apache.catalina.valves.CrawlerSessionManagerValve.invoke(CrawlerSessionManagerValve.java:234)
at org.apache.catalina.valves.AbstractAccessLogValve.invoke(AbstractAccessLogValve.java:650)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:87)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:342)
at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:800)
at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)
at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:806)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1498)
at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Thread.java:748)
</code></pre>
<ul>
<li>I notice that I get different JSESSIONID cookies for <code>/</code> (XMLUI) and <code>/jspui</code> (JSPUI) on Tomcat 8.5.37, I wonder if it&rsquo;s the same on Tomcat 7.0.92&hellip; yes I do.</li>
<li>Hmm, on Tomcat 7.0.92 I see that I get a <code>dspace.current.user.id</code> session cookie after logging into XMLUI, and then when I browse to JSPUI I am still logged in&hellip;
<ul>
<li>I didn&rsquo;t see that cookie being set on Tomcat 8.5.37</li>
</ul></li>
<li>I sent a message to the dspace-tech mailing list to ask</li>
</ul>
<h2 id="2019-01-04">2019-01-04</h2>
<ul>
<li>Linode sent a message last night that CGSpace (linode18) had high CPU usage, but I don&rsquo;t see anything around that time in the web server logs:</li>
</ul>
<pre><code># zcat --force /var/log/nginx/*.log /var/log/nginx/*.log.1 | grep -E &quot;03/Jan/2019:1(7|8|9)&quot; | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10
189 207.46.13.192
217 31.6.77.23
340 66.249.70.29
349 40.77.167.86
417 34.218.226.147
630 207.46.13.173
710 35.237.175.180
790 40.77.167.87
1776 66.249.70.27
2099 54.70.40.11
</code></pre>
<ul>
<li>I&rsquo;m thinking about trying to validate our <code>dc.subject</code> terms against <a href="http://aims.fao.org/agrovoc/webservices">AGROVOC webservices</a></li>
<li>There seem to be a few APIs and the documentation is kinda confusing, but I found this REST endpoint that does work well, for example searching for <code>SOIL</code>:</li>
</ul>
<pre><code>$ http http://agrovoc.uniroma2.it/agrovoc/rest/v1/search?query=SOIL&amp;lang=en
HTTP/1.1 200 OK
Access-Control-Allow-Origin: *
Connection: Keep-Alive
Content-Length: 493
Content-Type: application/json; charset=utf-8
Date: Fri, 04 Jan 2019 13:44:27 GMT
Keep-Alive: timeout=5, max=100
Server: Apache
Strict-Transport-Security: max-age=63072000; includeSubdomains
Vary: Accept
X-Content-Type-Options: nosniff
X-Frame-Options: ALLOW-FROM http://aims.fao.org
{
&quot;@context&quot;: {
&quot;@language&quot;: &quot;en&quot;,
&quot;altLabel&quot;: &quot;skos:altLabel&quot;,
&quot;hiddenLabel&quot;: &quot;skos:hiddenLabel&quot;,
&quot;isothes&quot;: &quot;http://purl.org/iso25964/skos-thes#&quot;,
&quot;onki&quot;: &quot;http://schema.onki.fi/onki#&quot;,
&quot;prefLabel&quot;: &quot;skos:prefLabel&quot;,
&quot;results&quot;: {
&quot;@container&quot;: &quot;@list&quot;,
&quot;@id&quot;: &quot;onki:results&quot;
},
&quot;skos&quot;: &quot;http://www.w3.org/2004/02/skos/core#&quot;,
&quot;type&quot;: &quot;@type&quot;,
&quot;uri&quot;: &quot;@id&quot;
},
&quot;results&quot;: [
{
&quot;lang&quot;: &quot;en&quot;,
&quot;prefLabel&quot;: &quot;soil&quot;,
&quot;type&quot;: [
&quot;skos:Concept&quot;
],
&quot;uri&quot;: &quot;http://aims.fao.org/aos/agrovoc/c_7156&quot;,
&quot;vocab&quot;: &quot;agrovoc&quot;
}
],
&quot;uri&quot;: &quot;&quot;
}
</code></pre>
<ul>
<li>The API does not appear to be case sensitive (searches for <code>SOIL</code> and <code>soil</code> return the same thing)</li>
<li>I&rsquo;m a bit confused that there&rsquo;s no obvious return code or status when a term is not found, for example <code>SOILS</code>:</li>
</ul>
<pre><code>HTTP/1.1 200 OK
Access-Control-Allow-Origin: *
Connection: Keep-Alive
Content-Length: 367
Content-Type: application/json; charset=utf-8
Date: Fri, 04 Jan 2019 13:48:31 GMT
Keep-Alive: timeout=5, max=100
Server: Apache
Strict-Transport-Security: max-age=63072000; includeSubdomains
Vary: Accept
X-Content-Type-Options: nosniff
X-Frame-Options: ALLOW-FROM http://aims.fao.org
{
&quot;@context&quot;: {
&quot;@language&quot;: &quot;en&quot;,
&quot;altLabel&quot;: &quot;skos:altLabel&quot;,
&quot;hiddenLabel&quot;: &quot;skos:hiddenLabel&quot;,
&quot;isothes&quot;: &quot;http://purl.org/iso25964/skos-thes#&quot;,
&quot;onki&quot;: &quot;http://schema.onki.fi/onki#&quot;,
&quot;prefLabel&quot;: &quot;skos:prefLabel&quot;,
&quot;results&quot;: {
&quot;@container&quot;: &quot;@list&quot;,
&quot;@id&quot;: &quot;onki:results&quot;
},
&quot;skos&quot;: &quot;http://www.w3.org/2004/02/skos/core#&quot;,
&quot;type&quot;: &quot;@type&quot;,
&quot;uri&quot;: &quot;@id&quot;
},
&quot;results&quot;: [],
&quot;uri&quot;: &quot;&quot;
}
</code></pre>
<ul>
<li>I guess the <code>results</code> object will just be empty&hellip;</li>
<li>Another way would be to try with SPARQL, perhaps using the Python 2.7 <a href="https://pypi.org/project/sparql-client/">sparql-client</a>:</li>
</ul>
<pre><code>$ python2.7 -m virtualenv /tmp/sparql
$ . /tmp/sparql/bin/activate
$ pip install sparql-client ipython
$ ipython
In [10]: import sparql
In [11]: s = sparql.Service(&quot;http://agrovoc.uniroma2.it:3030/agrovoc/sparql&quot;, &quot;utf-8&quot;, &quot;GET&quot;)
In [12]: statement=('PREFIX skos: &lt;http://www.w3.org/2004/02/skos/core#&gt; '
...: 'SELECT '
...: '?label '
...: 'WHERE { '
...: '{ ?concept skos:altLabel ?label . } UNION { ?concept skos:prefLabel ?label . } '
...: 'FILTER regex(str(?label), &quot;^fish&quot;, &quot;i&quot;) . '
...: '} LIMIT 10')
In [13]: result = s.query(statement)
In [14]: for row in result.fetchone():
...: print(row)
...:
(&lt;Literal &quot;fish catching&quot;@en&gt;,)
(&lt;Literal &quot;fish harvesting&quot;@en&gt;,)
(&lt;Literal &quot;fish meat&quot;@en&gt;,)
(&lt;Literal &quot;fish roe&quot;@en&gt;,)
(&lt;Literal &quot;fish conversion&quot;@en&gt;,)
(&lt;Literal &quot;fisheries catches (composition)&quot;@en&gt;,)
(&lt;Literal &quot;fishtail palm&quot;@en&gt;,)
(&lt;Literal &quot;fishflies&quot;@en&gt;,)
(&lt;Literal &quot;fishery biology&quot;@en&gt;,)
(&lt;Literal &quot;fish production&quot;@en&gt;,)
</code></pre>
<ul>
<li>The SPARQL query comes from my notes in <a href="/cgspace-notes/2017-08/">2017-08</a></li>
</ul>
<!-- vim: set sw=2 ts=2: --> <!-- vim: set sw=2 ts=2: -->

View File

@ -43,7 +43,7 @@ Disallow: /cgspace-notes/2015-12/
Disallow: /cgspace-notes/2015-11/ Disallow: /cgspace-notes/2015-11/
Disallow: /cgspace-notes/ Disallow: /cgspace-notes/
Disallow: /cgspace-notes/categories/ Disallow: /cgspace-notes/categories/
Disallow: /cgspace-notes/categories/notes/
Disallow: /cgspace-notes/tags/notes/ Disallow: /cgspace-notes/tags/notes/
Disallow: /cgspace-notes/categories/notes/
Disallow: /cgspace-notes/posts/ Disallow: /cgspace-notes/posts/
Disallow: /cgspace-notes/tags/ Disallow: /cgspace-notes/tags/

View File

@ -4,7 +4,7 @@
<url> <url>
<loc>https://alanorth.github.io/cgspace-notes/2019-01/</loc> <loc>https://alanorth.github.io/cgspace-notes/2019-01/</loc>
<lastmod>2019-01-02T20:52:39+02:00</lastmod> <lastmod>2019-01-03T11:52:26+02:00</lastmod>
</url> </url>
<url> <url>
@ -204,7 +204,7 @@
<url> <url>
<loc>https://alanorth.github.io/cgspace-notes/</loc> <loc>https://alanorth.github.io/cgspace-notes/</loc>
<lastmod>2019-01-02T20:52:39+02:00</lastmod> <lastmod>2019-01-03T11:52:26+02:00</lastmod>
<priority>0</priority> <priority>0</priority>
</url> </url>
@ -213,27 +213,27 @@
<priority>0</priority> <priority>0</priority>
</url> </url>
<url>
<loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
<lastmod>2019-01-03T11:52:26+02:00</lastmod>
<priority>0</priority>
</url>
<url> <url>
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc> <loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
<lastmod>2018-03-09T22:10:33+02:00</lastmod> <lastmod>2018-03-09T22:10:33+02:00</lastmod>
<priority>0</priority> <priority>0</priority>
</url> </url>
<url>
<loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
<lastmod>2019-01-02T20:52:39+02:00</lastmod>
<priority>0</priority>
</url>
<url> <url>
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc> <loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
<lastmod>2019-01-02T20:52:39+02:00</lastmod> <lastmod>2019-01-03T11:52:26+02:00</lastmod>
<priority>0</priority> <priority>0</priority>
</url> </url>
<url> <url>
<loc>https://alanorth.github.io/cgspace-notes/tags/</loc> <loc>https://alanorth.github.io/cgspace-notes/tags/</loc>
<lastmod>2019-01-02T20:52:39+02:00</lastmod> <lastmod>2019-01-03T11:52:26+02:00</lastmod>
<priority>0</priority> <priority>0</priority>
</url> </url>