mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-11-22 06:35:03 +01:00
Add notes for 2021-06-17
This commit is contained in:
parent
fb2bd040a7
commit
a6d606ca0e
@ -85,4 +85,35 @@ elasticdump --input=/home/aorth/openrxv-items_data.json --output=http://localhos
|
||||
$ podman unshare chown 1000:1000 /home/aorth/.local/share/containers/storage/volumes/docker_esData_7/_data
|
||||
```
|
||||
|
||||
- The new OpenRXV harvesting method by Moayad uses pages of 10 items instead of 100 and it's much faster
|
||||
- I harvested 90,000+ items from DSpace Test in ~3 hours
|
||||
- There seem to be some issues with the health check step though
|
||||
|
||||
## 2021-06-17
|
||||
|
||||
- I ported my ilri/resolve-addresses.py script that uses IPAPI.co to use the local GeoIP2 databases
|
||||
- The new script is ilri/resolve-addresses-geoip2.py and it is much faster and works offline with no API rate limits
|
||||
- Teams meeting with the CGIAR Metadata Working group to discuss CGSpace and open repositories and the way forward
|
||||
- More work with Moayad on OpenRXV harvesting issues
|
||||
- Using a JSON export from elasticdump we debugged the duplicate checker plugin and found that there are indeed duplicates:
|
||||
|
||||
```console
|
||||
$ grep -oE '"handle":"[[:digit:]]+/[[:digit:]]+"' openrxv-items_data.json | awk -F: '{print $2}' | wc -l
|
||||
90459
|
||||
$ grep -oE '"handle":"[[:digit:]]+/[[:digit:]]+"' openrxv-items_data.json | awk -F: '{print $2}' | sort | uniq | wc -l
|
||||
90380
|
||||
$ grep -oE '"handle":"[[:digit:]]+/[[:digit:]]+"' openrxv-items_data.json | awk -F: '{print $2}' | sort | uniq -c | sort -h
|
||||
...
|
||||
2 "10568/99409"
|
||||
2 "10568/99410"
|
||||
2 "10568/99411"
|
||||
2 "10568/99516"
|
||||
3 "10568/102093"
|
||||
3 "10568/103524"
|
||||
3 "10568/106664"
|
||||
3 "10568/106940"
|
||||
3 "10568/107195"
|
||||
3 "10568/96546"
|
||||
```
|
||||
|
||||
<!-- vim: set sw=2 ts=2: -->
|
||||
|
@ -20,7 +20,7 @@ I simply started it and AReS was running again:
|
||||
<meta property="og:type" content="article" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2021-06/" />
|
||||
<meta property="article:published_time" content="2021-06-01T10:51:07+03:00" />
|
||||
<meta property="article:modified_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="article:modified_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
@ -46,9 +46,9 @@ I simply started it and AReS was running again:
|
||||
"@type": "BlogPosting",
|
||||
"headline": "June, 2021",
|
||||
"url": "https://alanorth.github.io/cgspace-notes/2021-06/",
|
||||
"wordCount": "627",
|
||||
"wordCount": "817",
|
||||
"datePublished": "2021-06-01T10:51:07+03:00",
|
||||
"dateModified": "2021-06-14T15:09:07+03:00",
|
||||
"dateModified": "2021-06-16T18:31:15+03:00",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Alan Orth"
|
||||
@ -209,6 +209,44 @@ elasticdump --input=/home/aorth/openrxv-items_data.json --output=http://localhos
|
||||
</li>
|
||||
</ul>
|
||||
<pre><code class="language-console" data-lang="console">$ podman unshare chown 1000:1000 /home/aorth/.local/share/containers/storage/volumes/docker_esData_7/_data
|
||||
</code></pre><ul>
|
||||
<li>The new OpenRXV harvesting method by Moayad uses pages of 10 items instead of 100 and it’s much faster
|
||||
<ul>
|
||||
<li>I harvested 90,000+ items from DSpace Test in ~3 hours</li>
|
||||
<li>There seem to be some issues with the health check step though</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<h2 id="2021-06-17">2021-06-17</h2>
|
||||
<ul>
|
||||
<li>I ported my ilri/resolve-addresses.py script that uses IPAPI.co to use the local GeoIP2 databases
|
||||
<ul>
|
||||
<li>The new script is ilri/resolve-addresses-geoip2.py and it is much faster and works offline with no API rate limits</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Teams meeting with the CGIAR Metadata Working group to discuss CGSpace and open repositories and the way forward</li>
|
||||
<li>More work with Moayad on OpenRXV harvesting issues
|
||||
<ul>
|
||||
<li>Using a JSON export from elasticdump we debugged the duplicate checker plugin and found that there are indeed duplicates:</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<pre><code class="language-console" data-lang="console">$ grep -oE '"handle":"[[:digit:]]+/[[:digit:]]+"' openrxv-items_data.json | awk -F: '{print $2}' | wc -l
|
||||
90459
|
||||
$ grep -oE '"handle":"[[:digit:]]+/[[:digit:]]+"' openrxv-items_data.json | awk -F: '{print $2}' | sort | uniq | wc -l
|
||||
90380
|
||||
$ grep -oE '"handle":"[[:digit:]]+/[[:digit:]]+"' openrxv-items_data.json | awk -F: '{print $2}' | sort | uniq -c | sort -h
|
||||
...
|
||||
2 "10568/99409"
|
||||
2 "10568/99410"
|
||||
2 "10568/99411"
|
||||
2 "10568/99516"
|
||||
3 "10568/102093"
|
||||
3 "10568/103524"
|
||||
3 "10568/106664"
|
||||
3 "10568/106940"
|
||||
3 "10568/107195"
|
||||
3 "10568/96546"
|
||||
</code></pre><!-- raw HTML omitted -->
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-06-14T15:09:07+03:00" />
|
||||
<meta property="og:updated_time" content="2021-06-16T18:31:15+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -3,19 +3,19 @@
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/</loc>
|
||||
<lastmod>2021-06-14T15:09:07+03:00</lastmod>
|
||||
<lastmod>2021-06-16T18:31:15+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||
<lastmod>2021-06-14T15:09:07+03:00</lastmod>
|
||||
<lastmod>2021-06-16T18:31:15+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2021-06/</loc>
|
||||
<lastmod>2021-06-14T15:09:07+03:00</lastmod>
|
||||
<lastmod>2021-06-16T18:31:15+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||
<lastmod>2021-06-14T15:09:07+03:00</lastmod>
|
||||
<lastmod>2021-06-16T18:31:15+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||
<lastmod>2021-06-14T15:09:07+03:00</lastmod>
|
||||
<lastmod>2021-06-16T18:31:15+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2021-05/</loc>
|
||||
<lastmod>2021-05-30T22:09:06+03:00</lastmod>
|
||||
|
Loading…
Reference in New Issue
Block a user