mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-12-22 05:02:19 +01:00
Add notes for 2023-08-22
This commit is contained in:
parent
f38ecfb75e
commit
d2ad21bde1
@ -152,4 +152,25 @@ $ ./run.sh -s http://localhost:8081/solr/statistics -a import -o /tmp/statistics
|
||||
|
||||
- Export CGSpace to check for missing Initiative collection mappings
|
||||
|
||||
## 2023-08-19
|
||||
|
||||
- Start a harvest on AReS
|
||||
|
||||
## 2023-08-21
|
||||
|
||||
- Experiment with the DSpace 7 REST API
|
||||
- I wrote a Python script to benchmark harvesting all 100,000+ items using the `/api/discover/search/objects` endpoint 100 items at a time
|
||||
- I was able to harvest the entire 106,000 items in fifty-two minutes, which seems slow, but that's about ten times faster than with the legacy REST API...
|
||||
- Still, I need to benchmark a bit more, as the item response doesn't include collection mappings or thumbnails
|
||||
- Reading the [API docs](https://github.com/DSpace/RestContract/blob/main/README.md#etags--conditional-headers) it seems that we should be able to use the standard `If-Modified-Since` header for some endpoints
|
||||
- I tried it on the `/api/discover/search/objects` and `/api/core/items` endpoints, but apparently those don't support this header because I don't see a `Last-Modified` header in the response
|
||||
- According to the docs, it means that these endpoints indeed don't support it...
|
||||
|
||||
## 2023-08-22
|
||||
|
||||
- I was experimenting with the DSpace 7 REST API again
|
||||
- This time looking at the thumbnail responses in item endpoints
|
||||
- According to [the documentation](https://github.com/DSpace/RestContract/blob/main/items.md#main-thumbnail) the API will respond with HTTP 200 if there is a thumbnail, and HTTP 204 if there is no content
|
||||
- That means we need to make the request before we can even find out!
|
||||
|
||||
<!-- vim: set sw=2 ts=2: -->
|
||||
|
@ -19,7 +19,7 @@ Start working on some batch uploads for IFPRI
|
||||
<meta property="og:type" content="article" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2023-08/" />
|
||||
<meta property="article:published_time" content="2023-08-03T11:18:36+03:00" />
|
||||
<meta property="article:modified_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="article:modified_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
@ -44,9 +44,9 @@ Start working on some batch uploads for IFPRI
|
||||
"@type": "BlogPosting",
|
||||
"headline": "August, 2023",
|
||||
"url": "https://alanorth.github.io/cgspace-notes/2023-08/",
|
||||
"wordCount": "1057",
|
||||
"wordCount": "1254",
|
||||
"datePublished": "2023-08-03T11:18:36+03:00",
|
||||
"dateModified": "2023-08-14T18:38:03+02:00",
|
||||
"dateModified": "2023-08-18T23:54:07+03:00",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Alan Orth"
|
||||
@ -293,6 +293,36 @@ Start working on some batch uploads for IFPRI
|
||||
<ul>
|
||||
<li>Export CGSpace to check for missing Initiative collection mappings</li>
|
||||
</ul>
|
||||
<h2 id="2023-08-19">2023-08-19</h2>
|
||||
<ul>
|
||||
<li>Start a harvest on AReS</li>
|
||||
</ul>
|
||||
<h2 id="2023-08-21">2023-08-21</h2>
|
||||
<ul>
|
||||
<li>Experiment with the DSpace 7 REST API
|
||||
<ul>
|
||||
<li>I wrote a Python script to benchmark harvesting all 100,000+ items using the <code>/api/discover/search/objects</code> endpoint 100 items at a time</li>
|
||||
<li>I was able to harvest the entire 106,000 items in fifty-two minutes, which seems slow, but that’s about ten times faster than with the legacy REST API…</li>
|
||||
<li>Still, I need to benchmark a bit more, as the item response doesn’t include collection mappings or thumbnails</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Reading the <a href="https://github.com/DSpace/RestContract/blob/main/README.md#etags--conditional-headers">API docs</a> it seems that we should be able to use the standard <code>If-Modified-Since</code> header for some endpoints
|
||||
<ul>
|
||||
<li>I tried it on the <code>/api/discover/search/objects</code> and <code>/api/core/items</code> endpoints, but apparently those don’t support this header because I don’t see a <code>Last-Modified</code> header in the response</li>
|
||||
<li>According to the docs, it means that these endpoints indeed don’t support it…</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<h2 id="2023-08-22">2023-08-22</h2>
|
||||
<ul>
|
||||
<li>I was experimenting with the DSpace 7 REST API again
|
||||
<ul>
|
||||
<li>This time looking at the thumbnail responses in item endpoints</li>
|
||||
<li>According to <a href="https://github.com/DSpace/RestContract/blob/main/items.md#main-thumbnail">the documentation</a> the API will respond with HTTP 200 if there is a thumbnail, and HTTP 204 if there is no content</li>
|
||||
<li>That means we need to make the request before we can even find out!</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<!-- raw HTML omitted -->
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-08-14T18:38:03+02:00" />
|
||||
<meta property="og:updated_time" content="2023-08-18T23:54:07+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -3,19 +3,19 @@
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2023-08/</loc>
|
||||
<lastmod>2023-08-14T18:38:03+02:00</lastmod>
|
||||
<lastmod>2023-08-18T23:54:07+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/</loc>
|
||||
<lastmod>2023-08-14T18:38:03+02:00</lastmod>
|
||||
<lastmod>2023-08-18T23:54:07+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||
<lastmod>2023-08-14T18:38:03+02:00</lastmod>
|
||||
<lastmod>2023-08-18T23:54:07+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||
<lastmod>2023-08-14T18:38:03+02:00</lastmod>
|
||||
<lastmod>2023-08-18T23:54:07+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||
<lastmod>2023-08-14T18:38:03+02:00</lastmod>
|
||||
<lastmod>2023-08-18T23:54:07+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2023-07/</loc>
|
||||
<lastmod>2023-08-02T23:04:11+03:00</lastmod>
|
||||
|
Loading…
Reference in New Issue
Block a user