mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-12-21 12:42:18 +01:00
Add notes for 2023-06-08
This commit is contained in:
parent
bda3cb4cd1
commit
363dbb4505
@ -50,4 +50,15 @@ $ ./ilri/update_orcids.py -i /tmp/2023-06-06-orcids-names.txt -db dspacetest -u
|
||||
|
||||
- Start working on updating the MODS schema in CGSpace from 3.1 to 3.8 based on Stefano and Salem's work last year
|
||||
|
||||
## 2023-06-08
|
||||
|
||||
- Continue working on the MODS schema mapping
|
||||
- Export CGSpace to check and update `dcterms.extent` fields
|
||||
- I normalized about 1,500 to use either "p. 1-6" or "5 p." format
|
||||
- Also, I used this GREL expression to extract missing pages from the citation field: `cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*(pp?\.\s?\d+[-–]\d+).*/)[0]`
|
||||
- This was over 4,000 items with a format like "p. 1-6" and "pp. 1-6" in the citation
|
||||
- I used another GREL expression to extract another 5,000: `cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*?(\d+\s+?[Pp]+\.).*/)[0]`
|
||||
- This was for the format like "1 p." (note we had to protect against the greedy `.*` in the beginning)
|
||||
- I also did some work to capture a handful of missing DOIs and ISSNs, but it was only about 100 items and I will have to wait until the 10,000+ above finish importing
|
||||
|
||||
<!-- vim: set sw=2 ts=2: -->
|
||||
|
@ -24,7 +24,7 @@ From what I can see we need to upgrade the MODS schema from 3.1 to 3.7 and then
|
||||
<meta property="og:type" content="article" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2023-06/" />
|
||||
<meta property="article:published_time" content="2023-06-02T10:29:36+03:00" />
|
||||
<meta property="article:modified_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="article:modified_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
@ -54,9 +54,9 @@ From what I can see we need to upgrade the MODS schema from 3.1 to 3.7 and then
|
||||
"@type": "BlogPosting",
|
||||
"headline": "June, 2023",
|
||||
"url": "https://alanorth.github.io/cgspace-notes/2023-06/",
|
||||
"wordCount": "327",
|
||||
"wordCount": "451",
|
||||
"datePublished": "2023-06-02T10:29:36+03:00",
|
||||
"dateModified": "2023-06-04T11:00:30+03:00",
|
||||
"dateModified": "2023-06-06T16:54:25+03:00",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Alan Orth"
|
||||
@ -179,6 +179,20 @@ From what I can see we need to upgrade the MODS schema from 3.1 to 3.7 and then
|
||||
</span></span></code></pre></div><ul>
|
||||
<li>Start working on updating the MODS schema in CGSpace from 3.1 to 3.8 based on Stefano and Salem’s work last year</li>
|
||||
</ul>
|
||||
<h2 id="2023-06-08">2023-06-08</h2>
|
||||
<ul>
|
||||
<li>Continue working on the MODS schema mapping</li>
|
||||
<li>Export CGSpace to check and update <code>dcterms.extent</code> fields
|
||||
<ul>
|
||||
<li>I normalized about 1,500 to use either “p. 1-6” or “5 p.” format</li>
|
||||
<li>Also, I used this GREL expression to extract missing pages from the citation field: <code>cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*(pp?\.\s?\d+[-–]\d+).*/)[0]</code></li>
|
||||
<li>This was over 4,000 items with a format like “p. 1-6” and “pp. 1-6” in the citation</li>
|
||||
<li>I used another GREL expression to extract another 5,000: <code>cells['dcterms.bibliographicCitation[en_US]'].value.match(/.*?(\d+\s+?[Pp]+\.).*/)[0]</code></li>
|
||||
<li>This was for the format like “1 p.” (note we had to protect against the greedy <code>.*</code> in the beginning)</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>I also did some work to capture a handful of missing DOIs and ISSNs, but it was only about 100 items and I will have to wait until the 10,000+ above finish importing</li>
|
||||
</ul>
|
||||
<!-- raw HTML omitted -->
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2023-06-04T11:00:30+03:00" />
|
||||
<meta property="og:updated_time" content="2023-06-06T16:54:25+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -3,19 +3,19 @@
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/</loc>
|
||||
<lastmod>2023-06-04T11:00:30+03:00</lastmod>
|
||||
<lastmod>2023-06-06T16:54:25+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||
<lastmod>2023-06-04T11:00:30+03:00</lastmod>
|
||||
<lastmod>2023-06-06T16:54:25+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2023-06/</loc>
|
||||
<lastmod>2023-06-04T11:00:30+03:00</lastmod>
|
||||
<lastmod>2023-06-06T16:54:25+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||
<lastmod>2023-06-04T11:00:30+03:00</lastmod>
|
||||
<lastmod>2023-06-06T16:54:25+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||
<lastmod>2023-06-04T11:00:30+03:00</lastmod>
|
||||
<lastmod>2023-06-06T16:54:25+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2023-05/</loc>
|
||||
<lastmod>2023-05-30T20:19:17+03:00</lastmod>
|
||||
|
Loading…
Reference in New Issue
Block a user