mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-11-22 06:35:03 +01:00
Add notes for 2021-10-09
This commit is contained in:
parent
23d6a808fc
commit
ab8cb272ea
@ -248,5 +248,32 @@ if(cells['dcterms.subject[en_US]'].value == cells['dcterms.subject[en_Fu]'].valu
|
||||
|
||||
- For these rows I starred them and then blanked out the original field so DSpace would see it as a removal, and add the new column
|
||||
- After these are uploaded I will normalize the `text_lang` fields in PostgreSQL again
|
||||
- I did the same for CIAT but there were over 7,000 duplicate metadata values! Hard to believe:
|
||||
|
||||
```console
|
||||
$ grep -c 'Removing duplicate value' /tmp/out.log
|
||||
7720
|
||||
```
|
||||
|
||||
- I applied these to the CIAT community, so in total that's over 8,000 duplicate metadata values removed in a handful of fields...
|
||||
|
||||
## 2021-10-09
|
||||
|
||||
- I did similar metadata cleanups for CCAFS and IITA too, but there were only a few hundred duplicates there
|
||||
- Also of note, there are some other fixes too, for example in IITA's community:
|
||||
|
||||
```console
|
||||
$ grep -c -E '(Fixing|Removing) (duplicate|excessive|invalid)' /tmp/out.log
|
||||
249
|
||||
```
|
||||
|
||||
- I ran a full Discovery re-indexing on CGSpace
|
||||
- Then I exported all of CGSpace and extracted the ISSNs and ISBNs:
|
||||
|
||||
```console
|
||||
$ csvcut -c 'id,cg.issn[en_US],dc.identifier.issn[en_US],cg.isbn[en_US],dc.identifier.isbn[en_US]' /tmp/cgspace.csv > /tmp/cgspace-issn-isbn.csv
|
||||
```
|
||||
|
||||
- I did cleanups on about seventy items with invalid and mixed ISSNs/ISBNs
|
||||
|
||||
<!-- vim: set sw=2 ts=2: -->
|
||||
|
@ -25,7 +25,7 @@ So we have 1879/7100 (26.46%) matching already
|
||||
<meta property="og:type" content="article" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2021-10/" />
|
||||
<meta property="article:published_time" content="2021-10-01T11:14:07+03:00" />
|
||||
<meta property="article:modified_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="article:modified_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
@ -56,9 +56,9 @@ So we have 1879/7100 (26.46%) matching already
|
||||
"@type": "BlogPosting",
|
||||
"headline": "October, 2021",
|
||||
"url": "https://alanorth.github.io/cgspace-notes/2021-10/",
|
||||
"wordCount": "1754",
|
||||
"wordCount": "1882",
|
||||
"datePublished": "2021-10-01T11:14:07+03:00",
|
||||
"dateModified": "2021-10-07T08:27:39+03:00",
|
||||
"dateModified": "2021-10-08T17:15:17+03:00",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Alan Orth"
|
||||
@ -390,6 +390,27 @@ $ csvjoin -c id /tmp/ilri-deduplicated-items.csv /tmp/ilri-deduplicated-items-cl
|
||||
<li>After these are uploaded I will normalize the <code>text_lang</code> fields in PostgreSQL again</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>I did the same for CIAT but there were over 7,000 duplicate metadata values! Hard to believe:</li>
|
||||
</ul>
|
||||
<pre tabindex="0"><code class="language-console" data-lang="console">$ grep -c 'Removing duplicate value' /tmp/out.log
|
||||
7720
|
||||
</code></pre><ul>
|
||||
<li>I applied these to the CIAT community, so in total that’s over 8,000 duplicate metadata values removed in a handful of fields…</li>
|
||||
</ul>
|
||||
<h2 id="2021-10-09">2021-10-09</h2>
|
||||
<ul>
|
||||
<li>I did similar metadata cleanups for CCAFS and IITA too, but there were only a few hundred duplicates there</li>
|
||||
<li>Also of note, there are some other fixes too, for example in IITA’s community:</li>
|
||||
</ul>
|
||||
<pre tabindex="0"><code class="language-console" data-lang="console">$ grep -c -E '(Fixing|Removing) (duplicate|excessive|invalid)' /tmp/out.log
|
||||
249
|
||||
</code></pre><ul>
|
||||
<li>I ran a full Discovery re-indexing on CGSpace</li>
|
||||
<li>Then I exported all of CGSpace and extracted the ISSNs and ISBNs:</li>
|
||||
</ul>
|
||||
<pre tabindex="0"><code class="language-console" data-lang="console">$ csvcut -c 'id,cg.issn[en_US],dc.identifier.issn[en_US],cg.isbn[en_US],dc.identifier.isbn[en_US]' /tmp/cgspace.csv > /tmp/cgspace-issn-isbn.csv
|
||||
</code></pre><ul>
|
||||
<li>I did cleanups on about seventy items with invalid and mixed ISSNs/ISBNs</li>
|
||||
</ul>
|
||||
<!-- raw HTML omitted -->
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2021-10-07T08:27:39+03:00" />
|
||||
<meta property="og:updated_time" content="2021-10-08T17:15:17+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -3,19 +3,19 @@
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/</loc>
|
||||
<lastmod>2021-10-07T08:27:39+03:00</lastmod>
|
||||
<lastmod>2021-10-08T17:15:17+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||
<lastmod>2021-10-07T08:27:39+03:00</lastmod>
|
||||
<lastmod>2021-10-08T17:15:17+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||
<lastmod>2021-10-07T08:27:39+03:00</lastmod>
|
||||
<lastmod>2021-10-08T17:15:17+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2021-10/</loc>
|
||||
<lastmod>2021-10-07T08:27:39+03:00</lastmod>
|
||||
<lastmod>2021-10-08T17:15:17+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||
<lastmod>2021-10-07T08:27:39+03:00</lastmod>
|
||||
<lastmod>2021-10-08T17:15:17+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2021-09/</loc>
|
||||
<lastmod>2021-10-04T11:10:54+03:00</lastmod>
|
||||
|
Loading…
Reference in New Issue
Block a user