From 54bd802b0dad79b5da53f47a0a092d4f8af84b4c Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 30 Oct 2016 14:38:01 +0200 Subject: [PATCH] Add notes for 2016-10-30 --- content/post/2016-10.md | 34 +++++++++++++++++++++++++++++++++ public/2016-10/index.html | 38 +++++++++++++++++++++++++++++++++++++ public/index.xml | 38 +++++++++++++++++++++++++++++++++++++ public/post/index.xml | 38 +++++++++++++++++++++++++++++++++++++ public/tags/notes/index.xml | 38 +++++++++++++++++++++++++++++++++++++ 5 files changed, 186 insertions(+) diff --git a/content/post/2016-10.md b/content/post/2016-10.md index bf9cbe91f..d3b84934b 100644 --- a/content/post/2016-10.md +++ b/content/post/2016-10.md @@ -209,3 +209,37 @@ UPDATE 0 ![DSpace Test with Font Awesome icons](2016/10/dspacetest-fontawesome-icons.png) - Run the same replacements on CGSpace + +## 2016-10-30 + +- Fix some messed up authors on CGSpace: + +``` +dspace=# update metadatavalue set authority='799da1d8-22f3-43f5-8233-3d2ef5ebf8a8', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Charleston, B.%'; +UPDATE 10 +dspace=# update metadatavalue set authority='e936f5c5-343d-4c46-aa91-7a1fff6277ed', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Knight-Jones%'; +UPDATE 36 +``` + +- I updated the authority index but nothing seemed to change, so I'll wait and do it again after I update Discovery below +- Skype chat with Tsega about the [IFPRI contentdm bridge](https://github.com/ilri/ckm-cgspace-contentdm-bridge) +- We tested harvesting OAI in an example collection to see how it works +- Talk to Carlos Quiros about CG Core metadata in CGSpace +- Get a list of countries from CGSpace so I can do some batch corrections: + +``` +dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=228 group by text_value order by count desc) to /tmp/countries.csv with csv; +``` + +- Fix a bunch of countries in Open Refine and run the corrections on CGSpace: + +``` +$ ./fix-metadata-values.py -i countries-fix-18.csv -f dc.coverage.country -t 'correct' -m 228 -d dspace -u dspace -p fuuu +$ ./delete-metadata-values.py -i countries-delete-2.csv -f dc.coverage.country -m 228 -d dspace -u dspace -p fuuu +``` + +- Run a shit ton of author fixes from Peter Ballantyne that we've been cleaning up for two months: + +``` +$ ./fix-metadata-values.py -i /tmp/authors-fix-pb2.csv -f dc.contributor.author -t correct -m 3 -u dspace -d dspace -p fuuu +``` diff --git a/public/2016-10/index.html b/public/2016-10/index.html index 96a96a6c0..1a8cd92e3 100644 --- a/public/2016-10/index.html +++ b/public/2016-10/index.html @@ -349,6 +349,44 @@ UPDATE 0
  • Run the same replacements on CGSpace
  • +

    2016-10-30

    + + + +
    dspace=# update metadatavalue set authority='799da1d8-22f3-43f5-8233-3d2ef5ebf8a8', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Charleston, B.%';
    +UPDATE 10
    +dspace=# update metadatavalue set authority='e936f5c5-343d-4c46-aa91-7a1fff6277ed', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Knight-Jones%';
    +UPDATE 36
    +
    + + + +
    dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=228 group by text_value order by count desc) to /tmp/countries.csv with csv;
    +
    + + + +
    $ ./fix-metadata-values.py -i countries-fix-18.csv -f dc.coverage.country -t 'correct' -m 228 -d dspace -u dspace -p fuuu
    +$ ./delete-metadata-values.py -i countries-delete-2.csv -f dc.coverage.country -m 228 -d dspace -u dspace -p fuuu
    +
    + + + +
    $ ./fix-metadata-values.py -i /tmp/authors-fix-pb2.csv -f dc.contributor.author -t correct -m 3 -u dspace -d dspace -p fuuu
    +
    + diff --git a/public/index.xml b/public/index.xml index dd266704c..0c280ddba 100644 --- a/public/index.xml +++ b/public/index.xml @@ -275,6 +275,44 @@ UPDATE 0 <ul> <li>Run the same replacements on CGSpace</li> </ul> + +<h2 id="2016-10-30">2016-10-30</h2> + +<ul> +<li>Fix some messed up authors on CGSpace:</li> +</ul> + +<pre><code>dspace=# update metadatavalue set authority='799da1d8-22f3-43f5-8233-3d2ef5ebf8a8', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Charleston, B.%'; +UPDATE 10 +dspace=# update metadatavalue set authority='e936f5c5-343d-4c46-aa91-7a1fff6277ed', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Knight-Jones%'; +UPDATE 36 +</code></pre> + +<ul> +<li>I updated the authority index but nothing seemed to change, so I&rsquo;ll wait and do it again after I update Discovery below</li> +<li>Skype chat with Tsega about the <a href="https://github.com/ilri/ckm-cgspace-contentdm-bridge">IFPRI contentdm bridge</a></li> +<li>We tested harvesting OAI in an example collection to see how it works</li> +<li>Talk to Carlos Quiros about CG Core metadata in CGSpace</li> +<li>Get a list of countries from CGSpace so I can do some batch corrections:</li> +</ul> + +<pre><code>dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=228 group by text_value order by count desc) to /tmp/countries.csv with csv; +</code></pre> + +<ul> +<li>Fix a bunch of countries in Open Refine and run the corrections on CGSpace:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i countries-fix-18.csv -f dc.coverage.country -t 'correct' -m 228 -d dspace -u dspace -p fuuu +$ ./delete-metadata-values.py -i countries-delete-2.csv -f dc.coverage.country -m 228 -d dspace -u dspace -p fuuu +</code></pre> + +<ul> +<li>Run a shit ton of author fixes from Peter Ballantyne that we&rsquo;ve been cleaning up for two months:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i /tmp/authors-fix-pb2.csv -f dc.contributor.author -t correct -m 3 -u dspace -d dspace -p fuuu +</code></pre> diff --git a/public/post/index.xml b/public/post/index.xml index f39eafaf7..71539369c 100644 --- a/public/post/index.xml +++ b/public/post/index.xml @@ -275,6 +275,44 @@ UPDATE 0 <ul> <li>Run the same replacements on CGSpace</li> </ul> + +<h2 id="2016-10-30">2016-10-30</h2> + +<ul> +<li>Fix some messed up authors on CGSpace:</li> +</ul> + +<pre><code>dspace=# update metadatavalue set authority='799da1d8-22f3-43f5-8233-3d2ef5ebf8a8', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Charleston, B.%'; +UPDATE 10 +dspace=# update metadatavalue set authority='e936f5c5-343d-4c46-aa91-7a1fff6277ed', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Knight-Jones%'; +UPDATE 36 +</code></pre> + +<ul> +<li>I updated the authority index but nothing seemed to change, so I&rsquo;ll wait and do it again after I update Discovery below</li> +<li>Skype chat with Tsega about the <a href="https://github.com/ilri/ckm-cgspace-contentdm-bridge">IFPRI contentdm bridge</a></li> +<li>We tested harvesting OAI in an example collection to see how it works</li> +<li>Talk to Carlos Quiros about CG Core metadata in CGSpace</li> +<li>Get a list of countries from CGSpace so I can do some batch corrections:</li> +</ul> + +<pre><code>dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=228 group by text_value order by count desc) to /tmp/countries.csv with csv; +</code></pre> + +<ul> +<li>Fix a bunch of countries in Open Refine and run the corrections on CGSpace:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i countries-fix-18.csv -f dc.coverage.country -t 'correct' -m 228 -d dspace -u dspace -p fuuu +$ ./delete-metadata-values.py -i countries-delete-2.csv -f dc.coverage.country -m 228 -d dspace -u dspace -p fuuu +</code></pre> + +<ul> +<li>Run a shit ton of author fixes from Peter Ballantyne that we&rsquo;ve been cleaning up for two months:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i /tmp/authors-fix-pb2.csv -f dc.contributor.author -t correct -m 3 -u dspace -d dspace -p fuuu +</code></pre> diff --git a/public/tags/notes/index.xml b/public/tags/notes/index.xml index a99e96d99..e6e8c8607 100644 --- a/public/tags/notes/index.xml +++ b/public/tags/notes/index.xml @@ -274,6 +274,44 @@ UPDATE 0 <ul> <li>Run the same replacements on CGSpace</li> </ul> + +<h2 id="2016-10-30">2016-10-30</h2> + +<ul> +<li>Fix some messed up authors on CGSpace:</li> +</ul> + +<pre><code>dspace=# update metadatavalue set authority='799da1d8-22f3-43f5-8233-3d2ef5ebf8a8', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Charleston, B.%'; +UPDATE 10 +dspace=# update metadatavalue set authority='e936f5c5-343d-4c46-aa91-7a1fff6277ed', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Knight-Jones%'; +UPDATE 36 +</code></pre> + +<ul> +<li>I updated the authority index but nothing seemed to change, so I&rsquo;ll wait and do it again after I update Discovery below</li> +<li>Skype chat with Tsega about the <a href="https://github.com/ilri/ckm-cgspace-contentdm-bridge">IFPRI contentdm bridge</a></li> +<li>We tested harvesting OAI in an example collection to see how it works</li> +<li>Talk to Carlos Quiros about CG Core metadata in CGSpace</li> +<li>Get a list of countries from CGSpace so I can do some batch corrections:</li> +</ul> + +<pre><code>dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=228 group by text_value order by count desc) to /tmp/countries.csv with csv; +</code></pre> + +<ul> +<li>Fix a bunch of countries in Open Refine and run the corrections on CGSpace:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i countries-fix-18.csv -f dc.coverage.country -t 'correct' -m 228 -d dspace -u dspace -p fuuu +$ ./delete-metadata-values.py -i countries-delete-2.csv -f dc.coverage.country -m 228 -d dspace -u dspace -p fuuu +</code></pre> + +<ul> +<li>Run a shit ton of author fixes from Peter Ballantyne that we&rsquo;ve been cleaning up for two months:</li> +</ul> + +<pre><code>$ ./fix-metadata-values.py -i /tmp/authors-fix-pb2.csv -f dc.contributor.author -t correct -m 3 -u dspace -d dspace -p fuuu +</code></pre>