From cfe5796b3a1c41ceadacbbcb9122ef9ee67242ea Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 8 Nov 2016 12:44:29 +0200 Subject: [PATCH] Update notes for 2016-11-08 --- content/post/2016-11.md | 10 ++++++++++ public/2016-11/index.html | 9 +++++++++ public/index.xml | 9 +++++++++ public/post/index.xml | 9 +++++++++ public/tags/notes/index.xml | 9 +++++++++ 5 files changed, 46 insertions(+) diff --git a/content/post/2016-11.md b/content/post/2016-11.md index 1c5ec006e..0166f1171 100644 --- a/content/post/2016-11.md +++ b/content/post/2016-11.md @@ -85,3 +85,13 @@ $ ./fix-metadata-values.py -i /tmp/CRPs.csv -f cg.contributor.crp -t correct -m ![Listings and Reports broken in DSpace 5.5](2016/11/listings-and-reports-55.png) - I've filed a ticket with Atmire +- Thinking about batch updates for ORCIDs and authors +- Playing with [SolrClient](https://github.com/moonlitesolutions/SolrClient) in Python to query Solr +- All records in the authority core are either `authority_type:orcid` or `authority_type:person` +- There is a `deleted` field and all items seem to be `false`, but might be important sanity check to remember +- The way to go is probably to have a CSV of author names and authority IDs, then to batch update them in PostgreSQL +- Dump of the top ~200 authors in CGSpace: + +``` +dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=3 group by text_value order by count desc limit 210) to /tmp/210-authors.csv with csv; +``` diff --git a/public/2016-11/index.html b/public/2016-11/index.html index 4930a8667..6406f4d3d 100644 --- a/public/2016-11/index.html +++ b/public/2016-11/index.html @@ -186,8 +186,17 @@ COPY 22 +
dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=3 group by text_value order by count desc limit 210) to /tmp/210-authors.csv with csv;
+
+ diff --git a/public/index.xml b/public/index.xml index 4574c3169..5f8cb8b8d 100644 --- a/public/index.xml +++ b/public/index.xml @@ -113,7 +113,16 @@ COPY 22 <ul> <li>I&rsquo;ve filed a ticket with Atmire</li> +<li>Thinking about batch updates for ORCIDs and authors</li> +<li>Playing with <a href="https://github.com/moonlitesolutions/SolrClient">SolrClient</a> in Python to query Solr</li> +<li>All records in the authority core are either <code>authority_type:orcid</code> or <code>authority_type:person</code></li> +<li>There is a <code>deleted</code> field and all items seem to be <code>false</code>, but might be important sanity check to remember</li> +<li>The way to go is probably to have a CSV of author names and authority IDs, then to batch update them in PostgreSQL</li> +<li>Dump of the top ~200 authors in CGSpace:</li> </ul> + +<pre><code>dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=3 group by text_value order by count desc limit 210) to /tmp/210-authors.csv with csv; +</code></pre> diff --git a/public/post/index.xml b/public/post/index.xml index 16ca209b5..47ffe6d2f 100644 --- a/public/post/index.xml +++ b/public/post/index.xml @@ -113,7 +113,16 @@ COPY 22 <ul> <li>I&rsquo;ve filed a ticket with Atmire</li> +<li>Thinking about batch updates for ORCIDs and authors</li> +<li>Playing with <a href="https://github.com/moonlitesolutions/SolrClient">SolrClient</a> in Python to query Solr</li> +<li>All records in the authority core are either <code>authority_type:orcid</code> or <code>authority_type:person</code></li> +<li>There is a <code>deleted</code> field and all items seem to be <code>false</code>, but might be important sanity check to remember</li> +<li>The way to go is probably to have a CSV of author names and authority IDs, then to batch update them in PostgreSQL</li> +<li>Dump of the top ~200 authors in CGSpace:</li> </ul> + +<pre><code>dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=3 group by text_value order by count desc limit 210) to /tmp/210-authors.csv with csv; +</code></pre> diff --git a/public/tags/notes/index.xml b/public/tags/notes/index.xml index 78fd45096..61618bba7 100644 --- a/public/tags/notes/index.xml +++ b/public/tags/notes/index.xml @@ -112,7 +112,16 @@ COPY 22 <ul> <li>I&rsquo;ve filed a ticket with Atmire</li> +<li>Thinking about batch updates for ORCIDs and authors</li> +<li>Playing with <a href="https://github.com/moonlitesolutions/SolrClient">SolrClient</a> in Python to query Solr</li> +<li>All records in the authority core are either <code>authority_type:orcid</code> or <code>authority_type:person</code></li> +<li>There is a <code>deleted</code> field and all items seem to be <code>false</code>, but might be important sanity check to remember</li> +<li>The way to go is probably to have a CSV of author names and authority IDs, then to batch update them in PostgreSQL</li> +<li>Dump of the top ~200 authors in CGSpace:</li> </ul> + +<pre><code>dspace=# \copy (select distinct text_value, count(*) from metadatavalue where metadata_field_id=3 group by text_value order by count desc limit 210) to /tmp/210-authors.csv with csv; +</code></pre>