diff --git a/content/post/2016-12.md b/content/post/2016-12.md index 9a6da3574..ff0c5b4e8 100644 --- a/content/post/2016-12.md +++ b/content/post/2016-12.md @@ -439,3 +439,43 @@ dspace=# update metadatavalue set authority='2df8136e-d8f4-4142-b58c-562337cab76 ``` - The authority IDs were different now than when I was looking a few days ago so I had to adjust them here + +## 2016-12-11 + +- After enabling a sizable `shared_buffers` for CGSpace's PostgreSQL configuration the number of connections to the database dropped significantly + +![postgres_bgwriter-week](2016/12/postgres_bgwriter-week.png) +![postgres_connections_ALL-week](2016/12/postgres_connections_ALL-week.png) + +- Looking at CIAT records from last week again, they have a lot of double authors like: + +``` +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::600 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::500 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::0 +``` + +- Some in the same `dc.contributor.author` field, and some in others like `dc.contributor.author[en_US]` etc +- Removing the duplicates in OpenRefine and uploading a CSV to DSpace says "no changes detected" +- Seems like the only way to sortof clean these up would be to start in SQL: + +``` +dspace=# select distinct text_value, authority, confidence from metadatavalue where resource_type_id=2 and metadata_field_id=3 and text_value like 'International Center for Tropical Agriculture'; + text_value | authority | confidence +-----------------------------------------------+--------------------------------------+------------ + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | -1 + International Center for Tropical Agriculture | | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 500 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 600 + International Center for Tropical Agriculture | | -1 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 500 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | -1 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 0 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value = 'International Center for Tropical Agriculture'; +UPDATE 1693 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', text_value='International Center for Tropical Agriculture', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value like '%CIAT%'; +UPDATE 35 +``` + +- Work on article for KM4Dev journal diff --git a/public/2016-12/index.html b/public/2016-12/index.html index c4a81a4ca..05efb6e93 100644 --- a/public/2016-12/index.html +++ b/public/2016-12/index.html @@ -30,7 +30,7 @@ - + @@ -579,6 +579,52 @@ dspace=# update metadatavalue set authority='2df8136e-d8f4-4142-b58c-562337cab76
  • The authority IDs were different now than when I was looking a few days ago so I had to adjust them here
  • +

    2016-12-11

    + + + +

    postgres_bgwriter-week +postgres_connections_ALL-week

    + + + +
    International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::600
    +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::500
    +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::0
    +
    + + + +
    dspace=# select distinct text_value, authority, confidence from metadatavalue where resource_type_id=2 and metadata_field_id=3 and text_value like 'International Center for Tropical Agriculture';
    +                  text_value                   |              authority               | confidence
    +-----------------------------------------------+--------------------------------------+------------
    + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 |         -1
    + International Center for Tropical Agriculture |                                      |        600
    + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 |        500
    + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 |        600
    + International Center for Tropical Agriculture |                                      |         -1
    + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 |        500
    + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 |        600
    + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 |         -1
    + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 |          0
    +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value = 'International Center for Tropical Agriculture';
    +UPDATE 1693
    +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', text_value='International Center for Tropical Agriculture', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value like '%CIAT%';
    +UPDATE 35
    +
    + + + diff --git a/public/2016/12/postgres_bgwriter-week.png b/public/2016/12/postgres_bgwriter-week.png new file mode 100644 index 000000000..2abcbcaf0 Binary files /dev/null and b/public/2016/12/postgres_bgwriter-week.png differ diff --git a/public/2016/12/postgres_connections_ALL-week.png b/public/2016/12/postgres_connections_ALL-week.png new file mode 100644 index 000000000..fc9cd3276 Binary files /dev/null and b/public/2016/12/postgres_connections_ALL-week.png differ diff --git a/public/index.xml b/public/index.xml index a8d7f61c4..19d58662d 100644 --- a/public/index.xml +++ b/public/index.xml @@ -482,6 +482,52 @@ dspace=# update metadatavalue set authority='2df8136e-d8f4-4142-b58c-562337c <ul> <li>The authority IDs were different now than when I was looking a few days ago so I had to adjust them here</li> </ul> + +<h2 id="2016-12-11">2016-12-11</h2> + +<ul> +<li>After enabling a sizable <code>shared_buffers</code> for CGSpace&rsquo;s PostgreSQL configuration the number of connections to the database dropped significantly</li> +</ul> + +<p><img src="2016/12/postgres_bgwriter-week.png" alt="postgres_bgwriter-week" /> +<img src="2016/12/postgres_connections_ALL-week.png" alt="postgres_connections_ALL-week" /></p> + +<ul> +<li>Looking at CIAT records from last week again, they have a lot of double authors like:</li> +</ul> + +<pre><code>International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::600 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::500 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::0 +</code></pre> + +<ul> +<li>Some in the same <code>dc.contributor.author</code> field, and some in others like <code>dc.contributor.author[en_US]</code> etc</li> +<li>Removing the duplicates in OpenRefine and uploading a CSV to DSpace says &ldquo;no changes detected&rdquo;</li> +<li>Seems like the only way to sortof clean these up would be to start in SQL:</li> +</ul> + +<pre><code>dspace=# select distinct text_value, authority, confidence from metadatavalue where resource_type_id=2 and metadata_field_id=3 and text_value like 'International Center for Tropical Agriculture'; + text_value | authority | confidence +-----------------------------------------------+--------------------------------------+------------ + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | -1 + International Center for Tropical Agriculture | | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 500 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 600 + International Center for Tropical Agriculture | | -1 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 500 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | -1 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 0 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value = 'International Center for Tropical Agriculture'; +UPDATE 1693 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', text_value='International Center for Tropical Agriculture', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value like '%CIAT%'; +UPDATE 35 +</code></pre> + +<ul> +<li>Work on article for KM4Dev journal</li> +</ul> diff --git a/public/post/index.xml b/public/post/index.xml index cf5f5efa1..49cddf1c8 100644 --- a/public/post/index.xml +++ b/public/post/index.xml @@ -482,6 +482,52 @@ dspace=# update metadatavalue set authority='2df8136e-d8f4-4142-b58c-562337c <ul> <li>The authority IDs were different now than when I was looking a few days ago so I had to adjust them here</li> </ul> + +<h2 id="2016-12-11">2016-12-11</h2> + +<ul> +<li>After enabling a sizable <code>shared_buffers</code> for CGSpace&rsquo;s PostgreSQL configuration the number of connections to the database dropped significantly</li> +</ul> + +<p><img src="2016/12/postgres_bgwriter-week.png" alt="postgres_bgwriter-week" /> +<img src="2016/12/postgres_connections_ALL-week.png" alt="postgres_connections_ALL-week" /></p> + +<ul> +<li>Looking at CIAT records from last week again, they have a lot of double authors like:</li> +</ul> + +<pre><code>International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::600 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::500 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::0 +</code></pre> + +<ul> +<li>Some in the same <code>dc.contributor.author</code> field, and some in others like <code>dc.contributor.author[en_US]</code> etc</li> +<li>Removing the duplicates in OpenRefine and uploading a CSV to DSpace says &ldquo;no changes detected&rdquo;</li> +<li>Seems like the only way to sortof clean these up would be to start in SQL:</li> +</ul> + +<pre><code>dspace=# select distinct text_value, authority, confidence from metadatavalue where resource_type_id=2 and metadata_field_id=3 and text_value like 'International Center for Tropical Agriculture'; + text_value | authority | confidence +-----------------------------------------------+--------------------------------------+------------ + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | -1 + International Center for Tropical Agriculture | | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 500 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 600 + International Center for Tropical Agriculture | | -1 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 500 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | -1 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 0 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value = 'International Center for Tropical Agriculture'; +UPDATE 1693 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', text_value='International Center for Tropical Agriculture', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value like '%CIAT%'; +UPDATE 35 +</code></pre> + +<ul> +<li>Work on article for KM4Dev journal</li> +</ul> diff --git a/public/tags/notes/index.xml b/public/tags/notes/index.xml index 3b4191b4f..a355b44c8 100644 --- a/public/tags/notes/index.xml +++ b/public/tags/notes/index.xml @@ -481,6 +481,52 @@ dspace=# update metadatavalue set authority='2df8136e-d8f4-4142-b58c-562337c <ul> <li>The authority IDs were different now than when I was looking a few days ago so I had to adjust them here</li> </ul> + +<h2 id="2016-12-11">2016-12-11</h2> + +<ul> +<li>After enabling a sizable <code>shared_buffers</code> for CGSpace&rsquo;s PostgreSQL configuration the number of connections to the database dropped significantly</li> +</ul> + +<p><img src="2016/12/postgres_bgwriter-week.png" alt="postgres_bgwriter-week" /> +<img src="2016/12/postgres_connections_ALL-week.png" alt="postgres_connections_ALL-week" /></p> + +<ul> +<li>Looking at CIAT records from last week again, they have a lot of double authors like:</li> +</ul> + +<pre><code>International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::600 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::500 +International Center for Tropical Agriculture::3026b1de-9302-4f3e-85ab-ef48da024eb2::0 +</code></pre> + +<ul> +<li>Some in the same <code>dc.contributor.author</code> field, and some in others like <code>dc.contributor.author[en_US]</code> etc</li> +<li>Removing the duplicates in OpenRefine and uploading a CSV to DSpace says &ldquo;no changes detected&rdquo;</li> +<li>Seems like the only way to sortof clean these up would be to start in SQL:</li> +</ul> + +<pre><code>dspace=# select distinct text_value, authority, confidence from metadatavalue where resource_type_id=2 and metadata_field_id=3 and text_value like 'International Center for Tropical Agriculture'; + text_value | authority | confidence +-----------------------------------------------+--------------------------------------+------------ + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | -1 + International Center for Tropical Agriculture | | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 500 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 600 + International Center for Tropical Agriculture | | -1 + International Center for Tropical Agriculture | cc726b78-a2f4-4ee9-af98-855c2ea31c36 | 500 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 600 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | -1 + International Center for Tropical Agriculture | 3026b1de-9302-4f3e-85ab-ef48da024eb2 | 0 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value = 'International Center for Tropical Agriculture'; +UPDATE 1693 +dspace=# update metadatavalue set authority='3026b1de-9302-4f3e-85ab-ef48da024eb2', text_value='International Center for Tropical Agriculture', confidence=600 where resource_type_id=2 and metadata_field_id=3 and text_value like '%CIAT%'; +UPDATE 35 +</code></pre> + +<ul> +<li>Work on article for KM4Dev journal</li> +</ul> diff --git a/static/2016/12/postgres_bgwriter-week.png b/static/2016/12/postgres_bgwriter-week.png new file mode 100644 index 000000000..2abcbcaf0 Binary files /dev/null and b/static/2016/12/postgres_bgwriter-week.png differ diff --git a/static/2016/12/postgres_connections_ALL-week.png b/static/2016/12/postgres_connections_ALL-week.png new file mode 100644 index 000000000..fc9cd3276 Binary files /dev/null and b/static/2016/12/postgres_connections_ALL-week.png differ