From 56a24bf456512bd59a021bac495b73965df255f9 Mon Sep 17 00:00:00 2001
From: Alan Orth <alan.orth@gmail.com>
Date: Tue, 28 Feb 2017 22:58:29 +0200
Subject: [PATCH] Update notes for 2017-02-28

---
 content/post/2017-02.md     | 12 +++++++++++-
 public/2017-02/index.html   | 14 ++++++++++++--
 public/index.xml            | 14 ++++++++++++--
 public/post/index.xml       | 14 ++++++++++++--
 public/tags/notes/index.xml | 14 ++++++++++++--
 5 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/content/post/2017-02.md b/content/post/2017-02.md
index 94e09b030..e856e011d 100644
--- a/content/post/2017-02.md
+++ b/content/post/2017-02.md
@@ -310,4 +310,14 @@ dspace=# \copy (select resource_id, metadata_value_id from metadatavalue where r
 COPY 1968
 ```
 
-- And then using awk or uniq to either remove or print the lines that have a duplicate `resource_id` (meaning they belong to the same item in DSpace and are therefore duplicates), and then using the `metadata_value_id` to delete them
+- And then use awk to print the duplicate lines to a separate file:
+
+```
+$ awk -F',' 'seen[$1]++' /tmp/ciat.csv > /tmp/ciat-dupes.csv
+```
+
+- From that file I can create a list of 279 deletes and put them in a batch script like:
+
+```
+delete from metadatavalue where resource_type_id=2 and metadata_field_id=3 and metadata_value_id=2742061;
+```
diff --git a/public/2017-02/index.html b/public/2017-02/index.html
index 6a8140698..6db9663cd 100644
--- a/public/2017-02/index.html
+++ b/public/2017-02/index.html
@@ -90,7 +90,7 @@ Looks like we&rsquo;ll be using cg.identifier.ccafsprojectpii as the field name
   
   "headline": "February, 2017",
   "url": "https://alanorth.github.io/cgspace-notes/2017-02/",
-  "wordCount": "2019",
+  "wordCount": "2028",
   
   
   "datePublished": "2017-02-07T07:04:52-08:00",
@@ -522,9 +522,19 @@ COPY 1968
 </code></pre>
 
 <ul>
-<li>And then using awk or uniq to either remove or print the lines that have a duplicate <code>resource_id</code> (meaning they belong to the same item in DSpace and are therefore duplicates), and then using the <code>metadata_value_id</code> to delete them</li>
+<li>And then use awk to print the duplicate lines to a separate file:</li>
 </ul>
 
+<pre><code>$ awk -F',' 'seen[$1]++' /tmp/ciat.csv &gt; /tmp/ciat-dupes.csv
+</code></pre>
+
+<ul>
+<li>From that file I can create a list of 279 deletes and put them in a batch script like:</li>
+</ul>
+
+<pre><code>delete from metadatavalue where resource_type_id=2 and metadata_field_id=3 and metadata_value_id=2742061;
+</code></pre>
+
   
 
   
diff --git a/public/index.xml b/public/index.xml
index e24c7fd70..252ee3ea3 100644
--- a/public/index.xml
+++ b/public/index.xml
@@ -372,8 +372,18 @@ COPY 1968
 &lt;/code&gt;&lt;/pre&gt;
 
 &lt;ul&gt;
-&lt;li&gt;And then using awk or uniq to either remove or print the lines that have a duplicate &lt;code&gt;resource_id&lt;/code&gt; (meaning they belong to the same item in DSpace and are therefore duplicates), and then using the &lt;code&gt;metadata_value_id&lt;/code&gt; to delete them&lt;/li&gt;
-&lt;/ul&gt;</description>
+&lt;li&gt;And then use awk to print the duplicate lines to a separate file:&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;$ awk -F&#39;,&#39; &#39;seen[$1]++&#39; /tmp/ciat.csv &amp;gt; /tmp/ciat-dupes.csv
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;ul&gt;
+&lt;li&gt;From that file I can create a list of 279 deletes and put them in a batch script like:&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;delete from metadatavalue where resource_type_id=2 and metadata_field_id=3 and metadata_value_id=2742061;
+&lt;/code&gt;&lt;/pre&gt;</description>
     </item>
     
     <item>
diff --git a/public/post/index.xml b/public/post/index.xml
index baccecf8e..7d348dda0 100644
--- a/public/post/index.xml
+++ b/public/post/index.xml
@@ -372,8 +372,18 @@ COPY 1968
 &lt;/code&gt;&lt;/pre&gt;
 
 &lt;ul&gt;
-&lt;li&gt;And then using awk or uniq to either remove or print the lines that have a duplicate &lt;code&gt;resource_id&lt;/code&gt; (meaning they belong to the same item in DSpace and are therefore duplicates), and then using the &lt;code&gt;metadata_value_id&lt;/code&gt; to delete them&lt;/li&gt;
-&lt;/ul&gt;</description>
+&lt;li&gt;And then use awk to print the duplicate lines to a separate file:&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;$ awk -F&#39;,&#39; &#39;seen[$1]++&#39; /tmp/ciat.csv &amp;gt; /tmp/ciat-dupes.csv
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;ul&gt;
+&lt;li&gt;From that file I can create a list of 279 deletes and put them in a batch script like:&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;delete from metadatavalue where resource_type_id=2 and metadata_field_id=3 and metadata_value_id=2742061;
+&lt;/code&gt;&lt;/pre&gt;</description>
     </item>
     
     <item>
diff --git a/public/tags/notes/index.xml b/public/tags/notes/index.xml
index c15cf2a02..d2a7350ce 100644
--- a/public/tags/notes/index.xml
+++ b/public/tags/notes/index.xml
@@ -371,8 +371,18 @@ COPY 1968
 &lt;/code&gt;&lt;/pre&gt;
 
 &lt;ul&gt;
-&lt;li&gt;And then using awk or uniq to either remove or print the lines that have a duplicate &lt;code&gt;resource_id&lt;/code&gt; (meaning they belong to the same item in DSpace and are therefore duplicates), and then using the &lt;code&gt;metadata_value_id&lt;/code&gt; to delete them&lt;/li&gt;
-&lt;/ul&gt;</description>
+&lt;li&gt;And then use awk to print the duplicate lines to a separate file:&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;$ awk -F&#39;,&#39; &#39;seen[$1]++&#39; /tmp/ciat.csv &amp;gt; /tmp/ciat-dupes.csv
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;ul&gt;
+&lt;li&gt;From that file I can create a list of 279 deletes and put them in a batch script like:&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;delete from metadatavalue where resource_type_id=2 and metadata_field_id=3 and metadata_value_id=2742061;
+&lt;/code&gt;&lt;/pre&gt;</description>
     </item>
     
     <item>