From bd24b388364987f618caae04b18a2cc3fb72d921 Mon Sep 17 00:00:00 2001
From: Alan Orth <alan.orth@gmail.com>
Date: Wed, 17 Feb 2016 23:17:11 +0200
Subject: [PATCH] Update public html for 2016-02

---
 public/2016-02/index.html   | 27 +++++++++++++++++++++++++++
 public/index.xml            | 27 +++++++++++++++++++++++++++
 public/tags/notes/index.xml | 27 +++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)
diff --git a/public/2016-02/index.html b/public/2016-02/index.html
index 077d4b18b..b04801b97 100644
--- a/public/2016-02/index.html
+++ b/public/2016-02/index.html
@@ -308,6 +308,33 @@ CIAT_COLOMBIA_000169_Técnicas_para_el_aislamiento_y_cultivo_de_protoplastos_de_
 <ul>
 <li>Merge pull requests for submission form theming (<a href="https://github.com/ilri/DSpace/pull/178">#178</a>) and missing center subjects in XMLUI item views (<a href="https://github.com/ilri/DSpace/pull/176">#176</a>)</li>
 <li>They will be deployed on CGSpace the next time I re-deploy</li>
+</ul>
+
+<h2 id="2016-02-16:124a59adbaa8ef13e1518d003fc03981">2016-02-16</h2>
+
+<ul>
+<li>Turns out OpenRefine has an unescape function!</li>
+</ul>
+
+<pre><code>value.unescape(&quot;url&quot;)
+</code></pre>
+
+<ul>
+<li>This turns the URLs into human-readable versions that we can use as proper filenames</li>
+<li>Run web server and system updates on DSpace Test and reboot</li>
+<li>To merge <code>dc.identifier.url</code> and <code>dc.identifier.url[]</code>, rename the second column so it doesn&rsquo;t have the brackets, like <code>dc.identifier.url2</code></li>
+<li>Then you create a facet for blank values on each column, show the rows that have values for one and not the other, then transform each independently to have the contents of the other, with &ldquo;||&rdquo; in between</li>
+<li>Work on Python script for parsing and downloading PDF records from <code>dc.identifier.url</code></li>
+<li>To turn <code>dc.identifier.url</code> into filenames, create a new column based o</li>
+<li>To get filenames from <code>dc.identifier.url</code>, create a new column based on this transform: <code>forEach(value.split('||'), v, v.split('/')[-1]).join('||')</code></li>
+<li>This also works for records that have multiple URLs (separated by &ldquo;||&rdquo;)</li>
+</ul>
+
+<h2 id="2016-02-17:124a59adbaa8ef13e1518d003fc03981">2016-02-17</h2>
+
+<ul>
+<li>Re-deploy CGSpace, run all system updates, and reboot</li>
+<li>More work on CIAT data, cleaning and doing a last metadata-only import into DSpace Test</li>
 </ul>
 
   </section>
diff --git a/public/index.xml b/public/index.xml
index 4b6964298..c41ac3077 100644
--- a/public/index.xml
+++ b/public/index.xml
@@ -247,6 +247,33 @@ CIAT_COLOMBIA_000169_Técnicas_para_el_aislamiento_y_cultivo_de_protoplastos_de_
 &lt;li&gt;Merge pull requests for submission form theming (&lt;a href=&#34;https://github.com/ilri/DSpace/pull/178&#34;&gt;#178&lt;/a&gt;) and missing center subjects in XMLUI item views (&lt;a href=&#34;https://github.com/ilri/DSpace/pull/176&#34;&gt;#176&lt;/a&gt;)&lt;/li&gt;
 &lt;li&gt;They will be deployed on CGSpace the next time I re-deploy&lt;/li&gt;
 &lt;/ul&gt;
+
+&lt;h2 id=&#34;2016-02-16:124a59adbaa8ef13e1518d003fc03981&#34;&gt;2016-02-16&lt;/h2&gt;
+
+&lt;ul&gt;
+&lt;li&gt;Turns out OpenRefine has an unescape function!&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;value.unescape(&amp;quot;url&amp;quot;)
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;ul&gt;
+&lt;li&gt;This turns the URLs into human-readable versions that we can use as proper filenames&lt;/li&gt;
+&lt;li&gt;Run web server and system updates on DSpace Test and reboot&lt;/li&gt;
+&lt;li&gt;To merge &lt;code&gt;dc.identifier.url&lt;/code&gt; and &lt;code&gt;dc.identifier.url[]&lt;/code&gt;, rename the second column so it doesn&amp;rsquo;t have the brackets, like &lt;code&gt;dc.identifier.url2&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;Then you create a facet for blank values on each column, show the rows that have values for one and not the other, then transform each independently to have the contents of the other, with &amp;ldquo;||&amp;rdquo; in between&lt;/li&gt;
+&lt;li&gt;Work on Python script for parsing and downloading PDF records from &lt;code&gt;dc.identifier.url&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;To turn &lt;code&gt;dc.identifier.url&lt;/code&gt; into filenames, create a new column based o&lt;/li&gt;
+&lt;li&gt;To get filenames from &lt;code&gt;dc.identifier.url&lt;/code&gt;, create a new column based on this transform: &lt;code&gt;forEach(value.split(&#39;||&#39;), v, v.split(&#39;/&#39;)[-1]).join(&#39;||&#39;)&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;This also works for records that have multiple URLs (separated by &amp;ldquo;||&amp;rdquo;)&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;h2 id=&#34;2016-02-17:124a59adbaa8ef13e1518d003fc03981&#34;&gt;2016-02-17&lt;/h2&gt;
+
+&lt;ul&gt;
+&lt;li&gt;Re-deploy CGSpace, run all system updates, and reboot&lt;/li&gt;
+&lt;li&gt;More work on CIAT data, cleaning and doing a last metadata-only import into DSpace Test&lt;/li&gt;
+&lt;/ul&gt;
 </description>
     </item>
     
diff --git a/public/tags/notes/index.xml b/public/tags/notes/index.xml
index cf1cc6abe..1b5cde904 100644
--- a/public/tags/notes/index.xml
+++ b/public/tags/notes/index.xml
@@ -247,6 +247,33 @@ CIAT_COLOMBIA_000169_Técnicas_para_el_aislamiento_y_cultivo_de_protoplastos_de_
 &lt;li&gt;Merge pull requests for submission form theming (&lt;a href=&#34;https://github.com/ilri/DSpace/pull/178&#34;&gt;#178&lt;/a&gt;) and missing center subjects in XMLUI item views (&lt;a href=&#34;https://github.com/ilri/DSpace/pull/176&#34;&gt;#176&lt;/a&gt;)&lt;/li&gt;
 &lt;li&gt;They will be deployed on CGSpace the next time I re-deploy&lt;/li&gt;
 &lt;/ul&gt;
+
+&lt;h2 id=&#34;2016-02-16:124a59adbaa8ef13e1518d003fc03981&#34;&gt;2016-02-16&lt;/h2&gt;
+
+&lt;ul&gt;
+&lt;li&gt;Turns out OpenRefine has an unescape function!&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;pre&gt;&lt;code&gt;value.unescape(&amp;quot;url&amp;quot;)
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;ul&gt;
+&lt;li&gt;This turns the URLs into human-readable versions that we can use as proper filenames&lt;/li&gt;
+&lt;li&gt;Run web server and system updates on DSpace Test and reboot&lt;/li&gt;
+&lt;li&gt;To merge &lt;code&gt;dc.identifier.url&lt;/code&gt; and &lt;code&gt;dc.identifier.url[]&lt;/code&gt;, rename the second column so it doesn&amp;rsquo;t have the brackets, like &lt;code&gt;dc.identifier.url2&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;Then you create a facet for blank values on each column, show the rows that have values for one and not the other, then transform each independently to have the contents of the other, with &amp;ldquo;||&amp;rdquo; in between&lt;/li&gt;
+&lt;li&gt;Work on Python script for parsing and downloading PDF records from &lt;code&gt;dc.identifier.url&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;To turn &lt;code&gt;dc.identifier.url&lt;/code&gt; into filenames, create a new column based o&lt;/li&gt;
+&lt;li&gt;To get filenames from &lt;code&gt;dc.identifier.url&lt;/code&gt;, create a new column based on this transform: &lt;code&gt;forEach(value.split(&#39;||&#39;), v, v.split(&#39;/&#39;)[-1]).join(&#39;||&#39;)&lt;/code&gt;&lt;/li&gt;
+&lt;li&gt;This also works for records that have multiple URLs (separated by &amp;ldquo;||&amp;rdquo;)&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;h2 id=&#34;2016-02-17:124a59adbaa8ef13e1518d003fc03981&#34;&gt;2016-02-17&lt;/h2&gt;
+
+&lt;ul&gt;
+&lt;li&gt;Re-deploy CGSpace, run all system updates, and reboot&lt;/li&gt;
+&lt;li&gt;More work on CIAT data, cleaning and doing a last metadata-only import into DSpace Test&lt;/li&gt;
+&lt;/ul&gt;
 </description>
     </item>