mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-11-22 14:45:03 +01:00
Update notes for 2017-01-24
This commit is contained in:
parent
dad9c406f6
commit
54c60de7d1
@ -194,8 +194,7 @@ value + "__description:" + cells["dc.type"].value
|
|||||||
- Test importing of the new CIAT records (actually there are 232, not 234):
|
- Test importing of the new CIAT records (actually there are 232, not 234):
|
||||||
|
|
||||||
```
|
```
|
||||||
$ JAVA_OPTS="-Xmx512m -Dfile.encoding=UTF-8" /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568
|
$ JAVA_OPTS="-Xmx512m -Dfile.encoding=UTF-8" /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &> /tmp/ciat.log
|
||||||
/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &> /tmp/ciat.log
|
|
||||||
```
|
```
|
||||||
|
|
||||||
- Many of the PDFs are 20, 30, 40, 50+ MB, which makes a total of 4GB
|
- Many of the PDFs are 20, 30, 40, 50+ MB, which makes a total of 4GB
|
||||||
@ -246,3 +245,12 @@ $ for community in 10568/171 10568/27868 10568/231 10568/27869 10568/150 10568/2
|
|||||||
```
|
```
|
||||||
$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- Create a new list of the top 500 journal titles from the database:
|
||||||
|
|
||||||
|
```
|
||||||
|
dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
|
```
|
||||||
|
|
||||||
|
- Then sort them in OpenRefine and create a controlled vocabulary by manually adding the XML markup, pull request ([#298](https://github.com/ilri/DSpace/pull/298))
|
||||||
|
- This would be the last issue remaining to close the meta issue about switching to controlled vocabularies ([#69](https://github.com/ilri/DSpace/pull/69))
|
||||||
|
@ -59,7 +59,7 @@ I asked on the dspace-tech mailing list because it seems to be broken, and actua
|
|||||||
|
|
||||||
"headline": "January, 2017",
|
"headline": "January, 2017",
|
||||||
"url": "https://alanorth.github.io/cgspace-notes/2017-01/",
|
"url": "https://alanorth.github.io/cgspace-notes/2017-01/",
|
||||||
"wordCount": "1327",
|
"wordCount": "1400",
|
||||||
|
|
||||||
|
|
||||||
"datePublished": "2017-01-02T10:43:00+03:00",
|
"datePublished": "2017-01-02T10:43:00+03:00",
|
||||||
@ -299,8 +299,7 @@ UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4' in position 15:
|
|||||||
<li>Now get the top 500 journal titles:</li>
|
<li>Now get the top 500 journal titles:</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by cou
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
nt desc limit 500) to /tmp/journal-titles.csv with csv;
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -351,8 +350,7 @@ delete from collection2item where id = '91082';
|
|||||||
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ JAVA_OPTS="-Xmx512m -Dfile.encoding=UTF-8" /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568
|
<pre><code>$ JAVA_OPTS="-Xmx512m -Dfile.encoding=UTF-8" /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &> /tmp/ciat.log
|
||||||
/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &> /tmp/ciat.log
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -413,6 +411,18 @@ $ gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -
|
|||||||
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Create a new list of the top 500 journal titles from the database:</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Then sort them in OpenRefine and create a controlled vocabulary by manually adding the XML markup, pull request (<a href="https://github.com/ilri/DSpace/pull/298">#298</a>)</li>
|
||||||
|
<li>This would be the last issue remaining to close the meta issue about switching to controlled vocabularies (<a href="https://github.com/ilri/DSpace/pull/69">#69</a>)</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -180,8 +180,7 @@ UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4&
|
|||||||
<li>Now get the top 500 journal titles:</li>
|
<li>Now get the top 500 journal titles:</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by cou
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
nt desc limit 500) to /tmp/journal-titles.csv with csv;
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -232,8 +231,7 @@ delete from collection2item where id = '91082';
|
|||||||
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ JAVA_OPTS=&quot;-Xmx512m -Dfile.encoding=UTF-8&quot; /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568
|
<pre><code>$ JAVA_OPTS=&quot;-Xmx512m -Dfile.encoding=UTF-8&quot; /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &amp;&gt; /tmp/ciat.log
|
||||||
/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &amp;&gt; /tmp/ciat.log
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -292,7 +290,19 @@ $ gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -
|
|||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
||||||
</code></pre></description>
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Create a new list of the top 500 journal titles from the database:</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Then sort them in OpenRefine and create a controlled vocabulary by manually adding the XML markup, pull request (<a href="https://github.com/ilri/DSpace/pull/298">#298</a>)</li>
|
||||||
|
<li>This would be the last issue remaining to close the meta issue about switching to controlled vocabularies (<a href="https://github.com/ilri/DSpace/pull/69">#69</a>)</li>
|
||||||
|
</ul></description>
|
||||||
</item>
|
</item>
|
||||||
|
|
||||||
<item>
|
<item>
|
||||||
|
@ -180,8 +180,7 @@ UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4&
|
|||||||
<li>Now get the top 500 journal titles:</li>
|
<li>Now get the top 500 journal titles:</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by cou
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
nt desc limit 500) to /tmp/journal-titles.csv with csv;
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -232,8 +231,7 @@ delete from collection2item where id = '91082';
|
|||||||
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ JAVA_OPTS=&quot;-Xmx512m -Dfile.encoding=UTF-8&quot; /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568
|
<pre><code>$ JAVA_OPTS=&quot;-Xmx512m -Dfile.encoding=UTF-8&quot; /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &amp;&gt; /tmp/ciat.log
|
||||||
/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &amp;&gt; /tmp/ciat.log
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -292,7 +290,19 @@ $ gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -
|
|||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
||||||
</code></pre></description>
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Create a new list of the top 500 journal titles from the database:</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Then sort them in OpenRefine and create a controlled vocabulary by manually adding the XML markup, pull request (<a href="https://github.com/ilri/DSpace/pull/298">#298</a>)</li>
|
||||||
|
<li>This would be the last issue remaining to close the meta issue about switching to controlled vocabularies (<a href="https://github.com/ilri/DSpace/pull/69">#69</a>)</li>
|
||||||
|
</ul></description>
|
||||||
</item>
|
</item>
|
||||||
|
|
||||||
<item>
|
<item>
|
||||||
|
@ -179,8 +179,7 @@ UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4&
|
|||||||
<li>Now get the top 500 journal titles:</li>
|
<li>Now get the top 500 journal titles:</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by cou
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
nt desc limit 500) to /tmp/journal-titles.csv with csv;
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -231,8 +230,7 @@ delete from collection2item where id = '91082';
|
|||||||
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
<li>Test importing of the new CIAT records (actually there are 232, not 234):</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ JAVA_OPTS=&quot;-Xmx512m -Dfile.encoding=UTF-8&quot; /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568
|
<pre><code>$ JAVA_OPTS=&quot;-Xmx512m -Dfile.encoding=UTF-8&quot; /home/dspacetest.cgiar.org/bin/dspace import --add --eperson=aorth@mjanja.ch --collection=10568/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &amp;&gt; /tmp/ciat.log
|
||||||
/79042 --source /home/aorth/CIAT_234/SimpleArchiveFormat/ --mapfile=/tmp/ciat.map &amp;&gt; /tmp/ciat.log
|
|
||||||
</code></pre>
|
</code></pre>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
@ -291,7 +289,19 @@ $ gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -
|
|||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
<pre><code>$ ./fix-metadata-values.py -i /tmp/fix-49-journal-titles.csv -f dc.source -t correct -m 55 -d dspace -u dspace -p 'password'
|
||||||
</code></pre></description>
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Create a new list of the top 500 journal titles from the database:</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<pre><code>dspace-# \copy (select distinct text_value, count(*) from metadatavalue where resource_type_id=2 and metadata_field_id=55 group by text_value order by count desc limit 500) to /tmp/journal-titles.csv with csv;
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Then sort them in OpenRefine and create a controlled vocabulary by manually adding the XML markup, pull request (<a href="https://github.com/ilri/DSpace/pull/298">#298</a>)</li>
|
||||||
|
<li>This would be the last issue remaining to close the meta issue about switching to controlled vocabularies (<a href="https://github.com/ilri/DSpace/pull/69">#69</a>)</li>
|
||||||
|
</ul></description>
|
||||||
</item>
|
</item>
|
||||||
|
|
||||||
<item>
|
<item>
|
||||||
|
Loading…
Reference in New Issue
Block a user