From 3d73a51b1c00ce637c3b2053c528068d81cee9ea Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 14 Aug 2019 13:39:29 +0300 Subject: [PATCH] Add notes for 2019-08-14 --- content/posts/2019-08.md | 31 ++++++++++++++++++++++++++++++ docs/2019-08/index.html | 41 +++++++++++++++++++++++++++++++++++++--- docs/sitemap.xml | 10 +++++----- 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/content/posts/2019-08.md b/content/posts/2019-08.md index 19b0085df..559427b8f 100644 --- a/content/posts/2019-08.md +++ b/content/posts/2019-08.md @@ -159,5 +159,36 @@ $ dspace user -a -m blah@blah.com -g Mohammad -s Salem -p 'domoamaaa' - Create and merge a pull request ([#429](https://github.com/ilri/DSpace/pull/429)) to add eleven new CCAFS Phase II Project Tags to CGSpace - Atmire responded to the [Solr cores issue](https://tracker.atmire.com/tickets-cgiar-ilri/view-ticket?id=685) last week, but they could not reproduce the issue - I told them not to continue, and that we would keep an eye on it and keep troubleshooting it (if neccessary) in the public eye on dspace-tech and Solr mailing lists +- Testing an import of 1,429 Bioversity items (metadata only) on my local development machine and got an error with Java memory after about 1,000 items: + +``` +$ ~/dspace/bin/dspace metadata-import -f /tmp/bioversity.csv -e blah@blah.com +... +java.lang.OutOfMemoryError: GC overhead limit exceeded +``` + +- I increased the heap size to 1536m and tried again: + +``` +$ export JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx1536m" +$ ~/dspace/bin/dspace metadata-import -f /tmp/bioversity.csv -e blah@blah.com +``` + +- This time it succeeded, and using VisualVM I noticed that the import process used a maximum of 620MB of RAM + +## 2019-08-14 + +- I imported the 1429 Bioversity records into DSpace Test + - To make sure we didn't have memory issues I reduced Tomcat's JVM heap by 512m, increased the import processes's heap to 512m, and split the input file into two parts with about 700 each + - Then I had to create a few new temporary collections on DSpace Test that had been created on CGSpace after our last sync + - After that the import succeeded: + +``` +$ export JAVA_OPTS='-Dfile.encoding=UTF-8 -Xmx512m' +$ dspace metadata-import -f /tmp/bioversity1.csv -e blah@blah.com +$ dspace metadata-import -f /tmp/bioversity2.csv -e blah@blah.com +``` + +- The next step is to check these items for duplicates diff --git a/docs/2019-08/index.html b/docs/2019-08/index.html index 2689aed97..0e43d655c 100644 --- a/docs/2019-08/index.html +++ b/docs/2019-08/index.html @@ -27,7 +27,7 @@ Run system updates on DSpace Test (linode19) and reboot it - + @@ -59,9 +59,9 @@ Run system updates on DSpace Test (linode19) and reboot it "@type": "BlogPosting", "headline": "August, 2019", "url": "https:\/\/alanorth.github.io\/cgspace-notes\/2019-08\/", - "wordCount": "1230", + "wordCount": "1409", "datePublished": "2019-08-03T12:39:51\x2b03:00", - "dateModified": "2019-08-13T15:33:29\x2b03:00", + "dateModified": "2019-08-13T16:54:35\x2b03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -337,6 +337,41 @@ $ ./generate-thumbnails.py -i /tmp/user-upload2.csv -w --url-field-name url -d | + +
  • Testing an import of 1,429 Bioversity items (metadata only) on my local development machine and got an error with Java memory after about 1,000 items:

    + +
    $ ~/dspace/bin/dspace metadata-import -f /tmp/bioversity.csv -e blah@blah.com
    +...
    +java.lang.OutOfMemoryError: GC overhead limit exceeded
    +
  • + +
  • I increased the heap size to 1536m and tried again:

    + +
    $ export JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx1536m"
    +$ ~/dspace/bin/dspace metadata-import -f /tmp/bioversity.csv -e blah@blah.com
    +
  • + +
  • This time it succeeded, and using VisualVM I noticed that the import process used a maximum of 620MB of RAM

  • + + +

    2019-08-14

    + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index f07304e98..55025312f 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,30 +4,30 @@ https://alanorth.github.io/cgspace-notes/2019-08/ - 2019-08-13T15:33:29+03:00 + 2019-08-13T16:54:35+03:00 https://alanorth.github.io/cgspace-notes/ - 2019-08-13T15:33:29+03:00 + 2019-08-13T16:54:35+03:00 0 https://alanorth.github.io/cgspace-notes/tags/notes/ - 2019-08-13T15:33:29+03:00 + 2019-08-13T16:54:35+03:00 0 https://alanorth.github.io/cgspace-notes/posts/ - 2019-08-13T15:33:29+03:00 + 2019-08-13T16:54:35+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2019-08-13T15:33:29+03:00 + 2019-08-13T16:54:35+03:00 0