From f8550d509e750e310dccea15f92911d16f0c6144 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 19 Sep 2017 12:53:00 +0300 Subject: [PATCH] Update notes for 2017-09-19 --- content/cgiar-library-migration.md | 143 +++++++++++----------- content/post/2017-09.md | 16 +++ public/2017-09/index.html | 20 ++- public/categories/notes/index.xml | 2 +- public/cgiar-library-migration/index.html | 111 ++++++++--------- public/index.xml | 2 +- public/sitemap.xml | 6 +- 7 files changed, 161 insertions(+), 139 deletions(-) diff --git a/content/cgiar-library-migration.md b/content/cgiar-library-migration.md index 7aa4955d2..88f287c79 100644 --- a/content/cgiar-library-migration.md +++ b/content/cgiar-library-migration.md @@ -6,7 +6,7 @@ categories = ["Notes"] slug = "cgiar-library-migration" +++ -_Temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in `config.toml`_ +_Note: I'm temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in `config.toml`_ Rough notes for importing the CGIAR Library content. It was decided that this content would go to a new top-level community called _CGIAR System Organization_. @@ -19,15 +19,14 @@ Things that need to happen before the migration: - Set up nginx redirects for URLs like: - [x] https://library.cgiar.org/bitstream/handle/10947/2699/CGIAR_Branding_Guidelines_and_Toolkit.pdf - [x] https://library.cgiar.org/handle/10947/4258 -- [ ] Merge [#339](https://github.com/ilri/DSpace/pull/339) to `5_x-prod` branch and rebuild DSpace +- [x] Merge [#339](https://github.com/ilri/DSpace/pull/339) to `5_x-prod` branch and rebuild DSpace - [x] Increase `max_connections` in `/etc/postgresql/9.5/main/postgresql.conf` by ~10 - `SELECT * FROM pg_stat_activity;` seems to show ~6 extra connections used by the command line tools during import - [x] Temporarily disable nightly `index-discovery` cron job because the import process will be taking place during some of this time and I don't want them to be competing to update the Solr index -## Migration -Process for the actual migration: +## Migration Process -- Export all top-level communities and collections from DSpace Test: +**Export all top-level communities and collections from DSpace Test:** ``` $ export PATH=$PATH:/home/dspacetest.cgiar.org/bin @@ -45,106 +44,106 @@ $ dspace packager -d -a -t AIP -e aorth@mjanja.ch -i 10568/93760 10568-93760/105 $ dspace packager -d -a -t AIP -e aorth@mjanja.ch -i 10947/1 10947-1/10947-1.zip ``` -- Import to CGSpace (also see [notes from 2017-05-10](http://alanorth.github.io/cgspace-notes/2017-05/#2017-05-10)) - - [x] Copy all exports from DSpace Test - - [x] Add ingestion overrides to `dspace.cfg` before import: +**Import to CGSpace (also see [notes from 2017-05-10](http://alanorth.github.io/cgspace-notes/2017-05/#2017-05-10)):** - ``` - mets.dspaceAIP.ingest.crosswalk.METSRIGHTS = NIL - mets.dspaceAIP.ingest.crosswalk.DSPACE-ROLES = NIL - ``` +- [x] Copy all exports from DSpace Test +- [x] Add ingestion overrides to `dspace.cfg` before import: - - [x] Import communities and collections, paying attention to options to skip missing parents and ignore handles: +``` +mets.dspaceAIP.ingest.crosswalk.METSRIGHTS = NIL +mets.dspaceAIP.ingest.crosswalk.DSPACE-ROLES = NIL +``` - ``` - $ export JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx3072m -XX:-UseGCOverheadLimit -XX:+TieredCompilation -XX:TieredStopAtLevel=1" - $ export PATH=$PATH:/home/cgspace.cgiar.org/bin - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2515/10947-2515.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2516/10947-2516.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2517/10947-2517.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2518/10947-2518.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2519/10947-2519.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2708/10947-2708.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2526/10947-2526.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2871/10947-2871.zip - $ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-4467/10947-4467.zip - $ dspace packager -s -u -t AIP -o ignoreHandle=false -e aorth@mjanja.ch -p 10568/83389 10947-2527/10947-2527.zip - $ for item in 10947-2527/ITEM@10947-*; do dspace packager -r -f -u -t AIP -e aorth@mjanja.ch $item; done - $ dspace packager -s -t AIP -o ignoreHandle=false -e aorth@mjanja.ch -p 10568/83389 10947-1/10947-1.zip - $ for collection in 10947-1/COLLECTION@10947-*; do dspace packager -s -o ignoreHandle=false -t AIP -e aorth@mjanja.ch -p 10947/1 $collection; done - $ for item in 10947-1/ITEM@10947-*; do dspace packager -r -f -u -t AIP -e aorth@mjanja.ch $item; done - ``` +- [x] Import communities and collections, paying attention to options to skip missing parents and ignore handles: - - This submits AIP hierarchies recursively (-r) and suppresses errors when an item's parent collection hasn't been created yet—for example, if the item is mapped - - The large historic archive (10947/1) is created in several steps because it requires a lot of memory and often crashes -- Create new subcommunities and collections for content we reorganized into new hierarchies from the original: - - [x] Create _CGIAR System Management Board_ sub-community: 10568/83536 - - [x] Content from _CGIAR System Management Board documents_ collection (10947/4561) goes here - - Import collection hierarchy first and then the items: +``` +$ export JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx3072m -XX:-UseGCOverheadLimit -XX:+TieredCompilation -XX:TieredStopAtLevel=1" +$ export PATH=$PATH:/home/cgspace.cgiar.org/bin +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2515/10947-2515.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2516/10947-2516.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2517/10947-2517.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2518/10947-2518.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2519/10947-2519.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2708/10947-2708.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2526/10947-2526.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-2871/10947-2871.zip +$ dspace packager -r -u -a -t AIP -o skipIfParentMissing=true -e aorth@mjanja.ch -p 10568/83389 10947-4467/10947-4467.zip +$ dspace packager -s -u -t AIP -o ignoreHandle=false -e aorth@mjanja.ch -p 10568/83389 10947-2527/10947-2527.zip +$ for item in 10947-2527/ITEM@10947-*; do dspace packager -r -f -u -t AIP -e aorth@mjanja.ch $item; done +$ dspace packager -s -t AIP -o ignoreHandle=false -e aorth@mjanja.ch -p 10568/83389 10947-1/10947-1.zip +$ for collection in 10947-1/COLLECTION@10947-*; do dspace packager -s -o ignoreHandle=false -t AIP -e aorth@mjanja.ch -p 10947/1 $collection; done +$ for item in 10947-1/ITEM@10947-*; do dspace packager -r -f -u -t AIP -e aorth@mjanja.ch $item; done +``` - ``` - $ dspace packager -r -t AIP -o ignoreHandle=false -e aorth@mjanja.ch -p 10568/83536 10568-93760/COLLECTION@10947-4651.zip - $ for item in 10568-93760/ITEM@10947-465*; do dspace packager -r -f -u -t AIP -e aorth@mjanja.ch $item; done - ``` +This submits AIP hierarchies recursively (-r) and suppresses errors when an item's parent collection hasn't been created yet—for example, if the item is mapped. The large historic archive (10947/1) is created in several steps because it requires a lot of memory and often crashes. - - [x] Create _CGIAR System Management Office_ sub-community: 10568/83537 - - [x] Create _CGIAR System Management Office documents_ collection: 10568/83538 - - Import items to collection individually in replace mode (-r) while explicitly preserving handles and ignoring parents: +**Create new subcommunities and collections for content we reorganized into new hierarchies from the original:** - ``` - $ for item in 10568-93759/ITEM@10947-46*; do dspace packager -r -t AIP -o ignoreHandle=false -o ignoreParent=true -e aorth@mjanja.ch -p 10568/83538 $item; done - ``` +- [x] Create _CGIAR System Management Board_ sub-community: 10568/83536 + - [x] Content from _CGIAR System Management Board documents_ collection (10947/4561) goes here + - Import collection hierarchy first and then the items: - - Get the handles for the last few items from CGIAR Library that were created since we did the migration to DSpace Test in May: +``` +$ dspace packager -r -t AIP -o ignoreHandle=false -e aorth@mjanja.ch -p 10568/83536 10568-93760/COLLECTION@10947-4651.zip +$ for item in 10568-93760/ITEM@10947-465*; do dspace packager -r -f -u -t AIP -e aorth@mjanja.ch $item; done +``` - ``` - dspace=# select handle from item, handle where handle.resource_id = item.item_id AND item.item_id in (select item_id from metadatavalue where metadata_field_id=11 and date(text_value) > '2017-05-01T00:00:00Z'); - ``` +- [x] Create _CGIAR System Management Office_ sub-community: 10568/83537 + - [x] Create _CGIAR System Management Office documents_ collection: 10568/83538 + - Import items to collection individually in replace mode (-r) while explicitly preserving handles and ignoring parents: - - Export them from the CGIAR Library: +``` +$ for item in 10568-93759/ITEM@10947-46*; do dspace packager -r -t AIP -o ignoreHandle=false -o ignoreParent=true -e aorth@mjanja.ch -p 10568/83538 $item; done +``` - ``` - # for handle in 10947/4658 10947/4659 10947/4660 10947/4661 10947/4665 10947/4664 10947/4666 10947/4669; do /usr/local/dspace/bin/dspace packager -d -a -t AIP -e m.marus@cgiar.org -i $handle ${handle}.zip; done - ``` +**Get the handles for the last few items from CGIAR Library that were created since we did the migration to DSpace Test in May:** - - Import on CGSpace: +``` +dspace=# select handle from item, handle where handle.resource_id = item.item_id AND item.item_id in (select item_id from metadatavalue where metadata_field_id=11 and date(text_value) > '2017-05-01T00:00:00Z'); +``` - ``` - $ for item in 10947-latest/*.zip; do dspace packager -r -u -t AIP -e aorth@mjanja.ch $item; done - ``` +- Export them from the CGIAR Library: - - [ ] Shut down Tomcat and run `update-sequences.sql` as the system's `postgres` user +``` +# for handle in 10947/4658 10947/4659 10947/4660 10947/4661 10947/4665 10947/4664 10947/4666 10947/4669; do /usr/local/dspace/bin/dspace packager -d -a -t AIP -e m.marus@cgiar.org -i $handle ${handle}.zip; done +``` + +- Import on CGSpace: + +``` +$ for item in 10947-latest/*.zip; do dspace packager -r -u -t AIP -e aorth@mjanja.ch $item; done +``` ## Post Migration +- [ ] Shut down Tomcat and run `update-sequences.sql` as the system's `postgres` user - [x] Remove ingestion overrides from `dspace.cfg` -- [ ] Reset PostgreSQL `max_connections` to 183 +- [x] Reset PostgreSQL `max_connections` to 183 - [x] Enable nightly `index-discovery` cron job - HTTPS certificates: - [x] Install current certificates from their Tomcat keystore - ``` - $ keytool -list -keystore tomcat.keystore - $ keytool -importkeystore -srckeystore tomcat.keystore -destkeystore library.cgiar.org.p12 -deststoretype PKCS12 -srcalias tomcat - $ openssl pkcs12 -in library.cgiar.org.p12 -nokeys -out library.cgiar.org.crt.pem - $ openssl pkcs12 -in library.cgiar.org.p12 -nodes -nocerts -out library.cgiar.org.key.pem - $ wget https://certs.godaddy.com/repository/gdroot-g2.crt https://certs.godaddy.com/repository/gdig2.crt.pem - $ cat library.cgiar.org.crt.pem gdig2.crt.pem > library.cgiar.org-chained.pem - ``` +``` +$ keytool -list -keystore tomcat.keystore +$ keytool -importkeystore -srckeystore tomcat.keystore -destkeystore library.cgiar.org.p12 -deststoretype PKCS12 -srcalias tomcat +$ openssl pkcs12 -in library.cgiar.org.p12 -nokeys -out library.cgiar.org.crt.pem +$ openssl pkcs12 -in library.cgiar.org.p12 -nodes -nocerts -out library.cgiar.org.key.pem +$ wget https://certs.godaddy.com/repository/gdroot-g2.crt https://certs.godaddy.com/repository/gdig2.crt.pem +$ cat library.cgiar.org.crt.pem gdig2.crt.pem > library.cgiar.org-chained.pem +``` - [ ] Update DNS records: - CNAME: cgspace.cgiar.org - [ ] Re-deploy DSpace from freshly built `5_x-prod` branch +- [ ] Merge `cgiar-library` branch to `master` and re-run ansible nginx templates - [ ] Run system updates and reboot server -- [ ] Switch to Let's Encrypt HTTPS certificates (after DNS is updated and server isn't busy) +- [ ] Switch to Let's Encrypt HTTPS certificates (after DNS is updated and server isn't busy): ``` $ sudo systemctl stop tomcat7 $ ./letsencrypt-auto certonly --standalone -d library.cgiar.org ``` -- [ ] Merge `cgiar-library` branch to `master` and re-run ansible nginx templates - ## Troubleshooting ### Foreign Key Error in `dspace cleanup` diff --git a/content/post/2017-09.md b/content/post/2017-09.md index 1fca1fd6f..ee43d5668 100644 --- a/content/post/2017-09.md +++ b/content/post/2017-09.md @@ -398,3 +398,19 @@ $ for item in 10568-93759/ITEM@10947-46*; do ~/dspace/bin/dspace packager -r -t ![After DSpace 5.5](/cgspace-notes/2017/09/10947-2919-after.jpg) - Moved the CGIAR Library Migration notes to a page — [cgiar-library-migration]({{< relref "cgiar-library-migration.md" >}}) — as there seems to be a bug with post slugs defined in frontmatter when you have a permalink scheme defined in `config.toml` (happens currently in Hugo 0.27.1 at least) + +## 2017-09-19 + +- Nightly Solr indexing is working again, and it appears to be pretty quick actually: + +``` +2017-09-19 00:00:14,953 INFO com.atmire.dspace.discovery.AtmireSolrService @ Processing (0 of 65808): 17607 +... +2017-09-19 00:04:18,017 INFO com.atmire.dspace.discovery.AtmireSolrService @ Processing (65807 of 65808): 83753 +``` + +- Sisay asked if he could import 50 items for IITA that have already been checked by Bosede and Bizuwork +- I had a look at the collection and noticed a bunch of issues with item types and donors, so I asked him to fix those and import it to DSpace Test again first +- Abenet wants to be able to filter by ISI Journal in advanced search on queries like this: https://cgspace.cgiar.org/discover?filtertype_0=dateIssued&filtertype_1=dateIssued&filter_relational_operator_1=equals&filter_relational_operator_0=equals&filter_1=%5B2010+TO+2017%5D&filter_0=2017&filtertype=type&filter_relational_operator=equals&filter=Journal+Article +- I opened an issue to track this ([#340](https://github.com/ilri/DSpace/issues/340)) and will test it on DSpace Test soon + diff --git a/public/2017-09/index.html b/public/2017-09/index.html index c39c3bfc2..06b81f087 100644 --- a/public/2017-09/index.html +++ b/public/2017-09/index.html @@ -61,7 +61,7 @@ Ask Sisay to clean up the WLE approvers a bit, as Marianne’s user account "@type": "BlogPosting", "headline": "September, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-09/", - "wordCount": "2764", + "wordCount": "2886", "datePublished": "2017-09-07T16:54:52+07:00", "dateModified": "2017-09-18T18:18:09+03:00", "author": { @@ -569,6 +569,24 @@ DELETE 207
  • Moved the CGIAR Library Migration notes to a page — cgiar-library-migration — as there seems to be a bug with post slugs defined in frontmatter when you have a permalink scheme defined in config.toml (happens currently in Hugo 0.27.1 at least)
  • +

    2017-09-19

    + + + +
    2017-09-19 00:00:14,953 INFO  com.atmire.dspace.discovery.AtmireSolrService @ Processing (0 of 65808): 17607
    +...
    +2017-09-19 00:04:18,017 INFO  com.atmire.dspace.discovery.AtmireSolrService @ Processing (65807 of 65808): 83753
    +
    + + + diff --git a/public/categories/notes/index.xml b/public/categories/notes/index.xml index dd426e55c..099a592fb 100644 --- a/public/categories/notes/index.xml +++ b/public/categories/notes/index.xml @@ -17,7 +17,7 @@ Mon, 18 Sep 2017 16:38:35 +0300 https://alanorth.github.io/cgspace-notes/cgiar-library-migration/ - Temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in config.toml + Note: I&rsquo;m temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in config.toml Rough notes for importing the CGIAR Library content. It was decided that this content would go to a new top-level community called CGIAR System Organization. Pre-migration Technical TODOs Things that need to happen before the migration: Create top-level community on CGSpace to hold the CGIAR Library content: 10568&frasl;83389 Update nginx redirects in ansible templates Update handle in DSpace XMLUI config Set up nginx redirects for URLs like: https://library. diff --git a/public/cgiar-library-migration/index.html b/public/cgiar-library-migration/index.html index 2b3b6e43d..f485a7487 100644 --- a/public/cgiar-library-migration/index.html +++ b/public/cgiar-library-migration/index.html @@ -13,7 +13,7 @@ - + @@ -37,9 +37,9 @@ "@type": "BlogPosting", "headline": "CGIAR Library Migration", "url": "https://alanorth.github.io/cgspace-notes/cgiar-library-migration/", - "wordCount": "1169", + "wordCount": "1167", "datePublished": "2017-09-18T16:38:35+03:00", - "dateModified": "2017-09-18T18:05:57+03:00", + "dateModified": "2017-09-18T21:24:27+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -108,7 +108,7 @@ -

    Temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in config.toml

    +

    Note: I’m temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in config.toml

    Rough notes for importing the CGIAR Library content. It was decided that this content would go to a new top-level community called CGIAR System Organization.

    @@ -129,7 +129,7 @@
  • -
  • +
  • -
      $ keytool -list -keystore tomcat.keystore
    -  $ keytool -importkeystore -srckeystore tomcat.keystore -destkeystore library.cgiar.org.p12 -deststoretype PKCS12 -srcalias tomcat
    -  $ openssl pkcs12 -in library.cgiar.org.p12 -nokeys -out library.cgiar.org.crt.pem
    -  $ openssl pkcs12 -in library.cgiar.org.p12 -nodes -nocerts -out library.cgiar.org.key.pem
    -  $ wget https://certs.godaddy.com/repository/gdroot-g2.crt https://certs.godaddy.com/repository/gdig2.crt.pem
    -  $ cat library.cgiar.org.crt.pem gdig2.crt.pem > library.cgiar.org-chained.pem
    +
    $ keytool -list -keystore tomcat.keystore
    +$ keytool -importkeystore -srckeystore tomcat.keystore -destkeystore library.cgiar.org.p12 -deststoretype PKCS12 -srcalias tomcat
    +$ openssl pkcs12 -in library.cgiar.org.p12 -nokeys -out library.cgiar.org.crt.pem
    +$ openssl pkcs12 -in library.cgiar.org.p12 -nodes -nocerts -out library.cgiar.org.key.pem
    +$ wget https://certs.godaddy.com/repository/gdroot-g2.crt https://certs.godaddy.com/repository/gdig2.crt.pem
    +$ cat library.cgiar.org.crt.pem gdig2.crt.pem > library.cgiar.org-chained.pem
     
      @@ -275,18 +267,15 @@ $ for item in 10568-93760/ITEM@10947-465*; do dspace packager -r -f -u -t AIP -e
    • CNAME: cgspace.cgiar.org
  • +
  • -
  • +
  • $ sudo systemctl stop tomcat7
     $ ./letsencrypt-auto certonly --standalone -d library.cgiar.org
     
    -
      -
    • -
    -

    Troubleshooting

    Foreign Key Error in dspace cleanup

    diff --git a/public/index.xml b/public/index.xml index 0df43c59b..1142a58fd 100644 --- a/public/index.xml +++ b/public/index.xml @@ -17,7 +17,7 @@ Mon, 18 Sep 2017 16:38:35 +0300 https://alanorth.github.io/cgspace-notes/cgiar-library-migration/ - Temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in config.toml + Note: I&rsquo;m temporarily making this a page because it seems Hugo (currently 0.27.1) cannot use a custom slug for a post when there is a permalink defined in config.toml Rough notes for importing the CGIAR Library content. It was decided that this content would go to a new top-level community called CGIAR System Organization. Pre-migration Technical TODOs Things that need to happen before the migration: Create top-level community on CGSpace to hold the CGIAR Library content: 10568&frasl;83389 Update nginx redirects in ansible templates Update handle in DSpace XMLUI config Set up nginx redirects for URLs like: https://library. diff --git a/public/sitemap.xml b/public/sitemap.xml index 33d7393de..9170c439b 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/cgiar-library-migration/ - 2017-09-18T18:05:57+03:00 + 2017-09-18T21:24:27+03:00 @@ -124,7 +124,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-09-18T18:05:57+03:00 + 2017-09-18T21:24:27+03:00 0 @@ -141,7 +141,7 @@ https://alanorth.github.io/cgspace-notes/categories/notes/ - 2017-09-18T18:05:57+03:00 + 2017-09-18T21:24:27+03:00 0