From e3e602881eb0d9690adbce0e71a0db7cbeccdad6 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 1 Aug 2017 11:57:37 +0300 Subject: [PATCH] Add notes for 2017-08-01 --- content/post/2017-08.md | 20 +++ public/2015-11/index.html | 8 +- public/2015-12/index.html | 8 +- public/2016-01/index.html | 8 +- public/2016-02/index.html | 8 +- public/2016-03/index.html | 8 +- public/2016-04/index.html | 8 +- public/2016-05/index.html | 8 +- public/2016-06/index.html | 8 +- public/2016-07/index.html | 8 +- public/2016-08/index.html | 8 +- public/2016-09/index.html | 8 +- public/2016-10/index.html | 8 +- public/2016-11/index.html | 8 +- public/2016-12/index.html | 8 +- public/2017-01/index.html | 8 +- public/2017-02/index.html | 8 +- public/2017-03/index.html | 8 +- public/2017-04/index.html | 8 +- public/2017-05/index.html | 8 +- public/2017-06/index.html | 8 +- public/2017-07/index.html | 8 +- public/2017-08/index.html | 233 ++++++++++++++++++++++++++++ public/index.html | 77 ++++----- public/index.xml | 28 +++- public/page/2/index.html | 73 ++++----- public/page/3/index.html | 39 ++++- public/post/index.html | 77 ++++----- public/post/index.xml | 28 +++- public/post/page/2/index.html | 73 ++++----- public/post/page/3/index.html | 39 ++++- public/robots.txt | 1 + public/sitemap.xml | 55 ++++--- public/tags/index.xml | 4 +- public/tags/notes/index.html | 77 ++++----- public/tags/notes/index.xml | 28 +++- public/tags/notes/page/2/index.html | 73 ++++----- public/tags/notes/page/3/index.html | 39 ++++- 38 files changed, 787 insertions(+), 345 deletions(-) create mode 100644 content/post/2017-08.md create mode 100644 public/2017-08/index.html diff --git a/content/post/2017-08.md b/content/post/2017-08.md new file mode 100644 index 000000000..f569ef86a --- /dev/null +++ b/content/post/2017-08.md @@ -0,0 +1,20 @@ ++++ +date = "2017-08-01T11:51:52+03:00" +author = "Alan Orth" +title = "August, 2017" +tags = ["Notes"] + ++++ +## 2017-08-01 + +- Linode sent an alert that CGSpace (linode18) was using 350% CPU for the past two hours +- I looked in the Activity pane of the Admin Control Panel and it seems that Google, Baidu, Yahoo, and Bing are all crawling with massive numbers of bots concurrently (~100 total, mostly Baidu and Google) +- The good thing is that, according to `dspace.log.2017-08-01`, they are all using the same Tomcat session +- This means our Tomcat Crawler Session Valve is working +- But many of the bots are browsing dynamic URLs like: + - /handle/10568/3353/discover + - /handle/10568/16510/browse +- The `robots.txt` only blocks the top-level `/discover` and `/browse` URLs... we will need to find a way to forbid them from accessing these! +- Relevant issue from DSpace Jira (semi resolved in DSpace 6.0): https://jira.duraspace.org/browse/DS-2962 + + diff --git a/public/2015-11/index.html b/public/2015-11/index.html index bbfedc586..3807393ac 100644 --- a/public/2015-11/index.html +++ b/public/2015-11/index.html @@ -25,7 +25,7 @@ $ psql -c 'SELECT * from pg_stat_activity;' | grep idle | grep -c cgspac - + @@ -71,7 +71,7 @@ $ psql -c 'SELECT * from pg_stat_activity;' | grep idle | grep -c cgspac "url": "https://alanorth.github.io/cgspace-notes/2015-11/", "wordCount": "798", "datePublished": "2015-11-23T17:00:57+03:00", - "dateModified": "2016-09-28T17:02:30+03:00", + "dateModified": "2015-11-23T17:00:57+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -300,6 +300,8 @@ db.statementpool = true
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -308,8 +310,6 @@ db.statementpool = true
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2015-12/index.html b/public/2015-12/index.html index edd18bfb6..f0c34062b 100644 --- a/public/2015-12/index.html +++ b/public/2015-12/index.html @@ -26,7 +26,7 @@ Replace lzop with xz in log compression cron jobs on DSpace Test—it uses less - + @@ -73,7 +73,7 @@ Replace lzop with xz in log compression cron jobs on DSpace Test—it uses less "url": "https://alanorth.github.io/cgspace-notes/2015-12/", "wordCount": "753", "datePublished": "2015-12-02T13:18:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2015-12-02T13:18:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -317,6 +317,8 @@ $ curl -o /dev/null -s -w %{time_total}\\n https://cgspace.cgiar.org/rest/handle
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -325,8 +327,6 @@ $ curl -o /dev/null -s -w %{time_total}\\n https://cgspace.cgiar.org/rest/handle
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-01/index.html b/public/2016-01/index.html index ffdab08f9..473bfed55 100644 --- a/public/2016-01/index.html +++ b/public/2016-01/index.html @@ -21,7 +21,7 @@ Update GitHub wiki for documentation of maintenance tasks. - + @@ -63,7 +63,7 @@ Update GitHub wiki for documentation of maintenance tasks. "url": "https://alanorth.github.io/cgspace-notes/2016-01/", "wordCount": "466", "datePublished": "2016-01-13T13:18:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-01-13T13:18:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -232,6 +232,8 @@ $ find SimpleArchiveForBio/ -iname “*.pdf” -exec basename {} \; | so
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -240,8 +242,6 @@ $ find SimpleArchiveForBio/ -iname “*.pdf” -exec basename {} \; | so
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-02/index.html b/public/2016-02/index.html index 4c30c48e2..da2e33386 100644 --- a/public/2016-02/index.html +++ b/public/2016-02/index.html @@ -28,7 +28,7 @@ Also, lots of things like “COTE D`LVOIRE” and “COTE D IVOIRE&r - + @@ -77,7 +77,7 @@ Also, lots of things like “COTE D`LVOIRE” and “COTE D IVOIRE&r "url": "https://alanorth.github.io/cgspace-notes/2016-02/", "wordCount": "1657", "datePublished": "2016-02-05T13:18:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-02-05T13:18:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -485,6 +485,8 @@ Bitstream: tést señora alimentación.pdf
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -493,8 +495,6 @@ Bitstream: tést señora alimentación.pdf
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-03/index.html b/public/2016-03/index.html index c0f37fb0e..b92e5fcf6 100644 --- a/public/2016-03/index.html +++ b/public/2016-03/index.html @@ -21,7 +21,7 @@ Reinstall my local (Mac OS X) DSpace stack with Tomcat 7, PostgreSQL 9.3, and Ja - + @@ -63,7 +63,7 @@ Reinstall my local (Mac OS X) DSpace stack with Tomcat 7, PostgreSQL 9.3, and Ja "url": "https://alanorth.github.io/cgspace-notes/2016-03/", "wordCount": "1581", "datePublished": "2016-03-02T16:50:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-03-02T16:50:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -385,6 +385,8 @@ Reinstall my local (Mac OS X) DSpace stack with Tomcat 7, PostgreSQL 9.3, and Ja
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -393,8 +395,6 @@ Reinstall my local (Mac OS X) DSpace stack with Tomcat 7, PostgreSQL 9.3, and Ja
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-04/index.html b/public/2016-04/index.html index 47e84309e..5bf1dceb3 100644 --- a/public/2016-04/index.html +++ b/public/2016-04/index.html @@ -23,7 +23,7 @@ Also, I noticed the checker log has some errors we should pay attention to: - + @@ -67,7 +67,7 @@ Also, I noticed the checker log has some errors we should pay attention to: "url": "https://alanorth.github.io/cgspace-notes/2016-04/", "wordCount": "2006", "datePublished": "2016-04-04T11:06:00+03:00", - "dateModified": "2016-09-28T17:02:30+03:00", + "dateModified": "2016-04-04T11:06:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -602,6 +602,8 @@ dspace.log.2016-04-27:7271
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -610,8 +612,6 @@ dspace.log.2016-04-27:7271
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-05/index.html b/public/2016-05/index.html index 57901d36e..c4b1676d9 100644 --- a/public/2016-05/index.html +++ b/public/2016-05/index.html @@ -25,7 +25,7 @@ There are 3,000 IPs accessing the REST API in a 24-hour period! - + @@ -71,7 +71,7 @@ There are 3,000 IPs accessing the REST API in a 24-hour period! "url": "https://alanorth.github.io/cgspace-notes/2016-05/", "wordCount": "1349", "datePublished": "2016-05-01T23:06:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-05-01T23:06:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -452,6 +452,8 @@ sys 0m20.540s
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -460,8 +462,6 @@ sys 0m20.540s
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-06/index.html b/public/2016-06/index.html index 62b4c881c..dff8863c1 100644 --- a/public/2016-06/index.html +++ b/public/2016-06/index.html @@ -24,7 +24,7 @@ Working on second phase of metadata migration, looks like this will work for mov - + @@ -69,7 +69,7 @@ Working on second phase of metadata migration, looks like this will work for mov "url": "https://alanorth.github.io/cgspace-notes/2016-06/", "wordCount": "1549", "datePublished": "2016-06-01T10:53:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-06-01T10:53:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -497,6 +497,8 @@ $ ./delete-metadata-values.py -f dc.contributor.corporate -i Corporate-Authors-D
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -505,8 +507,6 @@ $ ./delete-metadata-values.py -f dc.contributor.corporate -i Corporate-Authors-D
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-07/index.html b/public/2016-07/index.html index 726a9232e..4796aea88 100644 --- a/public/2016-07/index.html +++ b/public/2016-07/index.html @@ -32,7 +32,7 @@ In this case the select query was showing 95 results before the update - + @@ -85,7 +85,7 @@ In this case the select query was showing 95 results before the update "url": "https://alanorth.github.io/cgspace-notes/2016-07/", "wordCount": "866", "datePublished": "2016-07-01T10:53:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-07-01T10:53:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -415,6 +415,8 @@ discovery.index.authority.ignore-variants=true
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -423,8 +425,6 @@ discovery.index.authority.ignore-variants=true
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-08/index.html b/public/2016-08/index.html index 2ad6b48d2..e6945c028 100644 --- a/public/2016-08/index.html +++ b/public/2016-08/index.html @@ -29,7 +29,7 @@ $ git rebase -i dspace-5.5 - + @@ -79,7 +79,7 @@ $ git rebase -i dspace-5.5 "url": "https://alanorth.github.io/cgspace-notes/2016-08/", "wordCount": "1514", "datePublished": "2016-08-01T15:53:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-08-01T15:53:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -488,6 +488,8 @@ $ JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx512m" /home/cgspace.cgiar.org/b
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -496,8 +498,6 @@ $ JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx512m" /home/cgspace.cgiar.org/b
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-09/index.html b/public/2016-09/index.html index 48a161a50..463a497db 100644 --- a/public/2016-09/index.html +++ b/public/2016-09/index.html @@ -25,7 +25,7 @@ $ ldapsearch -x -H ldaps://svcgroot2.cgiarad.org:3269/ -b "dc=cgiarad,dc=or - + @@ -71,7 +71,7 @@ $ ldapsearch -x -H ldaps://svcgroot2.cgiarad.org:3269/ -b "dc=cgiarad,dc=or "url": "https://alanorth.github.io/cgspace-notes/2016-09/", "wordCount": "3298", "datePublished": "2016-09-01T15:53:00+03:00", - "dateModified": "2017-01-09T16:18:07+02:00", + "dateModified": "2016-09-01T15:53:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -788,6 +788,8 @@ $ ./delete-metadata-values.py -i ilrisubjects-delete-13.csv -f cg.subject.ilri -
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -796,8 +798,6 @@ $ ./delete-metadata-values.py -i ilrisubjects-delete-13.csv -f cg.subject.ilri -
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-10/index.html b/public/2016-10/index.html index 848851b9c..667de7713 100644 --- a/public/2016-10/index.html +++ b/public/2016-10/index.html @@ -29,7 +29,7 @@ I exported a random item’s metadata as CSV, deleted all columns except id - + @@ -79,7 +79,7 @@ I exported a random item’s metadata as CSV, deleted all columns except id "url": "https://alanorth.github.io/cgspace-notes/2016-10/", "wordCount": "1828", "datePublished": "2016-10-03T15:53:00+03:00", - "dateModified": "2017-01-10T16:21:47+02:00", + "dateModified": "2016-10-03T15:53:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -474,6 +474,8 @@ dspace=# update metadatavalue set text_value = regexp_replace(text_value, 'http:
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -482,8 +484,6 @@ dspace=# update metadatavalue set text_value = regexp_replace(text_value, 'http:
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-11/index.html b/public/2016-11/index.html index ace01deb8..8e229abd6 100644 --- a/public/2016-11/index.html +++ b/public/2016-11/index.html @@ -21,7 +21,7 @@ Add dc.type to the output options for Atmire’s Listings and Reports module - + @@ -63,7 +63,7 @@ Add dc.type to the output options for Atmire’s Listings and Reports module "url": "https://alanorth.github.io/cgspace-notes/2016-11/", "wordCount": "2825", "datePublished": "2016-11-01T09:21:00+03:00", - "dateModified": "2017-01-10T16:21:47+02:00", + "dateModified": "2016-11-01T09:21:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -690,6 +690,8 @@ org.dspace.discovery.SearchServiceException: Error executing query
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -698,8 +700,6 @@ org.dspace.discovery.SearchServiceException: Error executing query
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2016-12/index.html b/public/2016-12/index.html index a335e2043..f35c43d03 100644 --- a/public/2016-12/index.html +++ b/public/2016-12/index.html @@ -33,7 +33,7 @@ Another worrying error from dspace.log is: - + @@ -87,7 +87,7 @@ Another worrying error from dspace.log is: "url": "https://alanorth.github.io/cgspace-notes/2016-12/", "wordCount": "4078", "datePublished": "2016-12-02T10:43:00+03:00", - "dateModified": "2017-01-10T16:21:47+02:00", + "dateModified": "2016-12-02T10:43:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -926,6 +926,8 @@ $ exit
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -934,8 +936,6 @@ $ exit
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-01/index.html b/public/2017-01/index.html index 078330a8b..17da54221 100644 --- a/public/2017-01/index.html +++ b/public/2017-01/index.html @@ -21,7 +21,7 @@ I asked on the dspace-tech mailing list because it seems to be broken, and actua - + @@ -63,7 +63,7 @@ I asked on the dspace-tech mailing list because it seems to be broken, and actua "url": "https://alanorth.github.io/cgspace-notes/2017-01/", "wordCount": "1594", "datePublished": "2017-01-02T10:43:00+03:00", - "dateModified": "2017-01-29T13:18:32+02:00", + "dateModified": "2017-01-02T10:43:00+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -471,6 +471,8 @@ $ gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -479,8 +481,6 @@ $ gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-02/index.html b/public/2017-02/index.html index 1d790318a..e3433514a 100644 --- a/public/2017-02/index.html +++ b/public/2017-02/index.html @@ -35,7 +35,7 @@ Looks like we’ll be using cg.identifier.ccafsprojectpii as the field name - + @@ -91,7 +91,7 @@ Looks like we’ll be using cg.identifier.ccafsprojectpii as the field name "url": "https://alanorth.github.io/cgspace-notes/2017-02/", "wordCount": "2028", "datePublished": "2017-02-07T07:04:52-08:00", - "dateModified": "2017-02-28T22:58:29+02:00", + "dateModified": "2017-02-07T07:04:52-08:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -543,6 +543,8 @@ COPY 1968
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -551,8 +553,6 @@ COPY 1968
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-03/index.html b/public/2017-03/index.html index c91623471..f5f2c6e0d 100644 --- a/public/2017-03/index.html +++ b/public/2017-03/index.html @@ -37,7 +37,7 @@ $ identify ~/Desktop/alc_contrastes_desafios.jpg - + @@ -95,7 +95,7 @@ $ identify ~/Desktop/alc_contrastes_desafios.jpg "url": "https://alanorth.github.io/cgspace-notes/2017-03/", "wordCount": "1538", "datePublished": "2017-03-01T17:08:52+02:00", - "dateModified": "2017-03-31T05:36:10+03:00", + "dateModified": "2017-03-01T17:08:52+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -457,6 +457,8 @@ $ ./delete-metadata-values.py -i Investors-Delete-121.csv -f dc.description.spon
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -465,8 +467,6 @@ $ ./delete-metadata-values.py -i Investors-Delete-121.csv -f dc.description.spon
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-04/index.html b/public/2017-04/index.html index fe8dd05bd..52581a1e0 100644 --- a/public/2017-04/index.html +++ b/public/2017-04/index.html @@ -30,7 +30,7 @@ $ [dspace]/bin/dspace filter-media -f -i 10568/16498 -p "ImageMagick PDF Th - + @@ -81,7 +81,7 @@ $ [dspace]/bin/dspace filter-media -f -i 10568/16498 -p "ImageMagick PDF Th "url": "https://alanorth.github.io/cgspace-notes/2017-04/", "wordCount": "2917", "datePublished": "2017-04-02T17:08:52+02:00", - "dateModified": "2017-04-26T13:35:10+03:00", + "dateModified": "2017-04-02T17:08:52+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -736,6 +736,8 @@ $ gem install compass -v 1.0.3
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -744,8 +746,6 @@ $ gem install compass -v 1.0.3
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-05/index.html b/public/2017-05/index.html index ae7bae08b..2c0e58f67 100644 --- a/public/2017-05/index.html +++ b/public/2017-05/index.html @@ -13,7 +13,7 @@ - + @@ -47,7 +47,7 @@ "url": "https://alanorth.github.io/cgspace-notes/2017-05/", "wordCount": "2412", "datePublished": "2017-05-01T16:21:52+02:00", - "dateModified": "2017-05-29T13:15:22+03:00", + "dateModified": "2017-05-01T16:21:52+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -491,6 +491,8 @@ UPDATE 187
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -499,8 +501,6 @@ UPDATE 187
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-06/index.html b/public/2017-06/index.html index 623e5bf0f..019e22eae 100644 --- a/public/2017-06/index.html +++ b/public/2017-06/index.html @@ -13,7 +13,7 @@ - + @@ -47,7 +47,7 @@ "url": "https://alanorth.github.io/cgspace-notes/2017-06/", "wordCount": "1261", "datePublished": "2017-06-01T10:14:52+03:00", - "dateModified": "2017-06-30T18:34:51+03:00", + "dateModified": "2017-06-01T10:14:52+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -308,6 +308,8 @@ $ JAVA_OPTS="-Xmx1024m -Dfile.encoding=UTF-8" [dspace]/bin/dspace impo
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -316,8 +318,6 @@ $ JAVA_OPTS="-Xmx1024m -Dfile.encoding=UTF-8" [dspace]/bin/dspace impo
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-07/index.html b/public/2017-07/index.html index a0b590c6b..0ec36b23b 100644 --- a/public/2017-07/index.html +++ b/public/2017-07/index.html @@ -27,7 +27,7 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the - + @@ -75,7 +75,7 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the "url": "https://alanorth.github.io/cgspace-notes/2017-07/", "wordCount": "1151", "datePublished": "2017-07-01T18:03:52+03:00", - "dateModified": "2017-07-31T12:06:21+03:00", + "dateModified": "2017-07-01T18:03:52+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -343,6 +343,8 @@ delete from metadatavalue where resource_type_id=2 and metadata_field_id=235 and
    +
  1. August, 2017
  2. +
  3. July, 2017
  4. June, 2017
  5. @@ -351,8 +353,6 @@ delete from metadatavalue where resource_type_id=2 and metadata_field_id=235 and
  6. April, 2017
  7. -
  8. March, 2017
  9. -
diff --git a/public/2017-08/index.html b/public/2017-08/index.html new file mode 100644 index 000000000..ebd4fa3ea --- /dev/null +++ b/public/2017-08/index.html @@ -0,0 +1,233 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + August, 2017 | CGSpace Notes + + + + + + + + + + + + + + + + +
+
+ +
+
+ +
+
+

CGSpace Notes

+

Documenting day-to-day work on the CGSpace repository.

+
+
+ +
+
+
+ + + + +
+
+

August, 2017

+ +
+

2017-08-01

+ +
    +
  • Linode sent an alert that CGSpace (linode18) was using 350% CPU for the past two hours
  • +
  • I looked in the Activity pane of the Admin Control Panel and it seems that Google, Baidu, Yahoo, and Bing are all crawling with massive numbers of bots concurrently (~100 total, mostly Baidu and Google)
  • +
  • The good thing is that, according to dspace.log.2017-08-01, they are all using the same Tomcat session
  • +
  • This means our Tomcat Crawler Session Valve is working
  • +
  • But many of the bots are browsing dynamic URLs like: + +
      +
    • /handle/10568/3353/discover
    • +
    • /handle/10568/16510/browse
    • +
  • +
  • The robots.txt only blocks the top-level /discover and /browse URLs… we will need to find a way to forbid them from accessing these!
  • +
  • Relevant issue from DSpace Jira (semi resolved in DSpace 6.0): https://jira.duraspace.org/browse/DS-2962
  • +
+ +

+ + + + + +
+ + + +
+ + + + +
+
+ + + + + + diff --git a/public/index.html b/public/index.html index 73a97c01e..9f28bb520 100644 --- a/public/index.html +++ b/public/index.html @@ -12,7 +12,7 @@ - + @@ -37,7 +37,7 @@ "@type": "Person", "name": "Alan Orth" }, - "dateModified": "2017-07-01T18:03:52+03:00", + "dateModified": "2017-08-01T11:51:52+03:00", "keywords": "notes,", "description": "Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." } @@ -95,6 +95,41 @@ +
+
+

August, 2017

+ +
+

2017-08-01

+ + + +

+ Read more → +
+ + + + + +

July, 2017

@@ -369,40 +404,6 @@ DELETE 1 - -
-
-

October, 2016

- -
-

2016-10-03

- -
    -
  • Testing adding ORCIDs to a CSV file for a single item to see if the author orders get messed up
  • -
  • Need to test the following scenarios to see how author order is affected: - -
      -
    • ORCIDs only
    • -
    • ORCIDs plus normal authors
    • -
  • -
  • I exported a random item’s metadata as CSV, deleted all columns except id and collection, and made a new coloum called ORCID:dc.contributor.author with the following random ORCIDs from the ORCID registry:
  • -
- -
0000-0002-6115-0956||0000-0002-3812-8793||0000-0001-7462-405X
-
- -

- Read more → -
- - - - -