From bb1b29170c137d3dc9a5677a88ec4f1f0c79b645 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 22 Jul 2020 11:00:40 +0300 Subject: [PATCH] Add notes for 2020-07-22 --- content/posts/2020-07.md | 55 ++++++++++++++++++++++ docs/2020-07/index.html | 61 +++++++++++++++++++++++-- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/sitemap.xml | 10 ++-- 20 files changed, 135 insertions(+), 25 deletions(-) diff --git a/content/posts/2020-07.md b/content/posts/2020-07.md index 99118961e..c17812ba9 100644 --- a/content/posts/2020-07.md +++ b/content/posts/2020-07.md @@ -593,5 +593,60 @@ Removing unnecessary Unicode (U+200B): Agencia de Servicios a la Comercializaci - I had previously thought these were fixed by setting the `font-family` on the elements, but it doesn't appear to be working now - I filed a ticket with Atmire to ask them to use the HTML elements instead, as their code already uses those elsewhere - I don't want to go back to using the large webfonts with CSS because the SVG + JS method saves us ~140KiB and causes at least three fewer network requests +- I started processing the 2019 stats in a batch of 1 million on DSpace Test: + +``` +$ export JAVA_OPTS='-Dfile.encoding=UTF-8 -Xmx2048m' +$ chrt -b 0 dspace solr-upgrade-statistics-6x -n 1000000 -i statistics-2019 +... + *** Statistics Records with Legacy Id *** + + 6,359,966 Bistream View + 2,204,775 Item View + 139,266 Community View + 131,234 Collection View + 948,529 Community Search + 593,974 Collection Search + 1,682,818 Unexpected Type & Full Site + -------------------------------------- + 12,060,562 TOTAL +``` + +- The statistics-2019 finished processing after about 9 hours so I started the 2018 ones: + +``` +$ export JAVA_OPTS='-Dfile.encoding=UTF-8 -Xmx2048m' +$ chrt -b 0 dspace solr-upgrade-statistics-6x -n 1000000 -i statistics-2018 + *** Statistics Records with Legacy Id *** + + 3,684,394 Bistream View + 2,183,032 Item View + 131,222 Community View + 79,348 Collection View + 345,529 Collection Search + 322,223 Community Search + 874,107 Unexpected Type & Full Site + -------------------------------------- + 7,619,855 TOTAL +``` + +- Moayad finally made OpenRXV use a unique user agent: + +``` +OpenRXV harvesting bot; https://github.com/ilri/OpenRXV +``` + +- I see nearly 200,000 hits in Solr from the IP address, though, so I need to make sure those are old ones from before today + - I purged the hits for 178.62.93.141 as well as any from the old `axios/0.19.2` user agent + - I made some requests with and without the new user agent and only the ones without showed up in Solr + +## 2020-07-22 + +- Atmire merged my latest bot suggestions to the COUNTER-Robots project: + - [Add new bots](https://github.com/atmire/COUNTER-Robots/pull/36) + - [COUNTER_Robots_list.json: Escape literal dots](https://github.com/atmire/COUNTER-Robots/pull/35) + - [COUNTER_Robots_list.json: Remove anchors from okhttp](https://github.com/atmire/COUNTER-Robots/pull/33) +- I will update the agent patterns on the CGSpace `5_x-prod` and 6.x branches +- Make some changes to the Bootstrap CSS and HTML configuration to improve readability and style on the CG Core v2 metadata reference guide and send a pull request to Marie ([#29](https://github.com/AgriculturalSemantics/cg-core/pull/29)) diff --git a/docs/2020-07/index.html b/docs/2020-07/index.html index 3f86b685f..75857aa12 100644 --- a/docs/2020-07/index.html +++ b/docs/2020-07/index.html @@ -20,7 +20,7 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f - + @@ -45,9 +45,9 @@ Since I was restarting Tomcat anyways I decided to redeploy the latest changes f "@type": "BlogPosting", "headline": "July, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-07/", - "wordCount": "3928", + "wordCount": "4195", "datePublished": "2020-07-01T10:53:54+03:00", - "dateModified": "2020-07-20T22:14:45+03:00", + "dateModified": "2020-07-21T13:44:52+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -727,6 +727,61 @@ Removing unnecessary Unicode (U+200B): Agencia de Servicios a la Comercializaci
  • I don’t want to go back to using the large webfonts with CSS because the SVG + JS method saves us ~140KiB and causes at least three fewer network requests
  • +
  • I started processing the 2019 stats in a batch of 1 million on DSpace Test:
  • + +
    $ export JAVA_OPTS='-Dfile.encoding=UTF-8 -Xmx2048m'
    +$ chrt -b 0 dspace solr-upgrade-statistics-6x -n 1000000 -i statistics-2019
    +...
    +        *** Statistics Records with Legacy Id ***
    +
    +           6,359,966    Bistream View
    +           2,204,775    Item View
    +             139,266    Community View
    +             131,234    Collection View
    +             948,529    Community Search
    +             593,974    Collection Search
    +           1,682,818    Unexpected Type & Full Site
    +        --------------------------------------
    +          12,060,562    TOTAL
    +
    +
    $ export JAVA_OPTS='-Dfile.encoding=UTF-8 -Xmx2048m'
    +$ chrt -b 0 dspace solr-upgrade-statistics-6x -n 1000000 -i statistics-2018
    +        *** Statistics Records with Legacy Id ***
    +
    +           3,684,394    Bistream View
    +           2,183,032    Item View
    +             131,222    Community View
    +              79,348    Collection View
    +             345,529    Collection Search
    +             322,223    Community Search
    +             874,107    Unexpected Type & Full Site
    +        --------------------------------------
    +           7,619,855    TOTAL
    +
    +
    OpenRXV harvesting bot; https://github.com/ilri/OpenRXV
    +
    +

    2020-07-22

    + diff --git a/docs/categories/index.html b/docs/categories/index.html index d0fc0344d..303182b3b 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 157401900..51a1bad50 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index 251e03880..c17323b22 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index bbc16d7d4..3e36722cc 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 78286a889..943b91383 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 9b3ae1baf..5102f828c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index ec815c596..b7a7c2fc0 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 337d6af9a..1d4e07f69 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index c0a9f6982..3104c1a85 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 1fb17cd6b..c8e73f88b 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index ac35d73d2..8393f58c0 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index 84ef724a5..5bf31541c 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index fc675106f..79e92274b 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index 5af143da6..95560a4e8 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 714fcf4b3..edc965d4c 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index abacf7514..1fc3c8e9d 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index 817d9bd73..26f179c86 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index fbd8ad732..f1b25b710 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/categories/ - 2020-07-20T22:14:45+03:00 + 2020-07-21T13:44:52+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-07-20T22:14:45+03:00 + 2020-07-21T13:44:52+03:00 https://alanorth.github.io/cgspace-notes/2020-07/ - 2020-07-20T22:14:45+03:00 + 2020-07-21T13:44:52+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-07-20T22:14:45+03:00 + 2020-07-21T13:44:52+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-07-20T22:14:45+03:00 + 2020-07-21T13:44:52+03:00