From ccecd63eb0ad65789812d9bb9507a8972d109cc4 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 11 Aug 2020 11:35:05 +0300 Subject: [PATCH] Add notes for 2020-08-11 --- content/posts/2020-08.md | 17 ++++++++++++++++ docs/2020-08/index.html | 26 ++++++++++++++++++++++--- docs/categories/index.html | 2 +- docs/categories/notes/index.html | 2 +- docs/categories/notes/page/2/index.html | 2 +- docs/categories/notes/page/3/index.html | 2 +- docs/categories/notes/page/4/index.html | 2 +- docs/index.html | 2 +- docs/page/2/index.html | 2 +- docs/page/3/index.html | 2 +- docs/page/4/index.html | 2 +- docs/page/5/index.html | 2 +- docs/page/6/index.html | 2 +- docs/posts/index.html | 2 +- docs/posts/page/2/index.html | 2 +- docs/posts/page/3/index.html | 2 +- docs/posts/page/4/index.html | 2 +- docs/posts/page/5/index.html | 2 +- docs/posts/page/6/index.html | 2 +- docs/sitemap.xml | 10 +++++----- 20 files changed, 62 insertions(+), 25 deletions(-) diff --git a/content/posts/2020-08.md b/content/posts/2020-08.md index 22f2070b7..4ab970c88 100644 --- a/content/posts/2020-08.md +++ b/content/posts/2020-08.md @@ -367,5 +367,22 @@ $ curl -s "http://localhost:8081/solr/statistics-2010/update?softCommit=true" -H - In Twitter's case they were also getting lumped in with the bad bots too, but really they only make ~50 or so requests a day when someone posts a CGSpace link on Twitter - I tagged the ISO 3166-1 Alpha2 country codes on all items on CGSpace using my [CountryCodeTagger](https://github.com/ilri/cgspace-java-helpers) curation task - I still need to set up a cron job for it... + - This tagged 50,000 countries! + +``` +dspace=# SELECT count(text_value) FROM metadatavalue WHERE metadata_field_id = 243 AND resource_type_id = 2; + count +------- + 50812 +(1 row) +``` + +## 2020-08-11 + +- I noticed some more hits from Macaroni's WordPress harvestor that I hadn't caught last week + - 104.198.13.34 made many requests without a user agent, with a "WordPress" user agent, and with their new "RTB website BOT" user agent, about 100,000 in total in 2020, and maybe another 70,000 in the other years + - I will purge them an add them to the Tomcat Crawler Session Manager and the DSpace bots list so they don't get logged in Solr +- I noticed a bunch of user agents with "Crawl" in the Solr stats, which is strange because the DSpace spider agents file has had "crawl" for a long time (and it is case insensitive) + - In any case I will purge them and add them to the Tomcat Crawler Session Manager Valve so that at least their sessions get re-used diff --git a/docs/2020-08/index.html b/docs/2020-08/index.html index 91c1cb480..586c2a7af 100644 --- a/docs/2020-08/index.html +++ b/docs/2020-08/index.html @@ -19,7 +19,7 @@ It is class based so I can easily add support for other vocabularies, and the te - + @@ -43,9 +43,9 @@ It is class based so I can easily add support for other vocabularies, and the te "@type": "BlogPosting", "headline": "August, 2020", "url": "https://alanorth.github.io/cgspace-notes/2020-08/", - "wordCount": "2285", + "wordCount": "2443", "datePublished": "2020-08-02T15:35:54+03:00", - "dateModified": "2020-08-10T09:27:50+03:00", + "dateModified": "2020-08-10T15:59:22+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -527,6 +527,26 @@ $ curl -s "http://localhost:8081/solr/statistics-2010/update?softCommit=tru
  • I tagged the ISO 3166-1 Alpha2 country codes on all items on CGSpace using my CountryCodeTagger curation task +
  • + +
    dspace=# SELECT count(text_value) FROM metadatavalue WHERE metadata_field_id = 243 AND resource_type_id = 2;
    + count
    +-------
    + 50812
    +(1 row)
    +

    2020-08-11

    + diff --git a/docs/categories/index.html b/docs/categories/index.html index 365604a97..decf36a9f 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/index.html b/docs/categories/notes/index.html index 4be8b4c3a..1f1405fc4 100644 --- a/docs/categories/notes/index.html +++ b/docs/categories/notes/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/2/index.html b/docs/categories/notes/page/2/index.html index f66e47184..2d24dd186 100644 --- a/docs/categories/notes/page/2/index.html +++ b/docs/categories/notes/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/3/index.html b/docs/categories/notes/page/3/index.html index ad3a45e2c..0406ae1cd 100644 --- a/docs/categories/notes/page/3/index.html +++ b/docs/categories/notes/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/categories/notes/page/4/index.html b/docs/categories/notes/page/4/index.html index 2b272b345..4663436b0 100644 --- a/docs/categories/notes/page/4/index.html +++ b/docs/categories/notes/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/index.html b/docs/index.html index 1b4717aae..ffbab9252 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index aee8e375e..747f7a589 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/3/index.html b/docs/page/3/index.html index 08b4f5f2b..020248691 100644 --- a/docs/page/3/index.html +++ b/docs/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/4/index.html b/docs/page/4/index.html index 388186efd..f3c9a2dbf 100644 --- a/docs/page/4/index.html +++ b/docs/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/5/index.html b/docs/page/5/index.html index 0dc7e51f1..2f8f257de 100644 --- a/docs/page/5/index.html +++ b/docs/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/page/6/index.html b/docs/page/6/index.html index f56a5a9a6..8c2aa8256 100644 --- a/docs/page/6/index.html +++ b/docs/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/index.html b/docs/posts/index.html index e59ea6e73..d22281c61 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/2/index.html b/docs/posts/page/2/index.html index a31e95ee1..7612d58a1 100644 --- a/docs/posts/page/2/index.html +++ b/docs/posts/page/2/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/3/index.html b/docs/posts/page/3/index.html index c8f26e90b..724115d01 100644 --- a/docs/posts/page/3/index.html +++ b/docs/posts/page/3/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/4/index.html b/docs/posts/page/4/index.html index 0cebf0bb5..c46c98a6d 100644 --- a/docs/posts/page/4/index.html +++ b/docs/posts/page/4/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/5/index.html b/docs/posts/page/5/index.html index 1ddc9d8b4..8558a5d8c 100644 --- a/docs/posts/page/5/index.html +++ b/docs/posts/page/5/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/posts/page/6/index.html b/docs/posts/page/6/index.html index e7cfe20b0..36ca45b93 100644 --- a/docs/posts/page/6/index.html +++ b/docs/posts/page/6/index.html @@ -9,7 +9,7 @@ - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0c1225f61..ecfee172a 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,27 +4,27 @@ https://alanorth.github.io/cgspace-notes/2020-08/ - 2020-08-10T09:27:50+03:00 + 2020-08-10T15:59:22+03:00 https://alanorth.github.io/cgspace-notes/categories/ - 2020-08-10T09:27:50+03:00 + 2020-08-10T15:59:22+03:00 https://alanorth.github.io/cgspace-notes/ - 2020-08-10T09:27:50+03:00 + 2020-08-10T15:59:22+03:00 https://alanorth.github.io/cgspace-notes/categories/notes/ - 2020-08-10T09:27:50+03:00 + 2020-08-10T15:59:22+03:00 https://alanorth.github.io/cgspace-notes/posts/ - 2020-08-10T09:27:50+03:00 + 2020-08-10T15:59:22+03:00