From 41adbab750a88a2fd7a0455dbbf74f7b20511798 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Tue, 26 Mar 2019 08:55:54 +0200 Subject: [PATCH] Add notes for 2019-03-26 --- content/posts/2019-03.md | 55 ++++++++++++++++++++++++++++++++++ docs/2019-03/index.html | 65 ++++++++++++++++++++++++++++++++++++++-- docs/robots.txt | 2 +- docs/sitemap.xml | 20 ++++++------- 4 files changed, 128 insertions(+), 14 deletions(-) diff --git a/content/posts/2019-03.md b/content/posts/2019-03.md index 418543e6a..5b99c198c 100644 --- a/content/posts/2019-03.md +++ b/content/posts/2019-03.md @@ -800,4 +800,59 @@ $ psql -c 'select * from pg_stat_activity' | grep -o -E '(dspaceWeb|dspaceApi|ds - I need to watch this carefully though because I've read some places that Tomcat's DBCP doesn't track statements and might create memory leaks if an application doesn't close statements before a connection gets returned back to the pool - According the Uptime Robot the server was up and down a few more times over the next hour so I restarted Tomcat again +## 2019-03-26 + +- UptimeRobot says CGSpace went down again and I see the load is again at 14.0! +- Here are the top IPs in nginx logs in the last hour: + +``` +# zcat --force /var/log/nginx/{oai,rest,statistics}.log /var/log/nginx/{oai,rest,statistics}.log.1 | grep -E "26/Mar/2019:(06|07)" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10 + 3 35.174.184.209 + 3 66.249.66.81 + 4 104.198.9.108 + 4 154.77.98.122 + 4 2.50.152.13 + 10 196.188.12.245 + 14 66.249.66.80 + 414 45.5.184.72 + 535 45.5.186.2 + 2014 205.186.128.185 +# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep -E "26/Mar/2019:(06|07)" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10 + 157 41.204.190.40 + 160 18.194.46.84 + 160 54.70.40.11 + 168 31.6.77.23 + 188 66.249.66.81 + 284 3.91.79.74 + 405 2a01:4f8:140:3192::2 + 471 66.249.66.80 + 712 35.174.184.209 + 784 2a01:4f8:13b:1296::2 +``` + +- The two IPV6 addresses are something called BLEXBot, which seems to check the robots.txt file and the completely ignore it by making thousands of requests to dynamic pages like Browse and Discovery +- Then `35.174.184.209` is MauiBot, which does the same thing +- Also `3.91.79.74` does, which appears to be CCBot +- I will add these three to the "bad bot" rate limiting that I originally used for Baidu +- Going further, these are the IPs making requests to Discovery and Browse pages so far today: + +``` +# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep -E "(discover|browse)" | grep -E "26/Mar/2019:" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10 + 120 34.207.146.166 + 128 3.91.79.74 + 132 108.179.57.67 + 143 34.228.42.25 + 185 216.244.66.198 + 430 54.70.40.11 + 1033 93.179.69.74 + 1206 2a01:4f8:140:3192::2 + 2678 2a01:4f8:13b:1296::2 + 3790 35.174.184.209 +``` + +- `54.70.40.11` is SemanticScholarBot +- `216.244.66.198` is DotBot +- `93.179.69.74` is some IP in Ukraine, which I will add to the list of bot IPs in nginx +- I can only hope that this helps the load go down because all this traffic is disrupting the service for normal users and well-behaved bots (and interrupting my dinner and breakfast) + diff --git a/docs/2019-03/index.html b/docs/2019-03/index.html index 039929eff..9ffa09c34 100644 --- a/docs/2019-03/index.html +++ b/docs/2019-03/index.html @@ -25,7 +25,7 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca - + @@ -55,9 +55,9 @@ I think I will need to ask Udana to re-copy and paste the abstracts with more ca "@type": "BlogPosting", "headline": "March, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-03/", - "wordCount": "5067", + "wordCount": "5371", "datePublished": "2019-03-01T12:16:30+01:00", - "dateModified": "2019-03-25T12:59:24+02:00", + "dateModified": "2019-03-25T23:47:00+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1076,6 +1076,65 @@ $ grep -o -E 'session_id=[A-Z0-9]{32}' dspace.log.2019-03-22 | sort -u | wc -l
  • According the Uptime Robot the server was up and down a few more times over the next hour so I restarted Tomcat again
  • +

    2019-03-26

    + + + +
    # zcat --force /var/log/nginx/{oai,rest,statistics}.log /var/log/nginx/{oai,rest,statistics}.log.1 | grep -E "26/Mar/2019:(06|07)" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10 
    +      3 35.174.184.209
    +      3 66.249.66.81
    +      4 104.198.9.108
    +      4 154.77.98.122
    +      4 2.50.152.13
    +     10 196.188.12.245
    +     14 66.249.66.80
    +    414 45.5.184.72
    +    535 45.5.186.2
    +   2014 205.186.128.185
    +# zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep -E "26/Mar/2019:(06|07)" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10
    +    157 41.204.190.40
    +    160 18.194.46.84
    +    160 54.70.40.11
    +    168 31.6.77.23
    +    188 66.249.66.81
    +    284 3.91.79.74
    +    405 2a01:4f8:140:3192::2
    +    471 66.249.66.80
    +    712 35.174.184.209
    +    784 2a01:4f8:13b:1296::2
    +
    + + + +
    # zcat --force /var/log/nginx/{access,error,library-access}.log /var/log/nginx/{access,error,library-access}.log.1 | grep -E "(discover|browse)" | grep -E "26/Mar/2019:" | awk '{print $1}' | sort | uniq -c | sort -n | tail -n 10
    +    120 34.207.146.166
    +    128 3.91.79.74
    +    132 108.179.57.67
    +    143 34.228.42.25
    +    185 216.244.66.198
    +    430 54.70.40.11
    +   1033 93.179.69.74
    +   1206 2a01:4f8:140:3192::2
    +   2678 2a01:4f8:13b:1296::2
    +   3790 35.174.184.209
    +
    + + + diff --git a/docs/robots.txt b/docs/robots.txt index 9c8f41429..2b88ae317 100644 --- a/docs/robots.txt +++ b/docs/robots.txt @@ -45,7 +45,7 @@ Disallow: /cgspace-notes/2015-12/ Disallow: /cgspace-notes/2015-11/ Disallow: /cgspace-notes/ Disallow: /cgspace-notes/categories/ -Disallow: /cgspace-notes/tags/notes/ Disallow: /cgspace-notes/categories/notes/ +Disallow: /cgspace-notes/tags/notes/ Disallow: /cgspace-notes/posts/ Disallow: /cgspace-notes/tags/ diff --git a/docs/sitemap.xml b/docs/sitemap.xml index a45eb37d0..ba2ada140 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2019-03/ - 2019-03-25T12:59:24+02:00 + 2019-03-25T23:47:00+02:00 @@ -214,7 +214,7 @@ https://alanorth.github.io/cgspace-notes/ - 2019-03-25T12:59:24+02:00 + 2019-03-25T23:47:00+02:00 0 @@ -223,27 +223,27 @@ 0 - - https://alanorth.github.io/cgspace-notes/tags/notes/ - 2019-03-25T12:59:24+02:00 - 0 - - https://alanorth.github.io/cgspace-notes/categories/notes/ 2018-03-09T22:10:33+02:00 0 + + https://alanorth.github.io/cgspace-notes/tags/notes/ + 2019-03-25T23:47:00+02:00 + 0 + + https://alanorth.github.io/cgspace-notes/posts/ - 2019-03-25T12:59:24+02:00 + 2019-03-25T23:47:00+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2019-03-25T12:59:24+02:00 + 2019-03-25T23:47:00+02:00 0