From 0f8c0fda83c4be2f5c6b462a53ea5ccd6f728153 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 19 Nov 2017 16:11:12 +0200 Subject: [PATCH] Add notes for 2017-11-19 --- content/post/2017-11.md | 43 +++++++++++++++++++++++++++++++ public/2017-11/index.html | 53 ++++++++++++++++++++++++++++++++++++--- public/sitemap.xml | 10 ++++---- 3 files changed, 98 insertions(+), 8 deletions(-) diff --git a/content/post/2017-11.md b/content/post/2017-11.md index 073c45d57..777dfbc95 100644 --- a/content/post/2017-11.md +++ b/content/post/2017-11.md @@ -693,3 +693,46 @@ $ jconsole -J-DsocksProxyHost=localhost -J-DsocksProxyPort=7777 service:jmx:rmi: - Here is the Jconsole screen after looping `http --print Hh https://dspacetest.cgiar.org/handle/10568/1` for a few minutes: ![Jconsole sessions for XMLUI](/cgspace-notes/2017/11/jconsole-sessions.png) + +## 2017-11-19 + +- Linode sent an alert that CGSpace was using a lot of CPU around 4–6 AM +- Looking in the nginx access logs I see the most active XMLUI users between 4 and 6 AM: + +``` +# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep -E "19/Nov/2017:0[456]" | awk '{print $1}' | sort -n | uniq -c | sort -h | tail + 111 66.249.66.155 + 171 5.9.6.51 + 188 54.162.241.40 + 229 207.46.13.23 + 233 207.46.13.137 + 247 40.77.167.6 + 251 207.46.13.36 + 275 68.180.229.254 + 325 104.196.152.243 + 1610 66.249.66.153 +``` + +- 66.249.66.153 appears to be Googlebot: + +``` +66.249.66.153 - - [19/Nov/2017:06:26:01 +0000] "GET /handle/10568/2203 HTTP/1.1" 200 6309 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" +``` + +- We know Googlebot is persistent but behaves well, so I guess it was just a coincidence that it came at a time when we had other traffic and server activity +- In related news, I see an Atmire update process going for many hours and responsible for hundreds of thousands of log entries (two thirds of all log entries) + +``` +$ wc -l dspace.log.2017-11-19 +388472 dspace.log.2017-11-19 +$ grep -c com.atmire.utils.UpdateSolrStatsMetadata dspace.log.2017-11-19 +267494 +``` + +- WTF is this process doing every day, and for so many hours? +- In unrelated news, when I was looking at the DSpace logs I saw a bunch of errors like this: + +``` +2017-11-19 03:00:32,806 INFO org.apache.pdfbox.pdfparser.PDFParser @ Document is encrypted +2017-11-19 03:00:32,807 ERROR org.apache.pdfbox.filter.FlateFilter @ FlateFilter: stop reading corrupt stream due to a DataFormatException +``` diff --git a/public/2017-11/index.html b/public/2017-11/index.html index 2456e5ccf..43197c2a3 100644 --- a/public/2017-11/index.html +++ b/public/2017-11/index.html @@ -38,7 +38,7 @@ COPY 54701 - + @@ -86,9 +86,9 @@ COPY 54701 "@type": "BlogPosting", "headline": "November, 2017", "url": "https://alanorth.github.io/cgspace-notes/2017-11/", - "wordCount": "4062", + "wordCount": "4282", "datePublished": "2017-11-02T09:37:54+02:00", - "dateModified": "2017-11-16T10:15:33+02:00", + "dateModified": "2017-11-17T12:35:53+02:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -924,6 +924,53 @@ dspace6=# CREATE EXTENSION pgcrypto;

Jconsole sessions for XMLUI

+

2017-11-19

+ + + +
# cat /var/log/nginx/access.log /var/log/nginx/access.log.1 | grep -E "19/Nov/2017:0[456]" | awk '{print $1}' | sort -n | uniq -c | sort -h | tail
+    111 66.249.66.155
+    171 5.9.6.51
+    188 54.162.241.40
+    229 207.46.13.23
+    233 207.46.13.137
+    247 40.77.167.6
+    251 207.46.13.36
+    275 68.180.229.254
+    325 104.196.152.243
+   1610 66.249.66.153
+
+ + + +
66.249.66.153 - - [19/Nov/2017:06:26:01 +0000] "GET /handle/10568/2203 HTTP/1.1" 200 6309 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
+
+ + + +
$ wc -l dspace.log.2017-11-19 
+388472 dspace.log.2017-11-19
+$ grep -c com.atmire.utils.UpdateSolrStatsMetadata dspace.log.2017-11-19 
+267494
+
+ + + +
2017-11-19 03:00:32,806 INFO  org.apache.pdfbox.pdfparser.PDFParser @ Document is encrypted
+2017-11-19 03:00:32,807 ERROR org.apache.pdfbox.filter.FlateFilter @ FlateFilter: stop reading corrupt stream due to a DataFormatException
+
+ diff --git a/public/sitemap.xml b/public/sitemap.xml index 4b8094e80..698ae374e 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2017-11/ - 2017-11-16T10:15:33+02:00 + 2017-11-17T12:35:53+02:00 @@ -134,7 +134,7 @@ https://alanorth.github.io/cgspace-notes/ - 2017-11-16T10:15:33+02:00 + 2017-11-17T12:35:53+02:00 0 @@ -145,7 +145,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2017-11-16T10:15:33+02:00 + 2017-11-17T12:35:53+02:00 0 @@ -157,13 +157,13 @@ https://alanorth.github.io/cgspace-notes/post/ - 2017-11-16T10:15:33+02:00 + 2017-11-17T12:35:53+02:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2017-11-16T10:15:33+02:00 + 2017-11-17T12:35:53+02:00 0