mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-11-22 06:35:03 +01:00
Add notes for 2022-07-06
This commit is contained in:
parent
fc1e83e76d
commit
19715c3295
@ -82,4 +82,40 @@ Time: 399.751 ms
|
||||
- Perhaps we need to update our list of languages to include all instead of the most common ones
|
||||
- I wrote a script `ilri/iso-639-value-pairs.py` to extract the names and Alpha 2 codes for all ISO 639-1 languages from pycountry and added them to `input-forms.xml`
|
||||
|
||||
## 2022-07-06
|
||||
|
||||
- CGSpace went down and up a few times due to high load
|
||||
- I found one host in Romania making very high speed requests with a normal user agent (`Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.2; WOW64; Trident/7.0; .NET4.0E; .NET4.0C`):
|
||||
|
||||
```console
|
||||
# awk '{print $1}' /var/log/nginx/{access,library-access,oai,rest}.log | sort | uniq -c | sort -h | tail -n 10
|
||||
516 142.132.248.90
|
||||
525 157.55.39.234
|
||||
587 66.249.66.21
|
||||
593 95.108.213.59
|
||||
1372 137.184.159.211
|
||||
4776 54.195.118.125
|
||||
5441 205.186.128.185
|
||||
6267 45.5.186.2
|
||||
15839 2a01:7e00::f03c:91ff:fe9a:3a37
|
||||
36114 146.19.75.141
|
||||
```
|
||||
|
||||
- I added 146.19.75.141 to the list of bot networks in nginx
|
||||
- While looking at the logs I started thinking about Bing again
|
||||
- They apparently [publish a list of all their networks](https://www.bing.com/toolbox/bingbot.json)
|
||||
- I wrote a script to use `prips` to [print the IPs for each network](https://stackoverflow.com/a/52501093/1996540)
|
||||
- The script is `bing-networks-to-ips.sh`
|
||||
- From Bing's IPs alone I purged 145,403 hits... sheesh
|
||||
- Delete two items on CGSpace for Margarita because she was getting the "Authorization denied for action OBSOLETE (DELETE) on BITSTREAM:0b26875a-..." error
|
||||
- This is the same DSpace 6 bug I noticed in 2021-03, 2021-04, and 2021-05
|
||||
- Update some `cg.audience` metadata to use "Academics" instead of "Academicians":
|
||||
|
||||
```console
|
||||
dspace=# UPDATE metadatavalue SET text_value='Academics' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=144 AND text_value='Academicians';
|
||||
UPDATE 104
|
||||
```
|
||||
|
||||
- I will also have to remove "Academicians" from input-forms.xml
|
||||
|
||||
<!-- vim: set sw=2 ts=2: -->
|
||||
|
@ -19,7 +19,7 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
|
||||
<meta property="og:type" content="article" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/2022-07/" />
|
||||
<meta property="article:published_time" content="2022-07-02T14:07:36+03:00" />
|
||||
<meta property="article:modified_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="article:modified_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
@ -44,9 +44,9 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
|
||||
"@type": "BlogPosting",
|
||||
"headline": "July, 2022",
|
||||
"url": "https://alanorth.github.io/cgspace-notes/2022-07/",
|
||||
"wordCount": "532",
|
||||
"wordCount": "739",
|
||||
"datePublished": "2022-07-02T14:07:36+03:00",
|
||||
"dateModified": "2022-07-04T17:20:01+03:00",
|
||||
"dateModified": "2022-07-04T22:10:02+03:00",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Alan Orth"
|
||||
@ -205,6 +205,47 @@ Also, the trgm functions I’ve used before are case insensitive, but Levens
|
||||
</li>
|
||||
<li>I wrote a script <code>ilri/iso-639-value-pairs.py</code> to extract the names and Alpha 2 codes for all ISO 639-1 languages from pycountry and added them to <code>input-forms.xml</code></li>
|
||||
</ul>
|
||||
<h2 id="2022-07-06">2022-07-06</h2>
|
||||
<ul>
|
||||
<li>CGSpace went down and up a few times due to high load
|
||||
<ul>
|
||||
<li>I found one host in Romania making very high speed requests with a normal user agent (<code>Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.2; WOW64; Trident/7.0; .NET4.0E; .NET4.0C</code>):</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-console" data-lang="console"><span style="display:flex;"><span># awk <span style="color:#e6db74">'{print $1}'</span> /var/log/nginx/<span style="color:#f92672">{</span>access,library-access,oai,rest<span style="color:#f92672">}</span>.log | sort | uniq -c | sort -h | tail -n <span style="color:#ae81ff">10</span>
|
||||
</span></span><span style="display:flex;"><span> 516 142.132.248.90
|
||||
</span></span><span style="display:flex;"><span> 525 157.55.39.234
|
||||
</span></span><span style="display:flex;"><span> 587 66.249.66.21
|
||||
</span></span><span style="display:flex;"><span> 593 95.108.213.59
|
||||
</span></span><span style="display:flex;"><span> 1372 137.184.159.211
|
||||
</span></span><span style="display:flex;"><span> 4776 54.195.118.125
|
||||
</span></span><span style="display:flex;"><span> 5441 205.186.128.185
|
||||
</span></span><span style="display:flex;"><span> 6267 45.5.186.2
|
||||
</span></span><span style="display:flex;"><span> 15839 2a01:7e00::f03c:91ff:fe9a:3a37
|
||||
</span></span><span style="display:flex;"><span> 36114 146.19.75.141
|
||||
</span></span></code></pre></div><ul>
|
||||
<li>I added 146.19.75.141 to the list of bot networks in nginx</li>
|
||||
<li>While looking at the logs I started thinking about Bing again
|
||||
<ul>
|
||||
<li>They apparently <a href="https://www.bing.com/toolbox/bingbot.json">publish a list of all their networks</a></li>
|
||||
<li>I wrote a script to use <code>prips</code> to <a href="https://stackoverflow.com/a/52501093/1996540">print the IPs for each network</a></li>
|
||||
<li>The script is <code>bing-networks-to-ips.sh</code></li>
|
||||
<li>From Bing’s IPs alone I purged 145,403 hits… sheesh</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Delete two items on CGSpace for Margarita because she was getting the “Authorization denied for action OBSOLETE (DELETE) on BITSTREAM:0b26875a-…” error
|
||||
<ul>
|
||||
<li>This is the same DSpace 6 bug I noticed in 2021-03, 2021-04, and 2021-05</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Update some <code>cg.audience</code> metadata to use “Academics” instead of “Academicians”:</li>
|
||||
</ul>
|
||||
<div class="highlight"><pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-console" data-lang="console"><span style="display:flex;"><span>dspace=# UPDATE metadatavalue SET text_value='Academics' WHERE dspace_object_id IN (SELECT uuid FROM item) AND metadata_field_id=144 AND text_value='Academicians';
|
||||
</span></span><span style="display:flex;"><span>UPDATE 104
|
||||
</span></span></code></pre></div><ul>
|
||||
<li>I will also have to remove “Academicians” from input-forms.xml</li>
|
||||
</ul>
|
||||
<!-- raw HTML omitted -->
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/categories/notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
<meta property="og:description" content="Documenting day-to-day work on the [CGSpace](https://cgspace.cgiar.org) repository." />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://alanorth.github.io/cgspace-notes/posts/" />
|
||||
<meta property="og:updated_time" content="2022-07-04T17:20:01+03:00" />
|
||||
<meta property="og:updated_time" content="2022-07-04T22:10:02+03:00" />
|
||||
|
||||
|
||||
|
||||
|
@ -3,19 +3,19 @@
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
<url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/</loc>
|
||||
<lastmod>2022-07-04T17:20:01+03:00</lastmod>
|
||||
<lastmod>2022-07-04T22:10:02+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||
<lastmod>2022-07-04T17:20:01+03:00</lastmod>
|
||||
<lastmod>2022-07-04T22:10:02+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2022-07/</loc>
|
||||
<lastmod>2022-07-04T17:20:01+03:00</lastmod>
|
||||
<lastmod>2022-07-04T22:10:02+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||
<lastmod>2022-07-04T17:20:01+03:00</lastmod>
|
||||
<lastmod>2022-07-04T22:10:02+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||
<lastmod>2022-07-04T17:20:01+03:00</lastmod>
|
||||
<lastmod>2022-07-04T22:10:02+03:00</lastmod>
|
||||
</url><url>
|
||||
<loc>https://alanorth.github.io/cgspace-notes/2022-06/</loc>
|
||||
<lastmod>2022-07-04T09:25:14+03:00</lastmod>
|
||||
|
Loading…
Reference in New Issue
Block a user