mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-12-24 14:04:29 +01:00
Add notes for 2017-07-31
This commit is contained in:
parent
2efbe26be9
commit
ff336ce2ba
@ -139,3 +139,11 @@ delete from metadatavalue where resource_type_id=2 and metadata_field_id=235 and
|
|||||||
|
|
||||||
- Now just waiting to run them on CGSpace, and then apply the modified input forms after Macaroni Bros give me an updated list
|
- Now just waiting to run them on CGSpace, and then apply the modified input forms after Macaroni Bros give me an updated list
|
||||||
- Temporarily increase the nginx upload limit to 200MB for Sisay to upload the CIAT presentations
|
- Temporarily increase the nginx upload limit to 200MB for Sisay to upload the CIAT presentations
|
||||||
|
- Looking at CGSpace activity page, there are 52 Baidu bots concurrently crawling our website (I copied the activity page to a text file and grep it)!
|
||||||
|
|
||||||
|
```
|
||||||
|
$ grep 180.76. /tmp/status | awk '{print $5}' | sort | uniq | wc -l
|
||||||
|
52
|
||||||
|
```
|
||||||
|
|
||||||
|
- From looking at the `dspace.log` I see they are all using the same session, which means our Crawler Session Manager Valve is working
|
||||||
|
@ -27,7 +27,7 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the
|
|||||||
|
|
||||||
|
|
||||||
<meta property="article:published_time" content="2017-07-01T18:03:52+03:00"/>
|
<meta property="article:published_time" content="2017-07-01T18:03:52+03:00"/>
|
||||||
<meta property="article:modified_time" content="2017-07-30T14:18:23+03:00"/>
|
<meta property="article:modified_time" content="2017-07-31T12:06:21+03:00"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -73,9 +73,9 @@ We can use PostgreSQL’s extended output format (-x) plus sed to format the
|
|||||||
"@type": "BlogPosting",
|
"@type": "BlogPosting",
|
||||||
"headline": "July, 2017",
|
"headline": "July, 2017",
|
||||||
"url": "https://alanorth.github.io/cgspace-notes/2017-07/",
|
"url": "https://alanorth.github.io/cgspace-notes/2017-07/",
|
||||||
"wordCount": "1086",
|
"wordCount": "1151",
|
||||||
"datePublished": "2017-07-01T18:03:52+03:00",
|
"datePublished": "2017-07-01T18:03:52+03:00",
|
||||||
"dateModified": "2017-07-30T14:18:23+03:00",
|
"dateModified": "2017-07-31T12:06:21+03:00",
|
||||||
"author": {
|
"author": {
|
||||||
"@type": "Person",
|
"@type": "Person",
|
||||||
"name": "Alan Orth"
|
"name": "Alan Orth"
|
||||||
@ -313,6 +313,15 @@ delete from metadatavalue where resource_type_id=2 and metadata_field_id=235 and
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Now just waiting to run them on CGSpace, and then apply the modified input forms after Macaroni Bros give me an updated list</li>
|
<li>Now just waiting to run them on CGSpace, and then apply the modified input forms after Macaroni Bros give me an updated list</li>
|
||||||
<li>Temporarily increase the nginx upload limit to 200MB for Sisay to upload the CIAT presentations</li>
|
<li>Temporarily increase the nginx upload limit to 200MB for Sisay to upload the CIAT presentations</li>
|
||||||
|
<li>Looking at CGSpace activity page, there are 52 Baidu bots concurrently crawling our website (I copied the activity page to a text file and grep it)!</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<pre><code>$ grep 180.76. /tmp/status | awk '{print $5}' | sort | uniq | wc -l
|
||||||
|
52
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>From looking at the <code>dspace.log</code> I see they are all using the same session, which means our Crawler Session Manager Valve is working</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/2017-07/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/2017-07/</loc>
|
||||||
<lastmod>2017-07-30T14:18:23+03:00</lastmod>
|
<lastmod>2017-07-31T12:06:21+03:00</lastmod>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
@ -109,7 +109,7 @@
|
|||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||||
<lastmod>2017-07-30T14:18:23+03:00</lastmod>
|
<lastmod>2017-07-31T12:06:21+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
@ -120,19 +120,19 @@
|
|||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
|
||||||
<lastmod>2017-07-30T14:18:23+03:00</lastmod>
|
<lastmod>2017-07-31T12:06:21+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/post/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/post/</loc>
|
||||||
<lastmod>2017-07-30T14:18:23+03:00</lastmod>
|
<lastmod>2017-07-31T12:06:21+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/tags/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/tags/</loc>
|
||||||
<lastmod>2017-07-30T14:18:23+03:00</lastmod>
|
<lastmod>2017-07-31T12:06:21+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user