mirror of
https://github.com/alanorth/cgspace-notes.git
synced 2024-11-26 08:28:18 +01:00
Update notes for 2018-07-12
This commit is contained in:
parent
4e85415cca
commit
e1515963ea
@ -314,5 +314,15 @@ $ grep -c -E 'session_id=[A-Z0-9]{32}:ip_addr=95.108.181.88' dspace.log.2018-07-
|
|||||||
|
|
||||||
- So this bot is just like Baiduspider, and I need to add it to the nginx rate limiting
|
- So this bot is just like Baiduspider, and I need to add it to the nginx rate limiting
|
||||||
- I'll also add it to Tomcat's Crawler Session Manager Valve to force the re-use of a common Tomcat sesssion for all crawlers just in case
|
- I'll also add it to Tomcat's Crawler Session Manager Valve to force the re-use of a common Tomcat sesssion for all crawlers just in case
|
||||||
|
- Generate a list of all affiliations in CGSpace to send to Mohamed Salem to compare with the list on MEL (sorting the list by most occurrences):
|
||||||
|
|
||||||
|
```
|
||||||
|
dspace=# \copy (select distinct text_value, count(*) as count from metadatavalue where resource_type_id=2 and metadata_field_id=211 group by text_value order by count desc) to /tmp/affiliations.csv with csv header
|
||||||
|
COPY 4518
|
||||||
|
dspace=# \q
|
||||||
|
$ csvcut -c 1 < /tmp/affiliations.csv > /tmp/affiliations-1.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
- We also need to discuss standardizing our countries and comparing our ORCID iDs
|
||||||
|
|
||||||
<!-- vim: set sw=2 ts=2: -->
|
<!-- vim: set sw=2 ts=2: -->
|
||||||
|
@ -30,7 +30,7 @@ There is insufficient memory for the Java Runtime Environment to continue.
|
|||||||
|
|
||||||
<meta property="article:published_time" content="2018-07-01T12:56:54+03:00"/>
|
<meta property="article:published_time" content="2018-07-01T12:56:54+03:00"/>
|
||||||
|
|
||||||
<meta property="article:modified_time" content="2018-07-12T08:35:39+03:00"/>
|
<meta property="article:modified_time" content="2018-07-12T09:00:08+03:00"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -71,9 +71,9 @@ There is insufficient memory for the Java Runtime Environment to continue.
|
|||||||
"@type": "BlogPosting",
|
"@type": "BlogPosting",
|
||||||
"headline": "July, 2018",
|
"headline": "July, 2018",
|
||||||
"url": "https://alanorth.github.io/cgspace-notes/2018-07/",
|
"url": "https://alanorth.github.io/cgspace-notes/2018-07/",
|
||||||
"wordCount": "2079",
|
"wordCount": "2156",
|
||||||
"datePublished": "2018-07-01T12:56:54+03:00",
|
"datePublished": "2018-07-01T12:56:54+03:00",
|
||||||
"dateModified": "2018-07-12T08:35:39+03:00",
|
"dateModified": "2018-07-12T09:00:08+03:00",
|
||||||
"author": {
|
"author": {
|
||||||
"@type": "Person",
|
"@type": "Person",
|
||||||
"name": "Alan Orth"
|
"name": "Alan Orth"
|
||||||
@ -493,6 +493,17 @@ org.apache.solr.client.solrj.SolrServerException: IOException occured when talki
|
|||||||
<ul>
|
<ul>
|
||||||
<li>So this bot is just like Baiduspider, and I need to add it to the nginx rate limiting</li>
|
<li>So this bot is just like Baiduspider, and I need to add it to the nginx rate limiting</li>
|
||||||
<li>I’ll also add it to Tomcat’s Crawler Session Manager Valve to force the re-use of a common Tomcat sesssion for all crawlers just in case</li>
|
<li>I’ll also add it to Tomcat’s Crawler Session Manager Valve to force the re-use of a common Tomcat sesssion for all crawlers just in case</li>
|
||||||
|
<li>Generate a list of all affiliations in CGSpace to send to Mohamed Salem to compare with the list on MEL (sorting the list by most occurrences):</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<pre><code>dspace=# \copy (select distinct text_value, count(*) as count from metadatavalue where resource_type_id=2 and metadata_field_id=211 group by text_value order by count desc) to /tmp/affiliations.csv with csv header
|
||||||
|
COPY 4518
|
||||||
|
dspace=# \q
|
||||||
|
$ csvcut -c 1 < /tmp/affiliations.csv > /tmp/affiliations-1.csv
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>We also need to discuss standardizing our countries and comparing our ORCID iDs</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<!-- vim: set sw=2 ts=2: -->
|
<!-- vim: set sw=2 ts=2: -->
|
||||||
|
@ -37,7 +37,7 @@ Disallow: /cgspace-notes/2015-12/
|
|||||||
Disallow: /cgspace-notes/2015-11/
|
Disallow: /cgspace-notes/2015-11/
|
||||||
Disallow: /cgspace-notes/
|
Disallow: /cgspace-notes/
|
||||||
Disallow: /cgspace-notes/categories/
|
Disallow: /cgspace-notes/categories/
|
||||||
Disallow: /cgspace-notes/categories/notes/
|
|
||||||
Disallow: /cgspace-notes/tags/notes/
|
Disallow: /cgspace-notes/tags/notes/
|
||||||
|
Disallow: /cgspace-notes/categories/notes/
|
||||||
Disallow: /cgspace-notes/posts/
|
Disallow: /cgspace-notes/posts/
|
||||||
Disallow: /cgspace-notes/tags/
|
Disallow: /cgspace-notes/tags/
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/2018-07/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/2018-07/</loc>
|
||||||
<lastmod>2018-07-12T08:35:39+03:00</lastmod>
|
<lastmod>2018-07-12T09:00:08+03:00</lastmod>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
@ -174,7 +174,7 @@
|
|||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/</loc>
|
||||||
<lastmod>2018-07-12T08:35:39+03:00</lastmod>
|
<lastmod>2018-07-12T09:00:08+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
@ -183,27 +183,27 @@
|
|||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
|
<url>
|
||||||
|
<loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
|
||||||
|
<lastmod>2018-07-12T09:00:08+03:00</lastmod>
|
||||||
|
<priority>0</priority>
|
||||||
|
</url>
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/categories/notes/</loc>
|
||||||
<lastmod>2018-03-09T22:10:33+02:00</lastmod>
|
<lastmod>2018-03-09T22:10:33+02:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
<url>
|
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
|
|
||||||
<lastmod>2018-07-12T08:35:39+03:00</lastmod>
|
|
||||||
<priority>0</priority>
|
|
||||||
</url>
|
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
|
||||||
<lastmod>2018-07-12T08:35:39+03:00</lastmod>
|
<lastmod>2018-07-12T09:00:08+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
<url>
|
<url>
|
||||||
<loc>https://alanorth.github.io/cgspace-notes/tags/</loc>
|
<loc>https://alanorth.github.io/cgspace-notes/tags/</loc>
|
||||||
<lastmod>2018-07-12T08:35:39+03:00</lastmod>
|
<lastmod>2018-07-12T09:00:08+03:00</lastmod>
|
||||||
<priority>0</priority>
|
<priority>0</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user