From 0afdffa34f46e8df6ab06227cfed7204f43773ed Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Wed, 17 Oct 2018 00:33:01 +0300 Subject: [PATCH] Update notes for 2018-10-17 --- content/posts/2018-10.md | 48 ++++++++++++++++++++++++++++++++ docs/2018-10/index.html | 59 ++++++++++++++++++++++++++++++++++++++-- docs/sitemap.xml | 10 +++---- 3 files changed, 109 insertions(+), 8 deletions(-) diff --git a/content/posts/2018-10.md b/content/posts/2018-10.md index b2c33346c..cf8f8dcde 100644 --- a/content/posts/2018-10.md +++ b/content/posts/2018-10.md @@ -298,4 +298,52 @@ $ psql -h localhost -U postgres dspacetest -c 'alter user dspacetest nosuperuser dspace=# \copy (SELECT (CASE when metadata_schema_id=1 THEN 'dc' WHEN metadata_schema_id=2 THEN 'cg' END) AS schema, element, qualifier, scope_note FROM metadatafieldregistry where metadata_schema_id IN (1,2)) TO /tmp/cgspace-schema.csv WITH CSV HEADER; ``` +- Talking to the CodeObia guys about the REST API I started to wonder why it's so slow and how I can quantify it in order to ask the dspace-tech mailing list for help profiling it +- Interestingly, the speed doesn't get better after you request the same thing multiple times–it's consistently bad on both CGSpace and DSpace Test! + +``` +$ time http --print h 'https://cgspace.cgiar.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0' +... +0.35s user 0.06s system 1% cpu 25.133 total +0.31s user 0.04s system 1% cpu 25.223 total +0.27s user 0.06s system 1% cpu 27.858 total +0.20s user 0.05s system 1% cpu 23.838 total +0.30s user 0.05s system 1% cpu 24.301 total + +$ time http --print h 'https://dspacetest.cgiar.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0' +... +0.22s user 0.03s system 1% cpu 17.248 total +0.23s user 0.02s system 1% cpu 16.856 total +0.23s user 0.04s system 1% cpu 16.460 total +0.24s user 0.04s system 1% cpu 21.043 total +0.22s user 0.04s system 1% cpu 17.132 total +``` + +- I should note that at this time CGSpace is using Oracle Java and DSpace Test is using OpenJDK (both version 8) +- I wonder if the Java garbage collector is important here, or if there are missing indexes in PostgreSQL? +- I switched DSpace Test to the G1GC garbage collector and tried again and now the results are worse! + +``` +$ time http --print h 'https://dspacetest.cgiar.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0' +... +0.20s user 0.03s system 0% cpu 25.017 total +0.23s user 0.02s system 1% cpu 23.299 total +0.24s user 0.02s system 1% cpu 22.496 total +0.22s user 0.03s system 1% cpu 22.720 total +0.23s user 0.03s system 1% cpu 22.632 total +``` + +- If I make a request without the expands it is ten time faster: + +``` +$ time http --print h 'https://dspacetest.cgiar.org/rest/items?limit=100&offset=0' +... +0.20s user 0.03s system 7% cpu 3.098 total +0.22s user 0.03s system 8% cpu 2.896 total +0.21s user 0.05s system 9% cpu 2.787 total +0.23s user 0.02s system 8% cpu 2.896 total +``` + +- I sent a mail to dspace-tech to ask how to profile this... + diff --git a/docs/2018-10/index.html b/docs/2018-10/index.html index 145ea83af..ffeefec06 100644 --- a/docs/2018-10/index.html +++ b/docs/2018-10/index.html @@ -9,7 +9,7 @@ - + @@ -24,9 +24,9 @@ "@type": "BlogPosting", "headline": "October, 2018", "url": "https://alanorth.github.io/cgspace-notes/2018-10/", - "wordCount": "2132", + "wordCount": "2451", "datePublished": "2018-10-01T22:31:54+03:00", - "dateModified": "2018-10-15T17:26:03+03:00", + "dateModified": "2018-10-16T17:26:18+03:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -435,6 +435,59 @@ $ psql -h localhost -U postgres dspacetest -c 'alter user dspacetest nosuperuser
dspace=# \copy (SELECT (CASE when metadata_schema_id=1 THEN 'dc' WHEN metadata_schema_id=2 THEN 'cg' END) AS schema, element, qualifier, scope_note FROM metadatafieldregistry where metadata_schema_id IN (1,2)) TO /tmp/cgspace-schema.csv WITH CSV HEADER;
 
+ + +
$ time http --print h 'https://cgspace.cgiar.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0'
+...
+0.35s user 0.06s system 1% cpu 25.133 total
+0.31s user 0.04s system 1% cpu 25.223 total
+0.27s user 0.06s system 1% cpu 27.858 total
+0.20s user 0.05s system 1% cpu 23.838 total
+0.30s user 0.05s system 1% cpu 24.301 total
+
+$ time http --print h 'https://dspacetest.cgiar.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0'
+...
+0.22s user 0.03s system 1% cpu 17.248 total
+0.23s user 0.02s system 1% cpu 16.856 total
+0.23s user 0.04s system 1% cpu 16.460 total
+0.24s user 0.04s system 1% cpu 21.043 total
+0.22s user 0.04s system 1% cpu 17.132 total
+
+ + + +
$ time http --print h 'https://dspacetest.cgiar.org/rest/items?expand=metadata,bitstreams,parentCommunityList&limit=100&offset=0'
+...
+0.20s user 0.03s system 0% cpu 25.017 total
+0.23s user 0.02s system 1% cpu 23.299 total
+0.24s user 0.02s system 1% cpu 22.496 total
+0.22s user 0.03s system 1% cpu 22.720 total
+0.23s user 0.03s system 1% cpu 22.632 total
+
+ + + +
$ time http --print h 'https://dspacetest.cgiar.org/rest/items?limit=100&offset=0'
+...
+0.20s user 0.03s system 7% cpu 3.098 total
+0.22s user 0.03s system 8% cpu 2.896 total
+0.21s user 0.05s system 9% cpu 2.787 total
+0.23s user 0.02s system 8% cpu 2.896 total
+
+ + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index e471628ce..1922e2a6a 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -4,7 +4,7 @@ https://alanorth.github.io/cgspace-notes/2018-10/ - 2018-10-15T17:26:03+03:00 + 2018-10-16T17:26:18+03:00 @@ -189,7 +189,7 @@ https://alanorth.github.io/cgspace-notes/ - 2018-10-15T17:26:03+03:00 + 2018-10-16T17:26:18+03:00 0 @@ -200,7 +200,7 @@ https://alanorth.github.io/cgspace-notes/tags/notes/ - 2018-10-15T17:26:03+03:00 + 2018-10-16T17:26:18+03:00 0 @@ -212,13 +212,13 @@ https://alanorth.github.io/cgspace-notes/posts/ - 2018-10-15T17:26:03+03:00 + 2018-10-16T17:26:18+03:00 0 https://alanorth.github.io/cgspace-notes/tags/ - 2018-10-15T17:26:03+03:00 + 2018-10-16T17:26:18+03:00 0