From 00dc8241fc51df61d50491679e1e9514b5d56418 Mon Sep 17 00:00:00 2001
From: Alan Orth <alan.orth@gmail.com>
Date: Thu, 17 May 2018 13:14:29 +0300
Subject: [PATCH] Update notes for 2018-05-17

---
 content/posts/2018-05.md |  1 +
 docs/2018-05/index.html  |  7 ++++---
 docs/sitemap.xml         | 10 +++++-----
 3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/content/posts/2018-05.md b/content/posts/2018-05.md
index d884686a1..dba84c7d3 100644
--- a/content/posts/2018-05.md
+++ b/content/posts/2018-05.md
@@ -286,3 +286,4 @@ ga('send', 'pageview', {
 - I'm not sure which method is better, perhaps the `solr.ASCIIFoldingFilterFactory` filter because it doesn't require copying the `mapping-FoldToASCII.txt` file
 - And actually I'm not entirely sure about the order of filtering before tokenizing, etc...
 - Ah, I see that `charFilter` must be before the tokenizer because it works on a stream, whereas `filter` operates on tokenized input so it must come after the tokenizer
+- Regarding the use of the `charFilter` vs the `filter` class before and after the tokenizer, respectively, I think it's better to use the `charFilter` to normalize the input stream before tokenizing it as I have no idea what kinda stuff might get removed by the tokenizer
diff --git a/docs/2018-05/index.html b/docs/2018-05/index.html
index 834e1580b..54f530186 100644
--- a/docs/2018-05/index.html
+++ b/docs/2018-05/index.html
@@ -27,7 +27,7 @@ Also, I switched it to use OpenJDK instead of Oracle Java, as well as re-worked
 
 <meta property="article:published_time" content="2018-05-01T16:43:54&#43;03:00"/>
 
-<meta property="article:modified_time" content="2018-05-17T10:51:46&#43;03:00"/>
+<meta property="article:modified_time" content="2018-05-17T12:37:21&#43;03:00"/>
 
 
 
@@ -65,9 +65,9 @@ Also, I switched it to use OpenJDK instead of Oracle Java, as well as re-worked
   "@type": "BlogPosting",
   "headline": "May, 2018",
   "url": "https://alanorth.github.io/cgspace-notes/2018-05/",
-  "wordCount": "2267",
+  "wordCount": "2313",
   "datePublished": "2018-05-01T16:43:54&#43;03:00",
-  "dateModified": "2018-05-17T10:51:46&#43;03:00",
+  "dateModified": "2018-05-17T12:37:21&#43;03:00",
   "author": {
     "@type": "Person",
     "name": "Alan Orth"
@@ -469,6 +469,7 @@ $ ./bin/post -c countries ~/src/git/DSpace/2018-05-10-countries.csv
 <li>I&rsquo;m not sure which method is better, perhaps the <code>solr.ASCIIFoldingFilterFactory</code> filter because it doesn&rsquo;t require copying the <code>mapping-FoldToASCII.txt</code> file</li>
 <li>And actually I&rsquo;m not entirely sure about the order of filtering before tokenizing, etc&hellip;</li>
 <li>Ah, I see that <code>charFilter</code> must be before the tokenizer because it works on a stream, whereas <code>filter</code> operates on tokenized input so it must come after the tokenizer</li>
+<li>Regarding the use of the <code>charFilter</code> vs the <code>filter</code> class before and after the tokenizer, respectively, I think it&rsquo;s better to use the <code>charFilter</code> to normalize the input stream before tokenizing it as I have no idea what kinda stuff might get removed by the tokenizer</li>
 </ul>
 
   
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 5cee460f8..5011ef0cc 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -4,7 +4,7 @@
   
   <url>
     <loc>https://alanorth.github.io/cgspace-notes/2018-05/</loc>
-    <lastmod>2018-05-17T10:51:46+03:00</lastmod>
+    <lastmod>2018-05-17T12:37:21+03:00</lastmod>
   </url>
   
   <url>
@@ -164,7 +164,7 @@
   
   <url>
     <loc>https://alanorth.github.io/cgspace-notes/</loc>
-    <lastmod>2018-05-17T10:51:46+03:00</lastmod>
+    <lastmod>2018-05-17T12:37:21+03:00</lastmod>
     <priority>0</priority>
   </url>
   
@@ -175,7 +175,7 @@
   
   <url>
     <loc>https://alanorth.github.io/cgspace-notes/tags/notes/</loc>
-    <lastmod>2018-05-17T10:51:46+03:00</lastmod>
+    <lastmod>2018-05-17T12:37:21+03:00</lastmod>
     <priority>0</priority>
   </url>
   
@@ -187,13 +187,13 @@
   
   <url>
     <loc>https://alanorth.github.io/cgspace-notes/posts/</loc>
-    <lastmod>2018-05-17T10:51:46+03:00</lastmod>
+    <lastmod>2018-05-17T12:37:21+03:00</lastmod>
     <priority>0</priority>
   </url>
   
   <url>
     <loc>https://alanorth.github.io/cgspace-notes/tags/</loc>
-    <lastmod>2018-05-17T10:51:46+03:00</lastmod>
+    <lastmod>2018-05-17T12:37:21+03:00</lastmod>
     <priority>0</priority>
   </url>