diff --git a/content/posts/2019-02.md b/content/posts/2019-02.md index 485057536..720690c42 100644 --- a/content/posts/2019-02.md +++ b/content/posts/2019-02.md @@ -1020,11 +1020,12 @@ $ ./agrovoc-lookup.py -l fr -i /tmp/top-1500-subjects.txt -om /tmp/matched-subje $ cat /tmp/matched-subjects-* | sort | uniq > /tmp/2019-02-21-matched-subjects.txt ``` -- And then a list of all the unique *unmatched* terms using some utility I've never heard of before called `comm`: +- And then a list of all the unique *unmatched* terms using some utility I've never heard of before called `comm` or with `diff`: ``` $ sort /tmp/top-1500-subjects.txt > /tmp/subjects-sorted.txt $ comm -13 /tmp/2019-02-21-matched-subjects.txt /tmp/subjects-sorted.txt > /tmp/2019-02-21-unmatched-subjects.txt +$ diff --new-line-format="" --unchanged-line-format="" /tmp/subjects-sorted.txt /tmp/2019-02-21-matched-subjects.txt > /tmp/2019-02-21-unmatched-subjects.txt ``` - Generate a list of countries and regions from CGSpace for Sisay to look through: diff --git a/docs/2019-02/index.html b/docs/2019-02/index.html index fa7b56b33..e6155a19d 100644 --- a/docs/2019-02/index.html +++ b/docs/2019-02/index.html @@ -42,7 +42,7 @@ sys 0m1.979s - + @@ -89,9 +89,9 @@ sys 0m1.979s "@type": "BlogPosting", "headline": "February, 2019", "url": "https://alanorth.github.io/cgspace-notes/2019-02/", - "wordCount": "5947", + "wordCount": "5958", "datePublished": "2019-02-01T21:37:30+02:00", - "dateModified": "2019-02-20T18:20:09-08:00", + "dateModified": "2019-02-21T10:08:18-08:00", "author": { "@type": "Person", "name": "Alan Orth" @@ -1315,11 +1315,12 @@ $ ./agrovoc-lookup.py -l fr -i /tmp/top-1500-subjects.txt -om /tmp/matched-subje
$ sort /tmp/top-1500-subjects.txt > /tmp/subjects-sorted.txt
 $ comm -13 /tmp/2019-02-21-matched-subjects.txt /tmp/subjects-sorted.txt > /tmp/2019-02-21-unmatched-subjects.txt
+$ diff --new-line-format="" --unchanged-line-format="" /tmp/subjects-sorted.txt /tmp/2019-02-21-matched-subjects.txt > /tmp/2019-02-21-unmatched-subjects.txt