From 409eb3bd02b2c5776d49eab54d268867bb38b9f4 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sat, 1 Aug 2020 20:53:59 +0300 Subject: [PATCH] src/main/java: Refactor vocabularies classes We can't use the same class to map ISO 3166-1 and CGSpace country vocabularies because our Gson is old and lacks the support for the "alternate" value in its annotations (added in Gson 2.5). So it's better to create multiple classes that extend the base one instead of creating a custom deserializer. Each extended class then uses its own Serializedname. --- .../ctasks/CGSpaceCountriesVocabulary.java | 25 +++++++++++++++ .../cgspace/ctasks/CountriesVocabulary.java | 4 --- .../cgspace/ctasks/CountryCodeTagger.java | 32 +++++++++++++++++-- .../ctasks/ISO3166CountriesVocabulary.java | 27 ++++++++++++++++ 4 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java create mode 100644 src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java b/src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java new file mode 100644 index 0000000..f7b8d77 --- /dev/null +++ b/src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java @@ -0,0 +1,25 @@ +/* +DSpace Curation Tasks +Copyright (C) 2020 Alan Orth + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package org.cgiar.cgspace.ctasks; + +import java.util.List; + +public class CGSpaceCountriesVocabulary extends CountriesVocabulary { + List countries; +} \ No newline at end of file diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java b/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java index bb4d72d..cb1afdf 100644 --- a/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java +++ b/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java @@ -19,12 +19,8 @@ package org.cgiar.cgspace.ctasks; import javax.annotation.Nullable; -import java.util.List; -import com.google.gson.annotations.SerializedName; public class CountriesVocabulary { - // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly - @SerializedName("3166-1") List countries; class Country { private String name; //required diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java b/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java index a9880ae..6787c73 100644 --- a/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java +++ b/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java @@ -76,11 +76,11 @@ public class CountryCodeTagger extends AbstractCurationTask // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath))); - CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, CountriesVocabulary.class); + ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class); reader.close(); reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath))); - CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class); + CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class); reader.close(); //System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging"); @@ -91,12 +91,40 @@ public class CountryCodeTagger extends AbstractCurationTask if (itemAlpha2CountryCodes.length == 0) { //System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries."); + // split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata() + String[] iso3166Alpha2FieldParts = iso3166Alpha2Field.split("\\."); + System.out.println("schema:" + iso3166Alpha2FieldParts[0]); + System.out.println("element:" + iso3166Alpha2FieldParts[1]); + System.out.println("qualifier:" + iso3166Alpha2FieldParts[2]); + Integer addedCodeCount = 0; for (Metadatum itemCountry : itemCountries) { + //check ISO 3166-1 countries for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) { if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) { System.out.println(itemHandle + ": adding country code " + country.getAlpha_2()); + try { + // we have the field as a string, so we need to split/tokenize it here actually + item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2()); + item.update(); + + addedCodeCount++; + + result = itemHandle + ": added " + addedCodeCount + " country code(s)"; + status = Curator.CURATE_SUCCESS; + } catch (SQLException | AuthorizeException sqle) { + log.debug(sqle.getMessage()); + result = itemHandle + ": error"; + status = Curator.CURATE_ERROR; + } + } + } + //check CGSpace countries + for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) { + if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) { + System.out.println(itemHandle + ": adding country code " + country.getAlpha_2()); + try { // we have the field as a string, so we need to split/tokenize it here actually item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2()); diff --git a/src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java b/src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java new file mode 100644 index 0000000..02508ad --- /dev/null +++ b/src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java @@ -0,0 +1,27 @@ +/* +DSpace Curation Tasks +Copyright (C) 2020 Alan Orth + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package org.cgiar.cgspace.ctasks; + +import com.google.gson.annotations.SerializedName; +import java.util.List; + +public class ISO3166CountriesVocabulary extends CountriesVocabulary { + // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly + @SerializedName("3166-1") List countries; +} \ No newline at end of file