diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java b/src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java new file mode 100644 index 0000000..f7b8d77 --- /dev/null +++ b/src/main/java/org/cgiar/cgspace/ctasks/CGSpaceCountriesVocabulary.java @@ -0,0 +1,25 @@ +/* +DSpace Curation Tasks +Copyright (C) 2020 Alan Orth + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package org.cgiar.cgspace.ctasks; + +import java.util.List; + +public class CGSpaceCountriesVocabulary extends CountriesVocabulary { + List countries; +} \ No newline at end of file diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java b/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java index bb4d72d..cb1afdf 100644 --- a/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java +++ b/src/main/java/org/cgiar/cgspace/ctasks/CountriesVocabulary.java @@ -19,12 +19,8 @@ package org.cgiar.cgspace.ctasks; import javax.annotation.Nullable; -import java.util.List; -import com.google.gson.annotations.SerializedName; public class CountriesVocabulary { - // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly - @SerializedName("3166-1") List countries; class Country { private String name; //required diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java b/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java index a9880ae..6787c73 100644 --- a/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java +++ b/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java @@ -76,11 +76,11 @@ public class CountryCodeTagger extends AbstractCurationTask // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath))); - CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, CountriesVocabulary.class); + ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class); reader.close(); reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath))); - CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class); + CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class); reader.close(); //System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging"); @@ -91,12 +91,40 @@ public class CountryCodeTagger extends AbstractCurationTask if (itemAlpha2CountryCodes.length == 0) { //System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries."); + // split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata() + String[] iso3166Alpha2FieldParts = iso3166Alpha2Field.split("\\."); + System.out.println("schema:" + iso3166Alpha2FieldParts[0]); + System.out.println("element:" + iso3166Alpha2FieldParts[1]); + System.out.println("qualifier:" + iso3166Alpha2FieldParts[2]); + Integer addedCodeCount = 0; for (Metadatum itemCountry : itemCountries) { + //check ISO 3166-1 countries for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) { if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) { System.out.println(itemHandle + ": adding country code " + country.getAlpha_2()); + try { + // we have the field as a string, so we need to split/tokenize it here actually + item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2()); + item.update(); + + addedCodeCount++; + + result = itemHandle + ": added " + addedCodeCount + " country code(s)"; + status = Curator.CURATE_SUCCESS; + } catch (SQLException | AuthorizeException sqle) { + log.debug(sqle.getMessage()); + result = itemHandle + ": error"; + status = Curator.CURATE_ERROR; + } + } + } + //check CGSpace countries + for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) { + if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) { + System.out.println(itemHandle + ": adding country code " + country.getAlpha_2()); + try { // we have the field as a string, so we need to split/tokenize it here actually item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2()); diff --git a/src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java b/src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java new file mode 100644 index 0000000..02508ad --- /dev/null +++ b/src/main/java/org/cgiar/cgspace/ctasks/ISO3166CountriesVocabulary.java @@ -0,0 +1,27 @@ +/* +DSpace Curation Tasks +Copyright (C) 2020 Alan Orth + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package org.cgiar.cgspace.ctasks; + +import com.google.gson.annotations.SerializedName; +import java.util.List; + +public class ISO3166CountriesVocabulary extends CountriesVocabulary { + // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly + @SerializedName("3166-1") List countries; +} \ No newline at end of file