src/main/java: Refactor vocabularies classes

We can't use the same class to map ISO 3166-1 and CGSpace country
vocabularies because our Gson is old and lacks the support for the
"alternate" value in its annotations (added in Gson 2.5). So it's
better to create multiple classes that extend the base one instead
of creating a custom deserializer. Each extended class then uses
its own Serializedname.
This commit is contained in:
Alan Orth 2020-08-01 20:53:59 +03:00
parent 6891c93eeb
commit 409eb3bd02
Signed by: alanorth
GPG Key ID: 0FB860CC9C45B1B9
4 changed files with 82 additions and 6 deletions

View File

@ -0,0 +1,25 @@
/*
DSpace Curation Tasks
Copyright (C) 2020 Alan Orth
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.cgiar.cgspace.ctasks;
import java.util.List;
public class CGSpaceCountriesVocabulary extends CountriesVocabulary {
List<Country> countries;
}

View File

@ -19,12 +19,8 @@
package org.cgiar.cgspace.ctasks; package org.cgiar.cgspace.ctasks;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.List;
import com.google.gson.annotations.SerializedName;
public class CountriesVocabulary { public class CountriesVocabulary {
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
@SerializedName("3166-1") List<Country> countries;
class Country { class Country {
private String name; //required private String name; //required

View File

@ -76,11 +76,11 @@ public class CountryCodeTagger extends AbstractCurationTask
// TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath))); BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath)));
CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, CountriesVocabulary.class); ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
reader.close(); reader.close();
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath))); reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath)));
CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class); CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
reader.close(); reader.close();
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging"); //System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
@ -91,12 +91,40 @@ public class CountryCodeTagger extends AbstractCurationTask
if (itemAlpha2CountryCodes.length == 0) { if (itemAlpha2CountryCodes.length == 0) {
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries."); //System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
// split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
String[] iso3166Alpha2FieldParts = iso3166Alpha2Field.split("\\.");
System.out.println("schema:" + iso3166Alpha2FieldParts[0]);
System.out.println("element:" + iso3166Alpha2FieldParts[1]);
System.out.println("qualifier:" + iso3166Alpha2FieldParts[2]);
Integer addedCodeCount = 0; Integer addedCodeCount = 0;
for (Metadatum itemCountry : itemCountries) { for (Metadatum itemCountry : itemCountries) {
//check ISO 3166-1 countries
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) { for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) { if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2()); System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
try {
// we have the field as a string, so we need to split/tokenize it here actually
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());
item.update();
addedCodeCount++;
result = itemHandle + ": added " + addedCodeCount + " country code(s)";
status = Curator.CURATE_SUCCESS;
} catch (SQLException | AuthorizeException sqle) {
log.debug(sqle.getMessage());
result = itemHandle + ": error";
status = Curator.CURATE_ERROR;
}
}
}
//check CGSpace countries
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
try { try {
// we have the field as a string, so we need to split/tokenize it here actually // we have the field as a string, so we need to split/tokenize it here actually
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2()); item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());

View File

@ -0,0 +1,27 @@
/*
DSpace Curation Tasks
Copyright (C) 2020 Alan Orth
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.cgiar.cgspace.ctasks;
import com.google.gson.annotations.SerializedName;
import java.util.List;
public class ISO3166CountriesVocabulary extends CountriesVocabulary {
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
@SerializedName("3166-1") List<Country> countries;
}