mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2024-11-22 23:05:12 +01:00
src/main/java: Refactor vocabularies classes
We can't use the same class to map ISO 3166-1 and CGSpace country vocabularies because our Gson is old and lacks the support for the "alternate" value in its annotations (added in Gson 2.5). So it's better to create multiple classes that extend the base one instead of creating a custom deserializer. Each extended class then uses its own Serializedname.
This commit is contained in:
parent
6891c93eeb
commit
409eb3bd02
@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
DSpace Curation Tasks
|
||||||
|
Copyright (C) 2020 Alan Orth
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.cgiar.cgspace.ctasks;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class CGSpaceCountriesVocabulary extends CountriesVocabulary {
|
||||||
|
List<Country> countries;
|
||||||
|
}
|
@ -19,12 +19,8 @@
|
|||||||
package org.cgiar.cgspace.ctasks;
|
package org.cgiar.cgspace.ctasks;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.util.List;
|
|
||||||
import com.google.gson.annotations.SerializedName;
|
|
||||||
|
|
||||||
public class CountriesVocabulary {
|
public class CountriesVocabulary {
|
||||||
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
|
|
||||||
@SerializedName("3166-1") List<Country> countries;
|
|
||||||
|
|
||||||
class Country {
|
class Country {
|
||||||
private String name; //required
|
private String name; //required
|
||||||
|
@ -76,11 +76,11 @@ public class CountryCodeTagger extends AbstractCurationTask
|
|||||||
|
|
||||||
// TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
// TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
||||||
BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath)));
|
BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath)));
|
||||||
CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, CountriesVocabulary.class);
|
ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath)));
|
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath)));
|
||||||
CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class);
|
CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
|
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
|
||||||
@ -91,12 +91,40 @@ public class CountryCodeTagger extends AbstractCurationTask
|
|||||||
if (itemAlpha2CountryCodes.length == 0) {
|
if (itemAlpha2CountryCodes.length == 0) {
|
||||||
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
||||||
|
|
||||||
|
// split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
|
||||||
|
String[] iso3166Alpha2FieldParts = iso3166Alpha2Field.split("\\.");
|
||||||
|
System.out.println("schema:" + iso3166Alpha2FieldParts[0]);
|
||||||
|
System.out.println("element:" + iso3166Alpha2FieldParts[1]);
|
||||||
|
System.out.println("qualifier:" + iso3166Alpha2FieldParts[2]);
|
||||||
|
|
||||||
Integer addedCodeCount = 0;
|
Integer addedCodeCount = 0;
|
||||||
for (Metadatum itemCountry : itemCountries) {
|
for (Metadatum itemCountry : itemCountries) {
|
||||||
|
//check ISO 3166-1 countries
|
||||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||||
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
|
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
|
||||||
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// we have the field as a string, so we need to split/tokenize it here actually
|
||||||
|
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());
|
||||||
|
item.update();
|
||||||
|
|
||||||
|
addedCodeCount++;
|
||||||
|
|
||||||
|
result = itemHandle + ": added " + addedCodeCount + " country code(s)";
|
||||||
|
status = Curator.CURATE_SUCCESS;
|
||||||
|
} catch (SQLException | AuthorizeException sqle) {
|
||||||
|
log.debug(sqle.getMessage());
|
||||||
|
result = itemHandle + ": error";
|
||||||
|
status = Curator.CURATE_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//check CGSpace countries
|
||||||
|
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
|
||||||
|
if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
|
||||||
|
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// we have the field as a string, so we need to split/tokenize it here actually
|
// we have the field as a string, so we need to split/tokenize it here actually
|
||||||
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());
|
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());
|
||||||
|
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
DSpace Curation Tasks
|
||||||
|
Copyright (C) 2020 Alan Orth
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.cgiar.cgspace.ctasks;
|
||||||
|
|
||||||
|
import com.google.gson.annotations.SerializedName;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ISO3166CountriesVocabulary extends CountriesVocabulary {
|
||||||
|
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
|
||||||
|
@SerializedName("3166-1") List<Country> countries;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user