mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2024-11-23 07:10:19 +01:00
src/main/java: Refactor CountryCodeTagger.java
Now is much more modular and can easily, cleanly be extended to do ISO 3166-1 Alpha3, numeric, etc...
This commit is contained in:
parent
a6d3653c9e
commit
e5d45e62be
@ -37,121 +37,149 @@ import java.util.List;
|
|||||||
|
|
||||||
public class CountryCodeTagger extends AbstractCurationTask
|
public class CountryCodeTagger extends AbstractCurationTask
|
||||||
{
|
{
|
||||||
private int status = Curator.CURATE_UNSET;
|
public class CountryCodeTaggerConfig {
|
||||||
private String result = null;
|
private final String isocodesJsonPath = "/org/cgiar/cgspace/ctasks/iso_3166-1.json";
|
||||||
|
private final String cgspaceCountriesJsonPath = "/org/cgiar/cgspace/ctasks/cgspace-countries.json";
|
||||||
|
private final String iso3166Field = taskProperty("iso3166.field");
|
||||||
|
private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
|
||||||
|
private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
|
||||||
|
|
||||||
private static String isocodesJsonPath;
|
private List<String> results = new ArrayList<String>();
|
||||||
private static String cgspaceCountriesJsonPath;
|
|
||||||
private static String iso3166Field;
|
|
||||||
private static String iso3166Alpha2Field;
|
|
||||||
private static boolean forceupdate;
|
|
||||||
|
|
||||||
private List<String> results = new ArrayList<String>();
|
private Logger log = Logger.getLogger(CountryCodeTagger.class);
|
||||||
|
}
|
||||||
|
|
||||||
private static Logger log = Logger.getLogger(CountryCodeTagger.class);
|
public class CountryCodeTaggerResult {
|
||||||
|
private int status = Curator.CURATE_UNSET;
|
||||||
|
private String result = null;
|
||||||
|
|
||||||
|
public int getStatus() {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStatus(int status) {
|
||||||
|
this.status = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResult() {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResult(String result) {
|
||||||
|
this.result = result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int perform(DSpaceObject dso) throws IOException
|
public int perform(DSpaceObject dso) throws IOException
|
||||||
{
|
{
|
||||||
// Load configuration
|
// gotta define this here so we can access it after the if context...
|
||||||
isocodesJsonPath = "/org/cgiar/cgspace/ctasks/iso_3166-1.json";
|
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
|
||||||
cgspaceCountriesJsonPath = "/org/cgiar/cgspace/ctasks/cgspace-countries.json";
|
|
||||||
iso3166Field = taskProperty("iso3166.field");
|
|
||||||
iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
|
|
||||||
forceupdate = taskBooleanProperty("forceupdate", false);
|
|
||||||
|
|
||||||
if (dso.getType() == Constants.ITEM)
|
if (dso.getType() == Constants.ITEM)
|
||||||
{
|
{
|
||||||
|
// Load configuration
|
||||||
|
CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
|
||||||
|
|
||||||
Item item = (Item)dso;
|
Item item = (Item)dso;
|
||||||
String itemHandle = item.getHandle();
|
|
||||||
|
|
||||||
Metadatum[] itemCountries = item.getMetadataByMetadataString(iso3166Field);
|
alpha2Result = performAlpha2(item, config);
|
||||||
|
|
||||||
// skip items that don't have country metadata
|
setResult(alpha2Result.getResult());
|
||||||
if (itemCountries.length == 0) {
|
report(alpha2Result.getResult());
|
||||||
result = itemHandle + ": no countries, skipping.";
|
}
|
||||||
status = Curator.CURATE_SKIP;
|
|
||||||
} else {
|
|
||||||
Gson gson = new Gson();
|
|
||||||
|
|
||||||
// TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
return alpha2Result.getStatus();
|
||||||
BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(isocodesJsonPath)));
|
}
|
||||||
ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
|
|
||||||
reader.close();
|
|
||||||
|
|
||||||
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath)));
|
public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException
|
||||||
CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
|
{
|
||||||
reader.close();
|
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
|
||||||
|
String itemHandle = item.getHandle();
|
||||||
|
|
||||||
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
|
Metadatum[] itemCountries = item.getMetadataByMetadataString(config.iso3166Field);
|
||||||
|
|
||||||
// split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
|
// skip items that don't have country metadata
|
||||||
String[] iso3166Alpha2FieldParts = iso3166Alpha2Field.split("\\.");
|
if (itemCountries.length == 0) {
|
||||||
|
alpha2Result.setResult(itemHandle + ": no countries, skipping.");
|
||||||
|
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||||
|
} else {
|
||||||
|
Gson gson = new Gson();
|
||||||
|
|
||||||
if (forceupdate) {
|
// TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
||||||
item.clearMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
|
BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.isocodesJsonPath)));
|
||||||
}
|
ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
|
||||||
|
reader.close();
|
||||||
|
|
||||||
// check the item's country codes, if any
|
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.cgspaceCountriesJsonPath)));
|
||||||
Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(iso3166Alpha2Field);
|
CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
|
||||||
|
reader.close();
|
||||||
|
|
||||||
if (itemAlpha2CountryCodes.length == 0) {
|
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
|
||||||
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
|
||||||
|
|
||||||
Integer addedCodeCount = 0;
|
// split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
|
||||||
for (Metadatum itemCountry : itemCountries) {
|
String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
|
||||||
//check ISO 3166-1 countries
|
|
||||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
|
||||||
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
|
|
||||||
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
|
||||||
|
|
||||||
try {
|
if (config.forceupdate) {
|
||||||
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2());
|
item.clearMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
|
||||||
item.update();
|
}
|
||||||
|
|
||||||
addedCodeCount++;
|
// check the item's country codes, if any
|
||||||
|
Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field);
|
||||||
|
|
||||||
result = itemHandle + ": added " + addedCodeCount + " country code(s)";
|
if (itemAlpha2CountryCodes.length == 0) {
|
||||||
status = Curator.CURATE_SUCCESS;
|
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
||||||
} catch (SQLException | AuthorizeException sqle) {
|
|
||||||
log.debug(sqle.getMessage());
|
|
||||||
result = itemHandle + ": error";
|
|
||||||
status = Curator.CURATE_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//check CGSpace countries
|
|
||||||
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
|
|
||||||
if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
|
|
||||||
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
|
||||||
|
|
||||||
try {
|
int addedCodeCount = 0;
|
||||||
// we have the field as a string, so we need to split/tokenize it here actually
|
for (Metadatum itemCountry : itemCountries) {
|
||||||
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2());
|
//check ISO 3166-1 countries
|
||||||
item.update();
|
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||||
|
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
|
||||||
|
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
||||||
|
|
||||||
addedCodeCount++;
|
try {
|
||||||
|
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2());
|
||||||
|
item.update();
|
||||||
|
|
||||||
result = itemHandle + ": added " + addedCodeCount + " country code(s)";
|
addedCodeCount++;
|
||||||
status = Curator.CURATE_SUCCESS;
|
|
||||||
} catch (SQLException | AuthorizeException sqle) {
|
alpha2Result.setResult(itemHandle + ": added " + addedCodeCount + " country code(s)");
|
||||||
log.debug(sqle.getMessage());
|
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||||
result = itemHandle + ": error";
|
} catch (SQLException | AuthorizeException sqle) {
|
||||||
status = Curator.CURATE_ERROR;
|
config.log.debug(sqle.getMessage());
|
||||||
}
|
alpha2Result.setResult(itemHandle + ": error");
|
||||||
|
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//check CGSpace countries
|
||||||
|
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
|
||||||
|
if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
|
||||||
|
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// we have the field as a string, so we need to split/tokenize it here actually
|
||||||
|
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2());
|
||||||
|
item.update();
|
||||||
|
|
||||||
|
addedCodeCount++;
|
||||||
|
|
||||||
|
alpha2Result.setResult(itemHandle + ": added " + addedCodeCount + " country code(s)");
|
||||||
|
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||||
|
} catch (SQLException | AuthorizeException sqle) {
|
||||||
|
config.log.debug(sqle.getMessage());
|
||||||
|
alpha2Result.setResult(itemHandle + ": error");
|
||||||
|
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
result = itemHandle + ": item has country codes, skipping";
|
|
||||||
status = Curator.CURATE_SKIP;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
alpha2Result.setResult(itemHandle + ": item has country codes, skipping");
|
||||||
|
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
setResult(result);
|
return alpha2Result;
|
||||||
report(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user