mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2025-01-08 21:24:57 +01:00
Add working tagging of ISO 3166-1 countries
If an item has country metadata (cg.coverage.country) and no alpha codes we check for name matches in ISO 3166 and add alpha_2 codes. The name matching checks for a case-insensitive match on either an ISO 3166-1 name, official name, or common name.
This commit is contained in:
parent
6995d7a864
commit
6477b923b6
@ -19,6 +19,7 @@
|
||||
package org.cgiar.cgspace.ctasks;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import org.dspace.authorize.AuthorizeException;
|
||||
import org.dspace.content.DSpaceObject;
|
||||
import org.dspace.content.Item;
|
||||
import org.dspace.content.Metadatum;
|
||||
@ -30,8 +31,10 @@ import org.dspace.curate.Curator;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
public class CountryCodeTagger extends AbstractCurationTask
|
||||
{
|
||||
@ -46,6 +49,8 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
|
||||
private List<String> results = new ArrayList<String>();
|
||||
|
||||
private static Logger log = Logger.getLogger(CountryCodeTagger.class);
|
||||
|
||||
@Override
|
||||
public int perform(DSpaceObject dso) throws IOException
|
||||
{
|
||||
@ -60,14 +65,12 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
Item item = (Item)dso;
|
||||
String itemHandle = item.getHandle();
|
||||
|
||||
// Always succeed?
|
||||
status = Curator.CURATE_SUCCESS;
|
||||
|
||||
Metadatum[] itemCountries = item.getMetadataByMetadataString(iso3166Field);
|
||||
|
||||
// skip items that don't have country metadata
|
||||
if (itemCountries.length == 0) {
|
||||
result = itemHandle + ": no countries, skipping.";
|
||||
status = Curator.CURATE_SKIP;
|
||||
} else {
|
||||
Gson gson = new Gson();
|
||||
|
||||
@ -79,21 +82,41 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(cgspaceCountriesJsonPath)));
|
||||
CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class);
|
||||
reader.close();
|
||||
|
||||
System.out.println(isocodesCountriesJson.getClass());
|
||||
System.out.println(cgspaceCountriesJson.getClass());
|
||||
|
||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||
System.out.println(country.getName());
|
||||
}
|
||||
|
||||
result = itemHandle + ": " + itemCountries.length + " countries possibly need tagging";
|
||||
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
|
||||
|
||||
// check the item's country codes, if any
|
||||
Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(iso3166Alpha2Field);
|
||||
|
||||
if (itemAlpha2CountryCodes.length == 0) {
|
||||
System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
||||
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
||||
|
||||
Integer addedCodeCount = 0;
|
||||
for (Metadatum itemCountry : itemCountries) {
|
||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.getOfficialName()) || itemCountry.value.equalsIgnoreCase(country.getCommonName())) {
|
||||
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
||||
|
||||
try {
|
||||
// we have the field as a string, so we need to split/tokenize it here actually
|
||||
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());
|
||||
item.update();
|
||||
|
||||
addedCodeCount++;
|
||||
|
||||
result = itemHandle + ": added " + addedCodeCount + " country code(s)";
|
||||
status = Curator.CURATE_SUCCESS;
|
||||
} catch (SQLException | AuthorizeException sqle) {
|
||||
log.debug(sqle.getMessage());
|
||||
result = itemHandle + ": error";
|
||||
status = Curator.CURATE_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result = itemHandle + ": oh snap, we have countries and codes... not sure what to do";
|
||||
status = Curator.CURATE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,4 +126,4 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
|
||||
return status;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user