mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2024-11-26 16:48:22 +01:00
Add working tagging of ISO 3166-1 countries
If an item has country metadata (cg.coverage.country) and no alpha codes we check for name matches in ISO 3166 and add alpha_2 codes. The name matching checks for a case-insensitive match on either an ISO 3166-1 name, official name, or common name.
This commit is contained in:
parent
6995d7a864
commit
6477b923b6
@ -19,6 +19,7 @@
|
|||||||
package org.cgiar.cgspace.ctasks;
|
package org.cgiar.cgspace.ctasks;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
import org.dspace.authorize.AuthorizeException;
|
||||||
import org.dspace.content.DSpaceObject;
|
import org.dspace.content.DSpaceObject;
|
||||||
import org.dspace.content.Item;
|
import org.dspace.content.Item;
|
||||||
import org.dspace.content.Metadatum;
|
import org.dspace.content.Metadatum;
|
||||||
@ -30,8 +31,10 @@ import org.dspace.curate.Curator;
|
|||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
public class CountryCodeTagger extends AbstractCurationTask
|
public class CountryCodeTagger extends AbstractCurationTask
|
||||||
{
|
{
|
||||||
@ -46,6 +49,8 @@ public class CountryCodeTagger extends AbstractCurationTask
|
|||||||
|
|
||||||
private List<String> results = new ArrayList<String>();
|
private List<String> results = new ArrayList<String>();
|
||||||
|
|
||||||
|
private static Logger log = Logger.getLogger(CountryCodeTagger.class);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int perform(DSpaceObject dso) throws IOException
|
public int perform(DSpaceObject dso) throws IOException
|
||||||
{
|
{
|
||||||
@ -60,14 +65,12 @@ public class CountryCodeTagger extends AbstractCurationTask
|
|||||||
Item item = (Item)dso;
|
Item item = (Item)dso;
|
||||||
String itemHandle = item.getHandle();
|
String itemHandle = item.getHandle();
|
||||||
|
|
||||||
// Always succeed?
|
|
||||||
status = Curator.CURATE_SUCCESS;
|
|
||||||
|
|
||||||
Metadatum[] itemCountries = item.getMetadataByMetadataString(iso3166Field);
|
Metadatum[] itemCountries = item.getMetadataByMetadataString(iso3166Field);
|
||||||
|
|
||||||
// skip items that don't have country metadata
|
// skip items that don't have country metadata
|
||||||
if (itemCountries.length == 0) {
|
if (itemCountries.length == 0) {
|
||||||
result = itemHandle + ": no countries, skipping.";
|
result = itemHandle + ": no countries, skipping.";
|
||||||
|
status = Curator.CURATE_SKIP;
|
||||||
} else {
|
} else {
|
||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
|
|
||||||
@ -80,20 +83,40 @@ public class CountryCodeTagger extends AbstractCurationTask
|
|||||||
CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class);
|
CountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CountriesVocabulary.class);
|
||||||
reader.close();
|
reader.close();
|
||||||
|
|
||||||
System.out.println(isocodesCountriesJson.getClass());
|
//System.out.println(itemHandle + ": " + itemCountries.length + " countries possibly need tagging");
|
||||||
System.out.println(cgspaceCountriesJson.getClass());
|
|
||||||
|
|
||||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
|
||||||
System.out.println(country.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
result = itemHandle + ": " + itemCountries.length + " countries possibly need tagging";
|
|
||||||
|
|
||||||
// check the item's country codes, if any
|
// check the item's country codes, if any
|
||||||
Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(iso3166Alpha2Field);
|
Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(iso3166Alpha2Field);
|
||||||
|
|
||||||
if (itemAlpha2CountryCodes.length == 0) {
|
if (itemAlpha2CountryCodes.length == 0) {
|
||||||
System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
//System.out.println(itemHandle + ": Should add codes for " + itemCountries.length + " countries.");
|
||||||
|
|
||||||
|
Integer addedCodeCount = 0;
|
||||||
|
for (Metadatum itemCountry : itemCountries) {
|
||||||
|
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||||
|
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.getOfficialName()) || itemCountry.value.equalsIgnoreCase(country.getCommonName())) {
|
||||||
|
System.out.println(itemHandle + ": adding country code " + country.getAlpha_2());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// we have the field as a string, so we need to split/tokenize it here actually
|
||||||
|
item.addMetadata("cg", "coverage", "iso3166-alpha2", "en_US", country.getAlpha_2());
|
||||||
|
item.update();
|
||||||
|
|
||||||
|
addedCodeCount++;
|
||||||
|
|
||||||
|
result = itemHandle + ": added " + addedCodeCount + " country code(s)";
|
||||||
|
status = Curator.CURATE_SUCCESS;
|
||||||
|
} catch (SQLException | AuthorizeException sqle) {
|
||||||
|
log.debug(sqle.getMessage());
|
||||||
|
result = itemHandle + ": error";
|
||||||
|
status = Curator.CURATE_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result = itemHandle + ": oh snap, we have countries and codes... not sure what to do";
|
||||||
|
status = Curator.CURATE_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user