mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2024-11-26 16:48:22 +01:00
CountryCodeTagger.java: Refactor adding of alpha2 codes
We can append the codes we will add to a List of Strings and then actually apply them later in one addMetadata call, and update the item with one item.update() call. This reduces identical code and is more efficient. Note that when testing this on a collection with thousands of items I realized that it is really important to limit both the cache size as well as set the database transaction model to be per object/item or else you will crash due to Java heap issues. For example: $ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -l 500 -s object See: https://wiki.lyrasis.org/display/DSPACE/Curation+Task+Cookbook
This commit is contained in:
parent
1c866bdf64
commit
e158e4bc98
@ -126,46 +126,39 @@ public class CountryCodeTagger extends AbstractCurationTask
|
||||
Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field);
|
||||
|
||||
if (itemAlpha2CountryCodes.length == 0) {
|
||||
int addedCodeCount = 0;
|
||||
List<String> newAlpha2Codes = new ArrayList<String>();
|
||||
for (Metadatum itemCountry : itemCountries) {
|
||||
//check ISO 3166-1 countries
|
||||
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
|
||||
if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) {
|
||||
try {
|
||||
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2());
|
||||
item.update();
|
||||
newAlpha2Codes.add(country.getAlpha_2());
|
||||
}
|
||||
}
|
||||
|
||||
addedCodeCount++;
|
||||
|
||||
alpha2Result.setResult(itemHandle + ": added " + addedCodeCount + " country code(s)");
|
||||
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||
} catch (SQLException | AuthorizeException sqle) {
|
||||
config.log.debug(sqle.getMessage());
|
||||
alpha2Result.setResult(itemHandle + ": error");
|
||||
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
//check CGSpace countries
|
||||
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
|
||||
if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) {
|
||||
newAlpha2Codes.add(country.getAlpha_2());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (newAlpha2Codes.size() > 0) {
|
||||
try {
|
||||
// we have the field as a string, so we need to split/tokenize it here actually
|
||||
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2());
|
||||
// add metadata values (casting the List<String> to an array)
|
||||
item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes.toArray(new String[0]));
|
||||
item.update();
|
||||
|
||||
addedCodeCount++;
|
||||
|
||||
alpha2Result.setResult(itemHandle + ": added " + addedCodeCount + " country code(s)");
|
||||
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||
} catch (SQLException | AuthorizeException sqle) {
|
||||
config.log.debug(sqle.getMessage());
|
||||
alpha2Result.setResult(itemHandle + ": error");
|
||||
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||
}
|
||||
|
||||
alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)");
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": no matching countries found");
|
||||
}
|
||||
}
|
||||
}
|
||||
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": item has country codes, skipping");
|
||||
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||
|
Loading…
Reference in New Issue
Block a user