From e158e4bc987ca2c6e78054550822402423054302 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Sun, 2 Aug 2020 18:33:32 +0300 Subject: [PATCH] CountryCodeTagger.java: Refactor adding of alpha2 codes We can append the codes we will add to a List of Strings and then actually apply them later in one addMetadata call, and update the item with one item.update() call. This reduces identical code and is more efficient. Note that when testing this on a collection with thousands of items I realized that it is really important to limit both the cache size as well as set the database transaction model to be per object/item or else you will crash due to Java heap issues. For example: $ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -l 500 -s object See: https://wiki.lyrasis.org/display/DSPACE/Curation+Task+Cookbook --- .../cgspace/ctasks/CountryCodeTagger.java | 49 ++++++++----------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java b/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java index 5639d09..c4ca5c6 100644 --- a/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java +++ b/src/main/java/org/cgiar/cgspace/ctasks/CountryCodeTagger.java @@ -126,46 +126,39 @@ public class CountryCodeTagger extends AbstractCurationTask Metadatum[] itemAlpha2CountryCodes = item.getMetadataByMetadataString(config.iso3166Alpha2Field); if (itemAlpha2CountryCodes.length == 0) { - int addedCodeCount = 0; + List newAlpha2Codes = new ArrayList(); for (Metadatum itemCountry : itemCountries) { //check ISO 3166-1 countries for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) { if (itemCountry.value.equalsIgnoreCase(country.getName()) || itemCountry.value.equalsIgnoreCase(country.get_official_name()) || itemCountry.value.equalsIgnoreCase(country.get_common_name())) { - try { - item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2()); - item.update(); - - addedCodeCount++; - - alpha2Result.setResult(itemHandle + ": added " + addedCodeCount + " country code(s)"); - alpha2Result.setStatus(Curator.CURATE_SUCCESS); - } catch (SQLException | AuthorizeException sqle) { - config.log.debug(sqle.getMessage()); - alpha2Result.setResult(itemHandle + ": error"); - alpha2Result.setStatus(Curator.CURATE_ERROR); - } + newAlpha2Codes.add(country.getAlpha_2()); } } + //check CGSpace countries for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) { if (itemCountry.value.equalsIgnoreCase(country.getCgspace_name())) { - try { - // we have the field as a string, so we need to split/tokenize it here actually - item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", country.getAlpha_2()); - item.update(); - - addedCodeCount++; - - alpha2Result.setResult(itemHandle + ": added " + addedCodeCount + " country code(s)"); - alpha2Result.setStatus(Curator.CURATE_SUCCESS); - } catch (SQLException | AuthorizeException sqle) { - config.log.debug(sqle.getMessage()); - alpha2Result.setResult(itemHandle + ": error"); - alpha2Result.setStatus(Curator.CURATE_ERROR); - } + newAlpha2Codes.add(country.getAlpha_2()); } } } + + if (newAlpha2Codes.size() > 0) { + try { + // add metadata values (casting the List to an array) + item.addMetadata(iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes.toArray(new String[0])); + item.update(); + } catch (SQLException | AuthorizeException sqle) { + config.log.debug(sqle.getMessage()); + alpha2Result.setResult(itemHandle + ": error"); + alpha2Result.setStatus(Curator.CURATE_ERROR); + } + + alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)"); + } else { + alpha2Result.setResult(itemHandle + ": no matching countries found"); + } + alpha2Result.setStatus(Curator.CURATE_SUCCESS); } else { alpha2Result.setResult(itemHandle + ": item has country codes, skipping"); alpha2Result.setStatus(Curator.CURATE_SKIP);