Compare commits

..

No commits in common. "b396fba04342d5cb416e412692f2d5ca9358edc8" and "f0754ab4193dc3809bb0210de2c38188814b563a" have entirely different histories.

6 changed files with 178 additions and 252 deletions

View File

@ -1,8 +1,8 @@
/* /*
* Copyright (C) 2020 Alan Orth * Copyright (C) 2020 Alan Orth
* *
* SPDX-License-Identifier: GPL-3.0-or-later * SPDX-License-Identifier: GPL-3.0-or-later
*/ */
package io.github.ilri.cgspace.ctasks; package io.github.ilri.cgspace.ctasks;

View File

@ -1,8 +1,8 @@
/* /*
* Copyright (C) 2020 Alan Orth * Copyright (C) 2020 Alan Orth
* *
* SPDX-License-Identifier: GPL-3.0-or-later * SPDX-License-Identifier: GPL-3.0-or-later
*/ */
package io.github.ilri.cgspace.ctasks; package io.github.ilri.cgspace.ctasks;
@ -11,29 +11,26 @@ import javax.annotation.Nullable;
public class CountriesVocabulary { public class CountriesVocabulary {
class Country { class Country {
private String name; // required private String name; //required
private String common_name; // optional private String common_name; //optional
private String official_name; // optional private String official_name; //optional
private String cgspace_name; // optional private String cgspace_name; //optional
private String numeric; // required Hmmmm need to cast this... private String numeric; //required Hmmmm need to cast this...
private String alpha_2; // required private String alpha_2; //required
private String alpha_3; // required private String alpha_3; //required
public Country( public Country(String name,
String name, @Nullable String common_name,
@Nullable String common_name, @Nullable String official_name,
@Nullable String official_name, @Nullable String cgspace_name,
@Nullable String cgspace_name, String numeric,
String numeric, String alpha_2,
String alpha_2, String alpha_3) {
String alpha_3) {
this.name = name; this.name = name;
this.common_name = common_name; this.common_name = common_name;
this.official_name = official_name; this.official_name = official_name;
this.cgspace_name = cgspace_name; this.cgspace_name = cgspace_name;
this.numeric = this.numeric = numeric; // fuuuuu this is a string and we can't cast to Integer because some values are zeropadded like "004"
numeric; // fuuuuu this is a string and we can't cast to Integer because some
// values are zeropadded like "004"
this.alpha_2 = alpha_2; this.alpha_2 = alpha_2;
this.alpha_3 = alpha_3; this.alpha_3 = alpha_3;
} }

View File

@ -1,13 +1,12 @@
/* /*
* Copyright (C) 2020 Alan Orth * Copyright (C) 2020 Alan Orth
* *
* SPDX-License-Identifier: GPL-3.0-or-later * SPDX-License-Identifier: GPL-3.0-or-later
*/ */
package io.github.ilri.cgspace.ctasks; package io.github.ilri.cgspace.ctasks;
import com.google.gson.Gson; import com.google.gson.Gson;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject; import org.dspace.content.DSpaceObject;
@ -24,11 +23,11 @@ import java.sql.SQLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
public class CountryCodeTagger extends AbstractCurationTask { public class CountryCodeTagger extends AbstractCurationTask
{
public class CountryCodeTaggerConfig { public class CountryCodeTaggerConfig {
private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json"; private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
private final String cgspaceCountriesJsonPath = private final String cgspaceCountriesJsonPath = "/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
"/io/github/ilri/cgspace/ctasks/cgspace-countries.json";
private final String iso3166Field = taskProperty("iso3166.field"); private final String iso3166Field = taskProperty("iso3166.field");
private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field"); private final String iso3166Alpha2Field = taskProperty("iso3166-alpha2.field");
private final boolean forceupdate = taskBooleanProperty("forceupdate", false); private final boolean forceupdate = taskBooleanProperty("forceupdate", false);
@ -58,15 +57,17 @@ public class CountryCodeTagger extends AbstractCurationTask {
} }
@Override @Override
public int perform(DSpaceObject dso) throws IOException { public int perform(DSpaceObject dso) throws IOException
{
// gotta define this here so we can access it after the if context... // gotta define this here so we can access it after the if context...
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult(); CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
if (dso.getType() == Constants.ITEM) { if (dso.getType() == Constants.ITEM)
{
// Load configuration // Load configuration
CountryCodeTaggerConfig config = new CountryCodeTaggerConfig(); CountryCodeTaggerConfig config = new CountryCodeTaggerConfig();
Item item = (Item) dso; Item item = (Item)dso;
try { try {
alpha2Result = performAlpha2(item, config); alpha2Result = performAlpha2(item, config);
@ -76,18 +77,16 @@ public class CountryCodeTagger extends AbstractCurationTask {
setResult(alpha2Result.getResult()); setResult(alpha2Result.getResult());
report(alpha2Result.getResult()); report(alpha2Result.getResult());
} }
return alpha2Result.getStatus(); return alpha2Result.getStatus();
} }
public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config) throws IOException, SQLException {
throws IOException, SQLException {
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult(); CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
String itemHandle = item.getHandle(); String itemHandle = item.getHandle();
List<MetadataValue> itemCountries = List<MetadataValue> itemCountries = itemService.getMetadataByMetadataString(item, config.iso3166Field);
itemService.getMetadataByMetadataString(item, config.iso3166Field);
// skip items that don't have country metadata // skip items that don't have country metadata
if (itemCountries.size() == 0) { if (itemCountries.size() == 0) {
@ -96,60 +95,36 @@ public class CountryCodeTagger extends AbstractCurationTask {
} else { } else {
Gson gson = new Gson(); Gson gson = new Gson();
// TODO: convert to try: // TODO: convert to try: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
// https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html BufferedReader reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.isocodesJsonPath)));
BufferedReader reader = ISO3166CountriesVocabulary isocodesCountriesJson = gson.fromJson(reader, ISO3166CountriesVocabulary.class);
new BufferedReader(
new InputStreamReader(
this.getClass().getResourceAsStream(config.isocodesJsonPath)));
ISO3166CountriesVocabulary isocodesCountriesJson =
gson.fromJson(reader, ISO3166CountriesVocabulary.class);
reader.close(); reader.close();
reader = reader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(config.cgspaceCountriesJsonPath)));
new BufferedReader( CGSpaceCountriesVocabulary cgspaceCountriesJson = gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
new InputStreamReader(
this.getClass()
.getResourceAsStream(config.cgspaceCountriesJsonPath)));
CGSpaceCountriesVocabulary cgspaceCountriesJson =
gson.fromJson(reader, CGSpaceCountriesVocabulary.class);
reader.close(); reader.close();
// split the alpha2 country code field into schema, element, and qualifier so we can use // split the alpha2 country code field into schema, element, and qualifier so we can use it with item.addMetadata()
// it with item.addMetadata()
String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\."); String[] iso3166Alpha2FieldParts = config.iso3166Alpha2Field.split("\\.");
if (config.forceupdate) { if (config.forceupdate) {
itemService.clearMetadata( itemService.clearMetadata(Curator.curationContext(), item, iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], Item.ANY);
Curator.curationContext(),
item,
iso3166Alpha2FieldParts[0],
iso3166Alpha2FieldParts[1],
iso3166Alpha2FieldParts[2],
Item.ANY);
} }
// check the item's country codes, if any // check the item's country codes, if any
List<MetadataValue> itemAlpha2CountryCodes = List<MetadataValue> itemAlpha2CountryCodes = itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
itemService.getMetadataByMetadataString(item, config.iso3166Alpha2Field);
if (itemAlpha2CountryCodes.size() == 0) { if (itemAlpha2CountryCodes.size() == 0) {
List<String> newAlpha2Codes = new ArrayList<String>(); List<String> newAlpha2Codes = new ArrayList<String>();
for (MetadataValue itemCountry : itemCountries) { for (MetadataValue itemCountry : itemCountries) {
// check ISO 3166-1 countries //check ISO 3166-1 countries
for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) { for (CountriesVocabulary.Country country : isocodesCountriesJson.countries) {
if (itemCountry.getValue().equalsIgnoreCase(country.getName()) if (itemCountry.getValue().equalsIgnoreCase(country.getName()) || itemCountry.getValue().equalsIgnoreCase(country.get_official_name()) || itemCountry.getValue().equalsIgnoreCase(country.get_common_name())) {
|| itemCountry
.getValue()
.equalsIgnoreCase(country.get_official_name())
|| itemCountry
.getValue()
.equalsIgnoreCase(country.get_common_name())) {
newAlpha2Codes.add(country.getAlpha_2()); newAlpha2Codes.add(country.getAlpha_2());
} }
} }
// check CGSpace countries //check CGSpace countries
for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) { for (CountriesVocabulary.Country country : cgspaceCountriesJson.countries) {
if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) { if (itemCountry.getValue().equalsIgnoreCase(country.getCgspace_name())) {
newAlpha2Codes.add(country.getAlpha_2()); newAlpha2Codes.add(country.getAlpha_2());
@ -159,14 +134,7 @@ public class CountryCodeTagger extends AbstractCurationTask {
if (newAlpha2Codes.size() > 0) { if (newAlpha2Codes.size() > 0) {
try { try {
itemService.addMetadata( itemService.addMetadata(Curator.curationContext(), item, iso3166Alpha2FieldParts[0], iso3166Alpha2FieldParts[1], iso3166Alpha2FieldParts[2], "en_US", newAlpha2Codes);
Curator.curationContext(),
item,
iso3166Alpha2FieldParts[0],
iso3166Alpha2FieldParts[1],
iso3166Alpha2FieldParts[2],
"en_US",
newAlpha2Codes);
itemService.update(Curator.curationContext(), item); itemService.update(Curator.curationContext(), item);
} catch (SQLException | AuthorizeException sqle) { } catch (SQLException | AuthorizeException sqle) {
config.log.debug(sqle.getMessage()); config.log.debug(sqle.getMessage());
@ -174,11 +142,7 @@ public class CountryCodeTagger extends AbstractCurationTask {
alpha2Result.setStatus(Curator.CURATE_ERROR); alpha2Result.setStatus(Curator.CURATE_ERROR);
} }
alpha2Result.setResult( alpha2Result.setResult(itemHandle + ": added " + newAlpha2Codes.size() + " alpha2 country code(s)");
itemHandle
+ ": added "
+ newAlpha2Codes.size()
+ " alpha2 country code(s)");
} else { } else {
alpha2Result.setResult(itemHandle + ": no matching countries found"); alpha2Result.setResult(itemHandle + ": no matching countries found");
} }

View File

@ -1,18 +1,15 @@
/* /*
* Copyright (C) 2020 Alan Orth * Copyright (C) 2020 Alan Orth
* *
* SPDX-License-Identifier: GPL-3.0-or-later * SPDX-License-Identifier: GPL-3.0-or-later
*/ */
package io.github.ilri.cgspace.ctasks; package io.github.ilri.cgspace.ctasks;
import com.google.gson.annotations.SerializedName; import com.google.gson.annotations.SerializedName;
import java.util.List; import java.util.List;
public class ISO3166CountriesVocabulary extends CountriesVocabulary { public class ISO3166CountriesVocabulary extends CountriesVocabulary {
// support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since // support reading iso_3166-1.json from Debian's iso-codes package using SerializedName since our class needs to match the JSON exactly
// our class needs to match the JSON exactly @SerializedName("3166-1") List<Country> countries;
@SerializedName("3166-1")
List<Country> countries;
} }

View File

@ -1,27 +1,21 @@
/* /*
* Copyright (C) 2020 Alan Orth * Copyright (C) 2020 Alan Orth
* *
* SPDX-License-Identifier: GPL-3.0-or-later * SPDX-License-Identifier: GPL-3.0-or-later
*/ */
package io.github.ilri.cgspace.scripts; package io.github.ilri.cgspace.scripts;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream; import org.dspace.content.*;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.BundleService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants; import org.dspace.core.Constants;
import org.dspace.core.Context; import org.dspace.core.Context;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.handle.factory.HandleServiceFactory; import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService; import org.dspace.handle.service.HandleService;
import org.dspace.content.service.BundleService;
import java.io.IOException; import java.io.IOException;
import java.sql.SQLException; import java.sql.SQLException;
@ -35,139 +29,118 @@ import java.util.List;
* @since 5.1 * @since 5.1
*/ */
public class FixJpgJpgThumbnails { public class FixJpgJpgThumbnails {
// note: static members belong to the class itself, not any one instance //note: static members belong to the class itself, not any one instance
public static ItemService itemService = ContentServiceFactory.getInstance().getItemService(); public static ItemService itemService = ContentServiceFactory.getInstance().getItemService();
public static HandleService handleService = public static HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
HandleServiceFactory.getInstance().getHandleService(); public static BundleService bundleService = ContentServiceFactory.getInstance().getBundleService();
public static BundleService bundleService =
ContentServiceFactory.getInstance().getBundleService();
public static void main(String[] args) { public static void main(String[] args) {
String parentHandle = null; String parentHandle = null;
if (args.length >= 1) { if (args.length >= 1) {
parentHandle = args[0]; parentHandle = args[0];
} }
Context context = null; Context context = null;
try { try {
context = new Context(); context = new Context();
context.turnOffAuthorisationSystem(); context.turnOffAuthorisationSystem();
if (StringUtils.isBlank(parentHandle)) { if (StringUtils.isBlank(parentHandle)) {
process(context, itemService.findAll(context)); process(context, itemService.findAll(context));
} else { } else {
DSpaceObject parent = handleService.resolveToObject(context, parentHandle); DSpaceObject parent = handleService.resolveToObject(context, parentHandle);
if (parent != null) { if (parent != null) {
switch (parent.getType()) { switch (parent.getType()) {
case Constants.COLLECTION: case Constants.COLLECTION:
process( process(context, itemService.findByCollection(context, (Collection) parent));
context, break;
itemService.findByCollection(context, (Collection) parent)); case Constants.COMMUNITY:
break; List<Collection> collections = ((Community) parent).getCollections();
case Constants.COMMUNITY: for (Collection collection : collections) {
List<Collection> collections = ((Community) parent).getCollections(); process(context, itemService.findAllByCollection(context, collection));
for (Collection collection : collections) { }
process( break;
context, case Constants.SITE:
itemService.findAllByCollection(context, collection)); process(context, itemService.findAll(context));
} break;
break; case Constants.ITEM:
case Constants.SITE: processItem(context, (Item) parent);
process(context, itemService.findAll(context)); context.commit();
break; break;
case Constants.ITEM: }
processItem(context, (Item) parent); }
context.commit(); }
break; } catch (SQLException | AuthorizeException | IOException e) {
} e.printStackTrace(System.err);
} } finally {
} if (context != null && context.isValid()) {
} catch (SQLException | AuthorizeException | IOException e) { context.abort();
e.printStackTrace(System.err); }
} finally { }
if (context != null && context.isValid()) { }
context.abort();
}
}
}
private static void process(Context context, Iterator<Item> items) private static void process(Context context, Iterator<Item> items) throws SQLException, IOException, AuthorizeException {
throws SQLException, IOException, AuthorizeException { while (items.hasNext()) {
while (items.hasNext()) { Item item = items.next();
Item item = items.next(); processItem(context, item);
processItem(context, item); itemService.update(context, item);
itemService.update(context, item); }
} }
}
private static void processItem(Context context, Item item) private static void processItem(Context context, Item item) throws SQLException, AuthorizeException, IOException {
throws SQLException, AuthorizeException, IOException { // Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't
// Some bitstreams like Infographics and Maps are large JPEGs and put in the ORIGINAL bundle // swap them.
// on purpose so we shouldn't List<MetadataValue> itemTypes = itemService.getMetadataByMetadataString(item, "dcterms.type");
// swap them. boolean itemHasInfographic = false;
List<MetadataValue> itemTypes = for (MetadataValue itemType: itemTypes) {
itemService.getMetadataByMetadataString(item, "dcterms.type"); if (itemType.getValue().equals("Infographic")) {
for (MetadataValue itemType : itemTypes) { itemHasInfographic = true;
if (itemType.getValue().equals("Infographic") || itemType.getValue().equals("Map")) { }
System.out.println( }
item.getHandle() + ": item has an Infographic or Map, skipping.");
return;
}
}
List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL"); List<Bundle> thumbnailBundles = item.getBundles("THUMBNAIL");
for (Bundle thumbnailBundle : thumbnailBundles) { for (Bundle thumbnailBundle : thumbnailBundles) {
List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams(); List<Bitstream> thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) { for (Bitstream thumbnailBitstream : thumbnailBundleBitstreams) {
String thumbnailName = thumbnailBitstream.getName(); String thumbnailName = thumbnailBitstream.getName();
String thumbnailDescription = thumbnailBitstream.getDescription();
// There is no point continuing if the thumbnail's description is empty or null if (thumbnailName.toLowerCase().contains(".jpg.jpg")) {
if (StringUtils.isEmpty(thumbnailDescription)) { List<Bundle> originalBundles = item.getBundles("ORIGINAL");
continue; for (Bundle originalBundle : originalBundles) {
} List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
if (thumbnailName.toLowerCase().contains(".jpg.jpg")) { for (Bitstream originalBitstream : originalBundleBitstreams) {
List<Bundle> originalBundles = item.getBundles("ORIGINAL"); String originalName = originalBitstream.getName();
for (Bundle originalBundle : originalBundles) {
List<Bitstream> originalBundleBitstreams = originalBundle.getBitstreams();
for (Bitstream originalBitstream : originalBundleBitstreams) { long originalBitstreamBytes = originalBitstream.getSize();
String originalName = originalBitstream.getName();
long originalBitstreamBytes = originalBitstream.getSize(); /*
- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
- check if the thumbnail description indicates it was automatically generated
- check if the item has dc.type Infographic (JPG could be the "real" item!)
- check if the original bitstream is less than ~100KiB
- Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
bytes for an average of about 98KiB so ~100KiB seems like a good cut off
*/
if (
originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
&& ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))
&& !itemHasInfographic
&& originalBitstreamBytes < 100000
) {
System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
/* //add the original bitstream to the THUMBNAIL bundle
- check if the original file name is the same as the thumbnail name minus the extra ".jpg" bundleService.addBitstream(context, thumbnailBundle, originalBitstream);
- check if the thumbnail description indicates it was automatically generated //remove the original bitstream from the ORIGINAL bundle
- check if the original bitstream is less than ~100KiB originalBundle.removeBitstream(originalBitstream);
- Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249 //remove the JpgJpg bitstream from the THUMBNAIL bundle
bytes for an average of about 98KiB so ~100KiB seems like a good cut off thumbnailBundle.removeBitstream(thumbnailBitstream);
*/ }
if (originalName.equalsIgnoreCase( }
StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) }
&& ("Generated Thumbnail".equals(thumbnailDescription) }
|| "IM Thumbnail".equals(thumbnailDescription)) }
&& originalBitstreamBytes < 100000) { }
System.out.println( }
item.getHandle()
+ ": replacing "
+ thumbnailName
+ " with "
+ originalName);
// add the original bitstream to the THUMBNAIL bundle
bundleService.addBitstream(
context, thumbnailBundle, originalBitstream);
// remove the original bitstream from the ORIGINAL bundle
originalBundle.removeBitstream(originalBitstream);
// remove the JpgJpg bitstream from the THUMBNAIL bundle
thumbnailBundle.removeBitstream(thumbnailBitstream);
}
}
}
}
}
}
}
} }

View File

@ -1,19 +1,14 @@
/* /*
* Copyright (C) 2022 Alan Orth * Copyright (C) 2022 Alan Orth
* *
* SPDX-License-Identifier: GPL-3.0-or-later * SPDX-License-Identifier: GPL-3.0-or-later
*/ */
package io.github.ilri.cgspace.scripts; package io.github.ilri.cgspace.scripts;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream; import org.dspace.content.*;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.BundleService; import org.dspace.content.service.BundleService;
import org.dspace.content.service.ItemService; import org.dspace.content.service.ItemService;