mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2025-01-10 22:13:23 +01:00
src/main/java: Tune FixJpgJpgThumbnails a bit
Make sure we don't modify thumbnails if the item is an Infographic because the JPG in the ORIGINAL bundle might actually be the "real" file, in which case the THUMBNAIL bundle would have a legitimate ".jpg.jpg" file. Also, limit the criteria for replacement to original bitstreams that are less than 100KiB. In my tests I found that we had 4,022 items with ".jpg.jpg" thumbnails, and the average file size of the originals in those items was 98KiB. Without considering the large inforgraphics, which are several megabytes apiece, the average of the remaining 3,765 originals was ~20KiB so 100KiB should be very safe.
This commit is contained in:
parent
fdc910f93b
commit
26d3cbd778
@ -13,8 +13,8 @@ import java.sql.SQLException;
|
||||
/**
|
||||
* @author Andrea Schweer schweer@waikato.ac.nz for the LCoNZ Institutional Research Repositories
|
||||
* @author Alan Orth for the International Livestock Research Institute
|
||||
* @version 5.1-SNAPSHOT
|
||||
* @since 5.1-SNAPSHOT
|
||||
* @version 5.3
|
||||
* @since 5.1
|
||||
*/
|
||||
public class FixJpgJpgThumbnails {
|
||||
|
||||
@ -73,6 +73,16 @@ public class FixJpgJpgThumbnails {
|
||||
}
|
||||
|
||||
private static void processItem(Item item) throws SQLException, AuthorizeException, IOException {
|
||||
// Some bitstreams like Infographics are large JPGs and put in the ORIGINAL bundle on purpose so we shouldn't
|
||||
// swap them.
|
||||
Metadatum[] itemTypes = item.getMetadataByMetadataString("dc.type");
|
||||
Boolean itemHasInfographic = false;
|
||||
for (Metadatum itemType: itemTypes) {
|
||||
if (itemType.value.equals("Infographic")) {
|
||||
itemHasInfographic = true;
|
||||
}
|
||||
}
|
||||
|
||||
Bundle[] thumbnailBundles = item.getBundles("THUMBNAIL");
|
||||
for (Bundle thumbnailBundle : thumbnailBundles) {
|
||||
Bitstream[] thumbnailBundleBitstreams = thumbnailBundle.getBitstreams();
|
||||
@ -84,11 +94,25 @@ public class FixJpgJpgThumbnails {
|
||||
for (Bundle originalBundle : originalBundles) {
|
||||
Bitstream[] originalBundleBitstreams = originalBundle.getBitstreams();
|
||||
|
||||
for(Bitstream originalBitstream : originalBundleBitstreams) {
|
||||
for (Bitstream originalBitstream : originalBundleBitstreams) {
|
||||
String originalName = originalBitstream.getName();
|
||||
|
||||
//check if the original file name is the same as the thumbnail name minus the extra ".jpg"
|
||||
if (originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg")) && ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))) {
|
||||
Long originalBitstreamBytes = originalBitstream.getSize();
|
||||
|
||||
/*
|
||||
- check if the original file name is the same as the thumbnail name minus the extra ".jpg"
|
||||
- check if the thumbnail description indicates it was automatically generated
|
||||
- check if the item has dc.type Infographic (JPG could be the "real" item!)
|
||||
- check if the original bitstream is less than ~100KiB
|
||||
- Note: in my tests there were 4022 items with ".jpg.jpg" thumbnails totaling 394549249
|
||||
bytes for an average of about 98KiB so ~100KiB seems like a good cut off
|
||||
*/
|
||||
if (
|
||||
originalName.equalsIgnoreCase(StringUtils.removeEndIgnoreCase(thumbnailName, ".jpg"))
|
||||
&& ("Generated Thumbnail".equals(thumbnailBitstream.getDescription()) || "IM Thumbnail".equals(thumbnailBitstream.getDescription()))
|
||||
&& !itemHasInfographic
|
||||
&& originalBitstreamBytes < 100000
|
||||
) {
|
||||
System.out.println(item.getHandle() + ": replacing " + thumbnailName + " with " + originalName);
|
||||
|
||||
//add the original bitstream to the THUMBNAIL bundle
|
||||
|
Loading…
x
Reference in New Issue
Block a user