mirror of
https://github.com/ilri/cgspace-java-helpers.git
synced 2025-05-10 23:26:05 +02:00
Compare commits
11 Commits
Author | SHA1 | Date | |
---|---|---|---|
8531992412
|
|||
27016f5f77
|
|||
3a583c4f86
|
|||
28668f76c9
|
|||
e0153fd38a
|
|||
12a606ac61
|
|||
692a62b454
|
|||
d4ca92066a
|
|||
5ad8c556e9
|
|||
77425c13bf
|
|||
5e0a456fb5
|
@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [7.6.1.3] - 2024-06-26
|
||||
### Updated
|
||||
- Add more formats to `NormalizeDOIs` curation task
|
||||
|
||||
## [7.6.1.2] - 2024-04-25
|
||||
### Changed
|
||||
- Remove reporting from curation tasks since "results" are enough
|
||||
|
||||
## [7.6.1.1] - 2024-04-23
|
||||
### Added
|
||||
- New `NormalizeDOIs` curation task
|
||||
|
@ -17,7 +17,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>7.6.1.1-SNAPSHOT</version>
|
||||
<version>7.6.1.3-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -33,14 +33,14 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```console
|
||||
$ cp target/cgspace-java-helpers-7.6.1.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6.1.3-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Configuration
|
||||
Please refer to the appropriate README.md file:
|
||||
|
||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
|
||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace6/src/main/java/io/github/ilri/cgspace/scripts/README.md)
|
||||
- Curation Tasks: [src/main/java/io/github/ilri/cgspace/ctasks/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/ctasks/README.md)
|
||||
- Scripts: [src/main/java/io/github/ilri/cgspace/scripts/README.md](https://github.com/ilri/cgspace-java-helpers/blob/dspace7/src/main/java/io/github/ilri/cgspace/scripts/README.md)
|
||||
|
||||
## TODO
|
||||
|
||||
|
2
pom.xml
2
pom.xml
@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>7.6.1.1-SNAPSHOT</version>
|
||||
<version>7.6.1.3-SNAPSHOT</version>
|
||||
|
||||
<name>cgspace-java-helpers</name>
|
||||
<url>https://github.com/ilri/cgspace-java-helpers</url>
|
||||
|
@ -26,6 +26,13 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/*
|
||||
* Add ISO 3166-1 Alpha2 country codes to items based on their existing country metadata.
|
||||
*
|
||||
* @author Alan Orth for the International Livestock Research Institute
|
||||
* @version 7.6.1.2
|
||||
* @since 5.1
|
||||
*/
|
||||
public class CountryCodeTagger extends AbstractCurationTask {
|
||||
public class CountryCodeTaggerConfig {
|
||||
private final String isocodesJsonPath = "/io/github/ilri/cgspace/ctasks/iso_3166-1.json";
|
||||
@ -77,7 +84,6 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
}
|
||||
|
||||
setResult(alpha2Result.getResult());
|
||||
report(alpha2Result.getResult());
|
||||
}
|
||||
|
||||
return alpha2Result.getStatus();
|
||||
@ -86,14 +92,13 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
public CountryCodeTaggerResult performAlpha2(Item item, CountryCodeTaggerConfig config)
|
||||
throws IOException, SQLException {
|
||||
CountryCodeTaggerResult alpha2Result = new CountryCodeTaggerResult();
|
||||
String itemHandle = item.getHandle();
|
||||
|
||||
List<MetadataValue> itemCountries =
|
||||
itemService.getMetadataByMetadataString(item, config.iso3166Field);
|
||||
|
||||
// skip items that don't have country metadata
|
||||
if (itemCountries.isEmpty()) {
|
||||
alpha2Result.setResult(itemHandle + ": no countries, skipping.");
|
||||
alpha2Result.setResult("No countries, skipping.");
|
||||
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||
} else {
|
||||
Gson gson = new Gson();
|
||||
@ -172,21 +177,20 @@ public class CountryCodeTagger extends AbstractCurationTask {
|
||||
itemService.update(Curator.curationContext(), item);
|
||||
} catch (SQLException | AuthorizeException sqle) {
|
||||
config.log.debug(sqle.getMessage());
|
||||
alpha2Result.setResult(itemHandle + ": error");
|
||||
alpha2Result.setResult("Error");
|
||||
alpha2Result.setStatus(Curator.CURATE_ERROR);
|
||||
}
|
||||
|
||||
alpha2Result.setResult(
|
||||
itemHandle
|
||||
+ ": added "
|
||||
"Added "
|
||||
+ newAlpha2Codes.size()
|
||||
+ " alpha2 country code(s)");
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": no matching countries found");
|
||||
alpha2Result.setResult("No matching countries found");
|
||||
}
|
||||
alpha2Result.setStatus(Curator.CURATE_SUCCESS);
|
||||
} else {
|
||||
alpha2Result.setResult(itemHandle + ": item has country codes, skipping");
|
||||
alpha2Result.setResult("Item already has country codes, skipping unless forced");
|
||||
alpha2Result.setStatus(Curator.CURATE_SKIP);
|
||||
}
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ import java.util.List;
|
||||
* TODO: allow operation on communities and collections (currently only works on items)
|
||||
*
|
||||
* @author Alan Orth for the International Livestock Research Institute
|
||||
* @version 7.6.1.1
|
||||
* @version 7.6.1.3
|
||||
* @since 7.6.1.1
|
||||
*/
|
||||
@Suspendable
|
||||
@ -68,7 +68,6 @@ public class NormalizeDOIs extends AbstractCurationTask {
|
||||
} else {
|
||||
result = "All DOIs already normalized";
|
||||
}
|
||||
report(result);
|
||||
setResult(result);
|
||||
|
||||
return Curator.CURATE_SUCCESS;
|
||||
@ -79,17 +78,21 @@ public class NormalizeDOIs extends AbstractCurationTask {
|
||||
}
|
||||
|
||||
private static String getNormalizedDOI(MetadataValue itemDOI) {
|
||||
// 1. Convert to lowercase
|
||||
// Convert to lowercase
|
||||
String newDOI = itemDOI.getValue().toLowerCase();
|
||||
// 2. Strip leading and trailing whitespace
|
||||
// Strip leading and trailing whitespace
|
||||
newDOI = newDOI.strip();
|
||||
// 3. Convert to HTTPS
|
||||
// Convert to HTTPS
|
||||
newDOI = newDOI.replace("http://", "https://");
|
||||
// 4. Prefer doi.org to dx.doi.org
|
||||
// Prefer doi.org to dx.doi.org
|
||||
newDOI = newDOI.replace("dx.doi.org", "doi.org");
|
||||
// 5. Replace values like doi: 10.11648/j.jps.20140201.14
|
||||
// Prefer doi.org to www.doi.org
|
||||
newDOI = newDOI.replace("www.doi.org", "doi.org");
|
||||
// Fix URL encoded slashes (%2f)
|
||||
newDOI = newDOI.replace("%2f", "/");
|
||||
// Replace values like doi: 10.11648/j.jps.20140201.14
|
||||
newDOI = newDOI.replaceAll("^doi: 10\\.", "https://doi.org/10.");
|
||||
// 6. Replace values like 10.3390/foods12010115
|
||||
// Replace values like 10.3390/foods12010115
|
||||
newDOI = newDOI.replaceAll("^10\\.", "https://doi.org/10.");
|
||||
|
||||
return newDOI;
|
||||
|
@ -15,7 +15,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>7.6.1.1-SNAPSHOT</version>
|
||||
<version>7.6.1.3-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -31,7 +31,7 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```
|
||||
$ cp target/cgspace-java-helpers-7.6.1.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6.1.3-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Configuration
|
||||
@ -62,7 +62,7 @@ countrycodetagger.iso3166-alpha2.field = cg.coverage.iso3166-alpha2
|
||||
Once the jar is installed and you have added appropriate configuration in `~/dspace/config/modules`:
|
||||
|
||||
```
|
||||
$ ~/dspace/bin/dspace curate -t countrycodetagger -i 10568/3 -r - -s object
|
||||
$ ~/dspace/bin/dspace curate -e eperson@repo.org -t countrycodetagger -i 10568/3 -r - -s object
|
||||
```
|
||||
|
||||
*Note*: it is very important to set the database transaction scope to something sensible (`object`) if you're curating a community or collection with more than a few hundred items.
|
||||
|
@ -15,7 +15,7 @@ To use these curation tasks in a DSpace project add the following dependency to
|
||||
<dependency>
|
||||
<groupId>io.github.ilri.cgspace</groupId>
|
||||
<artifactId>cgspace-java-helpers</artifactId>
|
||||
<version>7.6.1.1-SNAPSHOT</version>
|
||||
<version>7.6.1.3-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -31,7 +31,7 @@ $ mvn package
|
||||
Copy the resulting jar to the DSpace `lib` directory:
|
||||
|
||||
```console
|
||||
$ cp target/cgspace-java-helpers-7.6.1.1-SNAPSHOT.jar ~/dspace/lib/
|
||||
$ cp target/cgspace-java-helpers-7.6.1.3-SNAPSHOT.jar ~/dspace/lib/
|
||||
```
|
||||
|
||||
## Invocation
|
||||
|
Reference in New Issue
Block a user