CGSpace Notes

Documenting day-to-day work on the CGSpace repository.

June, 2017

2017-06-01

  • After discussion with WLE and CGSpace content people, we decided to just add one metadata field for the WLE Research Themes
  • The cg.identifier.wletheme field will be used for both Phase I and Phase II Research Themes
  • Then we’ll create a new sub-community for Phase II and create collections for the research themes there
  • The current “Research Themes” community will be renamed to “WLE Phase I Research Themes”
  • Tagged all items in the current Phase I collections with their appropriate themes
  • Create pull request to add Phase II research themes to the submission form: #328
  • Add cg.subject.system to CGSpace metadata registry, for subject from the upcoming CGIAR Library migration
handle/10568/80731

2017-05-05

$ [dspace]/bin/dspace curate -t requiredmetadata -i 10568/1 -r - > /tmp/curation.out

2017-05-06

2017-05-07

$ ./fix-metadata-values.py -i ccafs-flagships-may7.csv -f cg.subject.ccafs -t correct -m 210 -d dspace -u dspace -p fuuu

2017-05-08

$ export JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx2048m -XX:-UseGCOverheadLimit"
$ [dspace]/bin/dspace packager -s -o ignoreHandle=false -t AIP -e some@user.com -p 10568/87775 /home/aorth/10947-1/10947-1.zip
$ for collection in /home/aorth/10947-1/COLLECTION@10947-*; do [dspace]/bin/dspace packager -s -o ignoreHandle=false -t AIP -e some@user.com -p 10947/1 $collection; done
$ for item in /home/aorth/10947-1/ITEM@10947-*; do [dspace]/bin/dspace packager -r -f -u -t AIP -e some@user.com $item; done

2017-05-09

dspace=# delete from metadatavalue where resource_type_id=2 and text_value=''
Caused by: org.postgresql.util.PSQLException: ERROR: duplicate key value violates unique constraint "handle_pkey"
  Detail: Key (handle_id)=(80928) already exists.

2017-05-10

$ export JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx3072m -XX:-UseGCOverheadLimit"
$ [dspace]/bin/dspace packager -r -a -t AIP -o skipIfParentMissing=true -e some@user.com -p 10568/80923 /home/aorth/10947-2517/10947-2517.zip
$ [dspace]/bin/dspace packager -r -a -t AIP -o skipIfParentMissing=true -e some@user.com -p 10568/80923 /home/aorth/10947-2515/10947-2515.zip
$ [dspace]/bin/dspace packager -r -a -t AIP -o skipIfParentMissing=true -e some@user.com -p 10568/80923 /home/aorth/10947-2516/10947-2516.zip
$ [dspace]/bin/dspace packager -r -a -t AIP -o skipIfParentMissing=true -e some@user.com -p 10568/80923 /home/aorth/10947-1/10947-1.zip
$ [dspace]/bin/dspace packager -s -t AIP -o ignoreHandle=false -e some@user.com -p 10568/80923 /home/aorth/10947-1/10947-1.zip
$ for collection in /home/aorth/10947-1/COLLECTION@10947-*; do [dspace]/bin/dspace packager -s -o ignoreHandle=false -t AIP -e some@user.com -p 10947/1 $collection; done
$ for item in /home/aorth/10947-1/ITEM@10947-*; do [dspace]/bin/dspace packager -r -f -u -t AIP -e some@user.com $item; done
dspace=# delete from metadatavalue where resource_type_id=2 and text_value='';

2017-05-13

2017-05-15

$ ./fix-metadata-values.py -i /tmp/ccafs-flagships-may7.csv -f cg.subject.ccafs -t correct -m 210 -d dspace -u dspace -p 'fuuu'

2017-05-16

2017-05-17

ERROR: duplicate key value violates unique constraint "handle_pkey" Detail: Key (handle_id)=(84834) already exists.
dspace=# select * from handle where handle_id=84834;
 handle_id |   handle   | resource_type_id | resource_id
-----------+------------+------------------+-------------
     84834 | 10947/1332 |                2 |       87113
dspace=# select * from handle where handle_id=(select max(handle_id) from handle);
 handle_id |  handle  | resource_type_id | resource_id
-----------+----------+------------------+-------------
     86873 | 10947/99 |                2 |       89153
(1 row)
dspace=# select setval('handle_seq',86873);

2017-05-21

2017-05-22

$ grep 10947/ /tmp/collections | grep -v cocoon | awk -F/ '{print $3"/"$4}' | awk -F\" '{print $1}' | vim -
dspace=# select distinct text_value
from metadatavalue
where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author')
AND resource_type_id = 2
AND resource_id IN (select item_id from collection2item where collection_id IN (select resource_id from handle where handle in ('10947/2', '10947/3', '10947/1
0', '10947/4', '10947/5', '10947/6', '10947/7', '10947/8', '10947/9', '10947/11', '10947/25', '10947/12', '10947/26', '10947/27', '10947/28', '10947/29', '109
47/30', '10947/13', '10947/14', '10947/15', '10947/16', '10947/31', '10947/32', '10947/33', '10947/34', '10947/35', '10947/36', '10947/37', '10947/17', '10947
/18', '10947/38', '10947/19', '10947/39', '10947/40', '10947/41', '10947/42', '10947/43', '10947/2512', '10947/44', '10947/20', '10947/21', '10947/45', '10947
/46', '10947/47', '10947/48', '10947/49', '10947/22', '10947/23', '10947/24', '10947/50', '10947/51', '10947/2518', '10947/2776', '10947/2790', '10947/2521',
'10947/2522', '10947/2782', '10947/2525', '10947/2836', '10947/2524', '10947/2878', '10947/2520', '10947/2523', '10947/2786', '10947/2631', '10947/2589', '109
47/2519', '10947/2708', '10947/2526', '10947/2871', '10947/2527', '10947/4467', '10947/3457', '10947/2528', '10947/2529', '10947/2533', '10947/2530', '10947/2
531', '10947/2532', '10947/2538', '10947/2534', '10947/2540', '10947/2900', '10947/2539', '10947/2784', '10947/2536', '10947/2805', '10947/2541', '10947/2535'
, '10947/2537', '10568/93761')));
dspace=# \copy (select distinct text_value, count(*)
from metadatavalue
where metadata_field_id = (select metadata_field_id from metadatafieldregistry where element = 'contributor' and qualifier = 'author')
AND resource_type_id = 2
AND resource_id IN (select item_id from collection2item where collection_id IN (select resource_id from handle where handle in ('10947/2', '10947/3', '10947/10', '10947/4', '10947/5', '10947/6', '10947/7', '10947/8', '10947/9', '10947/11', '10947/25', '10947/12', '10947/26', '10947/27', '10947/28', '10947/29', '10947/30', '10947/13', '10947/14', '10947/15', '10947/16', '10947/31', '10947/32', '10947/33', '10947/34', '10947/35', '10947/36', '10947/37', '10947/17', '10947/18', '10947/38', '10947/19', '10947/39', '10947/40', '10947/41', '10947/42', '10947/43', '10947/2512', '10947/44', '10947/20', '10947/21', '10947/45', '10947/46', '10947/47', '10947/48', '10947/49', '10947/22', '10947/23', '10947/24', '10947/50', '10947/51', '10947/2518', '10947/2776', '10947/2790', '10947/2521', '10947/2522', '10947/2782', '10947/2525', '10947/2836', '10947/2524', '10947/2878', '10947/2520', '10947/2523', '10947/2786', '10947/2631', '10947/2589', '10947/2519', '10947/2708', '10947/2526', '10947/2871', '10947/2527', '10947/4467', '10947/3457', '10947/2528', '10947/2529', '10947/2533', '10947/2530', '10947/2531', '10947/2532', '10947/2538', '10947/2534', '10947/2540', '10947/2900', '10947/2539', '10947/2784', '10947/2536', '10947/2805', '10947/2541', '10947/2535', '10947/2537', '10568/93761'))) group by text_value order by count desc) to /tmp/cgiar-librar-authors.csv with csv;

2017-05-23

dspace=# \copy (select distinct text_value from metadatavalue where resource_type_id=2 and metadata_field_id=119) to /tmp/wle.csv with csv;
COPY 111

2017-05-26

2017-05-28

dspace=# select distinct text_value, authority, confidence from metadatavalue where metadata_field_id=3 and text_value like 'Omore, A%';
dspace=# update metadatavalue set authority='4428ee88-90ef-4107-b837-3c0ec988520b', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Omore, A%';
UPDATE 187
dspace=# select distinct text_value, authority, confidence from metadatavalue where metadata_field_id=3 and text_value like 'Twine, E%';
dspace=# update metadatavalue set authority='f70d0a01-d562-45b8-bca3-9cf7f249bc8b', confidence=600 where metadata_field_id=3 and resource_type_id=2 and text_value like 'Twine, E%';

2017-05-29