mirror of
				https://github.com/ilri/csv-metadata-quality.git
				synced 2025-10-31 04:41:17 +01:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			82261f7fe0
			...
			c95261f522
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| c95261f522 | |||
| 787fa9e8d9 | 
| @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | |||||||
| well as dcterms.bibliographicCitation) in `experimental.correct_language()` | well as dcterms.bibliographicCitation) in `experimental.correct_language()` | ||||||
| - Regular expression to match dc.title and dcterms.title, but | - Regular expression to match dc.title and dcterms.title, but | ||||||
| ignore dc.title.alternative `check.duplicate_items()` | ignore dc.title.alternative `check.duplicate_items()` | ||||||
|  | - Missing field name in `fix.newlines()` output | ||||||
|  |  | ||||||
| ## [0.4.7] - 2021-03-17 | ## [0.4.7] - 2021-03-17 | ||||||
| ### Changed | ### Changed | ||||||
|   | |||||||
| @@ -89,7 +89,7 @@ def run(argv): | |||||||
|  |  | ||||||
|         # Fix: newlines |         # Fix: newlines | ||||||
|         if args.unsafe_fixes: |         if args.unsafe_fixes: | ||||||
|             df[column] = df[column].apply(fix.newlines) |             df[column] = df[column].apply(fix.newlines, field_name=column) | ||||||
|  |  | ||||||
|         # Fix: missing space after comma. Only run on author and citation |         # Fix: missing space after comma. Only run on author and citation | ||||||
|         # fields for now, as this problem is mostly an issue in names. |         # fields for now, as this problem is mostly an issue in names. | ||||||
|   | |||||||
| @@ -180,7 +180,7 @@ def duplicates(field, field_name): | |||||||
|     return new_field |     return new_field | ||||||
|  |  | ||||||
|  |  | ||||||
| def newlines(field): | def newlines(field, field_name): | ||||||
|     """Fix newlines. |     """Fix newlines. | ||||||
|  |  | ||||||
|     Single metadata values should not span multiple lines because this is not |     Single metadata values should not span multiple lines because this is not | ||||||
| @@ -205,7 +205,7 @@ def newlines(field): | |||||||
|     match = re.findall(r"\n", field) |     match = re.findall(r"\n", field) | ||||||
|  |  | ||||||
|     if match: |     if match: | ||||||
|         print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}") |         print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}") | ||||||
|         field = field.replace("\n", "") |         field = field.replace("\n", "") | ||||||
|  |  | ||||||
|     return field |     return field | ||||||
|   | |||||||
| @@ -76,8 +76,9 @@ def test_fix_newlines(): | |||||||
|  |  | ||||||
|     value = """Ken |     value = """Ken | ||||||
| ya""" | ya""" | ||||||
|  |     field_name = "dcterms.subject" | ||||||
|  |  | ||||||
|     assert fix.newlines(value) == "Kenya" |     assert fix.newlines(value, field_name) == "Kenya" | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_fix_comma_space(): | def test_fix_comma_space(): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user