mirror of
				https://github.com/ilri/csv-metadata-quality.git
				synced 2025-10-31 12:51:14 +01:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			82261f7fe0
			...
			c95261f522
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| c95261f522 | |||
| 787fa9e8d9 | 
| @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ||||
| well as dcterms.bibliographicCitation) in `experimental.correct_language()` | ||||
| - Regular expression to match dc.title and dcterms.title, but | ||||
| ignore dc.title.alternative `check.duplicate_items()` | ||||
| - Missing field name in `fix.newlines()` output | ||||
|  | ||||
| ## [0.4.7] - 2021-03-17 | ||||
| ### Changed | ||||
|   | ||||
| @@ -89,7 +89,7 @@ def run(argv): | ||||
|  | ||||
|         # Fix: newlines | ||||
|         if args.unsafe_fixes: | ||||
|             df[column] = df[column].apply(fix.newlines) | ||||
|             df[column] = df[column].apply(fix.newlines, field_name=column) | ||||
|  | ||||
|         # Fix: missing space after comma. Only run on author and citation | ||||
|         # fields for now, as this problem is mostly an issue in names. | ||||
|   | ||||
| @@ -180,7 +180,7 @@ def duplicates(field, field_name): | ||||
|     return new_field | ||||
|  | ||||
|  | ||||
| def newlines(field): | ||||
| def newlines(field, field_name): | ||||
|     """Fix newlines. | ||||
|  | ||||
|     Single metadata values should not span multiple lines because this is not | ||||
| @@ -205,7 +205,7 @@ def newlines(field): | ||||
|     match = re.findall(r"\n", field) | ||||
|  | ||||
|     if match: | ||||
|         print(f"{Fore.GREEN}Removing newline: {Fore.RESET}{field}") | ||||
|         print(f"{Fore.GREEN}Removing newline ({field_name}): {Fore.RESET}{field}") | ||||
|         field = field.replace("\n", "") | ||||
|  | ||||
|     return field | ||||
|   | ||||
| @@ -76,8 +76,9 @@ def test_fix_newlines(): | ||||
|  | ||||
|     value = """Ken | ||||
| ya""" | ||||
|     field_name = "dcterms.subject" | ||||
|  | ||||
|     assert fix.newlines(value) == "Kenya" | ||||
|     assert fix.newlines(value, field_name) == "Kenya" | ||||
|  | ||||
|  | ||||
| def test_fix_comma_space(): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user