mirror of
				https://github.com/ilri/csv-metadata-quality.git
				synced 2025-11-04 14:39:08 +01:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			ba4637ea34
			...
			530cd5863b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						
						
							
						
						530cd5863b
	
				 | 
					
					
						|||
| 
						
						
							
						
						f6018c51b6
	
				 | 
					
					
						|||
| 
						
						
							
						
						80c3f5b45a
	
				 | 
					
					
						
@@ -85,7 +85,7 @@ def run(argv):
 | 
				
			|||||||
        # user should be careful to no include spaces here.
 | 
					        # user should be careful to no include spaces here.
 | 
				
			||||||
        exclude = args.exclude_fields.split(",")
 | 
					        exclude = args.exclude_fields.split(",")
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        exclude = list()
 | 
					        exclude = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # enable transparent request cache with thirty days expiry
 | 
					    # enable transparent request cache with thirty days expiry
 | 
				
			||||||
    expire_after = timedelta(days=30)
 | 
					    expire_after = timedelta(days=30)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -202,7 +202,7 @@ def agrovoc(field, field_name, drop):
 | 
				
			|||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Initialize an empty list to hold the validated AGROVOC values
 | 
					    # Initialize an empty list to hold the validated AGROVOC values
 | 
				
			||||||
    values = list()
 | 
					    values = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Try to split multi-value field on "||" separator
 | 
					    # Try to split multi-value field on "||" separator
 | 
				
			||||||
    for value in field.split("||"):
 | 
					    for value in field.split("||"):
 | 
				
			||||||
@@ -358,7 +358,7 @@ def duplicate_items(df):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if items_count_unique < items_count_total:
 | 
					    if items_count_unique < items_count_total:
 | 
				
			||||||
        # Create a list to hold our items while we check for duplicates
 | 
					        # Create a list to hold our items while we check for duplicates
 | 
				
			||||||
        items = list()
 | 
					        items = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for index, row in df.iterrows():
 | 
					        for index, row in df.iterrows():
 | 
				
			||||||
            item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
 | 
					            item_title_type_date = f"{row[title_column_name]}{row[type_column_name]}{row[date_column_name]}"
 | 
				
			||||||
@@ -539,7 +539,7 @@ def countries_match_regions(row, exclude):
 | 
				
			|||||||
        if row[region_column_name] is not None:
 | 
					        if row[region_column_name] is not None:
 | 
				
			||||||
            regions = row[region_column_name].split("||")
 | 
					            regions = row[region_column_name].split("||")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            regions = list()
 | 
					            regions = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for country in countries:
 | 
					        for country in countries:
 | 
				
			||||||
            # Look up the UN M.49 regions for this country code. CoCo seems to
 | 
					            # Look up the UN M.49 regions for this country code. CoCo seems to
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -20,7 +20,7 @@ def correct_language(row, exclude):
 | 
				
			|||||||
    # Initialize some variables at global scope so that we can set them in the
 | 
					    # Initialize some variables at global scope so that we can set them in the
 | 
				
			||||||
    # loop scope below and still be able to access them afterwards.
 | 
					    # loop scope below and still be able to access them afterwards.
 | 
				
			||||||
    language = ""
 | 
					    language = ""
 | 
				
			||||||
    sample_strings = list()
 | 
					    sample_strings = []
 | 
				
			||||||
    title = None
 | 
					    title = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Iterate over the labels of the current row's values. Before we transposed
 | 
					    # Iterate over the labels of the current row's values. Before we transposed
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -23,7 +23,7 @@ def whitespace(field, field_name):
 | 
				
			|||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Initialize an empty list to hold the cleaned values
 | 
					    # Initialize an empty list to hold the cleaned values
 | 
				
			||||||
    values = list()
 | 
					    values = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Try to split multi-value field on "||" separator
 | 
					    # Try to split multi-value field on "||" separator
 | 
				
			||||||
    for value in field.split("||"):
 | 
					    for value in field.split("||"):
 | 
				
			||||||
@@ -64,7 +64,7 @@ def separators(field, field_name):
 | 
				
			|||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Initialize an empty list to hold the cleaned values
 | 
					    # Initialize an empty list to hold the cleaned values
 | 
				
			||||||
    values = list()
 | 
					    values = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Try to split multi-value field on "||" separator
 | 
					    # Try to split multi-value field on "||" separator
 | 
				
			||||||
    for value in field.split("||"):
 | 
					    for value in field.split("||"):
 | 
				
			||||||
@@ -175,7 +175,7 @@ def duplicates(field, field_name):
 | 
				
			|||||||
    values = field.split("||")
 | 
					    values = field.split("||")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Initialize an empty list to hold the de-duplicated values
 | 
					    # Initialize an empty list to hold the de-duplicated values
 | 
				
			||||||
    new_values = list()
 | 
					    new_values = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Iterate over all values
 | 
					    # Iterate over all values
 | 
				
			||||||
    for value in values:
 | 
					    for value in values:
 | 
				
			||||||
@@ -355,10 +355,10 @@ def countries_match_regions(row, exclude):
 | 
				
			|||||||
        if row[region_column_name] is not None:
 | 
					        if row[region_column_name] is not None:
 | 
				
			||||||
            regions = row[region_column_name].split("||")
 | 
					            regions = row[region_column_name].split("||")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            regions = list()
 | 
					            regions = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # An empty list for our regions so we can keep track for all countries
 | 
					        # An empty list for our regions so we can keep track for all countries
 | 
				
			||||||
        missing_regions = list()
 | 
					        missing_regions = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for country in countries:
 | 
					        for country in countries:
 | 
				
			||||||
            # Look up the UN M.49 regions for this country code. CoCo seems to
 | 
					            # Look up the UN M.49 regions for this country code. CoCo seems to
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										868
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										868
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -30,6 +30,7 @@ black = "^23.1.0"
 | 
				
			|||||||
isort = "^5.12.0"
 | 
					isort = "^5.12.0"
 | 
				
			||||||
csvkit = "^1.1.0"
 | 
					csvkit = "^1.1.0"
 | 
				
			||||||
ipython = "^8.10.0"
 | 
					ipython = "^8.10.0"
 | 
				
			||||||
 | 
					fixit = "^2.1.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[build-system]
 | 
					[build-system]
 | 
				
			||||||
requires = ["poetry>=0.12"]
 | 
					requires = ["poetry>=0.12"]
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user