|
5 | 5 | (1) Merges the content of the csv files found in all YEAR_MONTH subdirectories into a single global_csv_file. |
6 | 6 | This file will map all jp2 to their labels without any overlaps. This csv only map file basenames. |
7 | 7 |
|
8 | | -(2) This script also creates csv files mapping relative YEAR_MONTH-based file path to new common jp2 and labels directory |
9 | | -directly under a new parent directory. You may this e.g. for moving the files into a new more "global" tree. |
| 8 | +(2) This script also creates csv files mapping relative YEAR_MONTH-based file path to new common jp2 and labels |
| 9 | +directly under a new parent directory. You may run this e.g. for moving the files into a new more "global" tree. |
10 | 10 |
|
11 | | -E.g: given a parent directory "parent_dir" hosting the original data tree (parent_dir), |
12 | | -the csv file will map everything in |
| 11 | +E.g: given a parent directory "parent_dir", the csv file will map things from |
13 | 12 |
|
14 | 13 | parent_dir |
15 | 14 | 2010_12 |
|
47 | 46 | import glob |
48 | 47 | import pandas as pd |
49 | 48 | import csv |
| 49 | +from pathlib import Path |
50 | 50 |
|
51 | 51 |
|
52 | 52 | ############# Set some data directories - update to your personal case ############# |
53 | 53 |
|
54 | 54 | # Parent directory of all YEAR_MONTH subdirectories that will also contain the global csv file |
55 | | -parent_dir = '/Volumes/SolarData/LabeledImages/' |
| 55 | +parent_dir = '/media/raphael/SolarData/V2' |
56 | 56 | # Common directory where all files will be moved, without duplicates. |
57 | 57 | parent_dir2 = parent_dir |
58 | 58 | # Filename of csv file that will be the aggregation all csv files of all YEAR_MONTH subdirectories without duplicates |
59 | | -global_csv_file = os.path.join(parent_dir, 'label_jp2_map_global.csv') |
| 59 | +global_csv_file = os.path.join(parent_dir2, 'label_jp2_map_global.csv') |
| 60 | +# csv output for empty data |
| 61 | +csv_empty_data = os.path.join(parent_dir2, 'empty_data.csv') |
| 62 | + |
60 | 63 |
|
61 | 64 | ######### (1) Creating the aggregated map of jp2 and label masks ########### |
62 | 65 |
|
63 | 66 | # Fetch the csv file paths recursively |
64 | 67 | csv_files = sorted(glob.glob(os.path.join(parent_dir, '20*/label_jp2_map.csv'))) |
65 | 68 | # Read their content and concatenate in a unique dataframe |
66 | 69 | dfs = [] |
| 70 | +empty_csvs = [] |
67 | 71 | for csvf in csv_files: |
68 | 72 | print(csvf) |
69 | | - dfs.append(pd.read_csv(csvf, header=None)) |
| 73 | + try: |
| 74 | + # Sometimes the CSV file can be empty if no complete set exist at all |
| 75 | + dfs.append(pd.read_csv(csvf, header=None)) |
| 76 | + except pd.errors.EmptyDataError: |
| 77 | + print('Empty csv file') |
| 78 | + # Write to file the parent directory of the empty data |
| 79 | + empty_csvs.append([Path(csvf).parent.name]) |
| 80 | + continue |
| 81 | + |
| 82 | +with open(csv_empty_data, 'w') as csvFile: |
| 83 | + writer = csv.writer(csvFile) |
| 84 | + writer.writerows(empty_csvs) |
| 85 | +csvFile.close() |
70 | 86 |
|
71 | 87 | # Concatenate the dataframes into a single one while dropping all duplicates |
72 | 88 | label_jp2_map_global = pd.concat(dfs).drop_duplicates().reset_index(drop=True) |
|
96 | 112 | jp2f_csv = os.path.join(parent_dir2, 'map_non_duplicated_jp2_paths.csv') |
97 | 113 | labels_csv = os.path.join(parent_dir2, 'map_non_duplicated_labels_paths.csv') |
98 | 114 |
|
99 | | -# Map the jp2 files |
| 115 | +# Map the jp2 files of each sub-directories into a single list |
100 | 116 | new_files = [] |
101 | 117 | for file in jp2f: |
102 | 118 | new_file = os.path.join(jp2_dir, os.path.basename(file)) |
|
108 | 124 | jp2f_list.append([original_file_relative, new_file_relative]) |
109 | 125 | new_files.append(new_file) |
110 | 126 |
|
111 | | -# Write the csv file mapping the jp2 YEAR_MONTH-based path to new common directory |
| 127 | +# Write the csv file mapping the jp2 YEAR_MONTH-based path to a new single directory |
112 | 128 | with open(jp2f_csv, 'w') as csvFile: |
113 | 129 | writer = csv.writer(csvFile) |
114 | 130 | writer.writerows(jp2f_list) |
|
129 | 145 | labels_list.append([original_file_relative, new_file_relative]) |
130 | 146 | new_files.append(new_file) |
131 | 147 |
|
132 | | -# Create the restore csv of .npz files (including png files) mapping the .npz and png YEAR_MONTH-based path to new common directory |
| 148 | +# Create the csv mapping the .npz and png YEAR_MONTH-based path to new common directory |
133 | 149 | with open(labels_csv, 'w') as csvFile: |
134 | 150 | writer = csv.writer(csvFile) |
135 | 151 | writer.writerows(labels_list) |
|
0 commit comments