Skip to content

Commit 14e1924

Browse files
authored
Merge pull request #4 from MSKirk/wip_raphael
Added in changes from @WaaallEEE
2 parents 3417714 + a7bb54b commit 14e1924

9 files changed

+92
-31
lines changed

README.txt

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
To handle the overlapping events and associated duplicated files accross consecutive months,
22
you will find:
3-
(1) an aggregated map of all overlap-free events in a csv file. This only shows file basenames
4-
(2) csv files for mapping the data relative paths into a single parent directory.
5-
All these maps get rid of the overlapping events and duplicated files by keeping only one
6-
wherever they occur.
73

8-
The aggregation script to produce these csv files is available
4+
(1) an aggregated map of all duplicate-free events in a csv file. This only shows file basenames:
5+
label_jp2_map_global.csv
6+
7+
(2) csv files for mappin, copying or moving duplicate-free data relative paths into a single parent directory:
8+
- map_non_duplicated_jp2_paths.csv
9+
- map_non_duplicated_labels_paths.csv
10+
11+
12+
The aggregation script to produce these csv files using the data on the original disk is available
913
at https://github.com/MSKirk/MachineLearning/blob/master/script_aggregation.py
1014

1115

calibration.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ def scale_rotate(image, angle=0, scale_factor=1, reference_pixel=None):
1616
"""
1717
Perform scaled rotation with opencv. About 20 times faster than with Sunpy & scikit/skimage warp methods.
1818
The output is a padded image that holds the entire rescaled,rotated image, recentered around the reference pixel.
19-
Positive-angle rotation will go counterclockwise if the array is displayed with the origin on top (default),
20-
and clockwise with the origin at bottom.
19+
Positive-angle rotation rotates image clockwise if the array origin (0,0) map to the bottom left of the image,
20+
and counterclockwise if the array origin map to the top left of the image.
2121
2222
:param image: Numpy 2D array
23-
:param angle: rotation angle in degrees. Positive angle will rotate counterclocwise if array origin on top-left
23+
:param angle: rotation angle in degrees. Positive-angle rotation rotates image clockwise if the array origin (0,0)
24+
map to the bottom left of the image, and counterclockwise if the array origin map to the top left of the image.
2425
:param scale_factor: ratio of the wavelength-dependent pixel scale over the target scale of 0.6 arcsec
2526
:param reference_pixel: tuple of (x, y) coordinate. Given as (x, y) = (col, row) and not (row, col).
2627
:return: padded scaled and rotated image
1.03 MB
Binary file not shown.

read_jp2.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,19 @@ def read_solar_jp2(filepath, verbose=False):
2323
img = sunpy.io.read_file(filepath, filetype='jp2')[0]
2424
prepped_header = img.header
2525

26-
# Rotation of image to Solar North
26+
# Rotation of image to get vertical y-axis Top-to-Bottom parallel to Solar North-to-South axis.
2727
if img.header['CROTA2'] != 0:
2828
if verbose:
2929
print('Rotating image to solar north')
3030
prepped_data = calibration.scale_rotate(img.data, img.header['CROTA2'])
31+
prepped_header['CROTA2'] = 0
3132

3233
center = ((np.array(prepped_data.shape) - 1) / 2.0).astype(int)
3334
half_size = int(aia_image_size / 2)
34-
prepped_data = prepped_data[center[1] - half_size:center[1] + half_size, center[0] - half_size:center[0] + half_size]
35-
prepped_header['CROTA2'] = 0
35+
prepped_data = prepped_data[center[1] - half_size:center[1] + half_size, center[0] - half_size:center[0] + half_size].astype(np.float64)
36+
3637
else:
37-
prepped_data = img.data
38+
prepped_data = img.data.astype(np.float64)
3839

3940
# Normalizing the image intensity to levels at the start of the mission for AIA
4041
if 'AIA' in img.header['INSTRUME']:
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import os
2+
from read_jp2 import read_solar_jp2
3+
import matplotlib
4+
matplotlib.use('Tkagg')
5+
import matplotlib.pyplot as plt
6+
import numpy as np
7+
import calibration
8+
9+
# Get a jp2 sample. here using the jp2 included in this github repo
10+
filepath = '../images/2011_06_25__00_59_43_71__SDO_AIA_AIA_1700.jp2'
11+
12+
pdata, pheader = read_solar_jp2(filepath)
13+
pdata[0:1000, :] = 0
14+
15+
rdata = calibration.scale_rotate(pdata, 45)
16+
17+
# Display the image and make sure it's in the correct orientation with respect to the png sample
18+
vmax = np.percentile(pdata, 99.5)
19+
20+
fs = 20
21+
plt.figure(0, figsize=(18, 18))
22+
plt.subplot(2, 2, 1)
23+
plt.imshow(pdata, vmin=pdata.min(), vmax=vmax, origin='lower', cmap='gray')
24+
plt.title('origin lower, no rotation', fontsize=fs)
25+
plt.subplot(2, 2, 2)
26+
plt.imshow(rdata, vmin=pdata.min(), vmax=vmax, origin='lower', cmap='gray')
27+
plt.title('origin lower, rotation argument +45 deg', fontsize=fs)
28+
plt.subplot(2, 2, 3)
29+
plt.imshow(pdata, vmin=pdata.min(), vmax=vmax, cmap='gray')
30+
plt.title('origin top, no rotation', fontsize=fs)
31+
plt.subplot(2, 2, 4)
32+
plt.imshow(rdata, vmin=pdata.min(), vmax=vmax, cmap='gray')
33+
plt.title('origin top, rotation argument +45 deg', fontsize=fs)
34+
plt.tight_layout()
35+
plt.show()

script_aggregation.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@
55
(1) Merges the content of the csv files found in all YEAR_MONTH subdirectories into a single global_csv_file.
66
This file will map all jp2 to their labels without any overlaps. This csv only map file basenames.
77
8-
(2) This script also creates csv files mapping relative YEAR_MONTH-based file path to new common jp2 and labels directory
9-
directly under a new parent directory. You may this e.g. for moving the files into a new more "global" tree.
8+
(2) This script also creates csv files mapping relative YEAR_MONTH-based file path to new common jp2 and labels
9+
directly under a new parent directory. You may run this e.g. for moving the files into a new more "global" tree.
1010
11-
E.g: given a parent directory "parent_dir" hosting the original data tree (parent_dir),
12-
the csv file will map everything in
11+
E.g: given a parent directory "parent_dir", the csv file will map things from
1312
1413
parent_dir
1514
2010_12
@@ -47,26 +46,43 @@
4746
import glob
4847
import pandas as pd
4948
import csv
49+
from pathlib import Path
5050

5151

5252
############# Set some data directories - update to your personal case #############
5353

5454
# Parent directory of all YEAR_MONTH subdirectories that will also contain the global csv file
55-
parent_dir = '/Volumes/SolarData/LabeledImages/'
55+
parent_dir = '/media/raphael/SolarData/V2'
5656
# Common directory where all files will be moved, without duplicates.
5757
parent_dir2 = parent_dir
5858
# Filename of csv file that will be the aggregation all csv files of all YEAR_MONTH subdirectories without duplicates
59-
global_csv_file = os.path.join(parent_dir, 'label_jp2_map_global.csv')
59+
global_csv_file = os.path.join(parent_dir2, 'label_jp2_map_global.csv')
60+
# csv output for empty data
61+
csv_empty_data = os.path.join(parent_dir2, 'empty_data.csv')
62+
6063

6164
######### (1) Creating the aggregated map of jp2 and label masks ###########
6265

6366
# Fetch the csv file paths recursively
6467
csv_files = sorted(glob.glob(os.path.join(parent_dir, '20*/label_jp2_map.csv')))
6568
# Read their content and concatenate in a unique dataframe
6669
dfs = []
70+
empty_csvs = []
6771
for csvf in csv_files:
6872
print(csvf)
69-
dfs.append(pd.read_csv(csvf, header=None))
73+
try:
74+
# Sometimes the CSV file can be empty if no complete set exist at all
75+
dfs.append(pd.read_csv(csvf, header=None))
76+
except pd.errors.EmptyDataError:
77+
print('Empty csv file')
78+
# Write to file the parent directory of the empty data
79+
empty_csvs.append([Path(csvf).parent.name])
80+
continue
81+
82+
with open(csv_empty_data, 'w') as csvFile:
83+
writer = csv.writer(csvFile)
84+
writer.writerows(empty_csvs)
85+
csvFile.close()
7086

7187
# Concatenate the dataframes into a single one while dropping all duplicates
7288
label_jp2_map_global = pd.concat(dfs).drop_duplicates().reset_index(drop=True)
@@ -96,7 +112,7 @@
96112
jp2f_csv = os.path.join(parent_dir2, 'map_non_duplicated_jp2_paths.csv')
97113
labels_csv = os.path.join(parent_dir2, 'map_non_duplicated_labels_paths.csv')
98114

99-
# Map the jp2 files
115+
# Map the jp2 files of each sub-directories into a single list
100116
new_files = []
101117
for file in jp2f:
102118
new_file = os.path.join(jp2_dir, os.path.basename(file))
@@ -108,7 +124,7 @@
108124
jp2f_list.append([original_file_relative, new_file_relative])
109125
new_files.append(new_file)
110126

111-
# Write the csv file mapping the jp2 YEAR_MONTH-based path to new common directory
127+
# Write the csv file mapping the jp2 YEAR_MONTH-based path to a new single directory
112128
with open(jp2f_csv, 'w') as csvFile:
113129
writer = csv.writer(csvFile)
114130
writer.writerows(jp2f_list)
@@ -129,7 +145,7 @@
129145
labels_list.append([original_file_relative, new_file_relative])
130146
new_files.append(new_file)
131147

132-
# Create the restore csv of .npz files (including png files) mapping the .npz and png YEAR_MONTH-based path to new common directory
148+
# Create the csv mapping the .npz and png YEAR_MONTH-based path to new common directory
133149
with open(labels_csv, 'w') as csvFile:
134150
writer = csv.writer(csvFile)
135151
writer.writerows(labels_list)

script_over_days.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
if __name__ == '__main__':
66

7-
save_dir = os.path.abspath(os.path.expanduser('/Users/mskirk/Desktop/MLDataTest/test'))
8-
7+
save_dir = os.path.abspath(os.path.expanduser('~/Data/ML_projects/aia_recognition'))
8+
# DO NOT DOWNLOAD ANYTHING BEFORE 2010/12 (no HMI data in helioviewer)
99
tstart = '2011/06/25 00:00:00'
1010
tend = '2011/06/25 23:30:00'
1111
j = Jpd.Jp2ImageDownload(save_dir, tstart=tstart, tend=tend)

script_over_many_months.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
# SET THESE PARAMETERS:
1212
save_dir = os.path.abspath('/Users/mskirk/Desktop/MLDataTest')
1313
#save_dir = os.path.abspath('/Volumes/RAPH_1TB/Data/Michael/Hek_project')
14+
15+
# DO NOT DOWNLOAD ANYTHING BEFORE 2010/12 (no HMI data in helioviewer)
1416
start_date = '2017/01/01 00:00:00' # inclusive
1517
end_date = '2017/06/01 00:00:00' # not inclusive
1618

script_over_many_months_raphael.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,20 @@
88

99
if __name__ == '__main__':
1010

11-
# Update to your own case
12-
save_dir = os.path.abspath('/Volumes/RAPH_1TB/Data/Michael/Hek_project')
13-
start_date = '2018/05/01 00:00:00' # inclusive
14-
end_date = '2019/02/01 00:00:00' # not inclusive
11+
# SET THESE PARAMETERS:
12+
save_dir = os.path.abspath('~/Data/ML_projects/HEK_project')
1513

14+
# DO NOT DOWNLOAD ANYTHING BEFORE 2010/12 (no HMI data in helioviewer)
15+
start_date = '2017/01/01 00:00:00' # inclusive
16+
end_date = '2017/06/01 00:00:00' # not inclusive
1617

17-
begin_list = [dt for dt in rrule(MONTHLY, dtstart=parse_time(start_date), until=parse_time(end_date))]
18+
# SHOULDN'T NEED TO CHANGE ANYTHING BELOW THIS --------->>
19+
20+
begin_list = [dt for dt in rrule(MONTHLY, dtstart=parse_time(start_date).to_datetime(), until=parse_time(end_date).to_datetime())]
1821
end_list = [elem - datetime.timedelta(minutes=30) for elem in begin_list[1:]]
1922
del begin_list[-1]
2023

24+
2125
for tstart, tend in zip(begin_list, end_list):
2226

2327
j = Jpd.Jp2ImageDownload(save_dir, tstart=tstart, tend=tend)
@@ -45,5 +49,3 @@
4549
print('HEK server error during make_labels(). Trying again...')
4650
logging.warning('HEK server error raised ConnectionResetError during make_labels()')
4751
continue
48-
49-

0 commit comments

Comments
 (0)