Skip to content

Commit 46c4fef

Browse files
Merge pull request #1104 from igmhub/p1d_script_correction
[bump minor] small correction to account for the new bootstrap covariance estimator
2 parents 19499b0 + 3c336ce commit 46c4fef

File tree

1 file changed

+66
-34
lines changed

1 file changed

+66
-34
lines changed

py/picca/pk1d/postproc_pk1d.py

Lines changed: 66 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,13 @@
2525
from scipy.stats import binned_statistic
2626

2727
from picca.constants import ABSORBER_IGM, SPEED_LIGHT
28-
from picca.pk1d.utils import (
29-
MEANPK_FITRANGE_SNR,
30-
fitfunc_variance_pk1d,
31-
)
28+
from picca.pk1d.utils import MEANPK_FITRANGE_SNR, fitfunc_variance_pk1d
3229
from picca.utils import userprint
3330

3431

35-
def read_pk1d(filename, kbin_edges, snrcut=None, zbins_snrcut=None, skymask_matrices=None):
32+
def read_pk1d(
33+
filename, kbin_edges, snrcut=None, zbins_snrcut=None, skymask_matrices=None
34+
):
3635
"""Read Pk1D data from a single file.
3736
3837
Arguments
@@ -100,16 +99,22 @@ def read_pk1d(filename, kbin_edges, snrcut=None, zbins_snrcut=None, skymask_matr
10099
if skymask_matrices is not None:
101100
chunk_table["Pk_raw_skycorr"] = chunk_table["Pk_raw"]
102101
chunk_table["Pk_noise_skycorr"] = chunk_table["Pk_noise"]
103-
w, = np.where(np.isclose(meanz_skymatrices, chunk_header["MEANZ"], atol=1.e-2))
104-
if len(w)==1:
102+
(w,) = np.where(
103+
np.isclose(meanz_skymatrices, chunk_header["MEANZ"], atol=1.0e-2)
104+
)
105+
if len(w) == 1:
105106
correction_matrix = skymask_matrices[w[0]][1]
106107
ll = len(chunk_table["Pk_raw"])
107108
correction_matrix = np.copy(correction_matrix[0:ll, 0:ll])
108-
if len(chunk_table["Pk_raw"])!=correction_matrix.shape[0]:
109-
userprint(f"""file {filename} hdu {i+1}:"""
110-
"""Pk_raw doesnt match shape of skymatrix.""")
109+
if len(chunk_table["Pk_raw"]) != correction_matrix.shape[0]:
110+
userprint(
111+
f"""file {filename} hdu {i+1}:"""
112+
"""Pk_raw doesnt match shape of skymatrix."""
113+
)
111114
else:
112-
chunk_table["Pk_raw_skycorr"] = correction_matrix @ chunk_table["Pk_raw"]
115+
chunk_table["Pk_raw_skycorr"] = (
116+
correction_matrix @ chunk_table["Pk_raw"]
117+
)
113118
chunk_table["Pk_noise_skycorr"] = (
114119
correction_matrix @ chunk_table["Pk_noise"]
115120
)
@@ -364,7 +369,9 @@ def compute_mean_pk1d(
364369
nbins_z * nbins_k * nbins_k
365370
)
366371
if compute_bootstrap_average:
367-
cov_table["boot_average_covariance"] = np.zeros(nbins_z * nbins_k * nbins_k)
372+
cov_table["boot_average_covariance"] = np.zeros(
373+
nbins_z * nbins_k * nbins_k
374+
)
368375

369376
k_index = np.full(len(p1d_table["k"]), -1, dtype=int)
370377
for ikbin, _ in enumerate(kbin_edges[:-1]): # First loop 1) k bins
@@ -441,11 +448,11 @@ def compute_mean_pk1d(
441448
np.outer(mean_p1d_table["N"][select], mean_p1d_table["N"][select])
442449
)
443450
index_cov = cov_table_regular_slice(izbin, nbins_k)
444-
cov_table["zbin"][index_cov[0]:index_cov[1]] = zbin_array
445-
cov_table["index_zbin"][index_cov[0]:index_cov[1]] = index_zbin_array
446-
cov_table["k1"][index_cov[0]:index_cov[1]] = k1_array
447-
cov_table["k2"][index_cov[0]:index_cov[1]] = k2_array
448-
cov_table["N"][index_cov[0]:index_cov[1]] = n_array
451+
cov_table["zbin"][index_cov[0] : index_cov[1]] = zbin_array
452+
cov_table["index_zbin"][index_cov[0] : index_cov[1]] = index_zbin_array
453+
cov_table["k1"][index_cov[0] : index_cov[1]] = k1_array
454+
cov_table["k2"][index_cov[0] : index_cov[1]] = k2_array
455+
cov_table["N"][index_cov[0] : index_cov[1]] = n_array
449456

450457
if n_chunks[izbin] == 0:
451458
p1d_weights_z, p1d_groups_z = [], []
@@ -551,22 +558,26 @@ def fill_average_pk(
551558
median_array,
552559
snrfit_array,
553560
) = (*output_mean[izbin],)
554-
index_mean = mean_p1d_table_regular_slice(izbin,nbins_k)
561+
index_mean = mean_p1d_table_regular_slice(izbin, nbins_k)
555562

556-
mean_p1d_table["zbin"][index_mean[0]:index_mean[1]] = zbin_array
557-
mean_p1d_table["index_zbin"][index_mean[0]:index_mean[1]] = index_zbin_array
558-
mean_p1d_table["N"][index_mean[0]:index_mean[1]] = n_array
563+
mean_p1d_table["zbin"][index_mean[0] : index_mean[1]] = zbin_array
564+
mean_p1d_table["index_zbin"][index_mean[0] : index_mean[1]] = index_zbin_array
565+
mean_p1d_table["N"][index_mean[0] : index_mean[1]] = n_array
559566
for icol, col in enumerate(p1d_table_cols):
560-
mean_p1d_table["mean" + col][index_mean[0]:index_mean[1]] = mean_array[icol]
567+
mean_p1d_table["mean" + col][index_mean[0] : index_mean[1]] = mean_array[
568+
icol
569+
]
561570
mean_p1d_table["error" + col][index_mean[0] : index_mean[1]] = np.sqrt(
562571
variance_array[icol]
563572
)
564573
mean_p1d_table["min" + col][index_mean[0] : index_mean[1]] = min_array[icol]
565-
mean_p1d_table["max" + col][index_mean[0]:index_mean[1]] = max_array[icol]
574+
mean_p1d_table["max" + col][index_mean[0] : index_mean[1]] = max_array[icol]
566575
if not nomedians:
567-
mean_p1d_table["median" + col][index_mean[0]:index_mean[1]] = median_array[icol]
576+
mean_p1d_table["median" + col][index_mean[0] : index_mean[1]] = (
577+
median_array[icol]
578+
)
568579
if weight_method == "fit_snr":
569-
snrfit_table[index_mean[0]:index_mean[1], :] = snrfit_array
580+
snrfit_table[index_mean[0] : index_mean[1], :] = snrfit_array
570581

571582

572583
def compute_average_pk_redshift(
@@ -865,13 +876,19 @@ def compute_average_pk_redshift(
865876
if apply_z_weights:
866877
mean = np.average(p1d_table[col][select], weights=redshift_weights)
867878
# simple analytic expression:
868-
variance = (np.std(p1d_table[col][select]) * (
869-
np.sqrt(np.sum(redshift_weights**2)) / np.sum(redshift_weights)
870-
))**2
879+
variance = (
880+
np.std(p1d_table[col][select])
881+
* (
882+
np.sqrt(np.sum(redshift_weights**2))
883+
/ np.sum(redshift_weights)
884+
)
885+
) ** 2
871886
else:
872887
mean = np.mean(p1d_table[col][select])
873888
# unbiased estimate: num_chunks-1
874-
variance = (np.std(p1d_table[col][select]) / np.sqrt(num_chunks - 1))**2
889+
variance = (
890+
np.std(p1d_table[col][select]) / np.sqrt(num_chunks - 1)
891+
) ** 2
875892

876893
else:
877894
raise ValueError("Option for 'weight_method' argument not found")
@@ -966,7 +983,7 @@ def compute_and_fill_covariance(
966983
for izbin in range(nbins_z):
967984
covariance_array = output_cov[izbin]
968985
index_cov = cov_table_regular_slice(izbin, nbins_k)
969-
cov_table["covariance"][index_cov[0]:index_cov[1]] = covariance_array
986+
cov_table["covariance"][index_cov[0] : index_cov[1]] = covariance_array
970987

971988
if compute_bootstrap:
972989
userprint("Computing covariance matrix with bootstrap method")
@@ -1248,7 +1265,9 @@ def compute_cov(
12481265
if len(p1d_groups) == 0:
12491266
return np.full(nbins_k * nbins_k, np.nan)
12501267

1251-
mean_pk_from_groups = np.nansum(p1d_weights * p1d_groups, axis=0)/np.nansum(p1d_weights, axis=0)
1268+
mean_pk_from_groups = np.nansum(p1d_weights * p1d_groups, axis=0) / np.nansum(
1269+
p1d_weights, axis=0
1270+
)
12521271
mean_pk_groups_product = np.outer(mean_pk_from_groups, mean_pk_from_groups)
12531272

12541273
sum_p1d_weights = np.nansum(p1d_weights, axis=0)
@@ -1274,7 +1293,7 @@ def compute_cov(
12741293

12751294
del p1d_groups, p1d_weights
12761295

1277-
covariance_matrix = ((weights_sum_product /weights_product_sum) - 1)**(-1) * (
1296+
covariance_matrix = ((weights_sum_product / weights_product_sum) - 1) ** (-1) * (
12781297
(p1d_groups_product_sum / weights_product_sum) - mean_pk_groups_product
12791298
)
12801299

@@ -1513,7 +1532,8 @@ def run_postproc_pk1d(
15131532

15141533
with Pool(ncpu) as pool:
15151534
output_readpk1d = pool.starmap(
1516-
read_pk1d, [[f, kbin_edges, snrcut, zbins_snrcut, skymask_matrices] for f in files]
1535+
read_pk1d,
1536+
[[f, kbin_edges, snrcut, zbins_snrcut, skymask_matrices] for f in files],
15171537
)
15181538

15191539
output_readpk1d = [x for x in output_readpk1d if x is not None]
@@ -1776,7 +1796,6 @@ def average_mean_pk1d_files(
17761796
)
17771797

17781798
if weighted_mean:
1779-
weights = 1 / masked_all_error_boot_covariance**2
17801799
combination_cov_table["boot_covariance"] = np.ma.average(
17811800
masked_all_boot_covariance, axis=0, weights=weights
17821801
).filled(np.nan)
@@ -1788,6 +1807,19 @@ def average_mean_pk1d_files(
17881807
masked_all_error_boot_covariance, axis=0
17891808
).filled(np.nan) / np.sqrt(len(cov_tables))
17901809

1810+
if "boot_average_covariance" in cov_tables[0].colnames:
1811+
all_boot_average_covariance = np.array(
1812+
[cov_table["boot_average_covariance"] for cov_table in cov_tables]
1813+
)
1814+
1815+
masked_all_boot_average_covariance = np.ma.masked_array(
1816+
all_boot_average_covariance, np.isnan(all_boot_average_covariance)
1817+
)
1818+
1819+
combination_cov_table["boot_average_covariance"] = np.ma.average(
1820+
masked_all_boot_average_covariance, axis=0
1821+
).filled(np.nan)
1822+
17911823
output_file = os.path.join(output_path, mean_p1d_names[0].split("/")[-1])
17921824
write_mean_pk1d(
17931825
output_file,

0 commit comments

Comments
 (0)