@@ -188,7 +188,7 @@ def compute_bootci(
188188 x ,
189189 y = None ,
190190 func = None ,
191- method = "cper " ,
191+ method = "bca " ,
192192 paired = False ,
193193 confidence = 0.95 ,
194194 n_boot = 2000 ,
@@ -217,9 +217,10 @@ def compute_bootci(
217217 method : str
218218 Method to compute the confidence intervals (see Notes):
219219
220- * ``'cper'``: Bias-corrected percentile method (default)
221- * ``'norm'``: Normal approximation with bootstrapped bias and standard error
220+ * ``'bca'``: Bias-corrected and accelerated (BCa, default)
222221 * ``'per'``: Simple percentile
222+ * ``'basic'``: Basic (pivotal) method
223+ * ``'norm'``: Normal approximation with bootstrapped bias and standard error
223224 paired : boolean
224225 Indicates whether x and y are paired or not. For example, for correlation functions or
225226 paired T-test, x and y are assumed to be paired. Pingouin will resample the pairs
@@ -244,24 +245,18 @@ def compute_bootci(
244245
245246 Notes
246247 -----
247- Results have been tested against the
248- `bootci <https://www.mathworks.com/help/stats/bootci.html>`_ Matlab function.
248+ This function uses :py:func:`scipy.stats.bootstrap` under the hood. Requires SciPy >= 1.10.
249249
250- Since version 1.7, SciPy also includes a built-in bootstrap function
251- :py:func:`scipy.stats.bootstrap`. The SciPy implementation has two advantages over Pingouin: it
252- is faster when using ``vectorized=True``, and it supports the bias-corrected and accelerated
253- (BCa) confidence intervals for univariate functions. However, unlike Pingouin, it does not
254- return the bootstrap distribution.
250+ The bias-corrected and accelerated method (``bca``, default) corrects for both bias and
251+ skewness of the bootstrap distribution using jackknife resampling.
255252
256253 The percentile bootstrap method (``per``) is defined as the
257254 :math:`100 \\ times \\ frac{\\ alpha}{2}` and :math:`100 \\ times \\ frac{1 - \\ alpha}{2}`
258255 percentiles of the distribution of :math:`\\ theta` estimates obtained from resampling, where
259256 :math:`\\ alpha` is the level of significance (1 - confidence, default = 0.05 for 95% CIs).
260257
261- The bias-corrected percentile method (``cper``) corrects for bias of the bootstrap
262- distribution. This method is different from the BCa method — the default in Matlab and SciPy —
263- which corrects for both bias and skewness of the bootstrap distribution using jackknife
264- resampling.
258+ The basic (pivotal) method (``basic``) reflects the bootstrap distribution around the
259+ observed statistic.
265260
266261 The normal approximation method (``norm``) calculates the confidence intervals with the
267262 standard normal distribution using bootstrapped bias and standard error.
@@ -279,7 +274,7 @@ def compute_bootci(
279274
280275 Examples
281276 --------
282- 1. Bootstrapped 95% confidence interval of a Pearson correlation
277+ 1. Bootstrapped 95% BCa confidence interval of a Pearson correlation
283278
284279 >>> import pingouin as pg
285280 >>> import numpy as np
@@ -289,60 +284,36 @@ def compute_bootci(
289284 >>> stat = np.corrcoef(x, y)[0][1]
290285 >>> ci = pg.compute_bootci(x, y, func="pearson", paired=True, seed=42, decimals=4)
291286 >>> print(round(stat, 4), ci)
292- 0.0945 [-0.098 0.2738]
293-
294- Let's compare to SciPy's built-in bootstrap function
295-
296- >>> from scipy.stats import bootstrap
297- >>> bt_scipy = bootstrap(
298- ... data=(x, y),
299- ... statistic=lambda x, y: np.corrcoef(x, y)[0][1],
300- ... method="basic",
301- ... vectorized=False,
302- ... n_resamples=2000,
303- ... paired=True,
304- ... random_state=42,
305- ... )
306- >>> np.round(bt_scipy.confidence_interval, 4)
307- array([-0.0952, 0.2883])
287+ 0.0945 [-0.0986 0.2868]
308288
309- 2. Bootstrapped 95% confidence interval of a Cohen d
289+ 2. Bootstrapped 95% BCa confidence interval of a Cohen d
310290
311291 >>> stat = pg.compute_effsize(x, y, eftype="cohen")
312292 >>> ci = pg.compute_bootci(x, y, func="cohen", seed=42, decimals=3)
313293 >>> print(round(stat, 4), ci)
314- 0.7009 [0.403 1.009 ]
294+ 0.7009 [0.413 1.01 ]
315295
316- 3. Bootstrapped confidence interval of a standard deviation (univariate)
296+ 3. Bootstrapped BCa confidence interval of a standard deviation (univariate)
317297
318298 >>> import numpy as np
319299 >>> stat = np.std(x, ddof=1)
320300 >>> ci = pg.compute_bootci(x, func="std", seed=123)
321301 >>> print(round(stat, 4), ci)
322- 1.5534 [1.38 1.8 ]
302+ 1.5534 [1.39 1.81 ]
323303
324- Compare to SciPy's built-in bootstrap function, which returns the bias-corrected and
325- accelerated CIs (see Notes).
326-
327- >>> def std(x, axis):
328- ... return np.std(x, ddof=1, axis=axis)
329- >>> bt_scipy = bootstrap(data=(x,), statistic=std, n_resamples=2000, random_state=123)
330- >>> np.round(bt_scipy.confidence_interval, 2)
331- array([1.39, 1.81])
332-
333- Changing the confidence intervals type in Pingouin
304+ Changing the confidence intervals method
334305
335306 >>> pg.compute_bootci(x, func="std", seed=123, method="norm")
336307 array([1.37, 1.76])
337308
338309 >>> pg.compute_bootci(x, func="std", seed=123, method="percentile")
339- array([1.35 , 1.75])
310+ array([1.36 , 1.75])
340311
341312 4. Bootstrapped confidence interval using a custom univariate function
342313
343314 >>> from scipy.stats import skew
344315 >>> round(skew(x), 4), pg.compute_bootci(x, func=skew, n_boot=10000, seed=123)
345- (-0.137, array([-0.55 , 0.32 ]))
316+ (-0.137, array([-0.51 , 0.38 ]))
346317
347318 5. Bootstrapped confidence interval using a custom bivariate function. Here, x and y are not
348319 paired and can therefore have different sizes.
@@ -361,21 +332,31 @@ def compute_bootci(
361332 ... f"The bootstrap distribution has {bt.size} samples. The mean and standard "
362333 ... f"{bt.mean():.4f} ± {bt.std():.4f}"
363334 ... )
364- The bootstrap distribution has 10000 samples. The mean and standard 0.8807 ± 0.1704
335+ The bootstrap distribution has 10000 samples. The mean and standard 0.8792 ± 0.1707
365336 """
366337 from inspect import isfunction , isroutine
367338
339+ from scipy .stats import bootstrap as scipy_bootstrap
368340 from scipy .stats import norm
369341
370342 # Check other arguments
371343 assert isinstance (confidence , float )
372344 assert 0 < confidence < 1 , "confidence must be between 0 and 1."
373- assert method in ["norm" , "normal" , "percentile" , "per" , "cpercentile" , "cper" ]
345+ valid_methods = [
346+ "norm" ,
347+ "normal" ,
348+ "percentile" ,
349+ "per" ,
350+ "bca" ,
351+ "BCa" ,
352+ "basic" ,
353+ ]
354+ assert method in valid_methods , f"method must be one of { valid_methods } ."
374355 assert isfunction (func ) or isinstance (func , str ) or isroutine (func ), (
375356 "func must be a function (e.g. np.mean, custom function) or a string (e.g. 'pearson'). "
376357 "See documentation for more details."
377358 )
378- vectorizable = False
359+ vectorized = False
379360
380361 # Check x
381362 x = np .asarray (x )
@@ -416,75 +397,63 @@ def func(x, y):
416397 return compute_effsize (x , y , paired = paired , eftype = func_str )
417398
418399 elif func == "mean" :
419- vectorizable = True
400+ vectorized = True
420401
421- def func (x ):
422- return np .mean (x , axis = 0 )
402+ def func (x , axis = - 1 ):
403+ return np .mean (x , axis = axis )
423404
424405 elif func == "std" :
425- vectorizable = True
406+ vectorized = True
426407
427- def func (x ):
428- return np .std (x , ddof = 1 , axis = 0 )
408+ def func (x , axis = - 1 ):
409+ return np .std (x , ddof = 1 , axis = axis )
429410
430411 elif func == "var" :
431- vectorizable = True
412+ vectorized = True
432413
433- def func (x ):
434- return np .var (x , ddof = 1 , axis = 0 )
414+ def func (x , axis = - 1 ):
415+ return np .var (x , ddof = 1 , axis = axis )
435416
436417 else :
437418 raise ValueError ("Function string not recognized." )
438419
439- # Bootstrap
440- bootstat = np .empty (n_boot )
441- rng = np .random .default_rng (seed ) # Random seed
442- boot_x = rng .choice (np .arange (nx ), size = (nx , n_boot ), replace = True )
443-
444- if y is not None :
445- reference = func (x , y )
446- if paired :
447- for i in range (n_boot ):
448- # Note that here we use a bootstrapping procedure with replacement
449- # of all the pairs (Xi, Yi). This is NOT suited for
450- # hypothesis testing such as p-value estimation). Instead, for the
451- # latter, one must only shuffle the Y values while keeping the X
452- # values constant, i.e.:
453- # >>> boot_x = rng.random_sample((n_boot, n)).argsort(axis=1)
454- # >>> for i in range(n_boot):
455- # >>> bootstat[i] = func(x, y[boot_x[i, :]])
456- bootstat [i ] = func (x [boot_x [:, i ]], y [boot_x [:, i ]])
457- else :
458- boot_y = rng .choice (np .arange (ny ), size = (ny , n_boot ), replace = True )
459- for i in range (n_boot ):
460- bootstat [i ] = func (x [boot_x [:, i ]], y [boot_y [:, i ]])
420+ # Determine scipy bootstrap method
421+ use_custom_ci = method in ["norm" , "normal" ]
422+ if method in ["bca" , "BCa" ]:
423+ _scipy_method = "BCa"
424+ elif method in ["per" , "percentile" ]:
425+ _scipy_method = "percentile"
426+ elif method == "basic" :
427+ _scipy_method = "basic"
461428 else :
462- reference = func (x )
463- if vectorizable :
464- bootstat = func (x [boot_x ])
465- else :
466- for i in range (n_boot ):
467- bootstat [i ] = func (x [boot_x [:, i ]])
429+ _scipy_method = "percentile"
430+
431+ # Run scipy bootstrap
432+ data = (x , y ) if y is not None else (x ,)
433+ _paired = paired if y is not None else False
434+ boot_result = scipy_bootstrap (
435+ data = data ,
436+ statistic = func ,
437+ n_resamples = n_boot ,
438+ confidence_level = confidence ,
439+ method = _scipy_method ,
440+ paired = _paired ,
441+ vectorized = vectorized ,
442+ random_state = seed ,
443+ )
444+ bootstat = boot_result .bootstrap_distribution
468445
469446 # CONFIDENCE INTERVALS
470- # See Matlab bootci function
471- alpha = (1 - confidence ) / 2
472- if method in ["norm" , "normal" ]:
447+ if use_custom_ci :
473448 # Normal approximation
474- za = norm .ppf (alpha ) # = 1.96
449+ alpha = (1 - confidence ) / 2
450+ reference = func (x ) if y is None else func (x , y )
451+ za = norm .ppf (alpha )
475452 se = np .std (bootstat , ddof = 1 )
476453 bias = np .mean (bootstat - reference )
477454 ci = np .array ([reference - bias + se * za , reference - bias - se * za ])
478- elif method in ["percentile" , "per" ]:
479- # Simple percentile
480- interval = 100 * np .array ([alpha , 1 - alpha ])
481- ci = np .percentile (bootstat , interval )
482- pass
483455 else :
484- # Bias-corrected percentile bootstrap
485- from pingouin .regression import _bias_corrected_ci
486-
487- ci = _bias_corrected_ci (bootstat , reference , alpha = (1 - confidence ))
456+ ci = np .array ([boot_result .confidence_interval .low , boot_result .confidence_interval .high ])
488457
489458 ci = np .round (ci , decimals )
490459
0 commit comments