@@ -195,7 +195,7 @@ def compute_bootci(
195195 x ,
196196 y = None ,
197197 func = None ,
198- method = "cper " ,
198+ method = "bca " ,
199199 paired = False ,
200200 confidence = 0.95 ,
201201 n_boot = 2000 ,
@@ -224,9 +224,10 @@ def compute_bootci(
224224 method : str
225225 Method to compute the confidence intervals (see Notes):
226226
227- * ``'cper'``: Bias-corrected percentile method (default)
228- * ``'norm'``: Normal approximation with bootstrapped bias and standard error
227+ * ``'bca'``: Bias-corrected and accelerated (BCa, default)
229228 * ``'per'``: Simple percentile
229+ * ``'basic'``: Basic (pivotal) method
230+ * ``'norm'``: Normal approximation with bootstrapped bias and standard error
230231 paired : boolean
231232 Indicates whether x and y are paired or not. For example, for correlation functions or
232233 paired T-test, x and y are assumed to be paired. Pingouin will resample the pairs
@@ -251,24 +252,18 @@ def compute_bootci(
251252
252253 Notes
253254 -----
254- Results have been tested against the
255- `bootci <https://www.mathworks.com/help/stats/bootci.html>`_ Matlab function.
255+ This function uses :py:func:`scipy.stats.bootstrap` under the hood. Requires SciPy >= 1.10.
256256
257- Since version 1.7, SciPy also includes a built-in bootstrap function
258- :py:func:`scipy.stats.bootstrap`. The SciPy implementation has two advantages over Pingouin: it
259- is faster when using ``vectorized=True``, and it supports the bias-corrected and accelerated
260- (BCa) confidence intervals for univariate functions. However, unlike Pingouin, it does not
261- return the bootstrap distribution.
257+ The bias-corrected and accelerated method (``bca``, default) corrects for both bias and
258+ skewness of the bootstrap distribution using jackknife resampling.
262259
263260 The percentile bootstrap method (``per``) is defined as the
264261 :math:`100 \\ times \\ frac{\\ alpha}{2}` and :math:`100 \\ times \\ frac{1 - \\ alpha}{2}`
265262 percentiles of the distribution of :math:`\\ theta` estimates obtained from resampling, where
266263 :math:`\\ alpha` is the level of significance (1 - confidence, default = 0.05 for 95% CIs).
267264
268- The bias-corrected percentile method (``cper``) corrects for bias of the bootstrap
269- distribution. This method is different from the BCa method — the default in Matlab and SciPy —
270- which corrects for both bias and skewness of the bootstrap distribution using jackknife
271- resampling.
265+ The basic (pivotal) method (``basic``) reflects the bootstrap distribution around the
266+ observed statistic.
272267
273268 The normal approximation method (``norm``) calculates the confidence intervals with the
274269 standard normal distribution using bootstrapped bias and standard error.
@@ -286,7 +281,7 @@ def compute_bootci(
286281
287282 Examples
288283 --------
289- 1. Bootstrapped 95% confidence interval of a Pearson correlation
284+ 1. Bootstrapped 95% BCa confidence interval of a Pearson correlation
290285
291286 >>> import pingouin as pg
292287 >>> import numpy as np
@@ -296,60 +291,36 @@ def compute_bootci(
296291 >>> stat = np.corrcoef(x, y)[0][1]
297292 >>> ci = pg.compute_bootci(x, y, func="pearson", paired=True, seed=42, decimals=4)
298293 >>> print(round(stat, 4), ci)
299- 0.0945 [-0.098 0.2738]
300-
301- Let's compare to SciPy's built-in bootstrap function
302-
303- >>> from scipy.stats import bootstrap
304- >>> bt_scipy = bootstrap(
305- ... data=(x, y),
306- ... statistic=lambda x, y: np.corrcoef(x, y)[0][1],
307- ... method="basic",
308- ... vectorized=False,
309- ... n_resamples=2000,
310- ... paired=True,
311- ... random_state=42,
312- ... )
313- >>> np.round(bt_scipy.confidence_interval, 4)
314- array([-0.0952, 0.2883])
294+ 0.0945 [-0.0986 0.2868]
315295
316- 2. Bootstrapped 95% confidence interval of a Cohen d
296+ 2. Bootstrapped 95% BCa confidence interval of a Cohen d
317297
318298 >>> stat = pg.compute_effsize(x, y, eftype="cohen")
319299 >>> ci = pg.compute_bootci(x, y, func="cohen", seed=42, decimals=3)
320300 >>> print(round(stat, 4), ci)
321- 0.7009 [0.403 1.009 ]
301+ 0.7009 [0.413 1.01 ]
322302
323- 3. Bootstrapped confidence interval of a standard deviation (univariate)
303+ 3. Bootstrapped BCa confidence interval of a standard deviation (univariate)
324304
325305 >>> import numpy as np
326306 >>> stat = np.std(x, ddof=1)
327307 >>> ci = pg.compute_bootci(x, func="std", seed=123)
328308 >>> print(round(stat, 4), ci)
329- 1.5534 [1.38 1.8 ]
309+ 1.5534 [1.39 1.81 ]
330310
331- Compare to SciPy's built-in bootstrap function, which returns the bias-corrected and
332- accelerated CIs (see Notes).
333-
334- >>> def std(x, axis):
335- ... return np.std(x, ddof=1, axis=axis)
336- >>> bt_scipy = bootstrap(data=(x,), statistic=std, n_resamples=2000, random_state=123)
337- >>> np.round(bt_scipy.confidence_interval, 2)
338- array([1.39, 1.81])
339-
340- Changing the confidence intervals type in Pingouin
311+ Changing the confidence intervals method
341312
342313 >>> pg.compute_bootci(x, func="std", seed=123, method="norm")
343314 array([1.37, 1.76])
344315
345316 >>> pg.compute_bootci(x, func="std", seed=123, method="percentile")
346- array([1.35 , 1.75])
317+ array([1.36 , 1.75])
347318
348319 4. Bootstrapped confidence interval using a custom univariate function
349320
350321 >>> from scipy.stats import skew
351322 >>> round(skew(x), 4), pg.compute_bootci(x, func=skew, n_boot=10000, seed=123)
352- (-0.137, array([-0.55 , 0.32 ]))
323+ (-0.137, array([-0.51 , 0.38 ]))
353324
354325 5. Bootstrapped confidence interval using a custom bivariate function. Here, x and y are not
355326 paired and can therefore have different sizes.
@@ -368,21 +339,31 @@ def compute_bootci(
368339 ... f"The bootstrap distribution has {bt.size} samples. The mean and standard "
369340 ... f"{bt.mean():.4f} ± {bt.std():.4f}"
370341 ... )
371- The bootstrap distribution has 10000 samples. The mean and standard 0.8807 ± 0.1704
342+ The bootstrap distribution has 10000 samples. The mean and standard 0.8792 ± 0.1707
372343 """
373344 from inspect import isfunction , isroutine
374345
346+ from scipy .stats import bootstrap as scipy_bootstrap
375347 from scipy .stats import norm
376348
377349 # Check other arguments
378350 assert isinstance (confidence , float )
379351 assert 0 < confidence < 1 , "confidence must be between 0 and 1."
380- assert method in ["norm" , "normal" , "percentile" , "per" , "cpercentile" , "cper" ]
352+ valid_methods = [
353+ "norm" ,
354+ "normal" ,
355+ "percentile" ,
356+ "per" ,
357+ "bca" ,
358+ "BCa" ,
359+ "basic" ,
360+ ]
361+ assert method in valid_methods , f"method must be one of { valid_methods } ."
381362 assert isfunction (func ) or isinstance (func , str ) or isroutine (func ), (
382363 "func must be a function (e.g. np.mean, custom function) or a string (e.g. 'pearson'). "
383364 "See documentation for more details."
384365 )
385- vectorizable = False
366+ vectorized = False
386367
387368 # Check x
388369 x = np .asarray (x )
@@ -423,75 +404,63 @@ def func(x, y):
423404 return compute_effsize (x , y , paired = paired , eftype = func_str )
424405
425406 elif func == "mean" :
426- vectorizable = True
407+ vectorized = True
427408
428- def func (x ):
429- return np .mean (x , axis = 0 )
409+ def func (x , axis = - 1 ):
410+ return np .mean (x , axis = axis )
430411
431412 elif func == "std" :
432- vectorizable = True
413+ vectorized = True
433414
434- def func (x ):
435- return np .std (x , ddof = 1 , axis = 0 )
415+ def func (x , axis = - 1 ):
416+ return np .std (x , ddof = 1 , axis = axis )
436417
437418 elif func == "var" :
438- vectorizable = True
419+ vectorized = True
439420
440- def func (x ):
441- return np .var (x , ddof = 1 , axis = 0 )
421+ def func (x , axis = - 1 ):
422+ return np .var (x , ddof = 1 , axis = axis )
442423
443424 else :
444425 raise ValueError ("Function string not recognized." )
445426
446- # Bootstrap
447- bootstat = np .empty (n_boot )
448- rng = np .random .default_rng (seed ) # Random seed
449- boot_x = rng .choice (np .arange (nx ), size = (nx , n_boot ), replace = True )
450-
451- if y is not None :
452- reference = func (x , y )
453- if paired :
454- for i in range (n_boot ):
455- # Note that here we use a bootstrapping procedure with replacement
456- # of all the pairs (Xi, Yi). This is NOT suited for
457- # hypothesis testing such as p-value estimation). Instead, for the
458- # latter, one must only shuffle the Y values while keeping the X
459- # values constant, i.e.:
460- # >>> boot_x = rng.random_sample((n_boot, n)).argsort(axis=1)
461- # >>> for i in range(n_boot):
462- # >>> bootstat[i] = func(x, y[boot_x[i, :]])
463- bootstat [i ] = func (x [boot_x [:, i ]], y [boot_x [:, i ]])
464- else :
465- boot_y = rng .choice (np .arange (ny ), size = (ny , n_boot ), replace = True )
466- for i in range (n_boot ):
467- bootstat [i ] = func (x [boot_x [:, i ]], y [boot_y [:, i ]])
427+ # Determine scipy bootstrap method
428+ use_custom_ci = method in ["norm" , "normal" ]
429+ if method in ["bca" , "BCa" ]:
430+ _scipy_method = "BCa"
431+ elif method in ["per" , "percentile" ]:
432+ _scipy_method = "percentile"
433+ elif method == "basic" :
434+ _scipy_method = "basic"
468435 else :
469- reference = func (x )
470- if vectorizable :
471- bootstat = func (x [boot_x ])
472- else :
473- for i in range (n_boot ):
474- bootstat [i ] = func (x [boot_x [:, i ]])
436+ _scipy_method = "percentile"
437+
438+ # Run scipy bootstrap
439+ data = (x , y ) if y is not None else (x ,)
440+ _paired = paired if y is not None else False
441+ boot_result = scipy_bootstrap (
442+ data = data ,
443+ statistic = func ,
444+ n_resamples = n_boot ,
445+ confidence_level = confidence ,
446+ method = _scipy_method ,
447+ paired = _paired ,
448+ vectorized = vectorized ,
449+ random_state = seed ,
450+ )
451+ bootstat = boot_result .bootstrap_distribution
475452
476453 # CONFIDENCE INTERVALS
477- # See Matlab bootci function
478- alpha = (1 - confidence ) / 2
479- if method in ["norm" , "normal" ]:
454+ if use_custom_ci :
480455 # Normal approximation
481- za = norm .ppf (alpha ) # = 1.96
456+ alpha = (1 - confidence ) / 2
457+ reference = func (x ) if y is None else func (x , y )
458+ za = norm .ppf (alpha )
482459 se = np .std (bootstat , ddof = 1 )
483460 bias = np .mean (bootstat - reference )
484461 ci = np .array ([reference - bias + se * za , reference - bias - se * za ])
485- elif method in ["percentile" , "per" ]:
486- # Simple percentile
487- interval = 100 * np .array ([alpha , 1 - alpha ])
488- ci = np .percentile (bootstat , interval )
489- pass
490462 else :
491- # Bias-corrected percentile bootstrap
492- from pingouin .regression import _bias_corrected_ci
493-
494- ci = _bias_corrected_ci (bootstat , reference , alpha = (1 - confidence ))
463+ ci = np .array ([boot_result .confidence_interval .low , boot_result .confidence_interval .high ])
495464
496465 ci = np .round (ci , decimals )
497466
0 commit comments