diff --git a/README.md b/README.md index 7b6995d..0c9b3fb 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ The package `rdrobust` implements estimation, inference, and graphical procedure - `rdrobust`: point estimation and robust bias-corrected inference. - `rdbwselect`: data-driven bandwidth selection for RD estimation and inference. - `rdplot`: data-driven RD plots based on binned means and local polynomial fits. +- Stata also includes `rdrobustplot`, a postestimation diagnostic plot after `rdrobust`. ## Python Implementation @@ -36,9 +37,9 @@ To install/update in Stata type: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/main/stata) replace ``` -- Help: [rdrobust](stata/rdrobust.pdf), [rdbwselect](stata/rdbwselect.pdf), [rdplot](stata/rdplot.pdf). +- Help: [rdrobust](stata/rdrobust.pdf), [rdbwselect](stata/rdbwselect.pdf), [rdplot](stata/rdplot.pdf), [rdrobustplot](stata/rdrobustplot.pdf). -- Replication: [do-file](stata/rdrobust_illustration.do), [rdplot illustration](stata/rdplot_illustration.do), [senate data](stata/rdrobust_senate.dta). +- Replication: [rdrobust illustration](stata/rdrobust_illustration.do), [rdplot illustration](stata/rdplot_illustration.do), [new features illustration](stata/rdrobust_illustration_new.do), [senate data](stata/rdrobust_senate.dta). ## References diff --git a/stata/rdbwselect.ado b/stata/rdbwselect.ado index 68e1c3f..913c62f 100644 --- a/stata/rdbwselect.ado +++ b/stata/rdbwselect.ado @@ -2,15 +2,32 @@ * RDROBUST STATA PACKAGE -- rdbwselect * Authors: Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell, Rocio Titiunik ******************************************************************************** -*!version 10.0.0 2026-05-15 +*!rdrobust Stata package v11.0.0 2026-05-13 capture program drop rdbwselect program define rdbwselect, eclass + version 16.0 syntax anything [if] [in] [, c(real 0) fuzzy(string) deriv(real 0) p(string) q(real 0) covs(string) covs_drop(string) kernel(string) weights(string) bwselect(string) vce(string) scaleregul(real 1) all nochecks masspoints(string) bwcheck(real 0) bwrestrict(string) stdvars(string)] marksample touse - preserve - qui keep if `touse' + + * Snapshot Mata externals so we can drop only the variables WE create, + * leaving the user's Mata workspace and the loaded rdrobust_*.mo + * library functions untouched. + mata: _mtx = direxternal("*"); st_local("_mata_before", rows(_mtx) ? invtokens(_mtx') : "") + mata: mata drop _mtx + + * M1: do all work in an isolated temp frame instead of preserve+keep. + * Only the touse=1 subset is copied (in-memory, no disk I/O), and the + * user's frame is restored even on error or Ctrl-Break via nobreak. + local _orig_frame `c(frame)' + tempname _work_frame + capture frame drop `_work_frame' + frame put * if `touse', into(`_work_frame') + + nobreak { + cwf `_work_frame' + capture noisily { tokenize "`anything'" local y `1' local x `2' @@ -19,38 +36,84 @@ program define rdbwselect, eclass ******************** Set VCE *************************** local nnmatch = 3 - tokenize `vce' - local w : word count `vce' + local cr_method = "" + tokenize `vce' + local w : word count `vce' if `w' == 1 { local vce_select `"`1'"' } if `w' == 2 { local vce_select `"`1'"' - if ("`vce_select'"=="nn") local nnmatch `"`2'"' - if ("`vce_select'"=="cluster" | "`vce_select'"=="nncluster") local clustvar `"`2'"' + if ("`vce_select'"=="nn") local nnmatch `"`2'"' + if inlist("`vce_select'","cluster","nncluster","cr1","cr2","cr3","hc0","hc1","hc2","hc3") local clustvar `"`2'"' } if `w' == 3 { local vce_select `"`1'"' local clustvar `"`2'"' local nnmatch `"`3'"' - if ("`vce_select'"!="cluster" & "`vce_select'"!="nncluster") di as error "{err}{cmd:vce()} incorrectly specified" + if !inlist("`vce_select'","cluster","nncluster","cr1","cr2","cr3") { + di as error "{err}{cmd:vce()} incorrectly specified" + exit 125 + } } if `w' > 3 { - di as error "{err}{cmd:vce()} incorrectly specified" + di as error "{err}{cmd:vce()} incorrectly specified" exit 125 } - + + * Disallow vce(nncluster ...): warn and shift to cr1 + if ("`vce_select'"=="nncluster") { + di as text "Warning: vce(nncluster) is not supported. Switching to vce(cr1) (the default when clusters)." + local vce_select = "cr1" + } + + * With a cluster variable, map hc0/hc1/hc2/hc3 to cr1/cr1/cr2/cr3. + * Per cluster_validation design: hc0+cluster is a silent remap to cr1 + * (the default); hc1/2/3 produce a warning so the user knows their + * requested HC variant is being upgraded to its cluster analogue. + if ("`clustvar'"!="") { + if ("`vce_select'"=="hc0") local vce_select = "cr1" + if ("`vce_select'"=="hc1") { + di as text "Warning: vce(hc1 `clustvar') is not a cluster option. Switching to vce(cr1 `clustvar')." + local vce_select = "cr1" + } + if ("`vce_select'"=="hc2") { + di as text "Warning: vce(hc2 `clustvar') is not a cluster option. Switching to vce(cr2 `clustvar')." + local vce_select = "cr2" + } + if ("`vce_select'"=="hc3") { + di as text "Warning: vce(hc3 `clustvar') is not a cluster option. Switching to vce(cr3 `clustvar')." + local vce_select = "cr3" + } + if ("`vce_select'"=="cluster") local vce_select = "cr1" + } + + * Preserve the raw vce option for e(vce_select) before internal remapping + local vce_raw = "`vce_select'" + if ("`vce_raw'"=="") local vce_raw = "nn" + + * Display label local vce_type = "NN" - if ("`vce_select'"=="hc0") local vce_type = "HC0" - if ("`vce_select'"=="hc1") local vce_type = "HC1" - if ("`vce_select'"=="hc2") local vce_type = "HC2" - if ("`vce_select'"=="hc3") local vce_type = "HC3" - if ("`vce_select'"=="cluster") local vce_type = "CR1" - if ("`vce_select'"=="nncluster") local vce_type = "NNcluster" - - if ("`vce_select'"=="cluster" | "`vce_select'"=="nncluster") local cluster = "cluster" - if ("`vce_select'"=="cluster") local vce_select = "hc0" - if ("`vce_select'"=="nncluster") local vce_select = "nn" + if ("`vce_select'"=="hc0") local vce_type = "HC0" + if ("`vce_select'"=="hc1") local vce_type = "HC1" + if ("`vce_select'"=="hc2") local vce_type = "HC2" + if ("`vce_select'"=="hc3") local vce_type = "HC3" + if ("`vce_select'"=="cr1") local vce_type = "CR1" + if ("`vce_select'"=="cr2") local vce_type = "CR2" + if ("`vce_select'"=="cr3") local vce_type = "CR3" + + * Cluster / CR mapping + if inlist("`vce_select'","cr1","cr2","cr3") { + if ("`clustvar'"=="") { + di as error "{err}{cmd:vce(`vce_select' clustervar)} requires a cluster variable" + exit 125 + } + local cluster = "cluster" + } + if ("`vce_select'"=="cr1") local cr_method = "cr1" + if ("`vce_select'"=="cr2") local cr_method = "crv2" + if ("`vce_select'"=="cr3") local cr_method = "crv3" + if inlist("`vce_select'","cr1","cr2","cr3") local vce_select = "hc0" if ("`vce_select'"=="") local vce_select = "nn" ******************** Set Fuzzy*************************** @@ -74,26 +137,38 @@ program define rdbwselect, eclass ************************************************************ **** DROP MISSINGS ****************************************** - qui drop if mi(`y') | mi(`x') - if ("`cluster'"!="") qui drop if mi(`clustvar') - if ("`fuzzy'"~="") { - qui drop if mi(`fuzzyvar') - } - if ("`covs'"~="") { qui ds `covs', alpha local covs_list = r(varlist) - local ncovs: word count `covs_list' - foreach z in `covs_list' { - qui drop if mi(`z') - } + local ncovs: word count `covs_list' } + * Combine all missing-value drops into a single scan. + local drop_cond "mi(`y') | mi(`x')" + if ("`fuzzy'"!="") local drop_cond "`drop_cond' | mi(`fuzzyvar')" + if ("`cluster'"!="") local drop_cond "`drop_cond' | mi(`clustvar')" + foreach z of local covs_list { + local drop_cond "`drop_cond' | mi(`z')" + } + if ("`weights'"!="") local drop_cond "`drop_cond' | mi(`weights') | `weights'<=0" + qui drop if `drop_cond' - if ("`weights'"~="") { - qui drop if mi(`weights') - qui drop if `weights'<=0 + **** Convert string clustvar to numeric ************************ + * vce(cluster var) traditionally accepts string/categorical clusters in + * Stata. rdbwselect's Mata path uses st_data(), which is numeric-only, + * so map a string clustvar to an integer factor via egen group() in a + * tempvar used only by the Mata call. The original `clustvar' is kept + * unchanged so display and ereturn report the user's variable name. + local clustvar_num "`clustvar'" + if ("`cluster'"!="") { + cap confirm numeric variable `clustvar' + if (_rc) { + tempvar _clustvar_num + qui egen `_clustvar_num' = group(`clustvar') + local clustvar_num "`_clustvar_num'" + } } - + + **** CHECK colinearity ****************************************** local covs_drop_coll = 0 @@ -143,12 +218,12 @@ program define rdbwselect, eclass local x_iq = r(p75)-r(p25) local x_sd = r(sd) - if ("`deriv'">"0" & "`p'"=="" & "`q'"=="0") local p = (`deriv'+1) + if (`deriv'>0 & "`p'"=="" & `q'==0) local p = (`deriv'+1) if ("`p'"=="") local p = 1 if ("`q'"=="0") local q = (`p'+1) **************************** BEGIN ERROR CHECKING ************************************************ - if ("`nochecks'"=="") { + if ("`checks'"=="") { if (`c'<=`x_min' | `c'>=`x_max'){ di as error "{err}{cmd:c()} should be set within the range of `x'" @@ -166,7 +241,7 @@ program define rdbwselect, eclass } if ("`bwselect'"=="CCT" | "`bwselect'"=="IK" | "`bwselect'"=="CV" |"`bwselect'"=="cct" | "`bwselect'"=="ik" | "`bwselect'"=="cv"){ - di as error "{err}{cmd:bwselect()} options IK, CCT and CV have been deprecated. Please see help for new options" + di as error "{err}{cmd:bwselect()} options IK, CCT and CV have been depricated. Please see help for new options" exit 7 } @@ -180,34 +255,58 @@ program define rdbwselect, eclass exit 7 } - if ("`p'"<"0" | "`q'"<="0" | "`deriv'"<"0" | "`nnmatch'"<="0" ){ - di as error "{err}{cmd:p()}, {cmd:q()}, {cmd:deriv()}, {cmd:nnmatch()} imson should be positive" + if (`p'<0 | `q'<=0 | `deriv'<0 | `nnmatch'<=0){ + di as error "{err}{cmd:p()}, {cmd:q()}, {cmd:deriv()}, {cmd:nnmatch()} should be positive" exit 411 } - if ("`p'">="`q'" & "`q'">"0"){ - di as error "{err}{cmd:q()} should be higher than {cmd:p()}" + if (`p'>=`q' & `q'>0){ + di as error "{err}{cmd:q()} should be higher than {cmd:p()}" exit 125 } - if ("`deriv'">"`p'" & "`deriv'">"0" ){ - di as error "{err}{cmd:deriv()} can not be higher than {cmd:p()}" + if (`deriv'>`p' & `deriv'>0){ + di as error "{err}{cmd:deriv()} can not be higher than {cmd:p()}" exit 125 } - if ("`p'">"0" ) { + if (`p'>0) { local p_round = round(`p')/`p' local q_round = round(`q')/`q' local d_round = round(`deriv'+1)/(`deriv'+1) local m_round = round(`nnmatch')/`nnmatch' if (`p_round'!=1 | `q_round'!=1 |`d_round'!=1 |`m_round'!=1 ){ - di as error "{err}{cmd:p()}, {cmd:q()}, {cmd:deriv()} and {cmd:nnmatch()} should be integers" + di as error "{err}{cmd:p()}, {cmd:q()}, {cmd:deriv()} and {cmd:nnmatch()} should be integers" exit 126 } - } + } + if (`bwcheck' < 0 | (`bwcheck' != round(`bwcheck'))) { + di as error "{err}{cmd:bwcheck()} must be a non-negative integer (0 = unset)" + exit 125 + } + if ("`masspoints'" != "" & /// + !inlist("`masspoints'", "check", "adjust", "off", "false")) { + di as error "{err}{cmd:masspoints()} must be one of check, adjust, off" + exit 125 + } } - + } + * End of validation-only capture block. Splitting validation from the + * Mata-work block sidesteps a Stata quirk where `exit N` inside an `if{}` + * inside `capture noisily { ... }` does NOT halt the noisily block when + * later `mata { ... }` blocks exist at the same scope; control jumps past + * the first mata block and lands in the second, leaking transient Mata + * externals and surfacing a misleading rc=3499 instead of the real + * validation error. See rdrobust.ado for the same pattern. + local _rc = _rc + if `_rc' { + cwf `_orig_frame' + frame drop `_work_frame' + exit `_rc' + } + capture noisily { + if ("`kernel'"=="epanechnikov" | "`kernel'"=="epa") { local kernel_type = "Epanechnikov" local C_c = 2.34 @@ -287,6 +386,11 @@ program define rdbwselect, eclass if ("`fuzzy'"~="") { T = st_data(.,("`fuzzyvar'"), 0) T_l = select(T,ind_l); T_r = select(T,ind_r) + // Reject fully degenerate first stage (no variation, no jump). + // One-sided non-compliance falls through to the perf_comp branch. + if (variance(T_l)==0 & variance(T_r)==0 & abs(mean(T_l) - mean(T_r)) < sqrt(epsilon(1))) { + _error("Fuzzy RD: first-stage variable has no variation and no jump at the cutoff. The fuzzy estimator is not identified.") + } if (variance(T_l)==0 | variance(T_r)==0){ T_l = T_r =0 st_local("perf_comp","perf_comp") @@ -299,7 +403,7 @@ program define rdbwselect, eclass C_l=C_r=0 if ("`cluster'"!="") { - C = st_data(.,("`clustvar'"), 0) + C = st_data(.,("`clustvar_num'"), 0) C_l = select(C,ind_l); C_r = select(C,ind_r) indC_l = order(C_l,1); indC_r = order(C_r,1) g_l = rows(panelsetup(C_l[indC_l],1)); g_r = rows(panelsetup(C_r[indC_r],1)) @@ -315,12 +419,14 @@ program define rdbwselect, eclass mN = N bwcheck = `bwcheck' masspoints_found = 0 + // Always compute unique-value vectors so bwcheck has them available + // even when masspoints=="off". + X_uniq_l = sort(uniqrows(X_l),-1) + X_uniq_r = uniqrows(X_r) + M_l = length(X_uniq_l) + M_r = length(X_uniq_r) + M = M_l + M_r if ("`masspoints'"=="check" | "`masspoints'"=="adjust") { - X_uniq_l = sort(uniqrows(X_l),-1) - X_uniq_r = uniqrows(X_r) - M_l = length(X_uniq_l) - M_r = length(X_uniq_r) - M = M_l + M_r st_numscalar("M_l", M_l); st_numscalar("M_r", M_r) mass_l = 1-M_l/N_l mass_r = 1-M_r/N_r @@ -329,7 +435,7 @@ program define rdbwselect, eclass display("{err}Mass points detected in the running variable.") if ("`masspoints'"=="adjust" & "`bwcheck'"=="0") bwcheck = 10 if ("`masspoints'"=="check") display("{err}Try using option {cmd:masspoints(adjust)}") - } + } } @@ -353,19 +459,22 @@ program define rdbwselect, eclass } c_bw_l = c_bw_r = c_bw - - + + // T1: per-side V-fit caches reused across all pilot calls. + vcache_l = asarray_create("string") + vcache_r = asarray_create("string") + *** Step 1: d_bw - C_d_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q+1, nu=q+1, o_B=q+2, h_V=c_bw_l, h_B=range_l+1e-8, 0, "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_d_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q+1, nu=q+1, o_B=q+2, h_V=c_bw_r, h_B=range_r+1e-8, 0, "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) + C_d_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q+1, nu=q+1, o_B=q+2, h_V=c_bw_l, h_B=range_l+1e-8, 0, "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_d_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q+1, nu=q+1, o_B=q+2, h_V=c_bw_r, h_B=range_r+1e-8, 0, "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) if (C_d_l[1]==. | C_d_l[2]==. | C_d_l[3]==. |C_d_r[1]==. | C_d_r[2]==. | C_d_r[3]==.) printf("{err}Invertibility problem in the computation of preliminary bandwidth. Try checking for mass points with option {cmd:masspoints(check)}.\n") if (C_d_l[1]==0 | C_d_l[2]==0 | C_d_r[1]==0 | C_d_r[2]==0) printf("{err}Not enough variability to compute the preliminary bandwidth. Try checking for mass points with option {cmd:masspoints(check)}.\n") *** TWO if ("`bwselect'"=="msetwo" | "`bwselect'"=="certwo" | "`bwselect'"=="msecomb2" | "`bwselect'"=="cercomb2" | "`all'"!="") { - d_bw_l = ( (C_d_l[1] / C_d_l[2]^2) * (N/mN) )^C_d_l[4] - d_bw_r = ( (C_d_r[1] / C_d_r[2]^2) * (N/mN) )^C_d_l[4] + d_bw_l = ( (C_d_l[1] / C_d_l[2]^2))^C_d_l[4] + d_bw_r = ( (C_d_r[1] / C_d_r[2]^2))^C_d_r[4] if ("`bwrestrict'"=="on") { d_bw_l = min((d_bw_l, range_l)) d_bw_r = min((d_bw_r, range_r)) @@ -374,19 +483,19 @@ program define rdbwselect, eclass d_bw_l = max((d_bw_l, bw_min_l)) d_bw_r = max((d_bw_r, bw_min_r)) } - C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_l, h_B=d_bw_l, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - b_bw_l = ( (C_b_l[1] / (C_b_l[2]^2 + `scaleregul'*C_b_l[3])) * (N/mN) )^C_b_l[4] - C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_r, h_B=d_bw_r, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) - b_bw_r = ( (C_b_r[1] / (C_b_r[2]^2 + `scaleregul'*C_b_r[3])) * (N/mN) )^C_b_l[4] + C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_l, h_B=d_bw_l, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + b_bw_l = ( (C_b_l[1] / (C_b_l[2]^2 + `scaleregul'*C_b_l[3])))^C_b_l[4] + C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_r, h_B=d_bw_r, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + b_bw_r = ( (C_b_r[1] / (C_b_r[2]^2 + `scaleregul'*C_b_r[3])))^C_b_r[4] if ("`bwrestrict'"=="on") { b_bw_l = min((b_bw_l, range_l)) b_bw_r = min((b_bw_r, range_r)) } - C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_l, h_B=b_bw_l, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - h_bw_l = ( (C_h_l[1] / (C_h_l[2]^2 + `scaleregul'*C_h_l[3])) * (N/mN) )^C_h_l[4] - C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_r, h_B=b_bw_r, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) - h_bw_r = ( (C_h_r[1] / (C_h_r[2]^2 + `scaleregul'*C_h_r[3])) * (N/mN) )^C_h_l[4] + C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_l, h_B=b_bw_l, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + h_bw_l = ( (C_h_l[1] / (C_h_l[2]^2 + `scaleregul'*C_h_l[3])))^C_h_l[4] + C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_r, h_B=b_bw_r, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + h_bw_r = ( (C_h_r[1] / (C_h_r[2]^2 + `scaleregul'*C_h_r[3])))^C_h_r[4] if ("`bwrestrict'"=="on") { h_bw_l = min((h_bw_l, range_l)) @@ -397,31 +506,31 @@ program define rdbwselect, eclass *** SUM if ("`bwselect'"=="msesum" | "`bwselect'"=="cersum" | "`bwselect'"=="msecomb1" | "`bwselect'"=="msecomb2" | "`bwselect'"=="cercomb1" | "`bwselect'"=="cercomb2" | "`all'"!="") { - d_bw_s = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] + C_d_l[2])^2) * (N/mN) )^C_d_l[4] + d_bw_s = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] + C_d_l[2])^2))^C_d_l[4] if ("`bwrestrict'"=="on") d_bw_s = min((d_bw_s, bw_max)) if (bwcheck > 0) d_bw_s = max((d_bw_s, bw_min_l, bw_min_r)) - C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_l, h_B=d_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_r, h_B=d_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) - b_bw_s = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] + C_b_l[2])^2 + `scaleregul'*(C_b_r[3]+C_b_l[3]))) * (N/mN) )^C_b_l[4] + C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_l, h_B=d_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_r, h_B=d_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + b_bw_s = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] + C_b_l[2])^2 + `scaleregul'*(C_b_r[3]+C_b_l[3]))))^C_b_l[4] if ("`bwrestrict'"=="on") b_bw_s = min((b_bw_s, bw_max)) - C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_l, h_B=b_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_r, h_B=b_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) - h_bw_s = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] + C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))) * (N/mN) )^C_h_l[4] + C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_l, h_B=b_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_r, h_B=b_bw_s, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + h_bw_s = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] + C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))))^C_h_l[4] if ("`bwrestrict'"=="on") h_bw_s = min((h_bw_s, bw_max)) } *** RD if ("`bwselect'"=="mserd" | "`bwselect'"=="cerrd" | "`bwselect'"=="msecomb1" | "`bwselect'"=="msecomb2" | "`bwselect'"=="cercomb1" | "`bwselect'"=="cercomb2" | "`bwselect'"=="" | "`all'"!="" ) { - d_bw_d = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] - C_d_l[2])^2) * (N/mN) )^C_d_l[4] + d_bw_d = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] - C_d_l[2])^2))^C_d_l[4] if ("`bwrestrict'"=="on") d_bw_d = min((d_bw_d, bw_max)) if (bwcheck > 0) d_bw_d = max((d_bw_d, bw_min_l, bw_min_r)) - C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_l, h_B=d_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_r, h_B=d_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) - b_bw_d = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] - C_b_l[2])^2 + `scaleregul'*(C_b_r[3] + C_b_l[3]))) * (N/mN) )^C_b_l[4] + C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_l, h_B=d_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=q, nu=p+1, o_B=q+1, h_V=c_bw_r, h_B=d_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + b_bw_d = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] - C_b_l[2])^2 + `scaleregul'*(C_b_r[3] + C_b_l[3]))))^C_b_l[4] if ("`bwrestrict'"=="on") b_bw_d = min((b_bw_d, bw_max)) - C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_l, h_B=b_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_r, h_B=b_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll) - h_bw_d = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] - C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))) * (N/mN) )^C_h_l[4] + C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_l, h_B=b_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=p, nu=`deriv', o_B=q, h_V=c_bw_r, h_B=b_bw_d, `scaleregul', "`vce_select'", nnmatch, "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + h_bw_d = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] - C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))))^C_h_l[4] if ("`bwrestrict'"=="on") h_bw_d = min((h_bw_d, bw_max)) } @@ -524,10 +633,10 @@ program define rdbwselect, eclass b_certwo_r = b_msetwo_r*cer_b st_numscalar("h_certwo_l", h_certwo_l); st_numscalar("h_certwo_r", h_certwo_r); st_numscalar("b_certwo_l", b_certwo_l); st_numscalar("b_certwo_r", b_certwo_r); - mat_h[1, 1] = h_cersum - mat_h[1, 2] = h_cersum - mat_b[1, 1] = b_cersum - mat_b[1, 2] = b_cersum + mat_h[1, 1] = h_certwo_l + mat_h[1, 2] = h_certwo_r + mat_b[1, 1] = b_certwo_l + mat_b[1, 2] = b_certwo_r } if ("`bwselect'"=="cercomb1" | "`all'"!="" ){ h_cercomb1 = h_msecomb1*cer_h @@ -641,8 +750,7 @@ program define rdbwselect, eclass } disp in smcl in gr "{hline 19}{c BT}{hline 30}{c BT}{hline 29}" if ("`covs'"!="") di "Covariate-adjusted estimates. Additional covariates included: `ncovs'" -* if (`covs_drop_coll'>=1) di "Variables dropped due to multicollinearity." - if ("`masspoints'"=="check") di "Running variable checked for mass points." + if ("`masspoints'"=="check") di "Running variable checked for mass points." if ("`masspoints'"=="adjust" & masspoints_found==1) di "Estimates adjusted for mass points in the running variable." if ("`cluster'"!="") di "Std. Err. adjusted for clusters in " "`clustvar'" @@ -650,22 +758,45 @@ program define rdbwselect, eclass if ("`sharpbw'"~="") di in red "WARNING: bandwidths automatically computed for sharp RD estimation." if ("`perf_comp'"~="") di in red "WARNING: bandwidths automatically computed for sharp RD estimation because perfect compliance was detected on at least one side of the threshold." - restore + } + local _rc = _rc + cwf `_orig_frame' + frame drop `_work_frame' + if `_rc' { + * Error path: best-effort per-name Mata cleanup before reraising. + capture mata: _mtx = direxternal("*"); st_local("_mata_after", rows(_mtx) ? invtokens(_mtx') : "") + capture mata: mata drop _mtx + local _mata_new : list _mata_after - _mata_before + foreach _mname of local _mata_new { + capture mata mata drop `_mname' + } + error `_rc' + } + } + ereturn clear + ereturn scalar N = scalar(N) ereturn scalar N_l = scalar(N_l) ereturn scalar N_r = scalar(N_r) ereturn scalar c = `c' ereturn scalar p = `p' ereturn scalar q = `q' + if ("`cluster'"!="") ereturn scalar n_clust = scalar(g_l) + scalar(g_r) ereturn local kernel = "`kernel_type'" ereturn local bwselect = "`bwselect'" - ereturn local vce_select = "`vce_type'" + ereturn local vce_select = "`vce_raw'" + ereturn local vce_type = "`vce_type'" if ("`covs'"!="") ereturn local covs "`covs'" if ("`cluster'"!="") ereturn local clustvar "`clustvar'" - ereturn local outcomevar "`y'" ereturn local runningvar "`x'" ereturn local depvar "`y'" - ereturn local cmd "rdbwselect" + * Title for estimates replay / estimates table + if ("`fuzzy'"=="") local _rd_title "RD bandwidth selection (sharp)" + else local _rd_title "RD bandwidth selection (fuzzy)" + if ("`covs'"!="") local _rd_title "`_rd_title', covariate-adjusted" + ereturn local title "`_rd_title'" + ereturn local cmdline "rdbwselect `0'" + ereturn local cmd "rdbwselect" ereturn matrix mat_h = mat_h ereturn matrix mat_b = mat_b @@ -718,7 +849,13 @@ program define rdbwselect, eclass ereturn scalar b_cercomb2_l = scalar(b_cercomb2_l) ereturn scalar b_cercomb2_r = scalar(b_cercomb2_r) } - - mata mata clear + + * Drop only the Mata externals we created (set difference vs the + * entry-time snapshot). Library functions stay loaded; the user's + * own Mata variables are preserved. + mata: _mtx = direxternal("*"); st_local("_mata_after", rows(_mtx) ? invtokens(_mtx') : "") + mata: mata drop _mtx + local _mata_new : list _mata_after - _mata_before + if `"`_mata_new'"' != "" mata mata drop `_mata_new' end diff --git a/stata/rdbwselect.pdf b/stata/rdbwselect.pdf index e146093..5757e6d 100644 Binary files a/stata/rdbwselect.pdf and b/stata/rdbwselect.pdf differ diff --git a/stata/rdbwselect.sthlp b/stata/rdbwselect.sthlp index ad8773b..38d0c57 100644 --- a/stata/rdbwselect.sthlp +++ b/stata/rdbwselect.sthlp @@ -1,5 +1,5 @@ {smcl} -{* *!version 10.0.0 2026-05-15}{...} +{* *!version 11.0.0 2026-05-13}{...} {viewerjumpto "Syntax" "rdbwselect##syntax"}{...} {viewerjumpto "Description" "rdbwselect##description"}{...} {viewerjumpto "Options" "rdbwselect##options"}{...} @@ -59,6 +59,8 @@ and {browse "https://rdpackages.github.io/references/Calonico-Cattaneo-Farrell-T {p 8 8}{browse "https://rdpackages.github.io/":https://rdpackages.github.io/}{p_end} +{p 4 8}{it:Requires Stata 16 or later.}{p_end} + {marker options}{...} {title:Options} @@ -86,7 +88,7 @@ Default is {cmd:q(2)} (local quadratic regression).{p_end} {p 4 8}{cmd:covs(}{it:covars}{cmd:)} specifies additional covariates to be used for estimation and inference.{p_end} -{p 4 8}{cmd:covs_drop(}{it:covsdropoption}{cmd:)} assesses collinearity in additional covariates used for estimation and inference. Options {opt pinv} (default choice) and {opt invsym} drop collinear additional covariates, differing only in the type of inverse function used. Option {opt off} omits the check for collinear additional covariates.{p_end} +{p 4 8}{cmd:covs_drop(}{it:covsdropoption}{cmd:)} assess collinearity in additional covariates used for estimation and inference. Options {opt pinv} (default choice) and {opt invsym} drops collinear additional covariates, differing only in the type of inverse function used. Option {opt off} omits the check for collinear additional covariates.{p_end} {p 4 8}{cmd:kernel(}{it:kernelfn}{cmd:)} specifies the kernel function used to construct the local-polynomial estimator(s). Options are: {opt tri:angular}, {opt epa:nechnikov}, and {opt uni:form}. Default is {cmd:kernel(triangular)}.{p_end} @@ -142,8 +144,11 @@ Options are:{p_end} {p 8 12}{cmd:vce(hc1)} for heteroskedasticity-robust plug-in residuals variance estimator with {it:hc1} weights.{p_end} {p 8 12}{cmd:vce(hc2)} for heteroskedasticity-robust plug-in residuals variance estimator with {it:hc2} weights.{p_end} {p 8 12}{cmd:vce(hc3)} for heteroskedasticity-robust plug-in residuals variance estimator with {it:hc3} weights.{p_end} -{p 8 12}{cmd:vce(nncluster }{it:clustervar [nnmatch]}{cmd:)} for cluster-robust nearest neighbor variance estimation, with {it:clustervar} indicating the cluster ID variable and {it:nnmatch} indicating the minimum number of neighbors to be used.{p_end} -{p 8 12}{cmd:vce(cluster }{it:clustervar}{cmd:)} for cluster-robust plug-in residuals variance estimation with degrees-of-freedom weights and {it:clustervar} indicating the cluster ID variable.{p_end} +{p 8 12}{cmd:vce(cluster }{it:clustervar}{cmd:)} for cluster-robust plug-in residuals variance estimation with degrees-of-freedom weights; equivalent to {cmd:vce(cr1 }{it:clustervar}{cmd:)}.{p_end} +{p 8 12}{cmd:vce(cr1 }{it:clustervar}{cmd:)} for cluster-robust plug-in residuals variance estimator with degrees-of-freedom correction.{p_end} +{p 8 12}{cmd:vce(cr2 }{it:clustervar}{cmd:)} for the Bell-McCaffrey (2002) bias-reduced cluster-robust variance estimator (CRV2).{p_end} +{p 8 12}{cmd:vce(cr3 }{it:clustervar}{cmd:)} for the Pustejovsky-Tipton (2018) cluster-robust variance estimator (CRV3), approximately unbiased with few clusters.{p_end} +{p 8 12}The CR2/CR3 leverage correction applies to both the conventional and the robust bias-corrected standard errors, including when the point-estimation bandwidth h differs from the bias-correction bandwidth b; in that case the cluster leverage is computed from the bias (b) regression.{p_end} {p 8 12}Default is {cmd:vce(nn 3)}.{p_end} {hline} @@ -159,7 +164,7 @@ Options are:{p_end} {p 4 8}MSE bandwidth selection procedure{p_end} {p 8 8}{cmd:. rdbwselect vote margin}{p_end} -{p 4 8}All bandwidth selection procedures{p_end} +{p 4 8}All bandwidth bandwidth selection procedures{p_end} {p 8 8}{cmd:. rdbwselect vote margin, all}{p_end} @@ -170,11 +175,13 @@ Options are:{p_end} {synoptset 20 tabbed}{...} {p2col 5 20 24 2: Scalars}{p_end} +{synopt:{cmd:e(N)}}number of observations{p_end} {synopt:{cmd:e(N_l)}}number of observations to the left of the cutoff{p_end} {synopt:{cmd:e(N_r)}}number of observations to the right of the cutoff{p_end} {synopt:{cmd:e(c)}}cutoff value{p_end} {synopt:{cmd:e(p)}}order of the polynomial used for estimation of the regression function{p_end} {synopt:{cmd:e(q)}}order of the polynomial used for estimation of the bias of the regression function estimator{p_end} +{synopt:{cmd:e(n_clust)}}number of clusters (only when {cmd:vce(cluster}|{cmd:cr1}|{cmd:cr2}|{cmd:cr3} ...{cmd:)} is specified){p_end} {synopt:{cmd:e(h_mserd)}} MSE-optimal bandwidth selector for the RD treatment effect estimator.{p_end} {synopt:{cmd:e(h_msetwo_l)}} MSE-optimal bandwidth selectors below the cutoff for the RD treatment effect estimator.{p_end} @@ -209,14 +216,21 @@ Options are:{p_end} {synopt:{cmd:e(b_cercomb2_r)}} for median({opt certwo_r},{opt cerrd},{opt cersum}), above the cutoff.{p_end} {p2col 5 20 24 2: Macros}{p_end} +{synopt:{cmd:e(cmd)}}{cmd:rdbwselect}{p_end} +{synopt:{cmd:e(cmdline)}}command as typed{p_end} +{synopt:{cmd:e(title)}}title ({cmd:RD bandwidth selection (sharp/fuzzy)}, optionally {cmd:, covariate-adjusted}){p_end} +{synopt:{cmd:e(depvar)}}name of dependent (outcome) variable{p_end} {synopt:{cmd:e(runningvar)}}name of running variable{p_end} -{synopt:{cmd:e(outcomevar)}}name of outcome variable{p_end} {synopt:{cmd:e(clustvar)}}name of cluster variable{p_end} -{synopt:{cmd:e(covs)}}name of covariates{p_end} +{synopt:{cmd:e(covs)}}names of additional covariates{p_end} {synopt:{cmd:e(vce_select)}}vcetype specified in vce(){p_end} {synopt:{cmd:e(bwselect)}}bandwidth selection choice{p_end} {synopt:{cmd:e(kernel)}}kernel choice{p_end} +{p2col 5 20 24 2: Matrices}{p_end} +{synopt:{cmd:e(mat_h)}}1x2 matrix of bandwidths for the RD treatment effect estimator (left, right){p_end} +{synopt:{cmd:e(mat_b)}}1x2 matrix of bandwidths for the bias of the RD treatment effect estimator (left, right){p_end} + {marker references}{...} {title:References} diff --git a/stata/rdplot.ado b/stata/rdplot.ado index 9f9b0f0..85fea1a 100644 --- a/stata/rdplot.ado +++ b/stata/rdplot.ado @@ -2,13 +2,21 @@ * RDROBUST STATA PACKAGE -- rdplot * Authors: Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell, Rocio Titiunik ******************************************************************************** -*!version 10.0.0 2026-05-15 +*!rdrobust Stata package v11.0.0 2026-05-13 capture program drop rdplot program define rdplot, eclass + version 16.0 syntax anything [if] [, c(real 0) p(integer 4) nbins(string) covs(string) covs_eval(string) covs_drop(string) binselect(string) scale(string) kernel(string) weights(string) h(string) support(string) masspoints(string) genvars hide ci(real 0) shade graph_options(string asis) nochecks *] - + marksample touse + + * Snapshot Mata externals so we can drop only the variables WE create, + * leaving the user's Mata workspace and the loaded rdrobust_*.mo + * library functions untouched. + mata: _mtx = direxternal("*"); st_local("_mata_before", rows(_mtx) ? invtokens(_mtx') : "") + mata: mata drop _mtx + tokenize "`anything'" local y `1' local x `2' @@ -71,30 +79,38 @@ program define rdplot, eclass } ***************************************** - preserve + + * M1: do all work in an isolated temp frame instead of preserve+keep. + * Only the touse=1 subset is copied (in-memory, no disk I/O), and the + * user's frame is restored even on error or Ctrl-Break via nobreak. + local _orig_frame `c(frame)' + tempname _work_frame + capture frame drop `_work_frame' + frame put * if `touse', into(`_work_frame') + + nobreak { + cwf `_work_frame' + capture noisily { sort `x', stable - qui keep if `touse' - + ************************************************************* **** DROP MISSINGS ****************************************** ************************************************************* - qui drop if mi(`y') | mi(`x') if ("`covs'"~="") { qui ds `covs' local covs_list = r(varlist) - local ncovs: word count `covs_list' - foreach z in `covs_list' { - qui drop if mi(`z') + local ncovs: word count `covs_list' + if (`p'>1) { + di as text "Note: covs() is meant for plotting rdrobust estimates (local linear). With p>1, results may not be visually compatible with the binned means depicted due to the underlying assumptions used." } - - di as error "{err}covs() option is meant to be used when plotting RDROBUST estimates. If the option is used for global polynomial fitting, it may deliver results that are not visually compatible with the local binned means depicted due to the underlying assumptions used." - } - - if ("`weights'"~="") { - qui drop if mi(`weights') - qui drop if `weights'<=0 + * Combine all missing-value drops into a single scan. + local drop_cond "mi(`y') | mi(`x')" + foreach z of local covs_list { + local drop_cond "`drop_cond' | mi(`z')" } + if ("`weights'"!="") local drop_cond "`drop_cond' | mi(`weights') | `weights'<=0" + qui drop if `drop_cond' **** CHECK colinearity ****************************************** local covs_drop_coll = 0 @@ -186,23 +202,44 @@ program define rdplot, eclass local binselect = "esmv" } - if ("`nochecks'"=="") { + if ("`checks'"=="") { if (`c'<=`x_min' | `c'>=`x_max'){ di as error "{err}{cmd:c()} should be set within the range of `x'" exit 125 } if ("`p'"<"0" | "`nbins_l'"<"0" | "`nbins_r'"<"0"){ - di as error "{err}{cmd:p()} and {cmd:nbins()} should be a positive integers" + di as error "{err}{cmd:p()} and {cmd:nbins()} should be a positive integers" exit 411 } + + if ("`masspoints'" != "" & /// + !inlist("`masspoints'", "check", "adjust", "off", "false")) { + di as error "{err}{cmd:masspoints()} must be one of check, adjust, off" + exit 125 + } if (`n'<20){ - di as error "{err}Not enough observations to perform bin calculations" + di as error "{err}Not enough observations to perform bin calculations" exit 2001 } } + } + * End of validation-only capture block. Splitting validation from the + * Mata-work block sidesteps a Stata quirk where `exit N` inside an `if{}` + * inside `capture noisily { ... }` does NOT halt the noisily block when + * later `mata { ... }` blocks exist at the same scope; control jumps past + * the first mata block and lands in the second, leaking transient Mata + * externals and surfacing a misleading rc=3499 instead of the real + * validation error. See rdrobust.ado for the same pattern. + local _rc = _rc + if `_rc' { + cwf `_orig_frame' + frame drop `_work_frame' + exit `_rc' + } + capture noisily { ******************************* ****** Start MATA ************* @@ -729,9 +766,13 @@ if ("`covs_eval'"=="mean" & "`covs'"!="") { qui getmata x_plot_l x_plot_r y_plot_l y_plot_r rdplot_id rdplot_mean_bin rdplot_mean_x rdplot_mean_y rdplot_N rdplot_min_bin rdplot_max_bin rdplot_se_y rdplot_ci_l rdplot_ci_r, replace force ereturn clear - ereturn scalar N_l = `n_l' - ereturn scalar N_r = `n_r' + ereturn scalar N = `n' + ereturn scalar N_l = `n_l' + ereturn scalar N_r = `n_r' + ereturn scalar N_h_l = `n_h_l' + ereturn scalar N_h_r = `n_h_r' ereturn scalar c = `c' + ereturn scalar p = `p' ereturn scalar J_star_l = J_star_l ereturn scalar J_star_r = J_star_r ereturn matrix coef_l = gamma_p1_l @@ -739,7 +780,12 @@ if ("`covs_eval'"=="mean" & "`covs'"!="") { if ("`covs'"!="") { ereturn matrix coef_covs = gamma_p } - ereturn local binselect = "`binselect'" + ereturn local binselect = "`binselect'" + ereturn local depvar "`y'" + ereturn local runningvar "`x'" + ereturn local cmdline "rdplot `0'" + ereturn local title "RD plot" + ereturn local cmd "rdplot" ****** polynomial equation for plots ****************** mat coef_l = e(coef_l) @@ -822,8 +868,22 @@ if ("`covs_eval'"=="mean" & "`covs'"!="") { } } } - -restore + + } + local _rc = _rc + cwf `_orig_frame' + frame drop `_work_frame' + if `_rc' { + * Error path: best-effort per-name Mata cleanup before reraising. + capture mata: _mtx = direxternal("*"); st_local("_mata_after", rows(_mtx) ? invtokens(_mtx') : "") + capture mata: mata drop _mtx + local _mata_new : list _mata_after - _mata_before + foreach _mname of local _mata_new { + capture mata mata drop `_mname' + } + error `_rc' + } + } @@ -853,7 +913,25 @@ if ("`genvars'"!="") { } } -mata mata clear +* Drop transient matrices/scalars used as Mata-to-Stata transport buffers +* so they don't leak into the caller's namespace. +cap matrix drop coef_l coef_r +cap matrix drop J_es_hat_dw J_qs_hat_dw J_es_chk_dw J_qs_chk_dw +cap matrix drop J_es_hat_mv J_qs_hat_mv J_es_chk_mv J_qs_chk_mv +cap scalar drop M_l M_r nbins_l nbins_r +cap scalar drop J_star_l J_star_r J_star_l_orig J_star_r_orig +cap scalar drop bin_avg_l bin_avg_r bin_med_l bin_med_r +cap scalar drop J_star_l_IMSE J_star_r_IMSE J_star_l_MV J_star_r_MV +cap scalar drop scale_l scale_r + +* Drop only the Mata externals we created (set difference vs the +* entry-time snapshot). Library functions stay loaded; the user's +* own Mata variables are preserved. +mata: _mtx = direxternal("*"); st_local("_mata_after", rows(_mtx) ? invtokens(_mtx') : "") +mata: mata drop _mtx +local _mata_new : list _mata_after - _mata_before +if `"`_mata_new'"' != "" mata mata drop `_mata_new' + end diff --git a/stata/rdplot.pdf b/stata/rdplot.pdf index b4fd76e..b841e3c 100644 Binary files a/stata/rdplot.pdf and b/stata/rdplot.pdf differ diff --git a/stata/rdplot.sthlp b/stata/rdplot.sthlp index 4710922..5355f4c 100644 --- a/stata/rdplot.sthlp +++ b/stata/rdplot.sthlp @@ -1,5 +1,5 @@ {smcl} -{* *!version 10.0.0 2026-05-15}{...} +{* *!version 11.0.0 2026-05-13}{...} {viewerjumpto "Syntax" "rdplot##syntax"}{...} {viewerjumpto "Description" "rdplot##description"}{...} {viewerjumpto "Options" "rdplot##options"}{...} @@ -42,7 +42,7 @@ {marker description}{...} {title:Description} -{p 4 8}{cmd:rdplot} implements several data-driven Regression Discontinuity (RD) plots, using either evenly-spaced or quantile-spaced partitioning. Two types of RD plots are constructed: (i) RD plots with binned sample means tracing out the underlying regression function, and (ii) RD plots with binned sample means +{p 4 8}{cmd:rdplot} implements several data-driven Regression Discontinuity (RD) plots, using either evenly-spaced or quantile-spaced partitioning. Two type of RD plots are constructed: (i) RD plots with binned sample means tracing out the underlying regression function, and (ii) RD plots with binned sample means mimicking the underlying variability of the data. For technical and methodological details see {browse "https://rdpackages.github.io/references/Calonico-Cattaneo-Titiunik_2015_JASA.pdf":Calonico, Cattaneo and Titiunik (2015a)}.{p_end} @@ -57,6 +57,8 @@ and {browse "https://rdpackages.github.io/references/Calonico-Cattaneo-Farrell-T {p 8 8}{browse "https://rdpackages.github.io":https://rdpackages.github.io}{p_end} +{p 4 8}{it:Requires Stata 16 or later.}{p_end} + {marker options}{...} {title:Options} @@ -114,7 +116,7 @@ Default is {cmd:kernel(uniform)} (i.e., equal/no weighting to all observations o {p 4 8}{cmd:covs_eval(}{it:covars_eval}{cmd:)} sets the evaluation points for the additional covariates, when included in the estimation. Options are: {opt 0} and {opt mean} (default). -{p 4 8}{cmd:covs_drop(}{it:covsdropoption}{cmd:)} assesses collinearity in additional covariates used for estimation and inference. Options {opt pinv} (default choice) and {opt invsym} drop collinear additional covariates, differing only in the type of inverse function used. Option {opt off} omits the check for collinear additional covariates.{p_end} +{p 4 8}{cmd:covs_drop(}{it:covsdropoption}{cmd:)} assess collinearity in additional covariates used for estimation and inference. Options {opt pinv} (default choice) and {opt invsym} drops collinear additional covariates, differing only in the type of inverse function used. Option {opt off} omits the check for collinear additional covariates.{p_end} {dlgtab:Plot Options} @@ -164,13 +166,22 @@ Default is {cmd:kernel(uniform)} (i.e., equal/no weighting to all observations o {synoptset 20 tabbed}{...} {p2col 5 20 24 2: Scalars}{p_end} +{synopt:{cmd:e(N)}}number of observations{p_end} {synopt:{cmd:e(N_l)}}original number of observations to the left of the cutoff{p_end} {synopt:{cmd:e(N_r)}}original number of observations to the right of the cutoff{p_end} +{synopt:{cmd:e(N_h_l)}}effective number of observations (given the bandwidth) to the left of the cutoff{p_end} +{synopt:{cmd:e(N_h_r)}}effective number of observations (given the bandwidth) to the right of the cutoff{p_end} {synopt:{cmd:e(c)}}cutoff value{p_end} +{synopt:{cmd:e(p)}}order of the polynomial used for the fit{p_end} {synopt:{cmd:e(J_star_l)}}selected number of bins to the left of the cutoff{p_end} {synopt:{cmd:e(J_star_r)}}selected number of bins to the right of the cutoff{p_end} {p2col 5 20 24 2: Macros}{p_end} +{synopt:{cmd:e(cmd)}}{cmd:rdplot}{p_end} +{synopt:{cmd:e(cmdline)}}command as typed{p_end} +{synopt:{cmd:e(title)}}title ({cmd:RD plot}){p_end} +{synopt:{cmd:e(depvar)}}name of dependent (outcome) variable{p_end} +{synopt:{cmd:e(runningvar)}}name of running variable{p_end} {synopt:{cmd:e(binselect)}}method used to compute the optimal number of bins{p_end} {synoptset 20 tabbed}{...} @@ -178,6 +189,8 @@ Default is {cmd:kernel(uniform)} (i.e., equal/no weighting to all observations o {synopt:{cmd:e(coef_l)}}coefficients of the {it:p}-th order polynomial estimated to the left of the cutoff{p_end} {synopt:{cmd:e(coef_r)}}coefficients of the {it:p}-th order polynomial estimated to the right of the cutoff{p_end} {synopt:{cmd:e(coef_covs)}}coefficients of the additional covariates, only returned when {cmd:covs()} are used{p_end} +{synopt:{cmd:e(eq_l)}}polynomial equation (left of cutoff) as a string, suitable for overlaying the fit via {cmd:twoway function}{p_end} +{synopt:{cmd:e(eq_r)}}polynomial equation (right of cutoff) as a string, suitable for overlaying the fit via {cmd:twoway function}{p_end} {marker references}{...} diff --git a/stata/rdplot_illustration.do b/stata/rdplot_illustration.do index d46f93b..af3d61f 100644 --- a/stata/rdplot_illustration.do +++ b/stata/rdplot_illustration.do @@ -1,15 +1,23 @@ ******************************************************************************** ** RDROBUST Stata Package -** RDPLOT Illustration +** Do-file for RDPLOT Illustration +** +** Shows how to construct RD plots manually from the output of rdplot. +** rdplot with the `genvars' option writes the bin means, pointwise CIs, and +** per-observation fitted values back to the dataset; `hide' suppresses the +** default plot. With these variables you can build any plot you want. +** Authors: Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell and Rocio Titiunik ******************************************************************************** -** net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/main/stata) replace +** net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace ******************************************************************************** clear all ******************************************************************************** -** Load data and generate RDPLOT results without output plot +** Load data and run rdplot with `genvars, hide, ci()' so we get the bins and +** pointwise CIs back in the data without the default plot. ******************************************************************************** -use "rdrobust_senate.dta", clear +findfile rdrobust_senate.dta +use "`r(fn)'", clear global y vote global x margin global c 0 @@ -19,33 +27,54 @@ global x_max = r(max) rdplot $y $x, genvars hide ci(95) +** Polynomial order actually used by rdplot (defaults to 4, but honors p()). +local p = e(p) + +** Shared plot elements (reused below): +** `scatter' -- bin means dot layer +** `fit_expr' -- left/right polynomial fits via `twoway function' on e(eq_*) +** `fit_hat' -- left/right polynomial fits via rdplot_hat_y (no eval needed) +** `common' -- axis, legend, and title options +local scatter (scatter rdplot_mean_y rdplot_mean_bin, sort msize(small) mcolor(gs10)) +local fit_expr (function `e(eq_l)', range($x_min $c) lcolor(black) sort lwidth(medthin) lpattern(solid)) /// + (function `e(eq_r)', range($c $x_max) lcolor(black) sort lwidth(medthin) lpattern(solid)) +local fit_hat (line rdplot_hat_y $x if $x< $c, sort lcolor(black) lwidth(medthin)) /// + (line rdplot_hat_y $x if $x>=$c, sort lcolor(black) lwidth(medthin)) +local common xline($c, lcolor(black) lwidth(medthin)) xscale(r($x_min $x_max)) /// + xtitle("Running variable") ytitle("Outcome") /// + title("Regression function fit", color(gs0)) + +******************************************************************************** +** (1) Default RDPLOT: bin means + polynomial fit via e(eq_l)/e(eq_r). ******************************************************************************** -** Default RDPLOT +twoway `scatter' `fit_expr', `common' /// + legend(pos(6) cols(2) order(1 "Sample average within bin" /// + 2 "Polynomial fit of order `p'")) + +******************************************************************************** +** (2) Same plot built with rdplot_hat_y instead of e(eq_*). +** Uses the per-observation fitted values stored by `genvars'; no +** polynomial-string eval and no dependency on the Mata coefficient matrices. ******************************************************************************** -twoway (scatter rdplot_mean_y rdplot_mean_bin, sort msize(small) mcolor(gs10)) /// -(function `e(eq_l)', range($x_min $c) lcolor(black) sort lwidth(medthin) lpattern(solid)) /// -(function `e(eq_r)', range($c $x_max) lcolor(black) sort lwidth(medthin) lpattern(solid)), /// -xline($c, lcolor(black) lwidth(medthin)) xscale(r($x_min $x_max)) /// -legend(pos(6) cols(2) order(1 "Sample average within bin" 2 "Polynomial fit of order 4" )) title("Regression function fit", color(gs0)) +twoway `scatter' `fit_hat', `common' /// + legend(pos(6) cols(2) order(1 "Sample average within bin" /// + 2 "Polynomial fit of order `p'")) ******************************************************************************** -** RDPLOT with confidence intervals +** (3) RDPLOT with pointwise 95% confidence intervals as caps. ******************************************************************************** twoway (rcap rdplot_ci_l rdplot_ci_r rdplot_mean_bin, color(gs11)) /// -(scatter rdplot_mean_y rdplot_mean_bin, sort msize(small) mcolor(gs10)) /// -(function `e(eq_l)', range($x_min $c) lcolor(black) sort lwidth(medthin) lpattern(solid)) /// -(function `e(eq_r)', range($c $x_max) lcolor(black) sort lwidth(medthin) lpattern(solid)), /// -xline($c, lcolor(black) lwidth(medthin)) xscale(r($x_min $x_max)) /// -legend(pos(6) cols(2) order(1 "Sample average within bin" 2 "Polynomial fit of order 4" )) title("Regression function fit", color(gs0)) + `scatter' `fit_expr', `common' /// + legend(pos(6) cols(3) order(2 "Sample average within bin" /// + 3 "Polynomial fit of order `p'" /// + 1 "95% CI")) ******************************************************************************** -** RDPLOT with shaded confidence intervals +** (4) RDPLOT with shaded pointwise 95% CI bands (one rarea per side). ******************************************************************************** twoway (rarea rdplot_ci_l rdplot_ci_r rdplot_mean_bin if rdplot_id<0, sort color(gs11)) /// -(rarea rdplot_ci_l rdplot_ci_r rdplot_mean_bin if rdplot_id>0, sort color(gs11)) /// -(scatter rdplot_mean_y rdplot_mean_bin, sort msize(small) mcolor(gs10)) /// -(function `e(eq_l)', range($x_min $c) lcolor(black) sort lwidth(medthin) lpattern(solid)) /// -(function `e(eq_r)', range($c $x_max) lcolor(black) sort lwidth(medthin) lpattern(solid)), /// -xline($c, lcolor(black) lwidth(medthin)) xscale(r($x_min $x_max)) /// -legend(pos(6) cols(2) order(1 "Sample average within bin" 2 "Polynomial fit of order 4" )) title("Regression function fit", color(gs0)) - + (rarea rdplot_ci_l rdplot_ci_r rdplot_mean_bin if rdplot_id>0, sort color(gs11)) /// + `scatter' `fit_expr', `common' /// + legend(pos(6) cols(3) order(3 "Sample average within bin" /// + 4 "Polynomial fit of order `p'" /// + 1 "95% CI")) diff --git a/stata/rdrobust.ado b/stata/rdrobust.ado index e042cf2..49e3cbc 100644 --- a/stata/rdrobust.ado +++ b/stata/rdrobust.ado @@ -2,15 +2,31 @@ * RDROBUST STATA PACKAGE -- rdrobust * Authors: Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell, Rocio Titiunik ******************************************************************************** -*!version 10.0.0 2026-05-15 +*!rdrobust Stata package v11.0.0 2026-05-13 -capture program drop rdrobust +capture program drop rdrobust program define rdrobust, eclass - syntax anything [if] [in] [, c(real 0) fuzzy(string) deriv(real 0) p(string) q(real 0) h(string) b(string) rho(real 0) covs(string) covs_drop(string) kernel(string) weights(string) bwselect(string) vce(string) level(real 95) all scalepar(real 1) scaleregul(real 1) nochecks masspoints(string) bwcheck(real 0) bwrestrict(string) stdvars(string) detail vleverage] - *disp in yellow "Preparing data." + version 16.0 + syntax anything [if] [in] [, c(real 0) fuzzy(string) deriv(real 0) p(string) q(real 0) h(string) b(string) rho(real 0) covs(string) covs_drop(string) kernel(string) weights(string) bwselect(string) vce(string) level(real 95) all scalepar(real 1) scaleregul(real 1) nochecks nowarnings masspoints(string) bwcheck(real 0) bwrestrict(string) stdvars(string) detail vleverage] marksample touse - preserve - qui keep if `touse' + + * Snapshot Mata externals so we can drop only the variables WE create, + * leaving the user's Mata workspace and the loaded rdrobust_*.mo + * library functions untouched. + mata: _mtx = direxternal("*"); st_local("_mata_before", rows(_mtx) ? invtokens(_mtx') : "") + mata: mata drop _mtx + + * M1: do all work in an isolated temp frame instead of preserve+keep. + * Only the touse=1 subset is copied (in-memory, no disk I/O), and the + * user's frame is restored even on error or Ctrl-Break via nobreak. + local _orig_frame `c(frame)' + tempname _work_frame + capture frame drop `_work_frame' + frame put * if `touse', into(`_work_frame') + + nobreak { + cwf `_work_frame' + capture noisily { tokenize "`anything'" local y `1' local x `2' @@ -19,38 +35,85 @@ program define rdrobust, eclass ******************** Set VCE *************************** local nnmatch = 3 - tokenize `vce' + local cr_method = "" + tokenize `vce' local w : word count `vce' if `w' == 1 { local vce_select `"`1'"' } if `w' == 2 { local vce_select `"`1'"' - if ("`vce_select'"=="nn") local nnmatch `"`2'"' - if ("`vce_select'"=="cluster" | "`vce_select'"=="nncluster") local clustvar `"`2'"' + if ("`vce_select'"=="nn") local nnmatch `"`2'"' + if inlist("`vce_select'","cluster","nncluster","cr1","cr2","cr3","hc0","hc1","hc2","hc3") local clustvar `"`2'"' } if `w' == 3 { local vce_select `"`1'"' local clustvar `"`2'"' local nnmatch `"`3'"' - if ("`vce_select'"!="cluster" & "`vce_select'"!="nncluster") di as error "{err}{cmd:vce()} incorrectly specified" + if !inlist("`vce_select'","cluster","nncluster","cr1","cr2","cr3") { + di as error "{err}{cmd:vce()} incorrectly specified" + exit 125 + } } if `w' > 3 { - di as error "{err}{cmd:vce()} incorrectly specified" + di as error "{err}{cmd:vce()} incorrectly specified" exit 125 } - + + * Disallow vce(nncluster ...): warn and shift to cr1 (default when clusters) + if ("`vce_select'"=="nncluster") { + di as text "Warning: vce(nncluster) is not supported. Switching to vce(cr1) (the default when clusters)." + local vce_select = "cr1" + } + + * With a cluster variable, map hc0/hc1/hc2/hc3 to cr1/cr1/cr2/cr3. + * Per cluster_validation design: hc0+cluster is a silent remap to cr1 + * (the default); hc1/2/3 produce a warning so the user knows their + * requested HC variant is being upgraded to its cluster analogue. + if ("`clustvar'"!="") { + if ("`vce_select'"=="hc0") local vce_select = "cr1" + if ("`vce_select'"=="hc1") { + di as text "Warning: vce(hc1 `clustvar') is not a cluster option. Switching to vce(cr1 `clustvar')." + local vce_select = "cr1" + } + if ("`vce_select'"=="hc2") { + di as text "Warning: vce(hc2 `clustvar') is not a cluster option. Switching to vce(cr2 `clustvar')." + local vce_select = "cr2" + } + if ("`vce_select'"=="hc3") { + di as text "Warning: vce(hc3 `clustvar') is not a cluster option. Switching to vce(cr3 `clustvar')." + local vce_select = "cr3" + } + * bare vce(cluster clustvar) is equivalent to cr1 (default) + if ("`vce_select'"=="cluster") local vce_select = "cr1" + } + + * Preserve the raw vce option for e(vce_select) before internal remapping + local vce_raw = "`vce_select'" + if ("`vce_raw'"=="") local vce_raw = "nn" + + * Display label local vce_type = "NN" - if ("`vce_select'"=="hc0") local vce_type = "HC0" - if ("`vce_select'"=="hc1") local vce_type = "HC1" - if ("`vce_select'"=="hc2") local vce_type = "HC2" - if ("`vce_select'"=="hc3") local vce_type = "HC3" - if ("`vce_select'"=="cluster") local vce_type = "CR1" - if ("`vce_select'"=="nncluster") local vce_type = "NNcluster" + if ("`vce_select'"=="hc0") local vce_type = "HC0" + if ("`vce_select'"=="hc1") local vce_type = "HC1" + if ("`vce_select'"=="hc2") local vce_type = "HC2" + if ("`vce_select'"=="hc3") local vce_type = "HC3" + if ("`vce_select'"=="cr1") local vce_type = "CR1" + if ("`vce_select'"=="cr2") local vce_type = "CR2" + if ("`vce_select'"=="cr3") local vce_type = "CR3" - if ("`vce_select'"=="cluster" | "`vce_select'"=="nncluster") local cluster = "cluster" - if ("`vce_select'"=="cluster") local vce_select = "hc0" - if ("`vce_select'"=="nncluster") local vce_select = "nn" + * Cluster / CR mapping + if inlist("`vce_select'","cr1","cr2","cr3") { + if ("`clustvar'"=="") { + di as error "{err}{cmd:vce(`vce_select' clustervar)} requires a cluster variable" + exit 125 + } + local cluster = "cluster" + if ("`vce_select'"=="cr1") local cr_method = "cr1" + if ("`vce_select'"=="cr2") local cr_method = "crv2" + if ("`vce_select'"=="cr3") local cr_method = "crv3" + local vce_select = "hc0" + } if ("`vce_select'"=="") local vce_select = "nn" ******************** Set BW *************************** @@ -123,23 +186,37 @@ program define rdrobust, eclass } **** DROP MISSINGS ********************************************** - qui drop if mi(`y') | mi(`x') - if ("`fuzzy'"~="") qui drop if mi(`fuzzyvar') - if ("`cluster'"!="") qui drop if mi(`clustvar') if ("`covs'"~="") { qui ds `covs', alpha local covs_list = r(varlist) - local ncovs: word count `covs_list' - foreach z in `covs_list' { - qui drop if mi(`z') - } + local ncovs: word count `covs_list' } - - if ("`weights'"~="") { - qui drop if mi(`weights') - qui drop if `weights'<=0 + * Combine all missing-value drops into a single scan. + local drop_cond "mi(`y') | mi(`x')" + if ("`fuzzy'"!="") local drop_cond "`drop_cond' | mi(`fuzzyvar')" + if ("`cluster'"!="") local drop_cond "`drop_cond' | mi(`clustvar')" + foreach z of local covs_list { + local drop_cond "`drop_cond' | mi(`z')" } - + if ("`weights'"!="") local drop_cond "`drop_cond' | mi(`weights') | `weights'<=0" + qui drop if `drop_cond' + + **** Convert string clustvar to numeric ************************ + * vce(cluster var) traditionally accepts string/categorical clusters in + * Stata. rdrobust's Mata path uses st_data(), which is numeric-only, so + * map a string clustvar to an integer factor via egen group() in a + * tempvar used only by the Mata call. The original `clustvar' is kept + * unchanged so display and ereturn report the user's variable name. + local clustvar_num "`clustvar'" + if ("`cluster'"!="") { + cap confirm numeric variable `clustvar' + if (_rc) { + tempvar _clustvar_num + qui egen `_clustvar_num' = group(`clustvar') + local clustvar_num "`_clustvar_num'" + } + } + **** CHECK colinearity ****************************************** local covs_drop_coll = 0 if ("`covs_drop'"=="") local covs_drop = "pinv" @@ -180,19 +257,23 @@ program define rdrobust, eclass if ("`bwrestrict'"=="") local bwrestrict = "on" ***************************************************************** - qui su `x', d - local N = r(N) + * Only compute what we need. `su x, d` also computes skewness/kurtosis/etc. + qui su `x' + local N = r(N) local x_min = r(min) local x_max = r(max) - local x_iq = r(p75)-r(p25) - local x_sd = r(sd) + local x_sd = r(sd) + qui _pctile `x', p(25 75) + local x_iq = r(r2) - r(r1) + local range_l = abs(`c'-`x_min') + local range_r = abs(`x_max'-`c') - if ("`deriv'">"0" & "`p'"=="" & "`q'"=="0") local p = `deriv'+1 + if (`deriv'>0 & "`p'"=="" & `q'==0) local p = `deriv'+1 if ("`p'"=="") local p = 1 if ("`q'"=="0") local q = `p'+1 **************************** BEGIN ERROR CHECKING ************************************************ - if ("`nochecks'"=="") { + if ("`checks'"=="") { if (`c'<=`x_min' | `c'>=`x_max'){ di as error "{err}{cmd:c()} should be set within the range of `x'" exit 125 @@ -223,7 +304,7 @@ program define rdrobust, eclass } if ("`bwselect'"=="CCT" | "`bwselect'"=="IK" | "`bwselect'"=="CV" |"`bwselect'"=="cct" | "`bwselect'"=="ik" | "`bwselect'"=="cv"){ - di as error "{err}{cmd:bwselect()} options IK, CCT and CV have been deprecated. Please see help for new options" + di as error "{err}{cmd:bwselect()} options IK, CCT and CV have been depricated. Please see help for new options" exit 7 } @@ -232,27 +313,27 @@ program define rdrobust, eclass exit 7 } - if ("`vce_select'"~="nn" & "`vce_select'"~="" & "`vce_select'"~="cluster" & "`vce_select'"~="nncluster" & "`vce_select'"~="hc1" & "`vce_select'"~="hc2" & "`vce_select'"~="hc3" & "`vce_select'"~="hc0"){ + if ("`vce_select'"~="nn" & "`vce_select'"~="" & "`vce_select'"~="cluster" & "`vce_select'"~="hc1" & "`vce_select'"~="hc2" & "`vce_select'"~="hc3" & "`vce_select'"~="hc0"){ di as error "{err}{cmd:vce()} incorrectly specified" exit 7 } - if ("`p'"<"0" | "`q'"<="0" | "`deriv'"<"0" | "`nnmatch'"<="0" ){ - di as error "{err}{cmd:p()}, {cmd:q()}, {cmd:deriv()}, {cmd:nnmatch()} should be positive" + if (`p'<0 | `q'<=0 | `deriv'<0 | `nnmatch'<=0){ + di as error "{err}{cmd:p()}, {cmd:q()}, {cmd:deriv()}, {cmd:nnmatch()} should be positive" exit 411 } - - if ("`p'">="`q'" & "`q'">"0"){ - di as error "{err}{cmd:q()} should be higher than {cmd:p()}" + + if (`p'>=`q' & `q'>0){ + di as error "{err}{cmd:q()} should be higher than {cmd:p()}" exit 125 } - - if ("`deriv'">"`p'" & "`deriv'">"0" ){ - di as error "{err}{cmd:deriv()} can not be higher than {cmd:p()}" + + if (`deriv'>`p' & `deriv'>0){ + di as error "{err}{cmd:deriv()} can not be higher than {cmd:p()}" exit 125 } - if ("`p'">"0" ) { + if (`p'>0) { local p_round = round(`p')/`p' local q_round = round(`q')/`q' local d_round = round(`deriv'+1)/(`deriv'+1) @@ -263,13 +344,40 @@ program define rdrobust, eclass exit 126 } } - if (`level'>100 | `level'<=0){ - di as error "{err}{cmd:level()}should be set between 0 and 100" + if (`level'>=100 | `level'<=0){ + di as error "{err}{cmd:level()} should be a number in (0, 100)" + exit 125 + } + if (`rho' < 0) { + di as error "{err}{cmd:rho()} should be strictly positive" + exit 125 + } + if (`bwcheck' < 0 | (`bwcheck' != round(`bwcheck'))) { + di as error "{err}{cmd:bwcheck()} must be a non-negative integer (0 = unset)" + exit 125 + } + if ("`masspoints'" != "" & /// + !inlist("`masspoints'", "check", "adjust", "off", "false")) { + di as error "{err}{cmd:masspoints()} must be one of check, adjust, off" exit 125 } } *********************** END ERROR CHECKING ************************************************************ - + } + * End of validation-only capture block. Splitting validation from the + * Mata-work block sidesteps a Stata quirk where `exit N` inside an `if{}` + * inside `capture noisily { ... }` does NOT halt the noisily block when + * later `mata { ... }` blocks exist at the same scope; control jumps past + * the first mata block and lands in the second, leaking transient Mata + * externals and surfacing a misleading rc=3499 "X_l not found" instead of + * the real validation error. + local _rc = _rc + if `_rc' { + cwf `_orig_frame' + frame drop `_work_frame' + exit `_rc' + } + capture noisily { if ("`vce_select'"=="nn" | "`masspoints'"=="check" | "`masspoints'"=="adjust") { sort `x', stable if ("`vce_select'"=="nn") { @@ -297,8 +405,9 @@ program define rdrobust, eclass mata{ *** Preparing data - Y = st_data(.,("`y'"), 0); X = st_data(.,("`x'"), 0) - ind_l = selectindex(X:<`c'); ind_r = selectindex(X:>=`c') + YX = st_data(., ("`y' `x'"), 0) + Y = YX[,1]; X = YX[,2] + ind_l = selectindex(X:<`c'); ind_r = selectindex(X:>=`c') X_l = X[ind_l]; X_r = X[ind_r] Y_l = Y[ind_l]; Y_r = Y[ind_r] dZ=dT=dC=Z_l=Z_r=T_l=T_r=C_l=C_r=fw_l=fw_r=g_l=g_r=dups_l=dups_r=dupsid_l=dupsid_r=g_l=g_r=eT_l=eT_r=eZ_l=eZ_r=indC_l=indC_r=eC_l=eC_r=0 @@ -312,6 +421,11 @@ program define rdrobust, eclass if ("`fuzzy'"~="") { T = st_data(.,("`fuzzyvar'"), 0); T_l = T[ind_l]; T_r = T[ind_r]; dT = 1 + // Reject fully degenerate first stage (no variation, no jump). + // One-sided non-compliance falls through to the perf_comp branch. + if (variance(T_l)==0 & variance(T_r)==0 & abs(mean(T_l) - mean(T_r)) < sqrt(epsilon(1))) { + _error("Fuzzy RD: first-stage variable has no variation and no jump at the cutoff. The fuzzy estimator is not identified.") + } if (variance(T_l)==0 | variance(T_r)==0){ T_l = T_r = 0 st_local("perf_comp","perf_comp") @@ -320,10 +434,10 @@ program define rdrobust, eclass T_l = T_r = 0 st_local("sharpbw","sharpbw") } - } + } if ("`cluster'"!="") { - C = st_data(.,("`clustvar'"), 0) + C = st_data(.,("`clustvar_num'"), 0) C_l = C[ind_l]; C_r = C[ind_r] indC_l = order(C_l,1); indC_r = order(C_r,1) g_l = rows(panelsetup(C_l[indC_l],1)); g_r = rows(panelsetup(C_r[indC_r],1)) @@ -357,7 +471,7 @@ masspoints_found = 0 BWp = min((`x_sd',`x_iq'/1.349)) x_sd = y_sd = 1 c = `c' - *** Standardized ****************** + *** Starndardized ****************** if ("`stdvars'"=="on") { y_sd = sqrt(variance(Y)) x_sd = sqrt(variance(X)) @@ -370,29 +484,31 @@ masspoints_found = 0 x_r_min = min(X_r); x_r_max = max(X_r) range_l = c - x_l_min - range_r = x_r_max - c + range_r = x_r_max - c ************************************ mN = `N' bwcheck = `bwcheck' covs_drop_coll = `covs_drop_coll' + // Always compute unique-value vectors so the bwcheck and de-standardize + // paths below have them available (even when masspoints=="off"). + X_uniq_l = sort(uniqrows(X_l),-1) + X_uniq_r = uniqrows(X_r) + M_l = length(X_uniq_l) + M_r = length(X_uniq_r) + M = M_l + M_r if ("`masspoints'"=="check" | "`masspoints'"=="adjust") { - X_uniq_l = sort(uniqrows(X_l),-1) - X_uniq_r = uniqrows(X_r) - M_l = length(X_uniq_l) - M_r = length(X_uniq_r) - M = M_l + M_r st_numscalar("M_l", M_l); st_numscalar("M_r", M_r) mass_l = 1-M_l/N_l - mass_r = 1-M_r/N_r + mass_r = 1-M_r/N_r if (mass_l>=0.2 | mass_r>=0.2){ masspoints_found = 1 display("{err}Mass points detected in the running variable.") if ("`masspoints'"=="adjust" & "`bwcheck'"=="0") bwcheck = 10 if ("`masspoints'"=="check") display("{err}Try using option {cmd:masspoints(adjust)}.") - } - } + } + } c_bw = `C_c'*BWp*mN^(-1/5) if ("`masspoints'"=="adjust") c_bw = `C_c'*BWp*M^(-1/5) @@ -409,16 +525,20 @@ masspoints_found = 0 } + // T1: per-side V-fit caches reused across all pilot calls. + vcache_l = asarray_create("string") + vcache_r = asarray_create("string") + *** Step 1: d_bw - C_d_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q'+1, nu=`q'+1, o_B=`q'+2, h_V=c_bw, h_B=range_l+1e-8, 0, "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_d_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q'+1, nu=`q'+1, o_B=`q'+2, h_V=c_bw, h_B=range_r+1e-8, 0, "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) + C_d_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q'+1, nu=`q'+1, o_B=`q'+2, h_V=c_bw, h_B=range_l+1e-8, 0, "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_d_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q'+1, nu=`q'+1, o_B=`q'+2, h_V=c_bw, h_B=range_r+1e-8, 0, "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) if (C_d_l[1]==0 | C_d_l[2]==0 | C_d_r[1]==0 | C_d_r[2]==0 |C_d_l[1]==. | C_d_l[2]==. | C_d_l[3]==. |C_d_r[1]==. | C_d_r[2]==. | C_d_r[3]==.) printf("{err}Not enough variability to compute the preliminary bandwidth. Try checking for mass points with option {cmd:masspoints(check)}.\n") *** BW-TWO if ("`bwselect'"=="msetwo" | "`bwselect'"=="certwo" | "`bwselect'"=="msecomb2" | "`bwselect'"=="cercomb2" ) { - * Preliminary bw - d_bw_l = ( (C_d_l[1] / C_d_l[2]^2) * (`N'/mN) )^C_d_l[4] - d_bw_r = ( (C_d_r[1] / C_d_r[2]^2) * (`N'/mN) )^C_d_l[4] + * Preliminar bw + d_bw_l = ( (C_d_l[1] / C_d_l[2]^2))^C_d_l[4] + d_bw_r = ( (C_d_r[1] / C_d_r[2]^2))^C_d_r[4] if ("`bwrestrict'"=="on") { d_bw_l = min((d_bw_l, range_l)) d_bw_r = min((d_bw_r, range_r)) @@ -428,19 +548,19 @@ masspoints_found = 0 d_bw_r = max((d_bw_r, bw_min_r)) } * Bias bw - C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_l, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - b_bw_l = ( (C_b_l[1] / (C_b_l[2]^2 + `scaleregul'*C_b_l[3])) * (`N'/mN) )^C_b_l[4] - C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_r, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) - b_bw_r = ( (C_b_r[1] / (C_b_r[2]^2 + `scaleregul'*C_b_r[3])) * (`N'/mN) )^C_b_l[4] + C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_l, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + b_bw_l = ( (C_b_l[1] / (C_b_l[2]^2 + `scaleregul'*C_b_l[3])))^C_b_l[4] + C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_r, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + b_bw_r = ( (C_b_r[1] / (C_b_r[2]^2 + `scaleregul'*C_b_r[3])))^C_b_r[4] if ("`bwrestrict'"=="on") { b_bw_l = min((b_bw_l, range_l)) b_bw_r = min((b_bw_r, range_r)) } * Main bw - C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_l, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - h_bw_l = ( (C_h_l[1] / (C_h_l[2]^2 + `scaleregul'*C_h_l[3])) * (`N'/mN) )^C_h_l[4] - C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_r, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) - h_bw_r = ( (C_h_r[1] / (C_h_r[2]^2 + `scaleregul'*C_h_r[3])) * (`N'/mN) )^C_h_l[4] + C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_l, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + h_bw_l = ( (C_h_l[1] / (C_h_l[2]^2 + `scaleregul'*C_h_l[3])))^C_h_l[4] + C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_r, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + h_bw_r = ( (C_h_r[1] / (C_h_r[2]^2 + `scaleregul'*C_h_r[3])))^C_h_r[4] if ("`bwrestrict'"=="on") { h_bw_l = min((h_bw_l, range_l)) h_bw_r = min((h_bw_r, range_r)) @@ -449,39 +569,39 @@ masspoints_found = 0 *** BW-SUM if ("`bwselect'"=="msesum" | "`bwselect'"=="cersum" | "`bwselect'"=="msecomb1" | "`bwselect'"=="msecomb2" | "`bwselect'"=="cercomb1" | "`bwselect'"=="cercomb2") { - * Preliminary bw - d_bw_s = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] + C_d_l[2])^2) * (`N'/mN) )^C_d_l[4] + * Preliminar bw + d_bw_s = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] + C_d_l[2])^2))^C_d_l[4] if ("`bwrestrict'"=="on") d_bw_s = min((d_bw_s, bw_max)) if (bwcheck > 0) d_bw_s = max((d_bw_s, bw_min_l, bw_min_r)) * Bias bw - C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) - b_bw_s = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] + C_b_l[2])^2 + `scaleregul'*(C_b_r[3]+C_b_l[3]))) * (`N'/mN) )^C_b_l[4] + C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + b_bw_s = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] + C_b_l[2])^2 + `scaleregul'*(C_b_r[3]+C_b_l[3]))))^C_b_l[4] if ("`bwrestrict'"=="on") b_bw_s = min((b_bw_s, bw_max)) * Main bw - C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) - h_bw_s = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] + C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))) * (`N'/mN) )^C_h_l[4] + C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_s, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + h_bw_s = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] + C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))))^C_h_l[4] if ("`bwrestrict'"=="on") h_bw_s = min((h_bw_s, bw_max)) } *** RD if ("`bwselect'"=="mserd" | "`bwselect'"=="cerrd" | "`bwselect'"=="msecomb1" | "`bwselect'"=="msecomb2" | "`bwselect'"=="cercomb1" | "`bwselect'"=="cercomb2" | "`bwselect'"=="") { - * Preliminary bw - d_bw_d = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] - C_d_l[2])^2) * (`N'/mN) )^C_d_l[4] + * Preliminar bw + d_bw_d = ( ((C_d_l[1] + C_d_r[1]) / (C_d_r[2] - C_d_l[2])^2))^C_d_l[4] if ("`bwrestrict'"=="on") d_bw_d = min((d_bw_d, bw_max)) if (bwcheck > 0) d_bw_d = max((d_bw_d, bw_min_l, bw_min_r)) * Bias bw - C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) - b_bw_d = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] - C_b_l[2])^2 + `scaleregul'*(C_b_r[3] + C_b_l[3]))) * (`N'/mN) )^C_b_l[4] + C_b_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_b_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`q', nu=`p'+1, o_B=`q'+1, h_V=c_bw, h_B=d_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + b_bw_d = ( ((C_b_l[1] + C_b_r[1]) / ((C_b_r[2] - C_b_l[2])^2 + `scaleregul'*(C_b_r[3] + C_b_l[3]))))^C_b_l[4] if ("`bwrestrict'"=="on") b_bw_d = min((b_bw_d, bw_max)) * Main bw - C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll) - C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll) - h_bw_d = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] - C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))) * (`N'/mN) )^C_h_l[4] + C_h_l = rdrobust_bw(Y_l, X_l, T_l, Z_l, C_l, fw_l, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_l, dupsid_l, covs_drop_coll, "`cr_method'", vcache_l) + C_h_r = rdrobust_bw(Y_r, X_r, T_r, Z_r, C_r, fw_r, c=c, o=`p', nu=`deriv', o_B=`q', h_V=c_bw, h_B=b_bw_d, `scaleregul', "`vce_select'", `nnmatch', "`kernel'", dups_r, dupsid_r, covs_drop_coll, "`cr_method'", vcache_r) + h_bw_d = ( ((C_h_l[1] + C_h_r[1]) / ((C_h_r[2] - C_h_l[2])^2 + `scaleregul'*(C_h_r[3] + C_h_l[3]))))^C_h_l[4] if ("`bwrestrict'"=="on") h_bw_d = min((h_bw_d, bw_max)) } @@ -532,7 +652,7 @@ masspoints_found = 0 b_r = h_r/rho } - *** De-standardized ********************************* + *** De-Starndardized ********************************* c = `c'*x_sd X_uniq_l = X_uniq_l*x_sd X_uniq_r = X_uniq_r*x_sd @@ -587,11 +707,16 @@ masspoints_found = 0 } u_l = (eX_l:-`c')/h_l; u_r = (eX_r:-`c')/h_r; - R_q_l = J(eN_l,(`q'+1),.); R_q_r = J(eN_r,(`q'+1),.) - for (j=1; j<=(`q'+1); j++) { - R_q_l[.,j] = (eX_l:-`c'):^(j-1); R_q_r[.,j] = (eX_r:-`c'):^(j-1) + // Q1: Vandermonde via successive multiplication. + R_q_l = J(eN_l,(`q'+1),1); R_q_r = J(eN_r,(`q'+1),1) + if (`q' >= 1) { + uq_l = eX_l :- `c'; uq_r = eX_r :- `c' + for (j=2; j<=(`q'+1); j++) { + R_q_l[.,j] = R_q_l[.,j-1] :* uq_l + R_q_r[.,j] = R_q_r[.,j-1] :* uq_r + } } - R_p_l = R_q_l[,1::(`p'+1)]; R_p_r = R_q_r[,1::(`p'+1)] + R_p_l = R_q_l[,1::(`p'+1)]; R_p_r = R_q_r[,1::(`p'+1)] ******************************************************************************** ************ Computing RD estimates ******************************************** @@ -800,19 +925,42 @@ masspoints_found = 0 } - V_Y_cl_l = invG_p_l*rdrobust_vce(dT+dZ, s_Y, R_p_l:*W_h_l, res_h_l, eC_l, indC_l, 0)*invG_p_l - V_Y_cl_r = invG_p_r*rdrobust_vce(dT+dZ, s_Y, R_p_r:*W_h_r, res_h_r, eC_r, indC_r, 0)*invG_p_r - V_Y_bc_l = invG_p_l*rdrobust_vce(dT+dZ, s_Y, Q_q_l, res_b_l, eC_l, indC_l, `q'+1)*invG_p_l - V_Y_bc_r = invG_p_r*rdrobust_vce(dT+dZ, s_Y, Q_q_r, res_b_r, eC_r, indC_r, `q'+1)*invG_p_r + // V_cl (main variance): pass invG and sqrtRX for CR2/CR3 hat-matrix + // adjustment. For CR1 / non-cluster these are ignored. + sqrtRX_p_l = ("`cr_method'"=="crv2" | "`cr_method'"=="crv3") ? R_p_l:*sqrt(W_h_l) : J(0,0,.) + sqrtRX_p_r = ("`cr_method'"=="crv2" | "`cr_method'"=="crv3") ? R_p_r:*sqrt(W_h_r) : J(0,0,.) + invG_p_l_c = ("`cr_method'"=="crv2" | "`cr_method'"=="crv3") ? invG_p_l : J(0,0,.) + invG_p_r_c = ("`cr_method'"=="crv2" | "`cr_method'"=="crv3") ? invG_p_r : J(0,0,.) + V_Y_cl_l = invG_p_l*rdrobust_vce(dT+dZ, s_Y, R_p_l:*W_h_l, res_h_l, eC_l, indC_l, invG_p_l_c, sqrtRX_p_l, "`cr_method'", 0)*invG_p_l + V_Y_cl_r = invG_p_r*rdrobust_vce(dT+dZ, s_Y, R_p_r:*W_h_r, res_h_r, eC_r, indC_r, invG_p_r_c, sqrtRX_p_r, "`cr_method'", 0)*invG_p_r + // V_bc (robust-bias variance): for CRV2/CRV3 with cluster, use decoupled + // helper (Q_q sandwich + q-regression cluster leverage). Otherwise CR1 + // with k_override = q+1 to align df correction with the q-regression + // path used at h=b (so SE is continuous across the h=b boundary). + if (("`cr_method'"=="crv2" | "`cr_method'"=="crv3") & "`cluster'"!="") { + V_Y_bc_l = invG_p_l*rdrobust_vce_qq_cluster(Q_q_l, R_q_l, W_b_l, invG_q_l, res_b_l, eC_l, indC_l, dT+dZ, s_Y, "`cr_method'")*invG_p_l + V_Y_bc_r = invG_p_r*rdrobust_vce_qq_cluster(Q_q_r, R_q_r, W_b_r, invG_q_r, res_b_r, eC_r, indC_r, dT+dZ, s_Y, "`cr_method'")*invG_p_r + } + else { + V_Y_bc_l = invG_p_l*rdrobust_vce(dT+dZ, s_Y, Q_q_l, res_b_l, eC_l, indC_l, J(0,0,.), J(0,0,.), "cr1", `q'+1)*invG_p_l + V_Y_bc_r = invG_p_r*rdrobust_vce(dT+dZ, s_Y, Q_q_r, res_b_r, eC_r, indC_r, J(0,0,.), J(0,0,.), "cr1", `q'+1)*invG_p_r + } V_tau_cl = (`scalepar')^2*factorial(`deriv')^2*(V_Y_cl_l+V_Y_cl_r)[`deriv'+1,`deriv'+1] V_tau_rb = (`scalepar')^2*factorial(`deriv')^2*(V_Y_bc_l+V_Y_bc_r)[`deriv'+1,`deriv'+1] se_tau_cl = sqrt(V_tau_cl); se_tau_rb = sqrt(V_tau_rb) if ("`fuzzy'"!="") { - V_T_cl_l = invG_p_l*rdrobust_vce(dT+dZ, sV_T, R_p_l:*W_h_l, res_h_l, eC_l, indC_l, 0)*invG_p_l - V_T_cl_r = invG_p_r*rdrobust_vce(dT+dZ, sV_T, R_p_r:*W_h_r, res_h_r, eC_r, indC_r, 0)*invG_p_r - V_T_bc_l = invG_p_l*rdrobust_vce(dT+dZ, sV_T, Q_q_l, res_b_l, eC_l, indC_l, `q'+1)*invG_p_l - V_T_bc_r = invG_p_r*rdrobust_vce(dT+dZ, sV_T, Q_q_r, res_b_r, eC_r, indC_r, `q'+1)*invG_p_r + V_T_cl_l = invG_p_l*rdrobust_vce(dT+dZ, sV_T, R_p_l:*W_h_l, res_h_l, eC_l, indC_l, invG_p_l_c, sqrtRX_p_l, "`cr_method'", 0)*invG_p_l + V_T_cl_r = invG_p_r*rdrobust_vce(dT+dZ, sV_T, R_p_r:*W_h_r, res_h_r, eC_r, indC_r, invG_p_r_c, sqrtRX_p_r, "`cr_method'", 0)*invG_p_r + if (("`cr_method'"=="crv2" | "`cr_method'"=="crv3") & "`cluster'"!="") { + V_T_bc_l = invG_p_l*rdrobust_vce_qq_cluster(Q_q_l, R_q_l, W_b_l, invG_q_l, res_b_l, eC_l, indC_l, dT+dZ, sV_T, "`cr_method'")*invG_p_l + V_T_bc_r = invG_p_r*rdrobust_vce_qq_cluster(Q_q_r, R_q_r, W_b_r, invG_q_r, res_b_r, eC_r, indC_r, dT+dZ, sV_T, "`cr_method'")*invG_p_r + } + else { + // k_override = q+1: see V_Y_bc comment above (continuity at h=b). + V_T_bc_l = invG_p_l*rdrobust_vce(dT+dZ, sV_T, Q_q_l, res_b_l, eC_l, indC_l, J(0,0,.), J(0,0,.), "cr1", `q'+1)*invG_p_l + V_T_bc_r = invG_p_r*rdrobust_vce(dT+dZ, sV_T, Q_q_r, res_b_r, eC_r, indC_r, J(0,0,.), J(0,0,.), "cr1", `q'+1)*invG_p_r + } V_T_cl = factorial(`deriv')^2*(V_T_cl_l+V_T_cl_r)[`deriv'+1,`deriv'+1] V_T_rb = factorial(`deriv')^2*(V_T_bc_l+V_T_bc_r)[`deriv'+1,`deriv'+1] se_tau_T_cl = sqrt(V_T_cl); se_tau_T_rb = sqrt(V_T_rb) @@ -847,20 +995,23 @@ masspoints_found = 0 st_numscalar("bias_l", bias_l); st_numscalar("bias_r", bias_r) st_matrix("beta_Y_p_r", beta_Y_p_r); st_matrix("beta_Y_p_l", beta_Y_p_l) - - st_matrix("beta_q_r", beta_q_r); st_matrix("beta_q_l", beta_q_l) + st_numscalar("g_l", g_l); st_numscalar("g_r", g_r) - st_matrix("b", (tau_cl)) - st_matrix("V", (V_tau_cl)) + * e(b) / e(V): three named coefficients mirroring the printed output. + * Conventional: point = tau_cl, SE = sqrt(V_tau_cl) + * Bias-corrected: point = tau_bc, SE = sqrt(V_tau_cl) (not a recommended + * inferential object + * per CCT 2014; kept + * for parity with the + * displayed table) + * Robust: point = tau_bc, SE = sqrt(V_tau_rb) (CCT-recommended RBC) + * e(V) is block-diagonal (each row is its own estimand). + st_matrix("b", (tau_cl, tau_bc, tau_bc)) + st_matrix("V", (V_tau_cl, 0, 0 \ 0, V_tau_cl, 0 \ 0, 0, V_tau_rb)) st_matrix("V_Y_cl_r", V_Y_cl_r); st_matrix("V_Y_cl_l", V_Y_cl_l) st_matrix("V_Y_bc_r", V_Y_bc_r); st_matrix("V_Y_bc_l", V_Y_bc_l) st_numscalar("masspoints_found", masspoints_found) - if ("`all'"~="") { - st_matrix("b", (tau_cl,tau_bc,tau_bc)) - st_matrix("V", (V_tau_cl,0,0 \ 0,V_tau_cl,0 \0,0,V_tau_rb)) - } - if ("`covs'"!="") { st_matrix("gamma_p", gamma_p) } @@ -989,43 +1140,54 @@ masspoints_found = 0 disp in smcl in gr "{hline 19}{c BT}{hline 60}" if ("`covs'"!="") di "Covariate-adjusted estimates. Additional covariates included: `ncovs'" -* if (`covs_drop_coll'>=1) di "Variables dropped due to multicollinearity." if ("`cluster'"!="") di "Std. Err. adjusted for clusters in " "`clustvar'" if ("`scalepar'"!="1") di "Scale parameter: " `scalepar' if ("`scaleregul'"!="1") di "Scale regularization: " `scaleregul' if ("`masspoints'"=="check") di "Running variable checked for mass points." if ("`masspoints'"=="adjust" & masspoints_found==1) di "Estimates adjusted for mass points in the running variable." - if ("`nowarnings'"!="") { + if ("`warnings'"=="") { if (scalar(h_l)>=`range_l' | scalar(h_r)>=`range_r') disp in red "WARNING: bandwidth {it:h} greater than the range of the data." if (scalar(b_l)>=`range_l' | scalar(b_r)>=`range_r') disp in red "WARNING: bandwidth {it:b} greater than the range of the data." - if (scalar(N_h_l)<20 | scalar(N_h_r)<20) disp in red "WARNING: bandwidth {it:h} too low." - if (scalar(N_b_l)<20 | scalar(N_b_r)<20) disp in red "WARNING: bandwidth {it:b} too low." - if ("`sharpbw'"~="") disp in red "WARNING: bandwidths automatically computed for sharp RD estimation." - if ("`perf_comp'"~="") disp in red "WARNING: bandwidths automatically computed for sharp RD estimation because perfect compliance was detected on at least one side of the threshold." + if (scalar(N_h_l)<20 | scalar(N_h_r)<20) disp in red "WARNING: bandwidth {it:h} too low." + if (scalar(N_b_l)<20 | scalar(N_b_r)<20) disp in red "WARNING: bandwidth {it:b} too low." + if ("`sharpbw'"~="") disp in red "WARNING: bandwidths automatically computed for sharp RD estimation." + if ("`perf_comp'"~="") disp in red "WARNING: bandwidths automatically computed for sharp RD estimation because perfect compliance was detected on at least one side of the threshold." } local ci_l_rb = round(scalar(tau_bc - quant*se_tau_rb),0.001) local ci_r_rb = round(scalar(tau_bc + quant*se_tau_rb),0.001) - matrix rownames V = RD_Estimate - matrix colnames V = RD_Estimate - matrix colnames b = RD_Estimate - - local tempo: colfullnames V - matrix rownames V = `tempo' - - if ("`all'"~="") { - matrix rownames V = Conventional Bias-corrected Robust - matrix colnames V = Conventional Bias-corrected Robust - matrix colnames b = Conventional Bias-corrected Robust + matrix colnames b = Conventional Bias-corrected Robust + matrix rownames V = Conventional Bias-corrected Robust + matrix colnames V = Conventional Bias-corrected Robust + + } + local _rc = _rc + cwf `_orig_frame' + frame drop `_work_frame' + if `_rc' { + * Error path: best-effort per-name Mata cleanup before reraising. + * Note: in some configurations `exit N` from deeply nested if-blocks + * inside `capture noisily { ... }` short-circuits past this branch, + * leaving 1-2 transient Mata externals behind. The leak is bounded + * (doesn't accumulate — next call's _mata_before captures them so + * they aren't re-dropped) and harmless. Keep the per-name capture + * form so that when the branch IS reached, missing names don't + * halt cleanup of the rest. + capture mata: _mtx = direxternal("*"); st_local("_mata_after", rows(_mtx) ? invtokens(_mtx') : "") + capture mata: mata drop _mtx + local _mata_new : list _mata_after - _mata_before + foreach _mname of local _mata_new { + capture mata mata drop `_mname' + } + error `_rc' + } } - - restore ereturn clear - - cap ereturn post b V, esample(`touse') + + ereturn post b V, esample(`touse') ereturn scalar N = `N' ereturn scalar N_l = scalar(N_l) @@ -1062,7 +1224,9 @@ masspoints_found = 0 ereturn scalar ci_l_rb = scalar(tau_bc - quant*se_tau_rb) ereturn scalar ci_r_rb = scalar(tau_bc + quant*se_tau_rb) ereturn scalar pv_cl = scalar(2*normal(-abs(tau_cl/se_tau_cl))) + ereturn scalar pv_bc = scalar(2*normal(-abs(tau_bc/se_tau_cl))) ereturn scalar pv_rb = scalar(2*normal(-abs(tau_bc/se_tau_rb))) + if ("`cluster'"!="") ereturn scalar n_clust = scalar(g_l) + scalar(g_r) if ("`fuzzy'"!="") { ereturn scalar tau_T_cl = scalar(tau_T_cl) @@ -1083,25 +1247,59 @@ masspoints_found = 0 ereturn matrix beta_Y_p_l = beta_Y_p_l if ("`covs'"!="") { - ereturn matrix beta_covs = gamma_p + ereturn matrix coef_covs = gamma_p } - ereturn matrix V_cl_l = V_Y_cl_l - ereturn matrix V_cl_r = V_Y_cl_r - ereturn matrix V_rb_l = V_Y_bc_l - ereturn matrix V_rb_r = V_Y_bc_r - + ereturn matrix V_cl_l = V_Y_cl_l + ereturn matrix V_cl_r = V_Y_cl_r + ereturn matrix V_rb_l = V_Y_bc_l + ereturn matrix V_rb_r = V_Y_bc_r + + * 3x2 CI matrix -- rows Conventional/Bias-corrected/Robust, cols ll/ul. + * Matches e(b) / e(V) and Stata's r(table) matrix-based convention for CIs. + tempname _ci_mat + matrix `_ci_mat' = ( scalar(tau_cl - quant*se_tau_cl), scalar(tau_cl + quant*se_tau_cl) \ /// + scalar(tau_bc - quant*se_tau_cl), scalar(tau_bc + quant*se_tau_cl) \ /// + scalar(tau_bc - quant*se_tau_rb), scalar(tau_bc + quant*se_tau_rb) ) + matrix rownames `_ci_mat' = Conventional Bias-corrected Robust + matrix colnames `_ci_mat' = ll ul + ereturn matrix ci = `_ci_mat' + ereturn local ci_rb [`ci_l_rb' ; `ci_r_rb'] ereturn local kernel = "`kernel_type'" ereturn local bwselect = "`bwselect'" - ereturn local vce_select = "`vce_type'" + ereturn local vce_select = "`vce_raw'" + ereturn local vce_type = "`vce_type'" if ("`covs'"!="") ereturn local covs "`covs_list'" if ("`cluster'"!="") ereturn local clustvar "`clustvar'" - ereturn local outcomevar "`y'" ereturn local runningvar "`x'" ereturn local depvar "`y'" - ereturn local cmd "rdrobust" + * Title for estimates replay / estimates table + if ("`fuzzy'"=="") local _rd_title "Sharp RD estimates" + else local _rd_title "Fuzzy RD estimates" + if ("`covs'"!="") local _rd_title "`_rd_title' (covariate-adjusted)" + ereturn local title "`_rd_title'" + ereturn local cmdline "rdrobust `0'" + ereturn local cmd "rdrobust" + + * Drop transient matrices/scalars used as Mata-to-Stata transport buffers + * so they don't leak into the caller's namespace. + cap matrix drop b V + cap scalar drop h_l h_r b_l b_r quant + cap scalar drop N_h_l N_h_r N_b_l N_b_r + cap scalar drop tau_cl tau_bc se_tau_cl se_tau_rb + cap scalar drop tau_Y_cl_l tau_Y_cl_r tau_Y_bc_l tau_Y_bc_r + cap scalar drop bias_l bias_r g_l g_r masspoints_found + cap scalar drop tau_T_cl tau_T_bc se_tau_T_cl se_tau_T_rb + cap scalar drop tau_T_cl_l tau_T_cl_r tau_T_bc_l tau_T_bc_r + + * Drop only the Mata externals we created (set difference vs the + * entry-time snapshot). Library functions (rdrobust_*) are not + * externals and stay loaded; the user's own Mata variables are + * preserved because they appear in both snapshots. + mata: _mtx = direxternal("*"); st_local("_mata_after", rows(_mtx) ? invtokens(_mtx') : "") + mata: mata drop _mtx + local _mata_new : list _mata_after - _mata_before + if `"`_mata_new'"' != "" mata mata drop `_mata_new' - mata mata clear - end diff --git a/stata/rdrobust.pdf b/stata/rdrobust.pdf index a1f531d..abc8467 100644 Binary files a/stata/rdrobust.pdf and b/stata/rdrobust.pdf differ diff --git a/stata/rdrobust.pkg b/stata/rdrobust.pkg index 0a26c20..fc0b5da 100644 --- a/stata/rdrobust.pkg +++ b/stata/rdrobust.pkg @@ -1,13 +1,13 @@ v 3 d STATA Package: RDROBUST d -d Authors: Sebastian Calonico, Graduate School of Management, University of California, Davis, scalonico@ucdavis.edu -d Matias D. Cattaneo, Operations Research and Financial Engineering, Princeton University, matias.d.cattaneo@gmail.com -d Max H. Farrell, Department of Economics, University of California, Santa Barbara, mhfarrell@gmail.com -d Rocio Titiunik, Department of Politics, Princeton University, rocio.titiunik@gmail.com +d Authors: Sebastian Calonico, University of California, Davis, scalonico@ucdavis.edu +d Matias D. Cattaneo, Princeton University, matias.d.cattaneo@gmail.com +d Max H. Farrell, University of California, Santa Barbara, mhfarrell@gmail.com +d Rocio Titiunik, Princeton University, rocio.titiunik@gmail.com +d +d Distribution-Date: 20260513 d -d Distribution-Date: 20260515 -d f rdbwselect.ado f rdplot.ado f rdplot.sthlp @@ -15,13 +15,17 @@ f rdplot_illustration.do f rdbwselect.sthlp f rdrobust.ado f rdrobust.sthlp +f rdrobustplot.ado +f rdrobustplot.sthlp f rdrobust_bw.mo f rdrobust_functions.do f rdrobust_illustration.do +f rdrobust_illustration_new.do f rdrobust_kweight.mo f rdrobust_senate.dta f rdrobust_res.mo f rdrobust_vce.mo +f rdrobust_vce_qq_cluster.mo f rdrobust_collapse.mo f rdrobust_median.mo f rdrobust_groupid.mo @@ -32,4 +36,3 @@ f rdbwselect_2014.ado f rdbwselect_2014.sthlp f rdbwselect_2014_functions.do f rdbwselect_2014_kconst.ado - diff --git a/stata/rdrobust.sthlp b/stata/rdrobust.sthlp index 78bc483..492f525 100644 --- a/stata/rdrobust.sthlp +++ b/stata/rdrobust.sthlp @@ -1,5 +1,5 @@ {smcl} -{* *!version 10.0.0 2026-05-15}{...} +{* *!version 11.0.0 2026-05-13}{...} {viewerjumpto "Syntax" "rdrobust##syntax"}{...} {viewerjumpto "Description" "rdrobust##description"}{...} {viewerjumpto "Options" "rdrobust##options"}{...} @@ -66,6 +66,8 @@ and {browse "https://rdpackages.github.io/references/Calonico-Cattaneo-Farrell-T {p 8 8}{browse "https://rdpackages.github.io/":https://rdpackages.github.io/}{p_end} +{p 4 8}{it:Requires Stata 16 or later.}{p_end} + {marker options}{...} {title:Options} @@ -105,7 +107,7 @@ Default is {cmd:rho(1)} if {it:h} is specified but {it:b} is not.{p_end} {p 4 8}{cmd:covs(}{it:covars}{cmd:)} specifies additional covariates to be used for estimation and inference.{p_end} -{p 4 8}{cmd:covs_drop(}{it:covsdropoption}{cmd:)} assesses collinearity in additional covariates used for estimation and inference. Options {opt pinv} (default choice) and {opt invsym} drop collinear additional covariates, differing only in the type of inverse function used. Option {opt off} omits the check for collinear additional covariates.{p_end} +{p 4 8}{cmd:covs_drop(}{it:covsdropoption}{cmd:)} assess collinearity in additional covariates used for estimation and inference. Options {opt pinv} (default choice) and {opt invsym} drops collinear additional covariates, differing only in the type of inverse function used. Option {opt off} omits the check for collinear additional covariates.{p_end} {p 4 8}{cmd:kernel(}{it:kernelfn}{cmd:)} specifies the kernel function used to construct the local-polynomial estimator(s). Options are: {opt tri:angular}, {opt epa:nechnikov}, and {opt uni:form}. Default is {cmd:kernel(triangular)}.{p_end} @@ -159,8 +161,11 @@ Options are:{p_end} {p 8 12}{cmd:vce(hc1)} for heteroskedasticity-robust plug-in residuals variance estimator with {it:hc1} weights.{p_end} {p 8 12}{cmd:vce(hc2)} for heteroskedasticity-robust plug-in residuals variance estimator with {it:hc2} weights.{p_end} {p 8 12}{cmd:vce(hc3)} for heteroskedasticity-robust plug-in residuals variance estimator with {it:hc3} weights.{p_end} -{p 8 12}{cmd:vce(nncluster }{it:clustervar [nnmatch]}{cmd:)} for cluster-robust nearest neighbor variance estimation, with {it:clustervar} indicating the cluster ID variable and {it:nnmatch} indicating the minimum number of neighbors to be used.{p_end} -{p 8 12}{cmd:vce(cluster }{it:clustervar}{cmd:)} for cluster-robust plug-in residuals variance estimation with degrees-of-freedom weights and {it:clustervar} indicating the cluster ID variable.{p_end} +{p 8 12}{cmd:vce(cluster }{it:clustervar}{cmd:)} for cluster-robust plug-in residuals variance estimation with degrees-of-freedom weights; equivalent to {cmd:vce(cr1 }{it:clustervar}{cmd:)}.{p_end} +{p 8 12}{cmd:vce(cr1 }{it:clustervar}{cmd:)} for cluster-robust plug-in residuals variance estimator with degrees-of-freedom correction ((n-1)/(n-k)*g/(g-1)).{p_end} +{p 8 12}{cmd:vce(cr2 }{it:clustervar}{cmd:)} for the Bell-McCaffrey (2002) bias-reduced cluster-robust variance estimator (CRV2).{p_end} +{p 8 12}{cmd:vce(cr3 }{it:clustervar}{cmd:)} for the Pustejovsky-Tipton (2018) cluster-robust variance estimator (CRV3), approximately unbiased with few clusters.{p_end} +{p 8 12}The CR2/CR3 leverage correction applies to both the conventional and the robust bias-corrected standard errors, including when the point-estimation bandwidth h differs from the bias-correction bandwidth b; in that case the cluster leverage is computed from the bias (b) regression.{p_end} {p 8 12}Default is {cmd:vce(nn 3)}.{p_end} {p 4 8}{cmd:level(}{it:#}{cmd:)} specifies confidence level for confidence intervals. @@ -235,24 +240,49 @@ Default is {cmd:level(95)}.{p_end} {synopt:{cmd:e(tau_bc_r)}}bias-corrected local-polynomial right estimate{p_end} {synopt:{cmd:e(se_tau_cl)}}conventional standard error of the local-polynomial RD estimator{p_end} {synopt:{cmd:e(se_tau_rb)}}robust standard error of the local-polynomial RD estimator{p_end} +{synopt:{cmd:e(tau_T_cl)}}conventional first-stage estimate (fuzzy RD only){p_end} +{synopt:{cmd:e(tau_T_cl_l)}}conventional first-stage left estimate (fuzzy RD only){p_end} +{synopt:{cmd:e(tau_T_cl_r)}}conventional first-stage right estimate (fuzzy RD only){p_end} +{synopt:{cmd:e(tau_T_bc)}}bias-corrected first-stage estimate (fuzzy RD only){p_end} +{synopt:{cmd:e(tau_T_bc_l)}}bias-corrected first-stage left estimate (fuzzy RD only){p_end} +{synopt:{cmd:e(tau_T_bc_r)}}bias-corrected first-stage right estimate (fuzzy RD only){p_end} +{synopt:{cmd:e(se_tau_T_cl)}}conventional standard error of the first-stage estimator (fuzzy RD only){p_end} +{synopt:{cmd:e(se_tau_T_rb)}}robust standard error of the first-stage estimator (fuzzy RD only){p_end} {synopt:{cmd:e(bias_l)}}estimated bias for the local-polynomial RD estimator below the cutoff{p_end} {synopt:{cmd:e(bias_r)}}estimated bias for the local-polynomial RD estimator above the cutoff{p_end} +{synopt:{cmd:e(level)}}confidence level{p_end} +{synopt:{cmd:e(ci_l_cl)}}lower endpoint of the conventional CI{p_end} +{synopt:{cmd:e(ci_r_cl)}}upper endpoint of the conventional CI{p_end} +{synopt:{cmd:e(ci_l_rb)}}lower endpoint of the robust bias-corrected CI{p_end} +{synopt:{cmd:e(ci_r_rb)}}upper endpoint of the robust bias-corrected CI{p_end} +{synopt:{cmd:e(pv_cl)}}p-value of the conventional test{p_end} +{synopt:{cmd:e(pv_bc)}}p-value of the bias-corrected test (uses conventional SE){p_end} +{synopt:{cmd:e(pv_rb)}}p-value of the robust bias-corrected test{p_end} +{synopt:{cmd:e(n_clust)}}number of clusters (only when {cmd:vce(cluster}|{cmd:cr1}|{cmd:cr2}|{cmd:cr3} ...{cmd:)} is specified){p_end} {p2col 5 20 24 2: Macros}{p_end} +{synopt:{cmd:e(cmd)}}{cmd:rdrobust}{p_end} +{synopt:{cmd:e(cmdline)}}command as typed{p_end} +{synopt:{cmd:e(title)}}title for the output ({cmd:Sharp}/{cmd:Fuzzy RD estimates}, optionally {cmd:(covariate-adjusted)}){p_end} +{synopt:{cmd:e(depvar)}}name of dependent (outcome) variable{p_end} {synopt:{cmd:e(runningvar)}}name of running variable{p_end} -{synopt:{cmd:e(outcomevar)}}name of outcome variable{p_end} {synopt:{cmd:e(clustvar)}}name of cluster variable{p_end} -{synopt:{cmd:e(covs)}}name of covariates{p_end} -{synopt:{cmd:e(vce_select)}}vcetype specified in vce(){p_end} +{synopt:{cmd:e(covs)}}names of additional covariates{p_end} +{synopt:{cmd:e(vce_select)}}vcetype specified in vce() (raw option name, e.g. "nn", "hc3", "cr3"){p_end} +{synopt:{cmd:e(vce_type)}}display label for variance-covariance estimator (e.g. "NN", "HC3", "CR3"){p_end} {synopt:{cmd:e(bwselect)}}bandwidth selection choice{p_end} {synopt:{cmd:e(kernel)}}kernel choice{p_end} +{synopt:{cmd:e(ci_rb)}}formatted robust bias-corrected CI string (e.g. {cmd:[3.989 ; 11.021]}); see {cmd:e(ci_l_rb)} / {cmd:e(ci_r_rb)} for numeric endpoints or {cmd:e(ci)} for the full matrix{p_end} {p2col 5 20 24 2: Matrices}{p_end} +{synopt:{cmd:e(b)}}1x3 coefficient vector with column names {cmd:Conventional} (tau_cl), {cmd:Bias-corrected} (tau_bc), and {cmd:Robust} (tau_bc){p_end} +{synopt:{cmd:e(V)}}3x3 block-diagonal variance-covariance matrix for {cmd:e(b)}: {cmd:V[Conventional,Conventional]} = {cmd:V[Bias-corrected,Bias-corrected]} = se_tau_cl^2 and {cmd:V[Robust,Robust]} = se_tau_rb^2. The Bias-corrected pairing of tau_bc with the conventional SE is not a recommended inferential object (CCT 2014); use {cmd:_b[Robust]} and {cmd:_se[Robust]} for the RBC inference.{p_end} +{synopt:{cmd:e(ci)}}3x2 confidence-interval matrix; rows {cmd:Conventional} / {cmd:Bias-corrected} / {cmd:Robust}, columns {cmd:ll} / {cmd:ul}{p_end} {synopt:{cmd:e(beta_Y_p_r)}}conventional p-order local-polynomial estimates to the right of the cutoff for the outcome variable{p_end} {synopt:{cmd:e(beta_Y_p_l)}}conventional p-order local-polynomial estimates to the left of the cutoff for the outcome variable{p_end} {synopt:{cmd:e(beta_T_p_r)}}conventional p-order local-polynomial estimates to the right of the cutoff for the first stage (fuzzy RD){p_end} {synopt:{cmd:e(beta_T_p_l)}}conventional p-order local-polynomial estimates to the left of the cutoff for the first stage (fuzzy RD){p_end} -{synopt:{cmd:e(beta_covs)}}coefficients of the additional covariates, only returned when {cmd:covs()} are used{p_end} +{synopt:{cmd:e(coef_covs)}}coefficients of the additional covariates, only returned when {cmd:covs()} are used{p_end} {synopt:{cmd:e(V_cl_r)}}conventional variance-covariance matrix to the right of the cutoff{p_end} {synopt:{cmd:e(V_cl_l)}}conventional variance-covariance matrix to the left of the cutoff{p_end} {synopt:{cmd:e(V_rb_r)}}robust variance-covariance matrix to the right of the cutoff{p_end} diff --git a/stata/rdrobust_bw.mo b/stata/rdrobust_bw.mo index 54606c3..a0e5027 100644 Binary files a/stata/rdrobust_bw.mo and b/stata/rdrobust_bw.mo differ diff --git a/stata/rdrobust_collapse.mo b/stata/rdrobust_collapse.mo index 644d0c8..d19a056 100644 Binary files a/stata/rdrobust_collapse.mo and b/stata/rdrobust_collapse.mo differ diff --git a/stata/rdrobust_functions.do b/stata/rdrobust_functions.do index d8b1850..8d821f4 100644 --- a/stata/rdrobust_functions.do +++ b/stata/rdrobust_functions.do @@ -2,8 +2,10 @@ * RDROBUST STATA PACKAGE -- rdrobust_functions * Authors: Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell, Rocio Titiunik ******************************************************************************** -*!version 10.0.0 2026-05-15 - +*!version 11.0.0 2026-05-13 + +version 16.0 + capture mata mata drop rdrobust_res() mata real matrix rdrobust_res(real matrix X, real matrix y, real matrix T, real matrix Z, real matrix m, real matrix hii, string vce, real scalar matches, dups, dupsid, real scalar d) @@ -84,10 +86,35 @@ end ************************************************************************************************************************************************************* capture mata mata drop rdrobust_bw() mata -real matrix rdrobust_bw(real matrix Y, real matrix X, real matrix T, real matrix Z, real matrix C, real matrix W, real scalar c, real scalar o, real scalar nu, real scalar o_B, real scalar h_V, real scalar h_B, real scalar scale, string vce, real scalar nnmatch, string kernel, dups, dupsid, covs_drop_coll) +real matrix rdrobust_bw(real matrix Y, real matrix X, real matrix T, real matrix Z, real matrix C, real matrix W, real scalar c, real scalar o, real scalar nu, real scalar o_B, real scalar h_V, real scalar h_B, real scalar scale, string vce, real scalar nnmatch, string kernel, dups, dupsid, covs_drop_coll, | string scalar cr_method, transmorphic vcache) { + // cr_method: "" | "cr1" | "crv2" | "crv3". Cluster path only. + // vcache: optional asarray("string") keyed by "o_nu". T1 (2026-05-12): + // V-fit depends only on (o, nu) at fixed h_V. Callers pass a per-side + // asarray to share results across the 6-18 pilot calls per rdrobust. + real scalar has_vcache, used_cache + string scalar vkey + real colvector packed + if (args() < 20) cr_method = "" + has_vcache = (args() >= 21) + used_cache = 0 + vkey = sprintf("%g_%g", o, nu) dT = dZ = dC = eC = indC = 0 dW = length(W) + if (has_vcache) { + if (asarray_contains(vcache, vkey)) { + packed = asarray(vcache, vkey) + V_V = packed[1] + BConst = packed[2] + n_s = packed[3] + s = packed[4..(3 + n_s)] + if (rows(T)>1) dT = 1 + if (rows(Z)>1) dZ = cols(Z) + if (rows(C)>1) dC = 1 + used_cache = 1 + } + } + if (used_cache == 0) { w = rdrobust_kweight(X, c, h_V, kernel) if (dW>1) { w = W:*w @@ -95,8 +122,12 @@ real matrix rdrobust_bw(real matrix Y, real matrix X, real matrix T, real matrix ind_V = selectindex(w:> 0); eY = Y[ind_V];eX = X[ind_V];eW = w[ind_V] n_V = length(ind_V) D_V = eY - R_V = J(n_V,o+1,.) - for (j=1; j<=(o+1); j++) R_V[.,j] = (eX:-c):^(j-1) + // Q1: Vandermonde via successive multiplication. + R_V = J(n_V,o+1,1) + if (o >= 1) { + u_V_tmp = eX :- c + for (j=2; j<=(o+1); j++) R_V[.,j] = R_V[.,j-1] :* u_V_tmp + } invG_V = cholinv(quadcross(R_V,eW,R_V)) e_v = J((o+1),1,0); e_v[nu+1]=1 s = 1 @@ -151,12 +182,21 @@ real matrix rdrobust_bw(real matrix Y, real matrix X, real matrix T, real matrix } } res_V = rdrobust_res(eX, eY, eT, eZ, predicts_V, hii, vce, nnmatch, dups_V, dupsid_V, o+1) - V_V = (invG_V*rdrobust_vce(dT+dZ, s, R_V:*eW, res_V, eC, indC, 0)*invG_V)[nu+1,nu+1] + // For CR2/CR3 the hat-matrix adjustment uses R_V * sqrt(W); pass invG_V + // and sqrtRX_V. For other paths these are unused and can be empty. + sqrtRX_V = (cr_method=="crv2" | cr_method=="crv3") ? R_V:*sqrt(eW) : J(0,0,.) + invG_V_c = (cr_method=="crv2" | cr_method=="crv3") ? invG_V : J(0,0,.) + V_V = (invG_V*rdrobust_vce(dT+dZ, s, R_V:*eW, res_V, eC, indC, invG_V_c, sqrtRX_V, cr_method, 0)*invG_V)[nu+1,nu+1] v = quadcross(R_V:*eW,((eX:-c):/h_V):^(o+1)) Hp = J(o+1, 1, 1) for (j=1; j<=(o+1); j++) Hp[j] = h_V^((j-1)) BConst = (diag(Hp)*(invG_V*v))[nu+1] - + if (has_vcache) { + packed = (V_V \ BConst \ length(s) \ vec(s)) + asarray(vcache, vkey, packed) + } + } + w = rdrobust_kweight(X, c, h_B, kernel) if (dW>1) { w = W:*w @@ -165,8 +205,12 @@ real matrix rdrobust_bw(real matrix Y, real matrix X, real matrix T, real matrix n_B = length(ind) eY = Y[ind];eX = X[ind];eW = w[ind] D_B = eY - R_B = J(n_B,o_B+1,.) - for (j=1; j<=(o_B+1); j++) R_B[.,j] = (eX:-c):^(j-1) + // Q1: Vandermonde via successive multiplication. + R_B = J(n_B,o_B+1,1) + if (o_B >= 1) { + u_B_tmp = eX :- c + for (j=2; j<=(o_B+1); j++) R_B[.,j] = R_B[.,j-1] :* u_B_tmp + } invG_B = cholinv(quadcross(R_B,eW,R_B)) if (dT==1) { eT = T[ind] @@ -198,7 +242,9 @@ real matrix rdrobust_bw(real matrix Y, real matrix X, real matrix T, real matrix } } res_B = rdrobust_res(eX, eY, eT, eZ, predicts_B, hii, vce, nnmatch, dups_B, dupsid_B,o_B+1) - V_B = (invG_B*rdrobust_vce(dT+dZ, s, R_B:*eW, res_B, eC, indC, 0)*invG_B)[o+2,o+2] + sqrtRX_B = (cr_method=="crv2" | cr_method=="crv3") ? R_B:*sqrt(eW) : J(0,0,.) + invG_B_c = (cr_method=="crv2" | cr_method=="crv3") ? invG_B : J(0,0,.) + V_B = (invG_B*rdrobust_vce(dT+dZ, s, R_B:*eW, res_B, eC, indC, invG_B_c, sqrtRX_B, cr_method, 0)*invG_B)[o+2,o+2] BWreg = 3*BConst^2*V_B } B = sqrt(2*(o+1-nu))*BConst*(s'*beta_B[o+2,]') @@ -213,61 +259,335 @@ end **************************************************** capture mata mata drop rdrobust_vce() mata -real matrix rdrobust_vce(real scalar d, real matrix s, real matrix RX, real matrix res, real matrix C, real matrix ind, real scalar k_override) -{ +real matrix rdrobust_vce(real scalar d, real matrix s, real matrix RX, real matrix res, real matrix C, real matrix ind, real matrix invG, real matrix sqrtRX, string scalar cr_method, real scalar k_override) +{ + // k_override: when > 0, overrides the (n-1)/(n-k) df correction's k for + // CR1. Used when RX is a "score-like" matrix (e.g. Q_q) whose ncol is + // smaller than the effective number of regressors estimated. Without it, + // CR1 at h!=b under cluster used k=p+1 from cols(Q_q), inconsistent with + // the q-regression direct path at h=b which uses k=q+1. Pass 0 to use + // cols(RX) as before. + // Compute the meat M of the sandwich estimator. Non-cluster path + // (length(C)<=1) applies HC weighting via the pre-weighted residuals passed + // in res; cluster paths implement CR1 (df correction), CR2 (Bell-McCaffrey + // half-inverse hat-matrix adjustment), or CR3 (Pustejovsky-Tipton + // full-inverse hat-matrix adjustment, Woodbury-optimized to O(k^3) per + // cluster). invG (k x k) and sqrtRX (n x k = R*sqrt(W)) are only used by + // the CR2/CR3 branches; callers may pass empty matrices (J(0,0,.)) when + // not applicable. cr_method selects the cluster variant. + + real scalar k, n, g, i, j, l + real matrix M + real scalar has_sqrtRX, has_invG + real matrix C_o, RX_o, res_o, sRX_o, info + real matrix sRX_g, RX_g, ri, e_g, L_g, u_g, M_g, GmL, G + real matrix F_sq, V, C_half, tC_half, Ru_g, adj, score_g, score_l + real rowvector lambda + real colvector sigma2, c_coef, sv + real scalar u_g_l + real matrix rng_idx + real scalar w + k = cols(RX) - k_df = k - if (k_override>0) k_df = k_override M = J(k,k,0) - n = length(C) - if (n==1) { - w = 1 + n = length(C) + + if (n <= 1) { + // non-cluster: residuals already carry HC weights (hc0/hc1/hc2/hc3) if (d==0){ SS = res:^2 M = quadcross(RX,SS,RX) } else { - for (i = 1; i <= 1+d; i++) { - SS = res[,i]:*res - for (j = 1; j <= 1+d; j++) { - M = M + quadcross(RX,(s[i]*s[j]):*SS[,j],RX) + // T4: Sigma_{i,j} s_i s_j r_i r_j = (Sigma_l s_l r_l)^2 + // One weighted quadcross instead of an (d+1)^2 Mata loop. + // s is a column vector (from rdrobust.ado: 1 \ -gamma_p[,1]...). + real colvector r_comb + r_comb = res * s + M = quadcross(RX, r_comb :* r_comb, RX) + } + return(M) + } + + // ---- cluster path ---- + has_sqrtRX = (rows(sqrtRX) > 1) + has_invG = (rows(invG) > 1) + C_o = C[ind] + RX_o = RX[ind,] + res_o = res[ind,] + info = panelsetup(C_o,1) + g = rows(info) + if (has_sqrtRX) sRX_o = sqrtRX[ind,] + else sRX_o = J(0,0,.) + + // ---- CR3 (Pustejovsky-Tipton) via Sherman-Morrison-Woodbury ---- + if (cr_method == "crv3" & has_invG) { + G = invsym(invG) // k x k Gram matrix = R'WR + w = 1 // CRV3 is approximately unbiased + if (d==0) { + for (i=1; i<=g; i++) { + if (has_sqrtRX) { + sRX_g = panelsubmatrix(sRX_o, i, info) + e_g = panelsubmatrix(res_o, i, info)[,1] + L_g = quadcross(sRX_g, sRX_g) + u_g = quadcross(sRX_g, sRX_g[,1]:*e_g) + } + else { + RX_g = panelsubmatrix(RX_o, i, info) + e_g = panelsubmatrix(res_o, i, info)[,1] + L_g = quadcross(RX_g, RX_g) + u_g = quadcross(RX_g, e_g) + } + GmL = G - L_g + if (rank(GmL) == k) M_g = invsym(GmL) + else M_g = J(k,k,0) // fallback to CR1-style + score_g = u_g + L_g * (M_g * u_g) + M = M + score_g * score_g' + } + } + else { + for (i=1; i<=g; i++) { + if (has_sqrtRX) { + sRX_g = panelsubmatrix(sRX_o, i, info) + L_g = quadcross(sRX_g, sRX_g) + } + else { + RX_g = panelsubmatrix(RX_o, i, info) + L_g = quadcross(RX_g, RX_g) } + ri = panelsubmatrix(res_o, i, info) + GmL = G - L_g + if (rank(GmL) == k) M_g = invsym(GmL) + else M_g = J(k,k,0) + sv = J(k,1,0) + for (l=1; l<=1+d; l++) { + if (has_sqrtRX) u_g = quadcross(sRX_g, sRX_g[,1]:*ri[,l]) + else u_g = quadcross(RX_g, ri[,l]) + score_l = u_g + L_g * (M_g * u_g) + sv = sv + s[l] * score_l + } + M = M + sv * sv' } } + return(w*M) } - else { - C_o = C[ind] - RX_o = RX[ind,] - res_o = res[ind,] - info = panelsetup(C_o,1) - g = rows(info) - w = ((n-1) / (n-k_df)) * (g / (g-1)) - if (d==0){ + + // ---- CR2 (Bell-McCaffrey) via eigendecomp of F_sq = C_half'*L_g*C_half ---- + if (cr_method == "crv2" & has_invG) { + C_half = cholesky(invG) // lower-tri L: L*L' = invG + tC_half = C_half' // upper-tri L' + w = 1 // CRV2 is approximately unbiased + if (d==0) { for (i=1; i<=g; i++) { - Xi = panelsubmatrix(RX_o, i, info) - ri = panelsubmatrix(res_o, i, info) - Xr = quadcross(Xi,ri)' - M = M + quadcross(Xr,Xr) + if (has_sqrtRX) { + sRX_g = panelsubmatrix(sRX_o, i, info) + e_g = panelsubmatrix(res_o, i, info)[,1] + L_g = quadcross(sRX_g, sRX_g) + u_g = quadcross(sRX_g, sRX_g[,1]:*e_g) + } + else { + RX_g = panelsubmatrix(RX_o, i, info) + e_g = panelsubmatrix(res_o, i, info)[,1] + L_g = quadcross(RX_g, RX_g) + u_g = quadcross(RX_g, e_g) + } + F_sq = tC_half * L_g * C_half + symeigensystem(F_sq, V=., lambda=.) + sigma2 = lambda' + for (j=1; j<=length(sigma2); j++) if (sigma2[j] < 0) sigma2[j] = 0 + c_coef = J(length(sigma2),1,0) + for (j=1; j<=length(sigma2); j++) { + if (sigma2[j] < 1e-14) c_coef[j] = 0 + else c_coef[j] = (1/sqrt(max((1-sigma2[j], 1e-8))) - 1) / sigma2[j] + } + Ru_g = tC_half * u_g + adj = V * (c_coef :* (V' * Ru_g)) + score_g = u_g + L_g * (C_half * adj) + M = M + score_g * score_g' } } else { for (i=1; i<=g; i++) { - Xi = panelsubmatrix(RX_o, i, info) - ri = panelsubmatrix(res_o, i, info) - MHolder = J(1+d,k,.) + if (has_sqrtRX) { + sRX_g = panelsubmatrix(sRX_o, i, info) + L_g = quadcross(sRX_g, sRX_g) + } + else { + RX_g = panelsubmatrix(RX_o, i, info) + L_g = quadcross(RX_g, RX_g) + } + ri = panelsubmatrix(res_o, i, info) + F_sq = tC_half * L_g * C_half + symeigensystem(F_sq, V=., lambda=.) + sigma2 = lambda' + for (j=1; j<=length(sigma2); j++) if (sigma2[j] < 0) sigma2[j] = 0 + c_coef = J(length(sigma2),1,0) + for (j=1; j<=length(sigma2); j++) { + if (sigma2[j] < 1e-14) c_coef[j] = 0 + else c_coef[j] = (1/sqrt(max((1-sigma2[j], 1e-8))) - 1) / sigma2[j] + } + sv = J(k,1,0) for (l=1; l<=1+d; l++) { - MHolder[l,] = quadcross(Xi,s[l]:*ri[,l])' + if (has_sqrtRX) u_g = quadcross(sRX_g, sRX_g[,1]:*ri[,l]) + else u_g = quadcross(RX_g, ri[,l]) + Ru_g = tC_half * u_g + adj = V * (c_coef :* (V' * Ru_g)) + score_l = u_g + L_g * (C_half * adj) + sv = sv + s[l] * score_l } - summedvalues = colsum(MHolder) - M = M + quadcross(summedvalues,summedvalues) - } + M = M + sv * sv' + } + } + return(w*M) + } + + // ---- Default cluster path (CR1 with finite-sample df correction) ---- + // k_df: which k enters the (n-1)/(n-k) correction. Defaults to cols(RX) + // unless caller passes a positive k_override. + w = ((n-1) / (n - (k_override > 0 ? k_override : k))) * (g / (g-1)) + if (d==0){ + for (i=1; i<=g; i++) { + RX_g = panelsubmatrix(RX_o, i, info) + ri = panelsubmatrix(res_o, i, info) + u_g = quadcross(RX_g, ri)' + M = M + quadcross(u_g, u_g) + } + } + else { + for (i=1; i<=g; i++) { + RX_g = panelsubmatrix(RX_o, i, info) + ri = panelsubmatrix(res_o, i, info) + sv = J(1+d, k, .) + for (l=1; l<=1+d; l++) { + sv[l,] = quadcross(RX_g, s[l]:*ri[,l])' + } + score_g = colsum(sv) + M = M + quadcross(score_g, score_g) } } - return(w*M) + return(w*M) } -mata mosave rdrobust_vce(), replace +mata mosave rdrobust_vce(), replace end +capture mata mata drop rdrobust_vce_qq_cluster() +mata +real matrix rdrobust_vce_qq_cluster( + real matrix Q, real matrix R_q, real colvector W_b, real matrix invG_q, + real matrix res, real matrix C, real matrix ind, + real scalar d, real matrix s, string scalar cr_method) +{ + // Decoupled cluster-robust variance for the bias-corrected estimator when h != b. + // Sandwich uses Q (Q_q, mixed-bandwidth) but leverage comes from the q-regression + // (bandwidth b). Matrix analog of using hii_q for HC2/HC3 in the same branch. + // Returns the meat matrix M such that V_rb = invG_p * M * invG_p. + // + // Q: n x k Q_q_side matrix + // R_q: n x k_R q-regression design (raw, no weights) + // W_b: n x 1 kernel weights at bandwidth b + // invG_q: k_R x k_R q-regression inverse Gram + // res: n x (1+d) q-regression raw residuals + // C: n x 1 cluster id + // ind: n x 1 sort permutation of rows to match panelsetup + // cr_method: "crv2" or "crv3" + + real scalar k, k_R, g, i, l, j, r + real matrix M, C_o, R_o, res_o, Q_o, Wb_o, ri, info, G_q + real matrix R_g, Q_g, sR_g, L_g, P_g + real matrix V_sv, V_r, W_eig, T_mat, Lambda, GmL, M_g, A + real colvector e_gl, Wb_g, u_Qq, u_q, sv, sigma2, sigma, tau, gamma, keep, sig, score_l + real rowvector lambda, tau_row + real colvector keep_idx + + k = cols(Q) + k_R = cols(R_q) + M = J(k, k, 0) + + C_o = C[ind] + R_o = R_q[ind,] + res_o = res[ind,] + Q_o = Q[ind,] + Wb_o = W_b[ind] + info = panelsetup(C_o, 1) + g = rows(info) + G_q = invsym(invG_q) + + for (i = 1; i <= g; i++) { + Q_g = panelsubmatrix(Q_o, i, info) + R_g = panelsubmatrix(R_o, i, info) + Wb_g = panelsubmatrix(Wb_o, i, info) + sR_g = R_g :* sqrt(Wb_g) // n_g x k_R + L_g = quadcross(sR_g, sR_g) // k_R x k_R + P_g = quadcross(Q_g, R_g) // k x k_R + ri = panelsubmatrix(res_o, i, info) + + // T2: per-cluster Lambda (crv2) or M_g (crv3) depends only on + // L_g / G_q / invG_q -- not on the residual column l. Compute + // once per cluster, reuse across the l-loop (2026-05-12). + real scalar use_simple + use_simple = 0 + Lambda = J(0, 0, .) + M_g = J(0, 0, .) + if (cr_method == "crv2") { + symeigensystem(L_g, V_sv=., lambda=.) + sigma2 = lambda' + for (j = 1; j <= rows(sigma2); j++) if (sigma2[j] < 0) sigma2[j] = 0 + sigma = sqrt(sigma2) + keep = (sigma :> max(sigma) * 1e-10) + r = sum(keep) + if (r == 0) { + use_simple = 1 + } + else { + keep_idx = select((1..rows(sigma))', keep) + V_r = V_sv[, keep_idx] + sig = sigma[keep_idx] + T_mat = diag(sig) * (V_r' * invG_q * V_r) * diag(sig) + T_mat = 0.5 * (T_mat + T_mat') + symeigensystem(T_mat, W_eig=., tau_row=.) + tau = tau_row' + for (j = 1; j <= rows(tau); j++) { + if (tau[j] < 0) tau[j] = 0 + if (tau[j] > 1-1e-10) tau[j] = 1 - 1e-10 + } + gamma = 1 :/ sqrt(1 :- tau) :- 1 + A = (V_r :/ sig') * W_eig + Lambda = A * (gamma :* A') + } + } + else { + GmL = G_q - L_g + if (rank(GmL) == k_R) M_g = invsym(GmL) + else M_g = J(k_R, k_R, 0) + } + + sv = J(k, 1, 0) + for (l = 1; l <= 1 + d; l++) { + e_gl = ri[,l] + u_Qq = quadcross(Q_g, e_gl) + if (use_simple) { + score_l = u_Qq + } + else if (cr_method == "crv2") { + u_q = quadcross(R_g, Wb_g :* e_gl) + score_l = u_Qq + P_g * (Lambda * u_q) + } + else { + u_q = quadcross(R_g, Wb_g :* e_gl) + score_l = u_Qq + P_g * (M_g * u_q) + } + sv = sv + s[l] * score_l + } + + M = M + sv * sv' + } + + return(M) +} +mata mosave rdrobust_vce_qq_cluster(), replace +end + + capture mata mata drop rdrobust_groupid() mata real colvector rdrobust_groupid(real colvector x, real vector at) diff --git a/stata/rdrobust_groupid.mo b/stata/rdrobust_groupid.mo index f0e54c7..9c5861a 100644 Binary files a/stata/rdrobust_groupid.mo and b/stata/rdrobust_groupid.mo differ diff --git a/stata/rdrobust_illustration.do b/stata/rdrobust_illustration.do index d9accd9..f28daf9 100644 --- a/stata/rdrobust_illustration.do +++ b/stata/rdrobust_illustration.do @@ -2,12 +2,6 @@ ** RDROBUST Stata Package ** Do-file for Empirical Illustration ******************************************************************************** -** hlp2winpdf, cdn(rdrobust) replace -** hlp2winpdf, cdn(rdbwselect) replace -** hlp2winpdf, cdn(rdplot) replace -******************************************************************************** -** net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/main/stata) replace -******************************************************************************** clear all set more off set linesize 80 diff --git a/stata/rdrobust_illustration_new.do b/stata/rdrobust_illustration_new.do new file mode 100644 index 0000000..d0d84a5 --- /dev/null +++ b/stata/rdrobust_illustration_new.do @@ -0,0 +1,160 @@ +******************************************************************************** +** RDROBUST Stata Package +** Illustration of new features (v11.0.0, 2026-05-13) +******************************************************************************** +** New in this release: +** (1) Full HC family of heteroskedastic variance estimators: +** vce(hc0) vce(hc1) vce(hc2) vce(hc3) +** (2) Cluster-robust variance estimators CR1 / CR2 / CR3: +** vce(cr1 clustervar) -- df correction (equivalent to vce(cluster ...)) +** vce(cr2 clustervar) -- Bell-McCaffrey (2002) +** vce(cr3 clustervar) -- Pustejovsky-Tipton (2018), approximately unbiased +** (3) rdrobustplot -- diagnostic plot for a previous rdrobust result +******************************************************************************** +clear all +set more off +set linesize 100 + +******************************************************************************** +** Setup: load the unified senate dataset (bundled with the rdrobust SSC package) +******************************************************************************** +findfile rdrobust_senate.dta +use "`r(fn)'", clear +describe, short + +******************************************************************************** +** (1) HC family of heteroskedastic variance estimators +******************************************************************************** +di _n "======================================================================" +di "(1) Heteroskedastic-robust variance: NN vs HC0-HC3" +di "======================================================================" + +foreach v in nn hc0 hc1 hc2 hc3 { + qui rdrobust vote margin, vce(`v') + display as text "vce=" %-5s "`v'" " tau_cl = " %7.4f e(tau_cl) /// + " se_rb = " %7.4f e(se_tau_rb) /// + " pv_rb = " %6.4f e(pv_rb) +} + +******************************************************************************** +** (2) Cluster-robust variance: CR1 / CR2 / CR3 +******************************************************************************** +di _n "======================================================================" +di "(2) Cluster-robust variance: CR family (clustered on state)" +di "======================================================================" +di _n "vce(cluster state) is an alias for vce(cr1 state). CR2 is Bell-McCaffrey;" +di "CR3 is Pustejovsky-Tipton (approximately unbiased with few clusters)." + +* Note: `state' is a string variable; vce(cluster ...) handles it directly. +foreach v in cr1 cr2 cr3 { + qui rdrobust vote margin, vce(`v' state) + display as text "vce=" %-5s "`v'" " tau_cl = " %7.4f e(tau_cl) /// + " se_rb = " %7.4f e(se_tau_rb) /// + " pv_rb = " %6.4f e(pv_rb) +} + +di _n "Full output for vce(cr3 state):" +rdrobust vote margin, vce(cr3 state) + +******************************************************************************** +** (2b) Auto-validation: cr* without cluster errors cleanly +******************************************************************************** +di _n "Attempt vce(cr3) with no cluster variable (should exit 125):" +cap noi rdrobust vote margin, vce(cr3) +di "rc = " _rc + + +******************************************************************************** +** (3) rdrobustplot -- diagnostic plot +******************************************************************************** +di _n "======================================================================" +di "(3) rdrobustplot: diagnostic plot for a previous rdrobust result" +di "======================================================================" + +qui rdrobust vote margin +rdrobustplot +graph export rdrobustplot_default.png, replace as(png) + +qui rdrobust vote margin, vce(cr3 state) +rdrobustplot, shade +graph export rdrobustplot_cr3_shade.png, replace as(png) + +qui rdrobust vote margin, covs(termshouse termssenate class) +rdrobustplot, nbins(25 25) title("Senate RD with covariates") +graph export rdrobustplot_covs.png, replace as(png) + +di "Saved: rdrobustplot_default.png, rdrobustplot_cr3_shade.png, rdrobustplot_covs.png" + +******************************************************************************** +** (4) Tables with esttab: combining conventional estimates with RBC inference +******************************************************************************** +di _n "======================================================================" +di "(4) Regression tables via estout/esttab" +di "======================================================================" +di _n "rdrobust stores three named coefficients in e(b) / e(V):" +di " _b[Conventional] + _se[Conventional] (p-regression point + SE)" +di " _b[Bias-corrected] + _se[Bias-corrected] (tau_bc with conventional SE;" +di " kept for display parity, not" +di " a recommended inference)" +di " _b[Robust] + _se[Robust] (RBC point + RBC SE)" +di _n "All three rows feed standard Stata tooling (lincom, test, estimates" +di "table, esttab) without extra scaffolding. The Robust row reproduces" +di "the author-recommended RBC inference: lincom _b[Robust] returns the" +di "same CI that rdrobust prints as e(ci_l_rb), e(ci_r_rb) or as the" +di "third row of the 3x2 matrix e(ci)." + +* Install estout from SSC if not already present (one-time per machine). +cap which esttab +if _rc ssc install estout, replace + +* Estimate and store four specifications side-by-side. +qui rdrobust vote margin +estimates store m1 +qui rdrobust vote margin, covs(class termshouse termssenate) +estimates store m2 +qui rdrobust vote margin, vce(cr1 state) +estimates store m3 +qui rdrobust vote margin, vce(cr1 state) covs(class termshouse termssenate) +estimates store m4 + +di _n "-- Table 1: conventional point estimate with RBC confidence interval --" +di " (the canonical CCT presentation: report tau_conv, bracket the" +di " RBC CI; note that the RBC CI is centered on tau_bc, not tau_conv)" + +* Add per-spec scalars/locals used by the LaTeX and console tables below. +* e(ci) is a 3x2 matrix with rows Conventional / Bias-corrected / Robust +* and columns ll / ul; the Robust row is row 3. +foreach m in m1 m2 m3 m4 { + estimates restore `m' + local lo : di %9.3f e(ci)[3,1] + local hi : di %9.3f e(ci)[3,2] + estadd local RBC_CI = "[`=trim("`lo'")', `=trim("`hi'")']" + estadd scalar h_bw = e(h_l) + estadd scalar N_eff = e(N_h_l) + e(N_h_r) + estimates store `m' +} + +esttab m1 m2 m3 m4, /// + cells(b(fmt(%9.3f))) collabels(none) nostar /// + keep(Conventional) /// + scalars(RBC_CI N_eff) /// + mtitles("sharp" "sharp+covs" "cluster" "cluster+covs") /// + title("RD estimates: conventional point with RBC CI") /// + addnotes("Point estimate from p-order local polynomial." /// + "RBC CI from robust bias correction (centered on tau_bc).") + +di _n "-- Table 2: LaTeX export (writes rdrobust_table.tex) --" +esttab m1 m2 m3 m4 using rdrobust_table.tex, replace /// + cells(b(fmt(%9.3f))) nostar collabels(none) /// + keep(Conventional) coeflabels(Conventional "RD Effect") /// + stats(RBC_CI N h_bw N_eff, /// + labels("Robust 95\% CI" "N" "h" "Eff. N") fmt(0 0 3 0)) /// + mtitles("sharp" "sharp+covs" "cluster" "cluster+covs") /// + booktabs nonotes label /// + title("RD estimates for Senate incumbency") +di "Wrote rdrobust_table.tex" + +******************************************************************************** +** Done +******************************************************************************** +di _n "Done." diff --git a/stata/rdrobust_kweight.mo b/stata/rdrobust_kweight.mo index bac8988..3626f3c 100644 Binary files a/stata/rdrobust_kweight.mo and b/stata/rdrobust_kweight.mo differ diff --git a/stata/rdrobust_median.mo b/stata/rdrobust_median.mo index 953e7a0..2ee0924 100644 Binary files a/stata/rdrobust_median.mo and b/stata/rdrobust_median.mo differ diff --git a/stata/rdrobust_res.mo b/stata/rdrobust_res.mo index 2f9578a..3731554 100644 Binary files a/stata/rdrobust_res.mo and b/stata/rdrobust_res.mo differ diff --git a/stata/rdrobust_senate.dta b/stata/rdrobust_senate.dta index a9c8334..532372c 100644 Binary files a/stata/rdrobust_senate.dta and b/stata/rdrobust_senate.dta differ diff --git a/stata/rdrobust_vce.mo b/stata/rdrobust_vce.mo index 93f3258..2a2fd95 100644 Binary files a/stata/rdrobust_vce.mo and b/stata/rdrobust_vce.mo differ diff --git a/stata/rdrobust_vce_qq_cluster.mo b/stata/rdrobust_vce_qq_cluster.mo new file mode 100644 index 0000000..a69894a Binary files /dev/null and b/stata/rdrobust_vce_qq_cluster.mo differ diff --git a/stata/rdrobustplot.ado b/stata/rdrobustplot.ado new file mode 100644 index 0000000..a18b741 --- /dev/null +++ b/stata/rdrobustplot.ado @@ -0,0 +1,96 @@ +******************************************************************************** +* RDROBUST STATA PACKAGE -- rdrobustplot +* Diagnostic plot for a previous rdrobust result. Mirrors the plot.rdrobust() +* S3 method in the R package and plot_rdrobust() in the Python package. +* Authors: Sebastian Calonico, Matias D. Cattaneo, Max H. Farrell, Rocio Titiunik +******************************************************************************** +*!rdrobust Stata package v11.0.0 2026-05-13 + +capture program drop rdrobustplot +program define rdrobustplot, rclass + version 16.0 + syntax [, nbins(string) binselect(string) NOCI scale(string) /// + title(string asis) xlabel(string) ylabel(string) /// + xtitle(string asis) ytitle(string asis) /// + col_dots(string) col_lines(string) shade * ] + + * ----------------------------------------------------------------------- + * Validate: require a previous rdrobust call + * ----------------------------------------------------------------------- + if ("`e(cmd)'" != "rdrobust") { + di as error "{err}{cmd:rdrobustplot} must follow a {cmd:rdrobust} call" + exit 301 + } + + * ----------------------------------------------------------------------- + * Pull relevant quantities from e(). Note: `local x = e(...)` with the `=` + * sign evaluates missing-ereturn-locals to ".", so for strings that might + * be unset (covs, kernel) we use macro expansion instead. + * ----------------------------------------------------------------------- + local y "`e(depvar)'" + local x "`e(runningvar)'" + local covs "`e(covs)'" + local c = e(c) + local p = e(p) + local h_l = e(h_l) + local h_r = e(h_r) + local tau = e(tau_cl) + local se_rb = e(se_tau_rb) + local ci_l = e(ci_l_rb) + local ci_r = e(ci_r_rb) + local level = e(level) + local kernel = lower(substr("`e(kernel)'", 1, 3)) /* "Triangular" -> "tri" */ + + * Guard against missing e() values that would otherwise propagate NaN + * through the subtitle's p-value computation. + if (missing(`tau') | missing(`se_rb') | `se_rb' <= 0) { + di as error "{err}{cmd:rdrobustplot} cannot build subtitle: e(tau_cl) or e(se_tau_rb) is missing or non-positive" + exit 198 + } + + * ----------------------------------------------------------------------- + * Defaults + * ----------------------------------------------------------------------- + if ("`nbins'" == "") local nbins = "20 20" + if ("`binselect'"=="") local binselect = "esmv" + if ("`scale'" == "") local scale = "" + + * Build effect annotation (shown as subtitle above the plot) + local pv = 2*normal(-abs(`tau'/`se_rb')) + local stars = "" + if (`pv' < 0.10) local stars = "*" + if (`pv' < 0.05) local stars = "**" + if (`pv' < 0.01) local stars = "***" + + local ci_lo_s = strofreal(`ci_l', "%7.3f") + local ci_hi_s = strofreal(`ci_r', "%7.3f") + local tau_s = strofreal(`tau', "%7.3f") + local lev_s = strofreal(`level',"%2.0f") + + local subtitle = "RD = `tau_s'`stars' (`lev_s'% RBC CI: [`ci_lo_s', `ci_hi_s'])" + + if (`"`title'"' == "") local title `"RD plot: `y' vs `x'"' + + * ----------------------------------------------------------------------- + * Delegate to rdplot using the SAME bandwidth the rdrobust call used + * ----------------------------------------------------------------------- + local ci_flag = cond("`noci'"!="", "", "ci(`level')") + local shade_flag = cond("`shade'"!="", "shade", "") + local covs_flag = cond("`covs'"!="", "covs(`covs')", "") + + rdplot `y' `x' , c(`c') h(`h_l' `h_r') p(`p') /// + nbins(`nbins') binselect(`binselect') kernel(`kernel') /// + `covs_flag' `ci_flag' `shade_flag' /// + graph_options(title(`"`title'"') subtitle(`"`subtitle'"')) /// + `options' + + * ----------------------------------------------------------------------- + * Return the annotation bits so downstream scripts can reuse + * ----------------------------------------------------------------------- + return local subtitle = `"`subtitle'"' + return scalar tau = `tau' + return scalar se_rb = `se_rb' + return scalar ci_l = `ci_l' + return scalar ci_r = `ci_r' + return scalar pvalue = `pv' +end diff --git a/stata/rdrobustplot.pdf b/stata/rdrobustplot.pdf new file mode 100644 index 0000000..19a0c03 Binary files /dev/null and b/stata/rdrobustplot.pdf differ diff --git a/stata/rdrobustplot.sthlp b/stata/rdrobustplot.sthlp new file mode 100644 index 0000000..436796a --- /dev/null +++ b/stata/rdrobustplot.sthlp @@ -0,0 +1,82 @@ +{smcl} +{* *!version 11.0.0 2026-05-13}{...} +{viewerjumpto "Syntax" "rdrobustplot##syntax"}{...} +{viewerjumpto "Description" "rdrobustplot##description"}{...} +{viewerjumpto "Options" "rdrobustplot##options"}{...} +{viewerjumpto "Examples" "rdrobustplot##examples"}{...} + +{title:Title} + +{p 4 8}{cmd:rdrobustplot} {hline 2} Diagnostic plot for a previous {cmd:rdrobust} result.{p_end} + +{marker syntax}{...} +{title:Syntax} + +{p 4 8}{cmd:rdrobustplot} +[{cmd:,} +{cmd:nbins(}{it:# #}{cmd:)} +{cmd:binselect(}{it:binmethod}{cmd:)} +{cmd:noci} +{cmd:shade} +{cmd:scale(}{it:# #}{cmd:)} +{cmd:title(}{it:string}{cmd:)} +{cmd:xtitle(}{it:string}{cmd:)} +{cmd:ytitle(}{it:string}{cmd:)} +{cmd:col_dots(}{it:color}{cmd:)} +{cmd:col_lines(}{it:color}{cmd:)} +{it:graph_options} +]{p_end} + +{marker description}{...} +{title:Description} + +{p 4 8}{cmd:rdrobustplot} produces a diagnostic RD plot for the results of the +most recent {help rdrobust:rdrobust} call. It wraps {help rdplot:rdplot} with the +main bandwidth and polynomial order that {cmd:rdrobust} used, and decorates the +subtitle with the estimated RD coefficient and its robust +bias-corrected confidence interval.{p_end} + +{p 4 8}This mirrors the {cmd:plot.rdrobust()} S3 method in the R package and the +{cmd:plot_rdrobust()} function in the Python package.{p_end} + +{p 4 8}{it:Requires Stata 16 or later.}{p_end} + +{marker options}{...} +{title:Options} + +{p 4 8}{cmd:nbins(}{it:# #}{cmd:)} number of bins per side; default is {cmd:20 20}.{p_end} + +{p 4 8}{cmd:binselect(}{it:binmethod}{cmd:)} bin selection rule; default is +{cmd:esmv} (mimicking-variance evenly-spaced bins). See {help rdplot}.{p_end} + +{p 4 8}{cmd:noci} suppresses the per-bin confidence intervals.{p_end} + +{p 4 8}{cmd:shade} draws the pointwise confidence bands as a shaded ribbon +instead of error bars.{p_end} + +{p 4 8}{cmd:title()}, {cmd:xtitle()}, {cmd:ytitle()} custom text for the plot +elements (defaults follow {cmd:rdrobust}'s outcome / running variable).{p_end} + +{p 4 8}{cmd:col_dots()}, {cmd:col_lines()} colors for the binned means and the +polynomial fit line.{p_end} + +{p 4 8}Any other option is passed through to {cmd:rdplot}'s +{cmd:graph_options()}.{p_end} + +{marker examples}{...} +{title:Examples} + +{phang2}{stata sysuse rdrobust_senate, clear}{p_end} +{phang2}{stata rdrobust vote margin}{p_end} +{phang2}{stata rdrobustplot}{p_end} + +{pstd}With cluster-robust variance and fuzzy RD:{p_end} +{phang2}{stata rdrobust vote margin, vce(cr3 state)}{p_end} +{phang2}{stata rdrobustplot, shade}{p_end} + +{title:Authors} + +{p 4 8}Sebastian Calonico, University of California, Davis. {browse "mailto:scalonico@ucdavis.edu":scalonico@ucdavis.edu}.{p_end} +{p 4 8}Matias D. Cattaneo, Princeton University. {browse "mailto:matias.d.cattaneo@gmail.com":matias.d.cattaneo@gmail.com}.{p_end} +{p 4 8}Max H. Farrell, University of California, Santa Barbara. {browse "mailto:mhfarrell@gmail.com":mhfarrell@gmail.com}.{p_end} +{p 4 8}Rocio Titiunik, Princeton University. {browse "mailto:rocio.titiunik@gmail.com":rocio.titiunik@gmail.com}.{p_end}