@@ -88,98 +88,6 @@ def semipar_fit(dict_, data):
8888 return rslt
8989
9090
91- def mte_unobserved (
92- X , Y , b0 , b1_b0 , prop_score , bandwidth , gridsize , startgrid , endgrid
93- ):
94- """
95- This function computes the unobserved component of the MTE
96- in MTE = mte_x + *mte_u*, where *mte_u* depends on the unobserved
97- esistance to treatment u_D.
98-
99- Parameters
100- ----------
101- X: pandas.DataFrame
102- DataFrame of observables (i.e. covariates).
103- Xp: pandas.DataFrame
104- X data multiplied by *prop_score* X * P(z).
105- Y: pandas.DataFrame
106- Individuals' wage data.
107- b0: np.ndarray
108- Beta0 coefficient in the Double Residual Regression,
109- i.e. the no-intercept OLS regression of the residuals of
110- X, Xp, and Y on *prop_score*.
111- b1_b0: np.ndarray
112- Difference of the coefficients in the Double Residual Regression,
113- i.e. the no-intercept OLS regression of the residuals of
114- X, Xp, and Y on *prop_score*.
115- prop_score: pandas.Series
116- Propensity score (range between [0, 1]). Values closer to 1
117- denote a higher inclination to treatment.
118- Sorted in ascending order.
119- bandwidth: float
120- Kernel bandwidth for the local polynomial regression.
121- gridsize: int
122- Number of equally-spaced grid points of u_D over which the
123- MTE shall be estimated.
124- startgrid: int
125- Start point of the grid of unobservable resistance (u_D),
126- over which the MTE is evaluated.
127- endgrid: int
128- End point of the grid of unobservable resistance (u_D),
129- over which the MTE is evaluated.
130-
131- Returns
132- -------
133- mte_u: np.ndarray
134- Part of the MTE that depends on the unobserved resistance
135- to treatment (u_D).
136- """
137- # 0) Construct Xp := X * P(z)
138- Xp = construct_Xp (X , prop_score )
139-
140- # Turn the X, Xp, and Y DataFrames as well as the
141- # propensity score Series into np.ndarrays
142- X_arr = np .array (X )
143- Xp_arr = np .array (Xp )
144- Y_arr = np .array (Y ).ravel ()
145- prop_score = np .array (prop_score )
146-
147- # Compute the unobserved part of Y
148- Y_tilde = Y_arr - np .dot (X_arr , b0 ) - np .dot (Xp_arr , b1_b0 )
149-
150- # Estimate mte_u, the unobserved component of the MTE,
151- # through a locally quadratic regression
152- mte_u = locpoly (prop_score , Y_tilde , 1 , 2 , bandwidth , gridsize , startgrid , endgrid )
153-
154- return mte_u
155-
156-
157- def mte_observed (X , b1_b0 ):
158- """
159- This function computes the observed component of the MTE (*mte_x*)
160- that depends on observables X:
161-
162- mte = *mte_x* + mte_u
163-
164- Parameters
165- ----------
166- X: pandas.DataFrame
167- Data of observables (covariates).
168- b1_b0: np.ndarray
169- Difference of the coefficients in the Double Residual Regression,
170- i.e. the no-intercept OLS regression of the residuals of
171- X, Xp, and Y on *prop_score*.
172-
173- Returns
174- -------
175- mte_x: np.ndarray
176- Part of the MTE that depends on observables X.
177- """
178- mte_x = np .dot (X , b1_b0 )
179-
180- return mte_x
181-
182-
18391def process_primary_inputs (dict_ ):
18492 """
18593 This functions processes the parameters specified
@@ -399,7 +307,7 @@ def trim_support(
399307 """
400308 # Find common support
401309 prop_score = data ["prop_score" ]
402- common_support = define_common_support (dict_ , data , bins , show_output )
310+ common_support = _define_common_support (dict_ , data , bins , show_output )
403311
404312 # Trim the data. Recommended.
405313 if trim is True :
@@ -432,7 +340,154 @@ def trim_support(
432340 return X , Y , prop_score
433341
434342
435- def define_common_support (
343+ def double_residual_reg (X , Y , prop_score , rbandwidth = 0.05 , show_output = False ):
344+ """
345+ This function performs a Double Residual Regression (DDR)
346+ of X, Xp, and Y on *prop_score*.
347+
348+ A local linear kernel regression (polynomial of degree 1)
349+ is implemented to generate the residuals.
350+
351+ Parameters
352+ ----------
353+ X: pandas.DataFrame
354+ DataFrame of observables (i.e. covariates).
355+ Y: pandas.DataFrame
356+ Individuals' wage data.
357+ prop_score: pandas.Series
358+ Propensity score (range between [0, 1]). Values closer to 1
359+ denote a higher inclination to treatment.
360+ Sorted in ascending order.
361+
362+ Returns
363+ -------
364+ b0: np.ndarray
365+ Beta0 coefficient of the DDR (no-intercept OLS regression
366+ of the residuals of X, Xp, and Y on *prop_score*).
367+ b1: np.ndarray
368+ Beta1 coefficient of the DDR (no-intercept OLS regression
369+ of the residuals of X, Xp, and Y on *prop_score*).
370+ """
371+ # 0) Construct Xp := X * P(z)
372+ Xp = _construct_Xp (X , prop_score )
373+
374+ # 1) Fit a separate local linear regression of X, Xp, and Y on prop_score,
375+ # which yields residuals e_X, e_Xp, and e_Y.
376+ res_X = _generate_residuals (prop_score , X , rbandwidth )
377+ res_Xp = _generate_residuals (prop_score , Xp , rbandwidth )
378+ res_Y = _generate_residuals (prop_score , Y , rbandwidth )
379+
380+ # Append res_X and res_Xp.
381+ col_names = list (X ) + list (Xp )
382+ res_X_Xp = pd .DataFrame (np .append (res_X , res_Xp , axis = 1 ), columns = col_names )
383+
384+ # 2) Run a single OLS regression of e_Y on e_X and e_Xp without intercept:
385+ # e_Y = e_X * beta_0 + e_Xp * (beta_1 - beta_0),
386+ # to estimate the values of beta_0 and (beta_1 - beta_0).
387+ model = sm .OLS (res_Y , res_X_Xp )
388+ results = model .fit ()
389+ b0 = results .params [: len (list (X ))]
390+ b1_b0 = results .params [len ((list (X ))) :]
391+
392+ if show_output is True :
393+ print (results .summary ())
394+
395+ return np .array (b0 ), np .array (b1_b0 )
396+
397+
398+ def mte_observed (X , b1_b0 ):
399+ """
400+ This function computes the observed component of the MTE (*mte_x*)
401+ that depends on observables X:
402+
403+ mte = *mte_x* + mte_u
404+
405+ Parameters
406+ ----------
407+ X: pandas.DataFrame
408+ Data of observables (covariates).
409+ b1_b0: np.ndarray
410+ Difference of the coefficients in the Double Residual Regression,
411+ i.e. the no-intercept OLS regression of the residuals of
412+ X, Xp, and Y on *prop_score*.
413+
414+ Returns
415+ -------
416+ mte_x: np.ndarray
417+ Part of the MTE that depends on observables X.
418+ """
419+ mte_x = np .dot (X , b1_b0 )
420+
421+ return mte_x
422+
423+
424+ def mte_unobserved (
425+ X , Y , b0 , b1_b0 , prop_score , bandwidth , gridsize , startgrid , endgrid
426+ ):
427+ """
428+ This function computes the unobserved component of the MTE
429+ in MTE = mte_x + *mte_u*, where *mte_u* depends on the unobserved
430+ esistance to treatment u_D.
431+
432+ Parameters
433+ ----------
434+ X: pandas.DataFrame
435+ DataFrame of observables (i.e. covariates).
436+ Xp: pandas.DataFrame
437+ X data multiplied by *prop_score* X * P(z).
438+ Y: pandas.DataFrame
439+ Individuals' wage data.
440+ b0: np.ndarray
441+ Beta0 coefficient in the Double Residual Regression,
442+ i.e. the no-intercept OLS regression of the residuals of
443+ X, Xp, and Y on *prop_score*.
444+ b1_b0: np.ndarray
445+ Difference of the coefficients in the Double Residual Regression,
446+ i.e. the no-intercept OLS regression of the residuals of
447+ X, Xp, and Y on *prop_score*.
448+ prop_score: pandas.Series
449+ Propensity score (range between [0, 1]). Values closer to 1
450+ denote a higher inclination to treatment.
451+ Sorted in ascending order.
452+ bandwidth: float
453+ Kernel bandwidth for the local polynomial regression.
454+ gridsize: int
455+ Number of equally-spaced grid points of u_D over which the
456+ MTE shall be estimated.
457+ startgrid: int
458+ Start point of the grid of unobservable resistance (u_D),
459+ over which the MTE is evaluated.
460+ endgrid: int
461+ End point of the grid of unobservable resistance (u_D),
462+ over which the MTE is evaluated.
463+
464+ Returns
465+ -------
466+ mte_u: np.ndarray
467+ Part of the MTE that depends on the unobserved resistance
468+ to treatment (u_D).
469+ """
470+ # 0) Construct Xp := X * P(z)
471+ Xp = _construct_Xp (X , prop_score )
472+
473+ # Turn the X, Xp, and Y DataFrames as well as the
474+ # propensity score Series into np.ndarrays
475+ X_arr = np .array (X )
476+ Xp_arr = np .array (Xp )
477+ Y_arr = np .array (Y ).ravel ()
478+ prop_score = np .array (prop_score )
479+
480+ # Compute the unobserved part of Y
481+ Y_tilde = Y_arr - np .dot (X_arr , b0 ) - np .dot (Xp_arr , b1_b0 )
482+
483+ # Estimate mte_u, the unobserved component of the MTE,
484+ # through a locally quadratic regression
485+ mte_u = locpoly (prop_score , Y_tilde , 1 , 2 , bandwidth , gridsize , startgrid , endgrid )
486+
487+ return mte_u
488+
489+
490+ def _define_common_support (
436491 dict_ ,
437492 data ,
438493 bins = 25 ,
@@ -479,10 +534,10 @@ def define_common_support(
479534 common_support: list
480535 List containing lower and upper bound of the propensity score.
481536 """
482- hist , treated , untreated = make_histogram (
537+ hist , treated , untreated = _make_histogram (
483538 dict_ , data , bins , show_output , figsize , fontsize , plot_title
484539 )
485- lower_limit , upper_limit = find_limits (hist , treated , untreated )
540+ lower_limit , upper_limit = _find_limits (hist , treated , untreated )
486541 common_support = [lower_limit , upper_limit ]
487542
488543 if show_output is True :
@@ -508,7 +563,7 @@ def define_common_support(
508563 return common_support
509564
510565
511- def make_histogram (
566+ def _make_histogram (
512567 dict_ ,
513568 data ,
514569 bins = 25 ,
@@ -590,7 +645,7 @@ def make_histogram(
590645 return hist , treated , untreated
591646
592647
593- def find_limits (hist , treated , untreated ):
648+ def _find_limits (hist , treated , untreated ):
594649 """
595650 Find the upper and lower limit of the common support.
596651
@@ -676,7 +731,7 @@ def find_limits(hist, treated, untreated):
676731 return lower_limit , upper_limit
677732
678733
679- def construct_Xp (X , prop_score ):
734+ def _construct_Xp (X , prop_score ):
680735 """
681736 This function generates the X * *prop_score* regressors:
682737
@@ -708,7 +763,7 @@ def construct_Xp(X, prop_score):
708763 return Xp
709764
710765
711- def generate_residuals (exog , endog , bandwidth = 0.05 ):
766+ def _generate_residuals (exog , endog , bandwidth = 0.05 ):
712767 """
713768 This function runs a series of loess regressions (degree=1)
714769 for a set of response variables (*endog*) on a single explanatory
@@ -771,58 +826,3 @@ def generate_residuals(exog, endog, bandwidth=0.05):
771826 res [:, col ] = y_fit .outputs .fitted_residuals
772827
773828 return res
774-
775-
776- def double_residual_reg (X , Y , prop_score , rbandwidth = 0.05 , show_output = False ):
777- """
778- This function performs a Double Residual Regression (DDR)
779- of X, Xp, and Y on *prop_score*.
780-
781- A local linear kernel regression (polynomial of degree 1)
782- is implemented to generate the residuals.
783-
784- Parameters
785- ----------
786- X: pandas.DataFrame
787- DataFrame of observables (i.e. covariates).
788- Y: pandas.DataFrame
789- Individuals' wage data.
790- prop_score: pandas.Series
791- Propensity score (range between [0, 1]). Values closer to 1
792- denote a higher inclination to treatment.
793- Sorted in ascending order.
794-
795- Returns
796- -------
797- b0: np.ndarray
798- Beta0 coefficient of the DDR (no-intercept OLS regression
799- of the residuals of X, Xp, and Y on *prop_score*).
800- b1: np.ndarray
801- Beta1 coefficient of the DDR (no-intercept OLS regression
802- of the residuals of X, Xp, and Y on *prop_score*).
803- """
804- # 0) Construct Xp := X * P(z)
805- Xp = construct_Xp (X , prop_score )
806-
807- # 1) Fit a separate local linear regression of X, Xp, and Y on prop_score,
808- # which yields residuals e_X, e_Xp, and e_Y.
809- res_X = generate_residuals (prop_score , X , rbandwidth )
810- res_Xp = generate_residuals (prop_score , Xp , rbandwidth )
811- res_Y = generate_residuals (prop_score , Y , rbandwidth )
812-
813- # Append res_X and res_Xp.
814- col_names = list (X ) + list (Xp )
815- res_X_Xp = pd .DataFrame (np .append (res_X , res_Xp , axis = 1 ), columns = col_names )
816-
817- # 2) Run a single OLS regression of e_Y on e_X and e_Xp without intercept:
818- # e_Y = e_X * beta_0 + e_Xp * (beta_1 - beta_0),
819- # to estimate the values of beta_0 and (beta_1 - beta_0).
820- model = sm .OLS (res_Y , res_X_Xp )
821- results = model .fit ()
822- b0 = results .params [: len (list (X ))]
823- b1_b0 = results .params [len ((list (X ))) :]
824-
825- if show_output is True :
826- print (results .summary ())
827-
828- return np .array (b0 ), np .array (b1_b0 )
0 commit comments