Skip to content
This repository was archived by the owner on Oct 31, 2024. It is now read-only.

Commit 138c230

Browse files
committed
Added _ (underscore) to internal funcitons in estimate_semipar.py
1 parent 4af2ef9 commit 138c230

2 files changed

Lines changed: 162 additions & 162 deletions

File tree

grmpy/estimate/estimate_semipar.py

Lines changed: 155 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -88,98 +88,6 @@ def semipar_fit(dict_, data):
8888
return rslt
8989

9090

91-
def mte_unobserved(
92-
X, Y, b0, b1_b0, prop_score, bandwidth, gridsize, startgrid, endgrid
93-
):
94-
"""
95-
This function computes the unobserved component of the MTE
96-
in MTE = mte_x + *mte_u*, where *mte_u* depends on the unobserved
97-
esistance to treatment u_D.
98-
99-
Parameters
100-
----------
101-
X: pandas.DataFrame
102-
DataFrame of observables (i.e. covariates).
103-
Xp: pandas.DataFrame
104-
X data multiplied by *prop_score* X * P(z).
105-
Y: pandas.DataFrame
106-
Individuals' wage data.
107-
b0: np.ndarray
108-
Beta0 coefficient in the Double Residual Regression,
109-
i.e. the no-intercept OLS regression of the residuals of
110-
X, Xp, and Y on *prop_score*.
111-
b1_b0: np.ndarray
112-
Difference of the coefficients in the Double Residual Regression,
113-
i.e. the no-intercept OLS regression of the residuals of
114-
X, Xp, and Y on *prop_score*.
115-
prop_score: pandas.Series
116-
Propensity score (range between [0, 1]). Values closer to 1
117-
denote a higher inclination to treatment.
118-
Sorted in ascending order.
119-
bandwidth: float
120-
Kernel bandwidth for the local polynomial regression.
121-
gridsize: int
122-
Number of equally-spaced grid points of u_D over which the
123-
MTE shall be estimated.
124-
startgrid: int
125-
Start point of the grid of unobservable resistance (u_D),
126-
over which the MTE is evaluated.
127-
endgrid: int
128-
End point of the grid of unobservable resistance (u_D),
129-
over which the MTE is evaluated.
130-
131-
Returns
132-
-------
133-
mte_u: np.ndarray
134-
Part of the MTE that depends on the unobserved resistance
135-
to treatment (u_D).
136-
"""
137-
# 0) Construct Xp := X * P(z)
138-
Xp = construct_Xp(X, prop_score)
139-
140-
# Turn the X, Xp, and Y DataFrames as well as the
141-
# propensity score Series into np.ndarrays
142-
X_arr = np.array(X)
143-
Xp_arr = np.array(Xp)
144-
Y_arr = np.array(Y).ravel()
145-
prop_score = np.array(prop_score)
146-
147-
# Compute the unobserved part of Y
148-
Y_tilde = Y_arr - np.dot(X_arr, b0) - np.dot(Xp_arr, b1_b0)
149-
150-
# Estimate mte_u, the unobserved component of the MTE,
151-
# through a locally quadratic regression
152-
mte_u = locpoly(prop_score, Y_tilde, 1, 2, bandwidth, gridsize, startgrid, endgrid)
153-
154-
return mte_u
155-
156-
157-
def mte_observed(X, b1_b0):
158-
"""
159-
This function computes the observed component of the MTE (*mte_x*)
160-
that depends on observables X:
161-
162-
mte = *mte_x* + mte_u
163-
164-
Parameters
165-
----------
166-
X: pandas.DataFrame
167-
Data of observables (covariates).
168-
b1_b0: np.ndarray
169-
Difference of the coefficients in the Double Residual Regression,
170-
i.e. the no-intercept OLS regression of the residuals of
171-
X, Xp, and Y on *prop_score*.
172-
173-
Returns
174-
-------
175-
mte_x: np.ndarray
176-
Part of the MTE that depends on observables X.
177-
"""
178-
mte_x = np.dot(X, b1_b0)
179-
180-
return mte_x
181-
182-
18391
def process_primary_inputs(dict_):
18492
"""
18593
This functions processes the parameters specified
@@ -399,7 +307,7 @@ def trim_support(
399307
"""
400308
# Find common support
401309
prop_score = data["prop_score"]
402-
common_support = define_common_support(dict_, data, bins, show_output)
310+
common_support = _define_common_support(dict_, data, bins, show_output)
403311

404312
# Trim the data. Recommended.
405313
if trim is True:
@@ -432,7 +340,154 @@ def trim_support(
432340
return X, Y, prop_score
433341

434342

435-
def define_common_support(
343+
def double_residual_reg(X, Y, prop_score, rbandwidth=0.05, show_output=False):
344+
"""
345+
This function performs a Double Residual Regression (DDR)
346+
of X, Xp, and Y on *prop_score*.
347+
348+
A local linear kernel regression (polynomial of degree 1)
349+
is implemented to generate the residuals.
350+
351+
Parameters
352+
----------
353+
X: pandas.DataFrame
354+
DataFrame of observables (i.e. covariates).
355+
Y: pandas.DataFrame
356+
Individuals' wage data.
357+
prop_score: pandas.Series
358+
Propensity score (range between [0, 1]). Values closer to 1
359+
denote a higher inclination to treatment.
360+
Sorted in ascending order.
361+
362+
Returns
363+
-------
364+
b0: np.ndarray
365+
Beta0 coefficient of the DDR (no-intercept OLS regression
366+
of the residuals of X, Xp, and Y on *prop_score*).
367+
b1: np.ndarray
368+
Beta1 coefficient of the DDR (no-intercept OLS regression
369+
of the residuals of X, Xp, and Y on *prop_score*).
370+
"""
371+
# 0) Construct Xp := X * P(z)
372+
Xp = _construct_Xp(X, prop_score)
373+
374+
# 1) Fit a separate local linear regression of X, Xp, and Y on prop_score,
375+
# which yields residuals e_X, e_Xp, and e_Y.
376+
res_X = _generate_residuals(prop_score, X, rbandwidth)
377+
res_Xp = _generate_residuals(prop_score, Xp, rbandwidth)
378+
res_Y = _generate_residuals(prop_score, Y, rbandwidth)
379+
380+
# Append res_X and res_Xp.
381+
col_names = list(X) + list(Xp)
382+
res_X_Xp = pd.DataFrame(np.append(res_X, res_Xp, axis=1), columns=col_names)
383+
384+
# 2) Run a single OLS regression of e_Y on e_X and e_Xp without intercept:
385+
# e_Y = e_X * beta_0 + e_Xp * (beta_1 - beta_0),
386+
# to estimate the values of beta_0 and (beta_1 - beta_0).
387+
model = sm.OLS(res_Y, res_X_Xp)
388+
results = model.fit()
389+
b0 = results.params[: len(list(X))]
390+
b1_b0 = results.params[len((list(X))) :]
391+
392+
if show_output is True:
393+
print(results.summary())
394+
395+
return np.array(b0), np.array(b1_b0)
396+
397+
398+
def mte_observed(X, b1_b0):
399+
"""
400+
This function computes the observed component of the MTE (*mte_x*)
401+
that depends on observables X:
402+
403+
mte = *mte_x* + mte_u
404+
405+
Parameters
406+
----------
407+
X: pandas.DataFrame
408+
Data of observables (covariates).
409+
b1_b0: np.ndarray
410+
Difference of the coefficients in the Double Residual Regression,
411+
i.e. the no-intercept OLS regression of the residuals of
412+
X, Xp, and Y on *prop_score*.
413+
414+
Returns
415+
-------
416+
mte_x: np.ndarray
417+
Part of the MTE that depends on observables X.
418+
"""
419+
mte_x = np.dot(X, b1_b0)
420+
421+
return mte_x
422+
423+
424+
def mte_unobserved(
425+
X, Y, b0, b1_b0, prop_score, bandwidth, gridsize, startgrid, endgrid
426+
):
427+
"""
428+
This function computes the unobserved component of the MTE
429+
in MTE = mte_x + *mte_u*, where *mte_u* depends on the unobserved
430+
esistance to treatment u_D.
431+
432+
Parameters
433+
----------
434+
X: pandas.DataFrame
435+
DataFrame of observables (i.e. covariates).
436+
Xp: pandas.DataFrame
437+
X data multiplied by *prop_score* X * P(z).
438+
Y: pandas.DataFrame
439+
Individuals' wage data.
440+
b0: np.ndarray
441+
Beta0 coefficient in the Double Residual Regression,
442+
i.e. the no-intercept OLS regression of the residuals of
443+
X, Xp, and Y on *prop_score*.
444+
b1_b0: np.ndarray
445+
Difference of the coefficients in the Double Residual Regression,
446+
i.e. the no-intercept OLS regression of the residuals of
447+
X, Xp, and Y on *prop_score*.
448+
prop_score: pandas.Series
449+
Propensity score (range between [0, 1]). Values closer to 1
450+
denote a higher inclination to treatment.
451+
Sorted in ascending order.
452+
bandwidth: float
453+
Kernel bandwidth for the local polynomial regression.
454+
gridsize: int
455+
Number of equally-spaced grid points of u_D over which the
456+
MTE shall be estimated.
457+
startgrid: int
458+
Start point of the grid of unobservable resistance (u_D),
459+
over which the MTE is evaluated.
460+
endgrid: int
461+
End point of the grid of unobservable resistance (u_D),
462+
over which the MTE is evaluated.
463+
464+
Returns
465+
-------
466+
mte_u: np.ndarray
467+
Part of the MTE that depends on the unobserved resistance
468+
to treatment (u_D).
469+
"""
470+
# 0) Construct Xp := X * P(z)
471+
Xp = _construct_Xp(X, prop_score)
472+
473+
# Turn the X, Xp, and Y DataFrames as well as the
474+
# propensity score Series into np.ndarrays
475+
X_arr = np.array(X)
476+
Xp_arr = np.array(Xp)
477+
Y_arr = np.array(Y).ravel()
478+
prop_score = np.array(prop_score)
479+
480+
# Compute the unobserved part of Y
481+
Y_tilde = Y_arr - np.dot(X_arr, b0) - np.dot(Xp_arr, b1_b0)
482+
483+
# Estimate mte_u, the unobserved component of the MTE,
484+
# through a locally quadratic regression
485+
mte_u = locpoly(prop_score, Y_tilde, 1, 2, bandwidth, gridsize, startgrid, endgrid)
486+
487+
return mte_u
488+
489+
490+
def _define_common_support(
436491
dict_,
437492
data,
438493
bins=25,
@@ -479,10 +534,10 @@ def define_common_support(
479534
common_support: list
480535
List containing lower and upper bound of the propensity score.
481536
"""
482-
hist, treated, untreated = make_histogram(
537+
hist, treated, untreated = _make_histogram(
483538
dict_, data, bins, show_output, figsize, fontsize, plot_title
484539
)
485-
lower_limit, upper_limit = find_limits(hist, treated, untreated)
540+
lower_limit, upper_limit = _find_limits(hist, treated, untreated)
486541
common_support = [lower_limit, upper_limit]
487542

488543
if show_output is True:
@@ -508,7 +563,7 @@ def define_common_support(
508563
return common_support
509564

510565

511-
def make_histogram(
566+
def _make_histogram(
512567
dict_,
513568
data,
514569
bins=25,
@@ -590,7 +645,7 @@ def make_histogram(
590645
return hist, treated, untreated
591646

592647

593-
def find_limits(hist, treated, untreated):
648+
def _find_limits(hist, treated, untreated):
594649
"""
595650
Find the upper and lower limit of the common support.
596651
@@ -676,7 +731,7 @@ def find_limits(hist, treated, untreated):
676731
return lower_limit, upper_limit
677732

678733

679-
def construct_Xp(X, prop_score):
734+
def _construct_Xp(X, prop_score):
680735
"""
681736
This function generates the X * *prop_score* regressors:
682737
@@ -708,7 +763,7 @@ def construct_Xp(X, prop_score):
708763
return Xp
709764

710765

711-
def generate_residuals(exog, endog, bandwidth=0.05):
766+
def _generate_residuals(exog, endog, bandwidth=0.05):
712767
"""
713768
This function runs a series of loess regressions (degree=1)
714769
for a set of response variables (*endog*) on a single explanatory
@@ -771,58 +826,3 @@ def generate_residuals(exog, endog, bandwidth=0.05):
771826
res[:, col] = y_fit.outputs.fitted_residuals
772827

773828
return res
774-
775-
776-
def double_residual_reg(X, Y, prop_score, rbandwidth=0.05, show_output=False):
777-
"""
778-
This function performs a Double Residual Regression (DDR)
779-
of X, Xp, and Y on *prop_score*.
780-
781-
A local linear kernel regression (polynomial of degree 1)
782-
is implemented to generate the residuals.
783-
784-
Parameters
785-
----------
786-
X: pandas.DataFrame
787-
DataFrame of observables (i.e. covariates).
788-
Y: pandas.DataFrame
789-
Individuals' wage data.
790-
prop_score: pandas.Series
791-
Propensity score (range between [0, 1]). Values closer to 1
792-
denote a higher inclination to treatment.
793-
Sorted in ascending order.
794-
795-
Returns
796-
-------
797-
b0: np.ndarray
798-
Beta0 coefficient of the DDR (no-intercept OLS regression
799-
of the residuals of X, Xp, and Y on *prop_score*).
800-
b1: np.ndarray
801-
Beta1 coefficient of the DDR (no-intercept OLS regression
802-
of the residuals of X, Xp, and Y on *prop_score*).
803-
"""
804-
# 0) Construct Xp := X * P(z)
805-
Xp = construct_Xp(X, prop_score)
806-
807-
# 1) Fit a separate local linear regression of X, Xp, and Y on prop_score,
808-
# which yields residuals e_X, e_Xp, and e_Y.
809-
res_X = generate_residuals(prop_score, X, rbandwidth)
810-
res_Xp = generate_residuals(prop_score, Xp, rbandwidth)
811-
res_Y = generate_residuals(prop_score, Y, rbandwidth)
812-
813-
# Append res_X and res_Xp.
814-
col_names = list(X) + list(Xp)
815-
res_X_Xp = pd.DataFrame(np.append(res_X, res_Xp, axis=1), columns=col_names)
816-
817-
# 2) Run a single OLS regression of e_Y on e_X and e_Xp without intercept:
818-
# e_Y = e_X * beta_0 + e_Xp * (beta_1 - beta_0),
819-
# to estimate the values of beta_0 and (beta_1 - beta_0).
820-
model = sm.OLS(res_Y, res_X_Xp)
821-
results = model.fit()
822-
b0 = results.params[: len(list(X))]
823-
b1_b0 = results.params[len((list(X))) :]
824-
825-
if show_output is True:
826-
print(results.summary())
827-
828-
return np.array(b0), np.array(b1_b0)

0 commit comments

Comments
 (0)