In [10]:
# ライブラリをインポート

# データ操作と数値計算のため
import pandas as pd
import numpy as np

# データ可視化のため
import matplotlib.pyplot as plt
import seaborn as sns

# 統計モデリングと計量経済分析のため
import statsmodels.api as sm
from linearmodels.iv import IV2SLS
from statsmodels.iolib.summary2 import summary_col

# for ignore warning
import warnings
warnings.filterwarnings('ignore')

In [31]:
url = "https://www.fbc.keio.ac.jp/~tyabu/keiryo/laborsupply_data.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,morekids,samesex,age,black,hispan,othrace,weeks
0,0,0,27,0,0,0,0
1,0,0,30,0,0,0,30
2,0,0,27,0,0,0,0
3,0,0,35,1,0,0,0
4,0,1,30,0,0,0,22


## 13.6節の推定結果

### OLS

In [32]:
X = sm.add_constant(df['morekids'])
y = df['weeks']
out_1 = sm.OLS(y, X).fit(cov_type='HC1')
print(out_1.summary())

                            OLS Regression Results                            
Dep. Variable:                  weeks   R-squared:                       0.014
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     3821.
Date:                Fri, 30 Aug 2024   Prob (F-statistic):               0.00
Time:                        17:31:18   Log-Likelihood:            -1.1451e+06
No. Observations:              254654   AIC:                         2.290e+06
Df Residuals:                  254652   BIC:                         2.290e+06
Df Model:                           1                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         21.0684      0.056    375.765      0.0

### IV
#### First-stage

In [46]:
X = sm.add_constant(df['samesex'])
y = df['morekids']
out_2_1st = sm.OLS(y, X).fit(cov_type='HC1')
print(out_2_1st.summary())

                            OLS Regression Results                            
Dep. Variable:               morekids   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     1238.
Date:                Fri, 30 Aug 2024   Prob (F-statistic):          1.39e-270
Time:                        17:35:05   Log-Likelihood:            -1.7673e+05
No. Observations:              254654   AIC:                         3.535e+05
Df Residuals:                  254652   BIC:                         3.535e+05
Df Model:                           1                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.3464      0.001    258.335      0.0

#### Second-stage

In [49]:
x = sm.add_constant(df['morekids'])
y = df['weeks']
w = None
Z = df['samesex']
out_2 = IV2SLS(y, x, w, Z).fit()
print(out_2)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                  weeks   R-squared:                      0.0143
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0143
No. Observations:              254654   F-statistic:                    3820.9
Date:                Fri, Aug 30 2024   P-value (F-stat)                0.0000
Time:                        17:37:29   Distribution:                  chi2(1)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const          21.068     0.0561     375.77     0.0000      20.959      21.178
morekids      -5.3870     0.0871    -61.814     0.00

## 練習問題他のコントロール変数を含めた分析

### OLS

In [48]:
X = sm.add_constant(df[['morekids', 'age', 'black', 'hispan', 'othrace']])
y = df['weeks']
out_3 = sm.OLS(y, X).fit(cov_type='HC1')
print(out_3.summary())

                            OLS Regression Results                            
Dep. Variable:                  weeks   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.044
Method:                 Least Squares   F-statistic:                     2545.
Date:                Fri, 30 Aug 2024   Prob (F-statistic):               0.00
Time:                        17:35:25   Log-Likelihood:            -1.1412e+06
No. Observations:              254654   AIC:                         2.283e+06
Df Residuals:                  254648   BIC:                         2.283e+06
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -4.8345      0.367    -13.161      0.0

### IV
#### First-stage

In [36]:
X = sm.add_constant(df[['samesex', 'age', 'black', 'hispan', 'othrace']])
y = df['morekids']
out_4_1st = sm.OLS(y, X).fit(cov_type='HC1')
print(out_4_1st.summary())

                            OLS Regression Results                            
Dep. Variable:               morekids   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.024
Method:                 Least Squares   F-statistic:                     1304.
Date:                Fri, 30 Aug 2024   Prob (F-statistic):               0.00
Time:                        17:31:39   Log-Likelihood:            -1.7423e+05
No. Observations:              254654   AIC:                         3.485e+05
Df Residuals:                  254648   BIC:                         3.485e+05
Df Model:                           5                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1395      0.008    -16.494      0.0

#### Second-stage

In [41]:
exog = sm.add_constant(df[['age', 'black', 'hispan', 'othrace']])
endog = df['morekids']
y = df['weeks']
instruments = df[['samesex']]
out_4 = IV2SLS(y, exog, endog, instruments).fit(cov_type='robust')
print(out_4.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                  weeks   R-squared:                      0.0437
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0437
No. Observations:              254654   F-statistic:                    6955.0
Date:                Fri, Aug 30 2024   P-value (F-stat)                0.0000
Time:                        17:32:56   Distribution:                  chi2(5)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -4.7919     0.3898    -12.294     0.0000     -5.5559     -4.0279
age            0.8316     0.0226     36.730     0.00