In [1]:
# ライブラリをインポート

# データ操作と数値計算のため
import pandas as pd
import numpy as np

# データ可視化のため
import matplotlib.pyplot as plt
import seaborn as sns

# 統計モデリングと計量経済分析のため
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

# for ignore warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
url = "https://www.fbc.keio.ac.jp/~tyabu/keiryo/black_data.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,name,call,black,experience,female
0,Allison,0,0,6,1
1,Kristen,0,0,6,1
2,Lakisha,0,1,6,1
3,Latonya,0,1,6,1
4,Carrie,0,0,22,1


# 9.3節の推定結果

In [7]:
endog = df['call']
exog = df['black']
exog = sm.add_constant(exog)

model_1 = sm.OLS(endog, exog)
results_1 = model_1.fit()
print(results_1.summary())

                            OLS Regression Results                            
Dep. Variable:                   call   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     16.93
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           3.94e-05
Time:                        15:24:53   Log-Likelihood:                -562.24
No. Observations:                4870   AIC:                             1128.
Df Residuals:                    4868   BIC:                             1141.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0965      0.006     17.532      0.0

In [9]:
endog = df['call']
exog = df['black']
exog = sm.add_constant(exog)

model_1_robust = sm.OLS(endog, exog)
results_1_robust = model_1_robust.fit(cov_type="HC1")
print(results_1_robust.summary())

                            OLS Regression Results                            
Dep. Variable:                   call   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     16.93
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           3.94e-05
Time:                        15:25:47   Log-Likelihood:                -562.24
No. Observations:                4870   AIC:                             1128.
Df Residuals:                    4868   BIC:                             1141.
Df Model:                           1                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0965      0.006     16.124      0.0

In [10]:
endog = df['call']
exog = df[['black', 'experience']]
exog = sm.add_constant(exog)

model_2 = sm.OLS(endog, exog)
results_2 = model_2.fit()
print(results_2.summary())

                            OLS Regression Results                            
Dep. Variable:                   call   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     17.70
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           2.19e-08
Time:                        15:26:26   Log-Likelihood:                -553.05
No. Observations:                4870   AIC:                             1112.
Df Residuals:                    4867   BIC:                             1132.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0705      0.008      8.630      0.0

In [11]:
endog = df['call']
exog = df[['black', 'experience']]
exog = sm.add_constant(exog)

model_2_robust = sm.OLS(endog, exog)
results_2_robust = model_2_robust.fit(cov_type="HC1")
print(results_2_robust.summary())

                            OLS Regression Results                            
Dep. Variable:                   call   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     15.87
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           1.35e-07
Time:                        15:27:32   Log-Likelihood:                -553.05
No. Observations:                4870   AIC:                             1112.
Df Residuals:                    4867   BIC:                             1132.
Df Model:                           2                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0705      0.008      8.341      0.0

# 9.4.2節の推定結果

In [12]:
endog = df['call']
exog = df[['black', 'experience']]
exog = sm.add_constant(exog)

model_3 = sm.OLS(endog, exog)
result_3 = model_3.fit(cov_type="HC1")
print(result_3.summary())

                            OLS Regression Results                            
Dep. Variable:                   call   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                     15.87
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           1.35e-07
Time:                        15:28:07   Log-Likelihood:                -553.05
No. Observations:                4870   AIC:                             1112.
Df Residuals:                    4867   BIC:                             1132.
Df Model:                           2                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0705      0.008      8.341      0.0

In [13]:
df['p'] = result_3.fittedvalues
df['h'] = ((1 - df['p']) * df['p'])**0.5
df['x0'] = 1 / df['h']
df['call_h'] = df['call'] / df['h']
df['black_h'] = df['black'] / df['h']
df['experience_h'] = df['experience'] / df['h']

endog = df['call_h']
exog = df[['x0', 'black_h', 'experience_h']]

model_4 = sm.OLS(endog, exog)
result_4 = model_4.fit()

print(result_4.summary())

                                 OLS Regression Results                                
Dep. Variable:                 call_h   R-squared (uncentered):                   0.081
Model:                            OLS   Adj. R-squared (uncentered):              0.080
Method:                 Least Squares   F-statistic:                              143.1
Date:                Fri, 30 Aug 2024   Prob (F-statistic):                    7.53e-89
Time:                        15:30:52   Log-Likelihood:                         -6911.4
No. Observations:                4870   AIC:                                  1.383e+04
Df Residuals:                    4867   BIC:                                  1.385e+04
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                   coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------