In [1]:
# ライブラリをインポート

# データ操作と数値計算のため
import pandas as pd
import numpy as np

# データ可視化のため
import matplotlib.pyplot as plt
import seaborn as sns

# 統計モデリングと計量経済分析のため
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

# for ignore warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
url = "https://www.fbc.keio.ac.jp/~tyabu/keiryo/suicide_data.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,country_name,year,unemployment,suicide
0,Albania,2000,16.8,4.9
1,Australia,2000,6.28,12.7
2,Austria,2000,4.69,19.9
3,Azerbaijan,2000,11.78,3.1
4,Barbados,2000,9.35,2.6


# 11.3.1節の推定

In [11]:
model_1 = sm.OLS.from_formula('suicide ~ unemployment', data=df)
results_1 = model_1.fit(cov_type='cluster', cov_kwds={'groups': df['country_name']})
print(results_1.summary())

                            OLS Regression Results                            
Dep. Variable:                suicide   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                    0.8968
Date:                Fri, 30 Aug 2024   Prob (F-statistic):              0.347
Time:                        15:45:23   Log-Likelihood:                -4594.7
No. Observations:                1280   AIC:                             9193.
Df Residuals:                    1278   BIC:                             9204.
Df Model:                           1                                         
Covariance Type:              cluster                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept       11.9896      1.548      7.747   

In [7]:
model_2 = sm.OLS.from_formula('suicide ~ unemployment + C(country_name)', data=df)
results_2 = model_2.fit(cov_type='cluster', cov_kwds={'groups': df['country_name']})
print(results_2.summary())

                            OLS Regression Results                            
Dep. Variable:                suicide   R-squared:                       0.916
Model:                            OLS   Adj. R-squared:                  0.911
Method:                 Least Squares   F-statistic:                     180.4
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           3.73e-20
Time:                        15:41:46   Log-Likelihood:                -3016.8
No. Observations:                1280   AIC:                             6164.
Df Residuals:                    1215   BIC:                             6499.
Df Model:                          64                                         
Covariance Type:              cluster                                         
                                            coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------

# 11.4.2節の推定

In [8]:
model_3 = sm.OLS.from_formula('suicide ~ unemployment + C(country_name)', data=df)
results_3 = model_3.fit(cov_type='cluster', cov_kwds={'groups': df['country_name']})
print(results_3.summary())

                            OLS Regression Results                            
Dep. Variable:                suicide   R-squared:                       0.916
Model:                            OLS   Adj. R-squared:                  0.911
Method:                 Least Squares   F-statistic:                     180.4
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           3.73e-20
Time:                        15:42:38   Log-Likelihood:                -3016.8
No. Observations:                1280   AIC:                             6164.
Df Residuals:                    1215   BIC:                             6499.
Df Model:                          64                                         
Covariance Type:              cluster                                         
                                            coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------

# 個別効果と時間効果を考慮した固定効果モデル

In [10]:
model_4 = sm.OLS.from_formula('suicide ~ unemployment + C(country_name) + C(year)', data=df)
results_4 = model_4.fit(cov_type='cluster', cov_kwds={'groups': df['country_name']})
print(results_4.summary())

                            OLS Regression Results                            
Dep. Variable:                suicide   R-squared:                       0.927
Model:                            OLS   Adj. R-squared:                  0.922
Method:                 Least Squares   F-statistic:                     25.91
Date:                Fri, 30 Aug 2024   Prob (F-statistic):           4.56e-23
Time:                        15:43:14   Log-Likelihood:                -2927.3
No. Observations:                1280   AIC:                             6023.
Df Residuals:                    1196   BIC:                             6456.
Df Model:                          83                                         
Covariance Type:              cluster                                         
                                            coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------