In [1]:
# ライブラリをインポート

# データ操作と数値計算のため
import pandas as pd
import numpy as np

# データ可視化のため
import matplotlib.pyplot as plt
import seaborn as sns

# 統計モデリングと計量経済分析のため
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

# for ignore warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
url = "https://www.fbc.keio.ac.jp/~tyabu/keiryo/stayhome_data.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,date,prefecture,time,stay,infection,rain,emerg_start,emerg_end,close_start,close_end,mobilephones
0,2020/1/6,北海道,1,3.2074,0.0,1.5,0,0,0,0,1561577
1,2020/1/7,北海道,2,0.8432,0.0,1.5,0,0,0,0,1561577
2,2020/1/8,北海道,3,3.4106,0.0,2.0,0,0,0,0,1561577
3,2020/1/9,北海道,4,2.2151,0.0,0.0,0,0,0,0,1561577
4,2020/1/10,北海道,5,-0.5728,0.0,0.0,0,0,0,0,1561577


# 変数の定義

In [3]:
df['drain'] = (df['rain'] > 0).astype(int)
df['linf'] = np.log(df['infection'] + np.sqrt(df['infection']**2 + 1))
df['emerg'] = df['emerg_start'] - df['emerg_end']
df['close'] = df['close_start'] - df['close_end']

# 11.4.2節の推定結果

In [6]:
formula = 'stay ~ close + emerg + linf + drain + C(prefecture) + C(date)'
model = sm.WLS.from_formula(formula, data=df, weights=df['mobilephones'])
results = model.fit(cov_type='cluster', cov_kwds={'groups': df['prefecture']})
print(results.summary())

                            WLS Regression Results                            
Dep. Variable:                   stay   R-squared:                       0.945
Model:                            WLS   Adj. R-squared:                  0.944
Method:                 Least Squares   F-statistic:                -1.424e+12
Date:                Fri, 30 Aug 2024   Prob (F-statistic):               1.00
Time:                        15:54:32   Log-Likelihood:                -24501.
No. Observations:                8225   AIC:                         4.945e+04
Df Residuals:                    8000   BIC:                         5.103e+04
Df Model:                         224                                         
Covariance Type:              cluster                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
Intercept               -3.4533 