python statsmodelsでロジスティック回帰した
参考文献: - 手を動かしながら学ぶ ビジネスに活かすデータマイニング
動機
注意点
コーディング
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split import statsmodels.api as sm import statsmodels.formula.api as smf %matplotlib inline sns.set() df_6_4_1 = pd.read_csv("ch6_4_1.txt",sep="\s+") X = df_6_4_1[["d11","d12","d13"]] Y = df_6_4_1[["cvr"]] X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2,random_state=0) data = pd.concat([X_train, Y_train], axis=1) formula = "cvr ~ 1 + d11 + d12 + d13" link = sm.genmod.families.links.logit family = sm.families.Binomial(link=link) mod = smf.glm(formula=formula, data=data, family= family) result = mod.fit() print(result.summary()) y_pred = result.predict(X_train) plt.plot(y_pred,Y_train,'.')
出力
Generalized Linear Model Regression Results ============================================================================== Dep. Variable: cvr No. Observations: 24 Model: GLM Df Residuals: 20 Model Family: Binomial Df Model: 3 Link Function: logit Scale: 1.0000 Method: IRLS Log-Likelihood: -7.0982 Date: Sat, 30 Oct 2021 Deviance: 5.4885 Time: 06:59:20 Pearson chi2: 5.51 No. Iterations: 6 Covariance Type: nonrobust ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -4.9468 5.487 -0.902 0.367 -15.701 5.807 d11 0.0024 0.001 2.513 0.012 0.001 0.004 d12 -0.0005 0.001 -0.767 0.443 -0.002 0.001 d13 -0.0002 0.001 -0.248 0.804 -0.002 0.001 ==============================================================================
感想
- ちょっとだけ勉強になった気がする