社会科学のためのデータ分析入門 章末問題解答(2章-2) Rコード
これまでの章の解答はこちら
章末問題解答(1章-1)と(1章-2)のRコードの記事はこちらこちら(1章-1)(1章-2)から確認できます。
はじめに (Textbook Solution: Quantitative Social Science: An Introduction )
Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。
欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません(2018年冬ごろの時点では)。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.
また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。
2章-1 (Chapter2 - Section 1)
スクリプトをベタ張りしています。
## Chapter 2 Causality ## Exercise Solution setwd("~/qss/CAUSALITY") # ご自身のディレクトリを選択 ## ----------------------------------------------------- ## The author of this script uses Japanese-Version QSS. ## ----------------------------------------------------- ## ----------------------------------------------------- ## Section 2 ## Q1 ## Canvasser: ?K???? gay <- read.csv("gay.csv") head(gay) dim(gay) ## Create subset for wave = 1 & study = 1. wave1 <- subset(gay, gay$study == 1 & gay$wave == 1) ## wave1 <- subset(gay, subset = (study == 1) & (wave == 1)) ## same as above sum(gay$ssm) ## no missing value wave1a <- tapply(wave1$ssm, wave1$treatment, mean) ## Q2 ## Create subset for wave = 2 & study = 1. wave2 <- subset(gay, gay$study == 1 & gay$wave == 2) ## Calculate mean for each of smm. wave2a <- tapply(wave2$ssm, wave2$treatment, mean) ## Convert wave2a into data.frame for simplicity's sake. wave1b <- as.data.frame(wave1a) wave2b <- as.data.frame(wave2a) ## Sample Average Treatment Effect for the Treated for Gay & Straight ## Difference-in-Difference Estimators DIDgay1 <- (wave2b[4,] - wave1b[4,]) - (wave2b[1,] - wave1b[1,]) DIDstraight1 <- (wave2b[5,] - wave1b[5,]) - (wave2b[1,] - wave1b[1,]) ## Maybe this part (wave2b[1,] - wave1b[1,]) is not need? ## Please give me feedback on it :D. ## DID DIDgay1 - DIDstraight1 ## Q3 DIDgay2 <- (wave2b[2,] - wave1b[2,]) - (wave2b[1,] - wave1b[1,]) DIDstraight2 <- (wave2b[3,] - wave1b[3,]) - (wave2b[1,] - wave1b[1,]) ## Interpretation: there is no big difference b/w the two ## that means there is a possibility that the bias does not exist. DIDgay2 - DIDstraight2 ## Q4 ## Create subsets for wave = 3:7 & study = 1 wave3 <- subset(gay, gay$study == 1 & gay$wave == 3) wave4 <- subset(gay, gay$study == 1 & gay$wave == 4) wave5 <- subset(gay, gay$study == 1 & gay$wave == 5) wave6 <- subset(gay, gay$study == 1 & gay$wave == 6) wave7 <- subset(gay, gay$study == 1 & gay$wave == 7) ## Calculate mean for each of smm. wave3a <- tapply(wave3$ssm, wave3$treatment, mean) wave4a <- tapply(wave4$ssm, wave4$treatment, mean) wave5a <- tapply(wave5$ssm, wave5$treatment, mean) wave6a <- tapply(wave6$ssm, wave6$treatment, mean) wave7a <- tapply(wave7$ssm, wave7$treatment, mean) ## Convert waves into data.frame for simplicity's sake. wave3b <- as.data.frame(wave3a) wave4b <- as.data.frame(wave4a) wave5b <- as.data.frame(wave5a) wave6b <- as.data.frame(wave6a) wave7b <- as.data.frame(wave7a) ## ATE DIDgay3 <- (wave3b[4,] - wave1b[4,]) - (wave3b[1,] - wave1b[1,]) DIDstraight3 <- (wave3b[5,] - wave1b[5,]) - (wave3b[1,] - wave1b[1,]) DIDgay4 <- (wave4b[4,] - wave1b[4,]) - (wave4b[1,] - wave1b[1,]) DIDstraight4 <- (wave4b[5,] - wave1b[5,]) - (wave4b[1,] - wave1b[1,]) DIDgay5 <- (wave5b[4,] - wave1b[4,]) - (wave5b[1,] - wave1b[1,]) DIDstraight5 <- (wave5b[5,] - wave1b[5,]) - (wave5b[1,] - wave1b[1,]) DIDgay6 <- (wave6b[4,] - wave1b[4,]) - (wave6b[1,] - wave1b[1,]) DIDstraight6 <- (wave6b[5,] - wave1b[5,]) - (wave6b[1,] - wave1b[1,]) DIDgay7 <- (wave7b[4,] - wave1b[4,]) - (wave7b[1,] - wave1b[1,]) DIDstraight7 <- (wave7b[5,] - wave1b[5,]) - (wave7b[1,] - wave1b[1,]) DIDgay3; DIDga4; DIDgay5; DIDga6; DIDgay7 DIDstraight3; DIDstraight4; DIDstraight5; DIDstraight6; DIDstraight7 ## DID DIDgay1 - DIDstraight1 DIDgay3 - DIDstraight3 DIDgay4 - DIDstraight4 DIDgay5 - DIDstraight5 DIDgay6 - DIDstraight6 DIDgay7 - DIDstraight7 ## The difference b/w gay & straight canvasser is still positive ## even in wave 7, and effects seems to be lasted. ## Q5 ## Create subset & calculate means study2 <- subset(gay, subset = (study == 2) & (wave == 1)) study2a <- tapply(study2$ssm, study2$treatment, mean) study2b <- as.data.frame(study2a) ## Randomization seems to be done prorerly ## (because of almost no difference b/w two means). ## Q6 ## Create subset & calculate means study2.2 <- subset(gay, subset = (study == 2) & (wave == 2)) study2.2a <- tapply(study2.2$ssm, study2.2$treatment, mean) study2.2b <- as.data.frame(study2.2a) DIDstudy2 <- (study2.2b[4,] - study2b[4, ]) - (study2.2b[1, ] - study2b[1, ]) ## Compare the results of study 1 & study 2. DIDstudy2; DIDgay1 ## The difference seems not so big. We can conclude that ## the result of study 2 of wave 2 is consistent with study 1. ## Q7 ## Create subsets of different waves study3 <- subset(gay, subset = (study == 2) & (wave == 3)) study4 <- subset(gay, subset = (study == 2) & (wave == 4)) study7 <- subset(gay, subset = (study == 2) & (wave == 7)) ## Create means of different waves study3a <- tapply(study3$ssm, study3$treatment, mean) study4a <- tapply(study4$ssm, study4$treatment, mean) study7a <- tapply(study7$ssm, study7$treatment, mean) ## Convert 3 studies into data.frame for simplicity's sake. study3b <- as.data.frame(study3a) study4b <- as.data.frame(study4a) study7b <- as.data.frame(study7a) diff3 <- (study3b[4, ] - study2b[4, ]) - (study3b[1, ] - study2b[1, ]) diff4 <- (study4b[4, ] - study2b[4, ]) - (study4b[1, ] - study2b[1, ]) diff7 <- (study7b[4, ] - study2b[4, ]) - (study7b[1, ] - study2b[1, ]) diff3; diff4; diff7 ## Study 2 also has the positive effects of asking by gay canvasser on ## marriage script. Overall, if gay canvasser asks about marriage, ## rate of suppor for same-sex marriage become higher.
章末問題解答(1章-1) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-1www.econ-stat-grad.com
章末問題解答(1章-2) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-2www.econ-stat-grad.com