社会科学のためのデータ分析入門章末問題解答(2章-2) Rコード - 開発経済学とその他応用分野を学ぶ院生

これまでの章の解答はこちら

章末問題解答(1章-1)と(1章-2)のRコードの記事はこちらこちら(1章-1)(1章-2)から確認できます。

はじめに (Textbook Solution: Quantitative Social Science: An Introduction )

Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。

欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません（2018年冬ごろの時点では）。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.

また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。

2章-1 (Chapter2 - Section 1)

スクリプトをベタ張りしています。

## Chapter 2 Causality 
## Exercise Solution

setwd("~/qss/CAUSALITY") # ご自身のディレクトリを選択

## -----------------------------------------------------
## The author of this script uses Japanese-Version QSS.
## -----------------------------------------------------
## -----------------------------------------------------
## Section 2
## Q1

## Canvasser: ?K????

gay <- read.csv("gay.csv")
head(gay)
dim(gay)

## Create subset for wave = 1 & study = 1.
wave1 <- subset(gay, gay$study == 1 & gay$wave == 1)
## wave1 <- subset(gay, subset = (study == 1) & (wave == 1)) 
## same as above

sum(gay$ssm) ## no missing value
wave1a <- tapply(wave1$ssm, wave1$treatment, mean)


## Q2

## Create subset for wave = 2 & study = 1.
wave2 <- subset(gay, gay$study == 1 & gay$wave == 2)

## Calculate mean for each of smm.
wave2a <- tapply(wave2$ssm, wave2$treatment, mean)

## Convert wave2a into data.frame for simplicity's sake.
wave1b <- as.data.frame(wave1a)
wave2b <- as.data.frame(wave2a)

## Sample Average Treatment Effect for the Treated for Gay & Straight
## Difference-in-Difference Estimators
DIDgay1 <- (wave2b[4,] - wave1b[4,]) - (wave2b[1,] - wave1b[1,])
DIDstraight1 <- (wave2b[5,] - wave1b[5,]) - (wave2b[1,] - wave1b[1,])

## Maybe this part (wave2b[1,] - wave1b[1,]) is not need?
## Please give me feedback on it :D.

## DID
DIDgay1 - DIDstraight1


## Q3

DIDgay2 <- (wave2b[2,] - wave1b[2,]) - (wave2b[1,] - wave1b[1,])
DIDstraight2 <- (wave2b[3,] - wave1b[3,]) - (wave2b[1,] - wave1b[1,])

## Interpretation: there is no big difference b/w the two
## that means there is a possibility that the bias does not exist. 

DIDgay2 - DIDstraight2


## Q4

## Create subsets for wave = 3:7 & study = 1
wave3 <- subset(gay, gay$study == 1 & gay$wave == 3)
wave4 <- subset(gay, gay$study == 1 & gay$wave == 4)
wave5 <- subset(gay, gay$study == 1 & gay$wave == 5)
wave6 <- subset(gay, gay$study == 1 & gay$wave == 6)
wave7 <- subset(gay, gay$study == 1 & gay$wave == 7)

## Calculate mean for each of smm.
wave3a <- tapply(wave3$ssm, wave3$treatment, mean)
wave4a <- tapply(wave4$ssm, wave4$treatment, mean)
wave5a <- tapply(wave5$ssm, wave5$treatment, mean)
wave6a <- tapply(wave6$ssm, wave6$treatment, mean)
wave7a <- tapply(wave7$ssm, wave7$treatment, mean)

## Convert waves into data.frame for simplicity's sake.
wave3b <- as.data.frame(wave3a)
wave4b <- as.data.frame(wave4a)
wave5b <- as.data.frame(wave5a)
wave6b <- as.data.frame(wave6a)
wave7b <- as.data.frame(wave7a)

## ATE
DIDgay3 <- (wave3b[4,] - wave1b[4,]) - (wave3b[1,] - wave1b[1,])
DIDstraight3 <- (wave3b[5,] - wave1b[5,]) - (wave3b[1,] - wave1b[1,])

DIDgay4 <- (wave4b[4,] - wave1b[4,]) - (wave4b[1,] - wave1b[1,])
DIDstraight4 <- (wave4b[5,] - wave1b[5,]) - (wave4b[1,] - wave1b[1,])

DIDgay5 <- (wave5b[4,] - wave1b[4,]) - (wave5b[1,] - wave1b[1,])
DIDstraight5 <- (wave5b[5,] - wave1b[5,]) - (wave5b[1,] - wave1b[1,])

DIDgay6 <- (wave6b[4,] - wave1b[4,]) - (wave6b[1,] - wave1b[1,])
DIDstraight6 <- (wave6b[5,] - wave1b[5,]) - (wave6b[1,] - wave1b[1,])

DIDgay7 <- (wave7b[4,] - wave1b[4,]) - (wave7b[1,] - wave1b[1,])
DIDstraight7 <- (wave7b[5,] - wave1b[5,]) - (wave7b[1,] - wave1b[1,])

DIDgay3; DIDga4; DIDgay5; DIDga6; DIDgay7
DIDstraight3; DIDstraight4; DIDstraight5; DIDstraight6; DIDstraight7

## DID

DIDgay1 - DIDstraight1
DIDgay3 - DIDstraight3
DIDgay4 - DIDstraight4
DIDgay5 - DIDstraight5
DIDgay6 - DIDstraight6
DIDgay7 - DIDstraight7

## The difference b/w gay & straight canvasser is still positive 
## even in wave 7, and effects seems to be lasted. 


## Q5

## Create subset & calculate means
study2 <- subset(gay, subset = (study == 2) & (wave == 1))
study2a <- tapply(study2$ssm, study2$treatment, mean)
study2b <- as.data.frame(study2a)

## Randomization seems to be done prorerly
## (because of almost no difference b/w two means). 


## Q6

## Create subset & calculate means
study2.2 <- subset(gay, subset = (study == 2) & (wave == 2))
study2.2a <- tapply(study2.2$ssm, study2.2$treatment, mean)
study2.2b <- as.data.frame(study2.2a)

DIDstudy2 <- (study2.2b[4,] - study2b[4, ]) - (study2.2b[1, ] - study2b[1, ])

## Compare the results of study 1 & study 2.
DIDstudy2; DIDgay1

## The difference seems not so big. We can conclude that 
## the result of study 2 of wave 2 is consistent with study 1. 


## Q7


## Create subsets of different waves
study3 <- subset(gay, subset = (study == 2) & (wave == 3))
study4 <- subset(gay, subset = (study == 2) & (wave == 4))
study7 <- subset(gay, subset = (study == 2) & (wave == 7))

## Create means of different waves
study3a <- tapply(study3$ssm, study3$treatment, mean)
study4a <- tapply(study4$ssm, study4$treatment, mean)
study7a <- tapply(study7$ssm, study7$treatment, mean)

## Convert 3 studies into data.frame for simplicity's sake.
study3b <- as.data.frame(study3a)
study4b <- as.data.frame(study4a)
study7b <- as.data.frame(study7a)

diff3 <- (study3b[4, ] - study2b[4, ]) - (study3b[1, ] - study2b[1, ])
diff4 <- (study4b[4, ] - study2b[4, ]) - (study4b[1, ] - study2b[1, ])
diff7 <- (study7b[4, ] - study2b[4, ]) - (study7b[1, ] - study2b[1, ])

diff3; diff4; diff7

## Study 2 also has the positive effects of asking by gay canvasser on
## marriage script. Overall, if gay canvasser asks about marriage, 
## rate of suppor for same-sex marriage become higher.

章末問題解答(1章-1) Rコード
 https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-1www.econ-stat-grad.com
章末問題解答(1章-2) Rコード https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-2www.econ-stat-grad.com

f:id:econgrad:20201012235846p:plain