社会科学のためのデータ分析入門 章末問題解答(3章-2) Rコード
これまでの章の解答はこちら
章末問題解答(1章-1)と(1章-2)のRコードの記事はこちらこちら(1章-1)(1章-2)から確認できます。
はじめに (Textbook Solution: Quantitative Social Science: An Introduction )
Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。
欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません(2018年冬ごろの時点では)。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.
また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。
2章-1 (Chapter2 - Section 1)
スクリプトをベタ張りしています。
## Chapter 3 Measurement ## Exercise Solution setwd("~/qss/CAUSALITY") # ご自身のディレクトリを選択 ## ----------------------------------------------------- ## The author of this script uses Japanese-Version QSS. ## ----------------------------------------------------- ## ----------------------------------------------------- ## ----------------------------------------------------- ## ----------------------------------------------------- ## Section 2 ## Q1 vign <- read.csv("vignettes.csv") head(vign); dim(vign) china <- subset(vign, vign$china == 1) mexico <- subset(vign, vign$china != 1) Cself <- prop.table(table(china$self)) Mself <- prop.table(table(mexico$self)) ## Plot barplot of self-evalutaion in China & Mexico. barplot(Cself, xlab = "Score of Self Evaluation in China", ylab = "ratio") barplot(Mself, xlab = "Score of Self Evaluation in Mexico", ylab = "ratio") ## Calculate the means. Cself; Mself mean(Cself); mean(Mself) ## mean political effectiveness is the same, ## but the shapes of distributions are totally different. ## The results seem not to be consistent with their political conditions. ## Q2 hist(china$age, ylim = c(0, 80)) abline(v = median(china$age), lty = "dashed", col = "red") text(x = 50, y = 60, "median = 45") hist(mexico$age, ylim = c(0, 80)) abline(v = median(mexico$age), lty = "dashed", col = "red") text(x = 40, y = 80, "median = 35") ## Extra work: plotting hists in one picture (using "add = TRUE"). hist(china$age, ylim = c(0, 80), xlab = "age", main = "hist of age", col = "#0000ff40", border = "#0000ff") abline(v = median(china$age), lty = "dashed", col = "red") text(x = 50, y = 60, "median = 45") hist(mexico$age, ylim = c(0, 80), col = "#ff00ff40", border = "#ff00ff", add = TRUE) abline(v = median(mexico$age), lty = "dashed", col = "red") text(x = 40, y = 80, "median = 35") cols <- c("#0000ff40", "#ff00ff40") legend("topright", c("China", "Mexico"), col = cols, pch = 19) ## End the extra work. ## Plot the Q-Q plots. ## China's Q-Q plot qqplot(china$age, mexico$age) abline(0, 1) ## First, the Q-Q plot is not on the 45-degree line, ## which means thier age's distributions are not similar. ## Q-Q line is below the 45-degree line, and thus ## Mexico's distribution is not that scattered like China counterpart. ## Q3 ## Percentage of respondents who give low self-evaluation ## less than eveluation about moses ## Create dummy variables in China & Mexico. Cmo.se <- sum(ifelse(china$moses > china$self, 1, 0)) Cmo.se.l <- length(ifelse(china$moses > china$self, 1, 0)) # no need to use ifelse Mmo.se <- sum(ifelse(mexico$moses > mexico$self, 1, 0)) Mmo.se.l <- length(ifelse(mexico$moses > mexico$self, 1, 0)) ## Answers Cmo.se / Cmo.se.l # China Mmo.se / Mmo.se.l # Mexico ## Q4 ## Create subsets which have relationship alison > jane > moses. china3 <- subset(china, subset = (alison >= jane) & (jane >= moses)) mexico3 <- subset(mexico, subset = (alison >= jane) & (jane >= moses)) vign3 <- subset(vign, subset = (alison >= jane) & (jane >= moses)) nrow(mexico) nrow(china3) ## Create categorical variables in China & Mexico, and overall. ## overall vign3$self.pos <- NA vign3$self.pos[vign3$moses > vign3$self] <- 1 vign3$self.pos[(vign3$moses == vign3$self) | ((vign3$moses <= vign3$self) & (vign3$self < vign3$jane))] <- 2 vign3$self.pos[(vign3$jane == vign3$self) | ((vign3$jane <= vign3$self) & (vign3$self < vign3$alison))] <- 3 vign3$self.pos[(vign3$alison <= vign3$self)] <- 4 ## China china3$self.pos <- NA china3$self.pos[china3$moses > china3$self] <- 1 china3$self.pos[(china3$moses == china3$self) | ((china3$moses <= china3$self) & (china3$self < china3$jane))] <- 2 china3$self.pos[(china3$jane == china3$self) | ((china3$jane <= china3$self) & (china3$self < china3$alison))] <- 3 china3$self.pos[(china3$alison <= china3$self)] <- 4 china3$self.pos # confirm ## Mexico mexico3$self.pop <- NA mexico3$self.pop[mexico3$moses > mexico3$self] <- 1 mexico3$self.pop[(mexico3$moses == mexico3$self) | ((mexico3$self >= mexico3$moses) & (mexico3$jane > mexico3$self))] <- 2 mexico3$self.pop[(mexico3$jane == mexico3$self) | ((mexico3$self >= mexico3$jane) & (mexico3$alison > mexico3$self))] <- 3 mexico3$self.pop[(mexico3$self >= mexico3$alison)] <- 4 ## bar plot Vself.pop <- prop.table(table(vign3$self.pos)) Cself.pop <- prop.table(table(china3$self.pos)) Mself.pop <- prop.table(table(mexico3$self.pop)) barplot(Vself.pop) barplot(Cself.pop) barplot(Mself.pop) ## mean mean(Vself.pop); mean(Cself.pop); mean(Mself.pop) ## Q5 ## under & over 40 Vu40 <- subset(vign3, vign3$age < 40) Vo40 <- subset(vign3, vign3$age >= 40) Cu40 <- subset(china3, china3$age < 40) Co40 <- subset(china3, china3$age >= 40) Mu40 <- subset(mexico3, mexico3$age < 40) Mo40 <- subset(mexico3, mexico3$age >= 40) ## proportion tables Vu40.pop <- prop.table(table(Vu40$self.pos)) Vo40.pop <- prop.table(table(Vo40$self.pos)) Cu40.pop <- prop.table(table(Cu40$self.pos)) Co40.pop <- prop.table(table(Co40$self.pos)) Mu40.pop <- prop.table(table(Mu40$self.pos)) Mo40.pop <- prop.table(table(Mo40$self.pos)) ## barplot(Vu40.pop); barplot(Vo40.pop) ## barplot(Cu40.pop); barplot(Co40.pop) ## barplot(Mu40.pop); barplot(Mo40.pop) ## mean mean(Vu40.pop); mean(Vo40.pop) mean(Cu40.pop); mean(Co40.pop) mean(Mu40.pop); mean(Mo40.pop)
章末問題解答(1章-1) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-1www.econ-stat-grad.com
章末問題解答(1章-2) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-2www.econ-stat-grad.com