社会科学のためのデータ分析入門 章末問題解答(3章-1) Rコード
これまでの章の解答はこちら
章末問題解答(1章-1)と(1章-2)のRコードの記事はこちらこちら(1章-1)(1章-2)から確認できます。
はじめに (Textbook Solution: Quantitative Social Science: An Introduction )
Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。
欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません(2018年冬ごろの時点では)。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.
また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。
2章-1 (Chapter2 - Section 1)
スクリプトをベタ張りしています。
## Chapter 3 Measurement ## Exercise Solution setwd("~/qss/CAUSALITY") # ご自身のディレクトリを選択 ## ----------------------------------------------------- ## The author of this script uses Japanese-Version QSS. ## ----------------------------------------------------- ## ----------------------------------------------------- ## ----------------------------------------------------- ## Section 1 ## Q1 reshaped <- read.csv("gayreshaped.csv") ccap <- read.csv("ccap2012.csv") head(reshaped) head(ccap) ## "complete.obs" perform a calculation for rows which have perfect matching. ## Correlation is strong b/w the two. cor(reshaped$therm1, reshaped$therm2, use = "complete.obs") ## Q2 ## Create the subset study2 <- subset(reshaped, subset = (study == 2) & (treatment == "No Contact")) study2a <- study2[, 3:6] class(study2a) ## "pairwise.complete.obs" perform a calculation for a data.frame which ## has several variables & missing values. cor(study2a, use = "pairwise.complete.obs") # correlation coefficient matrix ## The result is slightly different form the argument "pairwise.complete.obs". ## See. http://keita43a.hatenablog.com/entry/2018/04/10/034230. cor(study2a, use = "complete.obs") ## All correlation coefficients are nealy 1 (means storong correlation). ## Extra exercise, ploting the matrix using packages psych & corrplot ## install.packages("psych") library(psych) psych::pairs.panels(study2a) psych::pairs.panels(study2a, hist.col = "white", rug = F, ellipses = F, lm = T) ## install.packages("corrplot") library(corrplot) corrplot::corrplot(cor(study2a, use = "complete.obs")) cor.plot(cor(study2a, use = "complete.obs"), numbers=T) ## Q3 ## You might encounter "Error in plot.new() : figure margins too large" ## use dev.new() & dev.off() before and after the command plot plot(study2a$therm1, study2a$therm2, pch = 20, col = "blue", xlab = "Therm 1", ylab = "Therm 2") plot(study2a$therm1, study2a$therm3, pch = 20, col = "red", xlab = "Therm 1", ylab = "Therm 3") plot(study2a$therm1, study2a$therm4, pch = 20, col = "green", xlab = "Therm 1", ylab = "Therm 4") ## Plot & save 3 scatter plots in 1 screan (a bit weird though). png("aaa.png") split.screen(figs = c(1, 2)) split.screen(figs = c(2, 1), screen = 2) screen(1) plot(study2a$therm1, study2a$therm2, pch = 20, col = "blue", xlab = "Therm 1", ylab = "Therm 2") screen(3) plot(study2a$therm1, study2a$therm3, pch = 20, col = "red", xlab = "Therm 1", ylab = "Therm 3") screen(4) plot(study2a$therm1, study2a$therm4, pch = 20, col = "green", xlab = "Therm 1", ylab = "Therm 4") dev.off() ## Get back to 1 screan format. ## close.screen(all = T) ## Another way to plot. ## Plot 3 groups in 1 picture. plot(0, 0, type = "n", xlim = c(0, max(study2a$therm1)), ylim = c(0, max(study2a$therm1)), xlab = "therm1", ylab = "therm2-4") points(study2a$therm1, study2a$therm2, pch = 20, col = "red") points(study2a$therm1, study2a$therm3, pch = 20, col = "blue") points(study2a$therm1, study2a$therm4, pch = 20, col = "green") plot(0, 0, type = "n", xlim = c(0, max(study2a$therm1)), ylim = c(0, max(study2a$therm1)), xlab = "therm1", ylab = "therm2-4") points(study2a$therm1, study2a$therm2, pch = 20, col = "#30303020") points(study2a$therm1, study2a$therm3, pch = 20, col = "#ff990020") points(study2a$therm1, study2a$therm4, pch = 20, col = "#ffb6c120") ## Q4 ## Plot CCAP's gaytherm hist(ccap$gaytherm, col = "#0000ff20", las = 1, freq = FALSE, xlab = "therm", main = "Histogram of GayTherm", breaks = 20) ## Plot some lines abline(v = mean(ccap$gaytherm, na.rm = TRUE), lty = "dashed", col = "red") text(x = 67, y = 0.030, "mean(58.7)", col = "red") lines(density(ccap$gaytherm, na.rm = TRUE), col = "orange", lwd = 2) mean(ccap$gaytherm, na.rm = TRUE) ## Plot study1's gaytherm head(reshaped) study1 <- subset(reshaped, reshaped$study == 1) hist(study1$therm1, col = "#0000ff20", las = 1, freq = FALSE, xlab = "therm", main = "Histogram of Study1's Therm1", breaks = 20) hist(study2$therm1, col = "#0000ff20", las = 1, freq = FALSE, xlab = "therm", main = "Histogram of Study2's Therm1", breaks = 20) ## Q5 qqplot(study2a$therm1, study2a$therm2, pch = 20, col = "#0000ff") abline(0, 1) # 45-degree line qqplot(study2a$therm1, study2a$therm3, pch = 20, col = "#0000ff") abline(0, 1) qqplot(study2a$therm1, study2a$therm4, pch = 20, col = "#0000ff") abline(0, 1) ## It is difficult to put a interpretation. ## Q-Q plot is almost on the 45-degree line. ## This means two variables' distributions are almost the same.
章末問題解答(1章-1) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-1www.econ-stat-grad.com
章末問題解答(1章-2) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-2www.econ-stat-grad.com