開発経済学とその他応用分野を学ぶ院生

人間万事塞翁が馬を大切にしている応用経済学徒. 2020年4月から開発・計量・プログラミング関連の記事を書きます.

社会科学のためのデータ分析入門 章末問題解答(3章-3) Rコード

これまでの章の解答はこちら

章末問題解答(1章-1)と(1章-2)のRコードの記事はこちらこちら(1章-1)(1章-2)から確認できます。
 

はじめに (Textbook Solution: Quantitative Social Science: An Introduction )

Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。

 
欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません(2018年冬ごろの時点では)。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.
 
また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。
 

2章-1 (Chapter2 - Section 1)

 
スクリプトをベタ張りしています。

## Chapter 3 Measurement
## Exercise Solution

setwd("~/qss/CAUSALITY") # ご自身のディレクトリを選択

## -----------------------------------------------------
## The author of this script uses Japanese-Version QSS.
## -----------------------------------------------------
## -----------------------------------------------------
## -----------------------------------------------------
## -----------------------------------------------------
## -----------------------------------------------------
## Section 3
## Q1

un <- read.csv("unvoting.csv")
unique(un$Year)

## in the year 1980 & 2000
un80 <- un[c(un$Year == "1980"), ]
un00 <- un[c(un$Year == "2000"), ]
dim(un80); dim(un00)

## plot histgrams
hist(un80$idealpoint, col = "#0000ff40", border = "#0000ff")
abline(v = median(un80$idealpoint), lty = "dashed", col = "red")
hist(un00$idealpoint, col = "#ff00ff40", border = "#ff00ff")
abline(v = median(un00$idealpoint), lty = "dashed", col = "blue")

## Extra work: plotting hists in one picture (using "add = TRUE").
hist(un80$idealpoint, col = "#0000ff40", border = "#0000ff",
     xlab = "", main = "Estimated Ideal Point in 1980", 
     xlim = c(-3, 3), ylim = c(0, 60))
abline(v = median(un80$idealpoint), lty = "dashed", col = "red")
text(0.5, 50, "median(-0.09)", col = "red")

hist(un00$idealpoint, col = "#ff00ff40", border = "#ff00ff", 
     add = TRUE)
abline(v = median(un00$idealpoint), lty = "dashed", col = "blue")
text(-0.5, 60, "median(-0.35)", col = "blue")

cols <- c("#0000ff40", "#ff00ff40")
legend("topright", c("1980", "2000"), col = cols, pch = 19)

## calculate quantiles
quantile(un80$idealpoint)
quantile(un00$idealpoint)


## Q2

## time-series plot
trans.us <- tapply(un$PctAgreeUS, un$Year, mean)
trans.russia <- tapply(un$PctAgreeRUSSIA, un$Year, mean)

length(trans.mean)
plot(names(trans.us), trans.us, type = "l", col = "deepskyblue", 
     ylim = c(0, 0.9), xlab = "Year", ylab = "", 
     main = "Transition: Mean Percentage of Agree with US & Russia")
text(2010, 0.1, "US", col = "deepskyblue")
lines(names(trans.russia), trans.russia, type = "l", col = "pink")
text(2010, 0.6, "Russia", col = "pink")

## Find the countries where support the US & Russia the most. 
usMost <- tapply(un$PctAgreeUS, un$CountryName, mean)
russiaMost <- tapply(un$PctAgreeRUSSIA, un$CountryName, mean)

## [2] finds the socond highest element
## We can also get result w/o [2]
sort(usMost, decreasing = TRUE)[2]
sort(russiaMost, decreasing = TRUE)[2]

## FYI: Command which.max gets the place of maximum number.
## However now we need to find the socond best because US and Russia
## have obviously concordance rate 1. 
which.max(russiaMost) # length 143
russiaMost[143] # finds 143th element


## Q3

## Create subsets of US & Russia.
usIde <- subset(un, un$CountryName == "United States of America")
russiaIde <- subset(un, un$CountryName == "Russia")

## Create subsets.
usIde <- usIde[, c("Year", "idealpoint")]
russiaIde <- russiaIde[, c("Year", "idealpoint")]
medianIde <- tapply(un$idealpoint, un$Year, median)

## Plot transition. 
plot(usIde, type = "l", ylim = c(-3, 3), col = "red")
lines(russiaIde, type = "l", col = "blue")
lines(names(medianIde), medianIde, lty = "dashed")
text(2010, 3, "US", col = "red")
text(2010, 0.6, "Russia", col = "blue")
text(2010, -1, "UN-Median")


## Q4

## Soviet Union subset
soviet <- subset(un, subset = (CountryName == "Estonia") | 
         (CountryName == "Latvia") | 
         (CountryName == "Lithuania") | 
         (CountryName == "Belarus") | 
         (CountryName == "Moldova") | 
         (CountryName == "Ukraine") | 
         (CountryName == "Armenia") | 
         (CountryName == "Azerbaijan") | 
         (CountryName == "Georgia") | 
         (CountryName == "Kazakhstan") |
         (CountryName == "Kyrgyzstan") |
         (CountryName == "Tajikistan") |
         (CountryName == "Turkmenistan") |
         (CountryName == "Uzbekistan") |
         (CountryName == "Russia") 
)

## 2012 data
un2012 <- un[c(un$Year == "2012"), ]
soviet2012 <- soviet[c(soviet$Year == "2012"), ]

a <- un2012$CountryName %in% soviet2012$CountryName

## Soviet Union dummy
un2012$sov <- NA
un2012$sov <- ifelse((un2012$CountryName %in% soviet2012$CountryName) == TRUE, 1, 0)

un2012a <- subset(un2012, un2012$sov == 0)
un2012a <- un2012a[, c("idealpoint", "PctAgreeUS")]
soviet2012a <- soviet2012[, c("idealpoint", "PctAgreeUS")]

## Plot 2 groups in 1 picture. 
plot(0, 0, type = "n", 
     xlim = c(min(un2012a$idealpoint), max(un2012a$idealpoint)), 
     ylim = c(min(un2012a$PctAgreeUS), max(un2012a$PctAgreeUS)), 
     xlab = "idealpoint", ylab = "percentage agree with US", 
     main = "Soviet & non-Soviet in 2012")

points(un2012a, pch = 22, col = "blue")
points(soviet2012a, pch = 20, col = "red")

## same way to plot the points. 
points(un2012a$idealpoint, un2012a$PctAgreeUS, pch = 22, col = "deepskyblue") 
points(soviet2012a$idealpoint, soviet2012a$PctAgreeUS, pch = 20, col = "red") 

## There exists storong correlation b/w idealpoint & PctAgreeUS. 
## Tendency seems to be the same b/w Soviet and non-Soviet countries. 


## Q5

## Create non-Soviet subset
un$sov <- NA
un$sov <- ifelse((un$CountryName %in% soviet$CountryName) == TRUE, 1, 0)

un.sov <- subset(un, un$sov == 0)

trans.sov.m <- tapply(soviet$idealpoint, soviet$Year, median)
trans.un.m <- tapply(un.sov$idealpoint, un.sov$Year, median)

## plot lines
plot(names(trans.sov.m), trans.sov.m, type = "l", 
     xlab = "", ylab = "median ideal point", col = "red",
     ylim = c(-3, 1))
lines(names(trans.un.m), trans.un.m, type = "l", col = "blue")
abline(v = 1989, lty = "dashed") # Fall of the Berlin Wall
text(2007, 0.5, "Soviet", col = "red")
text(2000, -0.2, "non-Soviet", col = "blue")
text(1985, 1, "Fall of the Berlin Wall")

 
章末問題解答(1章-1) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-1www.econ-stat-grad.com
章末問題解答(1章-2) Rコード https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-2www.econ-stat-grad.com

f:id:econgrad:20201012235846p:plain