社会科学のためのデータ分析入門 章末問題解答(3章-2) Rコード



はじめに (Textbook Solution: Quantitative Social Science: An Introduction )


I would highly appreciate if you could point out mistakes.

## Chapter 3 Measurement
## Exercise Solution

setwd("~/qss/CAUSALITY") # ご自身のディレクトリを選択

## -----------------------------------------------------
## The author of this script uses Japanese-Version QSS.
## -----------------------------------------------------
## -----------------------------------------------------
## -----------------------------------------------------
## -----------------------------------------------------
## Section 2
## Q1

vign <- read.csv("vignettes.csv")
head(vign); dim(vign)

china <- subset(vign, vign$china == 1)
mexico <- subset(vign, vign$china != 1)

Cself <- prop.table(table(china$self))
Mself <- prop.table(table(mexico$self))

## Plot barplot of self-evalutaion in China & Mexico.
barplot(Cself, xlab = "Score of Self Evaluation in China", ylab = "ratio")
barplot(Mself, xlab = "Score of Self Evaluation in Mexico", ylab = "ratio")

## Calculate the means. 
Cself; Mself
mean(Cself); mean(Mself) 
## mean political effectiveness is the same, 
## but the shapes of distributions are totally different. 
## The results seem not to be consistent with their political conditions. 

## Q2

hist(china$age, ylim = c(0, 80))
abline(v = median(china$age), lty = "dashed", col = "red")
text(x = 50, y = 60, "median = 45")

hist(mexico$age, ylim = c(0, 80))
abline(v = median(mexico$age), lty = "dashed", col = "red")
text(x = 40, y = 80, "median = 35")

## Extra work: plotting hists in one picture (using "add = TRUE").

hist(china$age, ylim = c(0, 80), xlab = "age", main = "hist of age",
     col = "#0000ff40", border = "#0000ff")
abline(v = median(china$age), lty = "dashed", col = "red")
text(x = 50, y = 60, "median = 45")

hist(mexico$age, ylim = c(0, 80), col = "#ff00ff40", border = "#ff00ff", 
     add = TRUE)
abline(v = median(mexico$age), lty = "dashed", col = "red")
text(x = 40, y = 80, "median = 35")
cols <- c("#0000ff40", "#ff00ff40")
legend("topright", c("China", "Mexico"), col = cols, pch = 19)

## End the extra work. 

## Plot the Q-Q plots.
## China's Q-Q plot
qqplot(china$age, mexico$age)
abline(0, 1)

## First, the Q-Q plot is not on the 45-degree line, 
## which means thier age's distributions are not similar.
## Q-Q line is below the 45-degree line, and thus 
## Mexico's distribution is not that scattered like China counterpart. 

## Q3

## Percentage of respondents who give low self-evaluation
## less than eveluation about moses

## Create dummy variables in China & Mexico. 
Cmo.se <- sum(ifelse(china$moses > china$self, 1, 0))
Cmo.se.l <- length(ifelse(china$moses > china$self, 1, 0)) # no need to use ifelse

Mmo.se <- sum(ifelse(mexico$moses > mexico$self, 1, 0))
Mmo.se.l <- length(ifelse(mexico$moses > mexico$self, 1, 0))

## Answers
Cmo.se / Cmo.se.l # China
Mmo.se / Mmo.se.l # Mexico

## Q4

## Create subsets which have relationship alison > jane > moses. 
china3 <- subset(china, subset = (alison >= jane) & (jane >= moses))
mexico3 <- subset(mexico, subset = (alison >= jane) & (jane >= moses))
vign3 <- subset(vign, subset = (alison >= jane) & (jane >= moses))


## Create categorical variables in China & Mexico, and overall. 
## overall
vign3$self.pos <- NA

vign3$self.pos[vign3$moses > vign3$self] <- 1

vign3$self.pos[(vign3$moses == vign3$self) | 
                  ((vign3$moses <= vign3$self) & 
                     (vign3$self < vign3$jane))] <- 2

vign3$self.pos[(vign3$jane == vign3$self) | 
                  ((vign3$jane <= vign3$self) & 
                     (vign3$self < vign3$alison))] <- 3

vign3$self.pos[(vign3$alison <= vign3$self)] <- 4

## China 
china3$self.pos <- NA

china3$self.pos[china3$moses > china3$self] <- 1

china3$self.pos[(china3$moses == china3$self) | 
                  ((china3$moses <= china3$self) & 
                     (china3$self < china3$jane))] <- 2

china3$self.pos[(china3$jane == china3$self) | 
                  ((china3$jane <= china3$self) & 
                     (china3$self < china3$alison))] <- 3

china3$self.pos[(china3$alison <= china3$self)] <- 4

china3$self.pos # confirm

## Mexico

mexico3$self.pop <- NA

mexico3$self.pop[mexico3$moses > mexico3$self] <- 1

mexico3$self.pop[(mexico3$moses == mexico3$self) | 
                   ((mexico3$self >= mexico3$moses) & 
                      (mexico3$jane > mexico3$self))] <- 2

mexico3$self.pop[(mexico3$jane == mexico3$self) | 
                   ((mexico3$self >= mexico3$jane) & 
                      (mexico3$alison > mexico3$self))] <- 3

mexico3$self.pop[(mexico3$self >= mexico3$alison)] <- 4

## bar plot
Vself.pop <- prop.table(table(vign3$self.pos))
Cself.pop <- prop.table(table(china3$self.pos))
Mself.pop <- prop.table(table(mexico3$self.pop))


## mean

mean(Vself.pop); mean(Cself.pop); mean(Mself.pop)

## Q5

## under & over 40
Vu40 <- subset(vign3, vign3$age < 40)
Vo40 <- subset(vign3, vign3$age >= 40)

Cu40 <- subset(china3, china3$age < 40)
Co40 <- subset(china3, china3$age >= 40)

Mu40 <- subset(mexico3, mexico3$age < 40)
Mo40 <- subset(mexico3, mexico3$age >= 40)

## proportion tables
Vu40.pop <- prop.table(table(Vu40$self.pos))
Vo40.pop <- prop.table(table(Vo40$self.pos))

Cu40.pop <- prop.table(table(Cu40$self.pos))
Co40.pop <- prop.table(table(Co40$self.pos))

Mu40.pop <- prop.table(table(Mu40$self.pos))
Mo40.pop <- prop.table(table(Mo40$self.pos))

## barplot(Vu40.pop); barplot(Vo40.pop)
## barplot(Cu40.pop); barplot(Co40.pop)
## barplot(Mu40.pop); barplot(Mo40.pop)

## mean
mean(Vu40.pop); mean(Vo40.pop)
mean(Cu40.pop); mean(Co40.pop)
mean(Mu40.pop); mean(Mo40.pop)

