社会科学のためのデータ分析入門 章末問題解答(1章-2) Rコード
章末問題解答(1章-1) Rコードの記事はこちらこちらから確認できます。
はじめに (Textbook Solution: Quantitative Social Science: An Introduction )
Rを使った統計学の日本語のテキストとして非常に定評のある社会科学のためのデータ分析入門の章末問題の解答(Rコード)です。
欠点なのかは分かりませんが、こちらのテキストには章末問題の解答がついていません。そして日本語でも英語でもwebで公開されていません(2018年冬ごろの時点では)。2018年冬に私が上巻の章末問題を解いたのですが、一度公開してみようと思ったので複数の記事に分けて投稿していこうと思います。誰かの役に立てればとも思っているのですが、私のコードにミスがあった場合に指摘していただけると嬉しいです。
I would highly appreciate if you could point out mistakes.
また同じ変数に関するプロットをする場合でも複数の方法を使ったりもしています。
1章-2 (Chapter1 - Section 2)
スクリプトをベタ張りしています。
## Chapter 1 Introduction ## Exercise Solution ## ----------------------------------------------------- ## Taka(the author of this script) uses Japanese-Version QSS. ## ----------------------------------------------------- ## Section 2 ## Q1 kenya <- read.csv("Kenya.csv") sweden <- read.csv("Sweden.csv") world <- read.csv("World.csv") dim(kenya); dim(sweden); dim(world) ## Create sum of person-year. kenya$py <- kenya$py.men + kenya$py.women sweden$py <- sweden$py.men + sweden$py.women world$py <- world$py.men + world$py.women ## We will create CBRs in 3 ways. ## Create Kenya's CBR. kc1 <- sum(kenya[1:15, "births"]) / sum(kenya[1:15, "py"]) kc2 <- sum(kenya[16:30, "births"]) / sum(kenya[16:30, "py"]) kCBR <- c(kc1, kc2) ## Create Sweden's CBR. sc1 <- sum(sweden[c(1:15), "births"]) / sum(sweden[c(1:15), "py"]) sc2 <- sum(sweden[c(16:30), "births"]) / sum(sweden[c(16:30), "py"]) sCBR <- c(sc1, sc2) ## Create World's CBR. ## First, we create & add CBR in the data.frame. world$crb <- world$births / world$py ## Divide data into two periods. wp1 <- world[world$period=="1950-1955", ] # first half wp2 <- world[world$period=="2005-2010", ] # second half ## Calculate CBRs wc1 <- sum(wp1$births) / sum(wp1$py) wc2 <- sum(wp2$births) / sum(wp2$py) ## Save CBR as a new vector which has length=2 wCBR <- c(wc1, wc2) ## Print 3 countries' CBRs kCBR; sCBR; wCBR ## Q2 ## We will create ASFR in the different ways again. ## childbearing age: CBA ## Kenya ## 1950-55 Kcba1 <- kenya[4:10, ] K.ASFR1 <- Kcba1$births / Kcba1$py.women ## 2005-10 Kcba2 <- kenya[19:25, ] K.ASFR2 <- Kcba2$births / Kcba2$py.women ## total ## K.ASFR <- c(K.ASFR1, K.ASFR2) # easiest way Kcba3 <- rbind(kenya[4:10, ], kenya[19:25, ]) K.ASFR <- Kcba3$births / Kcba3$py.women ## Sweden ## 1950-55 Scbab1 <- sweden[4:10, "births"] Scbaw1 <- sweden[4:10, "py.women"] S.ASFR1 <- Scbab1 / Scbaw1 ## 2005-10 Scbaw2 <- sweden[19:25, "py.women"] Scbab2 <- sweden[19:25, "births"] S.ASFR2 <- Scbab2 / Scbaw2 ## total Scbab <- c(Scbab1, Scbab2) Scbaw <- c(Scbaw1, Scbaw2) S.ASFR <- Scbab / Scbaw ## World ## 1950-55 W.ASFR <- world$births / world$py.women W.ASFR1 <- W.ASFR[4:10] ## 2005-10 W.ASFR2 <- W.ASFR[c(19:25)] ## total W.ASFR <- W.ASFR[c(4,5,6,7,8,9,10,19,20,21,22,23,24,25)] ## kenya[kenya$age == "15-19", ] K.ASFR1; S.ASFR1; W.ASFR1 mean(K.ASFR1); mean(S.ASFR1); mean(W.ASFR1) K.ASFR2; S.ASFR2; W.ASFR2 mean(K.ASFR2); mean(S.ASFR2); mean(W.ASFR2) K.ASFR; S.ASFR; W.ASFR mean(K.ASFR); mean(S.ASFR); mean(W.ASFR) ## Q3 ## Kenya's TFR K.TFR <- c(sum(K.ASFR1 * 5), sum(K.ASFR2 * 5)) ## the number of women kw1 <- sum(kenya[kenya$period=="1950-1955", "py.women"]) # first half kw2 <- sum(kenya[kenya$period=="2005-2010", "py.women"]) # second half ## Sweden's TFR S.TFR <- c(sum(S.ASFR1 * 5), sum(S.ASFR2 * 5)) ## the number of women sw1 <- sum(sweden[sweden$period=="1950-1955", "py.women"]) # first half sw2 <- sum(sweden[sweden$period=="2005-2010", "py.women"]) # second half ## World's TFR W.TFR <- c(sum(W.ASFR1 * 5), sum(W.ASFR2 * 5)) ## the number of women ww1 <- sum(world[world$period=="1950-1955", "py.women"]) # first half ww2 <- sum(world[world$period=="2005-2010", "py.women"]) # second half ## the number of births wb1 <- sum(world[world$period=="1950-1955", "births"]) # first half wb2 <- sum(world[world$period=="2005-2010", "births"]) # second half ## print TFR & #women K.TFR; S.TFR; W.TFR kw1; kw2; sw1; sw2; ww1; ww2 wb1; wb2 ## Q4 ## Kenya's CDR K.CDR1 <- sum(kenya[kenya$period=="1950-1955", "deaths"]) / sum(kenya[kenya$period=="1950-1955", "py"]) K.CDR2 <- sum(kenya[kenya$period=="2005-2010", "deaths"]) / sum(kenya[kenya$period=="2005-2010", "py"]) K.CDR <- c(K.CDR1, K.CDR2) ## Sweden's CDR S.CDR1 <- sum(sweden[1:15, "deaths"]) / sum(sweden[1:15, "py"]) S.CDR2 <- sum(sweden[16:30, "deaths"]) / sum(sweden[16:30, "py"]) S.CDR <- c(S.CDR1, S.CDR2) ## World's CDR W.CDR1 <- sum(world[c(1:15), "deaths"]) / sum(world[c(1:15), "py"]) W.CDR2 <- sum(world[c(16:30), "deaths"]) / sum(world[c(16:30), "py"]) W.CDR <- c(W.CDR1, W.CDR2) ## Print CDRs. K.CDR; S.CDR; W.CDR ## Q5 K.ASDR <- kenya[16:30, "deaths"] / kenya[16:30, "py"] S.ASDR <- sweden[16:30, "deaths"] / sweden[16:30, "py"] options(scipen = 1) # avoid exponential notation K.ASDR; S.ASDR S.ASDR - K.ASDR ## In the every age class Kenya's ASDR is higher. ## Q5 ## rate of Sweden's each age class, P sp <- sweden$py[16:30] / sum(sweden$py[16:30]) ## Kenya's counterfactual CDR cfK.CDR <- sum(K.ASDR * sp) # result ## Compare the two K.CDR[2]; cfK.CDR # result: 0.01038914 0.02321646 K.CDR[2] - cfK.CDR ## This means if Kenya has Sweden's population distribution, ## CDR will be 1 percent point higher than factual CDR. ## Thus, the comparison of factual CDRs b/w Kenya & Sweden ## does not necessarily give a meaningful result.
章末問題解答(1章-1) Rコード
https://www.econ-stat-grad.com/entry/statistics/qss/solution/ch1-1www.econ-stat-grad.com