python numpy and pandas R R vs Stata R graph LaTeX asymptote Blog

目次

散布図

トップへ

日本棋院囲碁棋士の賞金ランキング(2011): type="h"で棒グラフ

x <- read.csv("shokin2011.csv", as.is=T)

plot(x$RANK, x$SHOKIN/10000, type="h", xlab="Rank", ylab="prize (10,000 yen)", main="Prize Money Ranking of Japanese Go Players (2011)", family="serif", ylim=c(0, 10000), yaxs="i", yaxt="n", xaxt="n")
lab <- seq(2, 10, by=2)*1000
axis(side=2, at=lab, labels=prettyNum(lab, big.mark=","), family="serif", lty=0)
axis(side=1, at=x$RANK, x$RANK, family="serif", lty=0)
grid()
points(x$RANK, x$SHOKIN/10000, pch=19)

日本プロ野球リーグの勝率(2012): エラーバーを描く

x <- read.csv("npb2012.csv", as.is=T)
x$N <- x$W + x$L
q <- qnorm(.975)
se <- (x$PCT*(1-x$PCT)/x$N)^(1/2)
x$ub <- x$PCT + q*se
x$lb <- x$PCT - q*se

# moc data for setting plot area
tmpy <- c(x$PCT, x$ub, x$lb)
tmpx <- c(x$rank, x$rank, x$rank)

# central
plot(tmpy ~ tmpx, type="n", xaxt="n", xlab="Team", ylab="PCT", main="Central League (2012)", family="Bookman")
ss <- (x$league=="C")
axis(side=1, at=x$rank[ss], labels=x$team[ss], lty=0)
grid()
arrows(x$rank[ss], x$ub[ss], x$rank[ss], x$lb[ss], angle=90, length=.05, code=3, col="brown4", lty=2)
points(PCT ~ rank, data=x[ss,], cex=2.5, pch=15, col="white")
points(PCT ~ rank, data=x[ss,], cex=2.25, pch=0)

# pacific
plot(tmpy ~ tmpx, type="n", xaxt="n", xlab="Team", ylab="PCT", main="Pacific League (2012)", family="Bookman")
ss <- (x$league=="P")
axis(side=1, at=x$rank[ss], labels=x$team[ss], lty=0)
grid()
arrows(x$rank[ss], x$ub[ss], x$rank[ss], x$lb[ss], angle=90, length=.05, code=3, col="darkgreen", lty=2)
points(PCT ~ rank, data=x[ss,], cex=2.5, pch=15, col="white")
points(PCT ~ rank, data=x[ss,], cex=2.25, pch=0)

自殺と犯罪: 文字をプロット、Bubble plot

y <- read.csv("pref-data.csv", as.is=T)
y$srate <- y$suicide / y$pop * 10^5
y$crate1 <- y$crime / y$pop * 10^3
y$pop.dens <- y$pop / y$res.area
y$pbrate <- y$psy.bed / y$pop * 10^4
y$aname <- gsub("[県府]", "", y$aname)
y$aname[y$acode==13000] <- "東京"

# text
plot(y$crate1[y$year %in% c(1995, 2005)], y$srate[y$year %in% c(1995, 2005)], type="n", log="xy", xlab="Crimes / 1,000 persons", ylab="Suicide / 100,000 persons", main="Suicide & Crime")
col <- rep(NA, nrow(y))
col[y$year==1995] <- "blue"
col[y$year==2005] <- "red"
ss <- y$year %in% c(1995, 2005)
text(y$crate1[ss], y$srate[ss], labels=y$aname[ss], col=col[ss])
smo1 <- smooth.spline(y$crate1[y$year == 2005], y$srate[y$year == 2005], spar=0.85)
smo2 <- smooth.spline(y$crate1[y$year == 1995], y$srate[y$year == 1995], spar=0.85)
lines(smo1)
lines(smo2)
legend("topright", legend=c(1995, 2005), lwd=2, col=c("blue", "red"))

# pop
plot(y$crate1[y$year %in% c(1995, 2005)], y$srate[y$year %in% c(1995, 2005)], type="n", log="xy", xlab="Crimes / 1,000 persons", ylab="Suicide / 100,000 persons")
col <- rep(NA, nrow(y))
col[y$year==1995] <- "blue"
col[y$year==2005] <- "red"
ss <- y$year %in% c(1995, 2005)
par(new=T)
symbols(log(y$crate1[ss]), log(y$srate[ss]), circle=(y$pop[ss])^(2/3), bg=col[ss], fg="aliceblue", inches=.2, main="Suicide & Crime (Blbble Size: Population)", xaxt="n", yaxt="n", xlab="", ylab="")
legend("topright", legend=c(1995, 2005), lwd=2, col=c("blue", "red"))

# density
plot(y$crate1[y$year %in% c(1995, 2005)], y$srate[y$year %in% c(1995, 2005)], type="n", log="xy", xlab="Crimes / 1,000 persons", ylab="Suicide / 100,000 persons")
col <- rep(NA, nrow(y))
col[y$year==1995] <- "blue"
col[y$year==2005] <- "red"
ss <- y$year %in% c(1995, 2005)
par(new=T)
symbols(log(y$crate1[ss]), log(y$srate[ss]), circle=(y$pop.dens[ss])^(1/2), bg=col[ss], fg="aliceblue", inches=.2, main="Suicide & Crime (Blbble Size: Population Density)", xaxt="n", yaxt="n", xlab="", ylab="")
legend("topright", legend=c(1995, 2005), lwd=2, col=c("blue", "red"))

# bed
plot(y$crate1[y$year %in% c(1995, 2005)], y$srate[y$year %in% c(1995, 2005)], type="n", log="xy", xlab="Crimes / 1,000 persons", ylab="Suicide / 100,000 persons")
col <- rep(NA, nrow(y))
col[y$year==1995] <- "blue"
col[y$year==2005] <- "red"
ss <- y$year %in% c(1995, 2005)
par(new=T)
symbols(log(y$crate1[ss]), log(y$srate[ss]), circle=(y$pbrate[ss])^(1.7), bg=col[ss], fg="aliceblue", inches=.2, main="Suicide & Crime (Blbble Size: Psychiatry Bed per Person)", xaxt="n", yaxt="n", xlab="", ylab="")
legend("topright", legend=c(1995, 2005), lwd=2, col=c("blue", "red"))

その他の散布図

トップへ

市町村人口と面積: Contour plot

library(MASS)
library(gplots)
x <- read.csv("city-data.csv", as.is=T)
x <- x[which(!x$seirei.ku),]
x$pop <- x$pop.male.2010 + x$pop.female.2010
x <- x[which(x$pop > 0) & x$res.area.2010,]
png("cont0.png")
plot(x$pop, x$res.area.2010, log="xy", family="serif", xlab="人口 (2010)", ylab="可住地面積 (2010)")
dev.off()

# 密度を常用対数で推計
dens <- kde2d(log10(x$pop), log10(x$res.area.2010), n=50)

# 軸を後で自分で作る用意
lev <- pretty(range(dens$z), 10)
axy <- pretty(range(dens$y))
laby <- format(10^(axy), digits=1)
axx <- pretty(range(dens$x))
labx <- format(10^(axx), digits=1)

png("cont1.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"))
dev.off()

png("cont2.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=gray((length(lev):1)/length(lev)))
dev.off()

png("cont3.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=hcl(seq(0,360,length=length(lev))))
dev.off()

png("cont4.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=rev(rainbow(length(lev))))
dev.off()

png("cont5.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=rev(heat.colors(length(lev))))
dev.off()

png("cont6.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=rev(topo.colors(length(lev))))
dev.off()

png("cont7.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=rev(terrain.colors(length(lev))))
dev.off()

png("cont8.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), color.palette=colorRampPalette(c("red", "orange", "yellow", "green", "blue"), space="rgb"))
dev.off()

png("cont9.png")
par(family="serif")
filled.contour(dens, levels=lev, plot.axes={axis(side=2, at=axy, label=laby); axis(side=1, at=axx, label=labx)}, plot.title=title(xlab="人口 (2010)", ylab="可住地面積 (2010)"), col=rich.colors(length(lev)))
dev.off()

Oldfaithful: Contour plot with 等高線

library(lattice)
library(MASS)
x <- read.csv("faithful.txt", as.is=T, sep=" ", header=F)

dens <- kde2d(x$V1, x$V2, n=250)

lev <- pretty(range(dens$z), 12)
png("faithful1.png")
par(family="Palatino")
filled.contour(dens, levels=lev, col=gray(seq(.9,.05,length=length(lev))), main="Oldfaithful", xlab="Duration of the eruption (min)", ylab="Interval (min)")
dev.off()

png("faithful2.png")
par(family="Palatino")
filled.contour(dens, levels=lev, col=rev(terrain.colors(length(lev))), main="Oldfaithful", xlab="Duration of the eruption (min)", ylab="Interval (min)")
dev.off()

# latticeでやるには行列ではだめ?
dat <- expand.grid(x=dens$x, y=dens$y)
dat$z <- as.vector(dens$z)

png("faithful3.png")
par(family="Platino")
contourplot(z~x*y, data=dat, cuts=10, region=T, col.regions=gray(seq(.95,.05,length=100)), main="Oldfaithful", xlab="Duration of the eruption (min)", ylab="Interval (min)")
dev.off()

png("faithful4.png")
par(family="Platino")
contourplot(z~x*y, data=dat, cuts=10, region=T, alpha.regions=.9, col.regions=rev(heat.colors(100)), main="Oldfaithful", xlab="Duration of the eruption (min)", ylab="Interval (min)")
dev.off()

# Ian Taylor's modified function
# see http://wiki.cbr.washington.edu/qerm/index.php/R/Contour_Plots
source("http://tips.futene.net/rsouko/filled.contour2.R")

png("faithful1b.png")
par(family="Palatino")
filled.contour2(dens, levels=lev, col=gray(seq(.9,.05,length=length(lev))), main="Oldfaithful", xlab="Duration of the eruption (min)", ylab="Interval (min)")
contour(dens, add=T, levels=lev)
dev.off()

png("faithful2b.png")
par(family="Palatino")
filled.contour2(dens, levels=lev, col=rev(terrain.colors(length(lev))), main="Oldfaithful", xlab="Duration of the eruption (min)", ylab="Interval (min)")
contour(dens, add=T, levels=lev)
dev.off()

飛ばないボール: Boxplot

打者成績
投手成績
x <- read.csv("npb2010-2011_b.csv", as.is=T)
lab <- ""
lab[x$league %in% "C"] <- "セ"
lab[x$league %in% "P"] <- "パ"
lab <- paste(lab, x$year)
levels <- c("セ 2010", "セ 2011", "パ 2010", "パ 2011")
x$lab <- factor(lab, levels=levels)


par(family="serif")
boxplot(AVG ~ lab, ylab="打率", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=x)

par(family="serif")
boxplot(SLG ~ lab, ylab="長打率", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=x)

par(family="serif")
boxplot(SLG/AVG ~ lab, ylab="長打率 / 打率", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=x)

par(family="serif")
boxplot(K/AB ~ lab, ylab="三振数 / 打数", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=x)


y <- read.csv("npb2010-2011_p.csv", as.is=T)
lab <- ""
lab[y$league %in% "C"] <- "セ"
lab[y$league %in% "P"] <- "パ"
lab <- paste(lab, y$year)
levels <- c("セ 2010", "セ 2011", "パ 2010", "パ 2011")
lab <- factor(lab, levels=levels)

par(family="serif")
boxplot(ERA ~ lab, ylab="防御率", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=y)

par(family="serif")
boxplot(HR/IP*9 ~ lab, ylab="被本塁打 / 9イニング", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=y)

par(family="serif")
boxplot(K/IP*9 ~ lab, ylab="奪三振率", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=y)

par(family="serif")
boxplot(K/butters ~ lab, ylab="奪三振 / 打者", boxwex=.6, outpch=4, col=c("cyan2", "darkolivegreen3"), data=y)

日本の有配偶率・未婚率: Bar plot

z <- read.csv("20s.csv", as.is=T)

# change color if increased
flg <- rep(F, nrow(z))
flg[-1] <- z$never.married.rate[-1] <= z$never.married.rate[-nrow(z)]
col <- rep("darkolivegreen4", nrow(z))
col[flg] <- "darkolivegreen3"

barplot(z$married.rate, names.arg=z$year, border=1, space=.3, xlab="Year", col=col, density=60, main="Fraction married (20s)", family="serif")

# change color if decreased
flg <- rep(F, nrow(z))
flg[-1] <- z$married.rate[-1] >= z$married.rate[-nrow(z)]
col <- rep("burlywood4", nrow(z))
col[flg] <- "burlywood3"

barplot(z$never.married.rate, names.arg=z$year, border=1, space=.3, col=col, density=80, xlab="Year", main="Fraction never married (20s)", family="serif")

ヒストグラム

トップへ

日本の市町村人口の分布

y <- read.csv("city-data.csv", as.is=T)

y$pop <- as.numeric(y$pop.male.2010) + as.numeric(y$pop.female.2010)

# linear scale 
h <- hist(y$pop[!y$seirei.ku], breaks="Scott", freq=F, col="darkorchid", density=50, border=1, xlab=("population"), main="Distribution of Japanese City Population (2010)", family="Bookman")

# log scale
logp <- log10(y$pop[!y$seirei.ku])
h <- hist(logp, breaks="Scott", freq=F, col="darkcyan", density=seq(30, 80, by=2.5), border=1, xlab="population", main="Distribution of Japanese City Population (2010)", axes=F, family="Bookman")

# trick to make log scale hist 
Axis(side=2)
Axis(side=1, at=h$mids, labels=format(10^(h$mids), digits=1), lty=NULL, family="Bookman") 

# add density estimate
dens <- density(logp)
lines(dens, lwd=2)

その他いろいろ

トップへ

イチローのヒット数: 画像の上にグラフを描く

library(png)

h <- read.csv("ichiro-hits.csv", as.is=T)
ima <- readPNG("ichiro.png")

plot(h$year, h$H, type="n", ann=F, xlab="", ylab="", family="Times")

# fill by image
rasterImage(ima, par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4])

# add grid
grid()

# make it a bit blur
rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col = "gray", density=20)

par(new=T)
plot(h$year, h$H, type="b", lwd=7, pch=20, cex=1.2, col="white", axes=F, xlab="", ylab="")
par(new=T)
plot(h$year, h$H, type="b", lwd=5, pch=20, cex=1, col="darkolivegreen4", axes=F, xlab="", ylab="")
title(main="Ichiro's Hits", family="Times")

マーカーと線の設定

pchs <- 0:25
types <- c("p", "l", "b", "c", "o")

plot(c(min(pchs), max(pchs)), c(1, length(types)), type="n", xlab="pch", ylab="type", axes=F, family="mono")
Axis(side=2, at=1:length(types), labels=types, lty=0, family="mono")
Axis(side=1, at=pchs, labels=pchs, lty=0, family="mono")
rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col="azure2", density=90)
for (j in 1:length(types)) {
    points(pchs, rep(j, length(pchs)), pch=pchs, type=types[j])
}

フォント実験

font.names.png <- c("Short", "Canonical", "mono", "Courier", "sans", "Helvetica", "serif", "Times", "AvantGarde", "Bookman", "Helvetica-Narrow", "NewCenturySchoolbook", "Palatino", "URWGothic", "URWBookman", "NimbusMon", "URWHelvetica", "NimbusSan", "NimbusSanCond", "CenturySch", "URWPalladio", "URWTimes", "NimbusRom")
pos <- runif(length(font.names.png)) 
y.rng <- c(0, 1)  
x.rng <- c(-.2, 1.2)  

fun <- function(face) {
    plot(x.rng, y.rng, type="n", xlab="", ylab="", xaxt="n", yaxt="n", ann=F)
    rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col = "moccasin", density=60)
    grid(col="red")
    for (j in 1:length(font.names.png)) {
        f <- font.names.png[j]
        text(x=pos[j], y=(j / length(pos)), f, family=f, font=face)
    }
}

fun(1)
fun(2)
fun(3)
fun(4)