docx

advertisement
Creating ScatterPlots and BoxPlots in R
Courtney Saari (cnosac1@tigers.lsu.edu)
11/13/09
This is a step-by-step self-tutorial on how to create various plots in R. It examines the many uses
of the plot() function.
References used:
Crawley, Michael J. 2007. The R Book. John Wiley & Sons, U.K.
Venables, W. N., D.M. Smith, and the R Development Core Team. 1999-2009. An Introduction
to R: Notes on R: A Programming Environment for Data Analysis and Graphics. Version
2.10.0 (2009-10-26).
plot() is a generic function that can be used to produce a variety of plots, depending on the type
or class of the first argument. For the most part, we would use the plot function to create
scatterplots, but it can also be used to create a time-series plot, box-and-whisker plots, and
distributional plots.
plot(x,y) Scatterplot of y against x
plot(xy) This also produces a scatterplot of y against x
plot(x) This can be used to plot a time-series plot
plot(x) This can also be used to plot the values in a numeric vector against their index in the
vector
plot(factor, y) box-and-whisker plot of y for each level of factor
plot(f) or plot(factor) bar plot of f
plot(df) distributional plot of the variables in a data frame
plot(~ expr) distributional plot of the variables of a number of named objects
plot(y ~ expr) plots y against very object named in expr
abline(lm(y~x)) adds a regression line to your scatterplot, uses the function for a linear model
The following pages have some step-by-step coding instructions with a sample
dataset (body.csv) and graphics.
#REMOVE ALL PREVIOUS VARIABLES
rm(list=ls(all=TRUE))
#SET YOUR WORKING DIRECTORY TO READ IN FILES
setwd("/Users/Courtney/R_Project/")
#CLOSE ALL PREVIOUS GRAPHICS WINDOWS
graphics.off()
#CREATE AN OUTPUT DIRECTORY TO SAVE ALL PLOTS AND TABLES
od="/Users/Courtney/R_Project/")
# IMPORT THE DATA SET
body=read.csv("/Users/Courtney/R_Project/body_data.csv.dat", header=T)
#CHECK TO MAKE SURE IT LOADED CORRECTLY
body[1:5,]
#MAKING A SIMPLE SCATTERPLOT OF Chest Depth vs Chest Diameter
plot(body$chest_diam, body$chest_depth)
#ADDING A REGRESSION LINE TO THE SCATTERPLOT
abline(lm(body$chest_depth~body$chest_diam))
#ADDING & EDITING THE AXIS LABELS
plot(body$chest_diam, body$chest_depth, main="Chest Diameter vs Depth", xlab="Chest
Diameter", ylab="Chest Depth")
#SAVING THE SCATTERPLOT
dev.copy(png, 'ChestDepthvDiam.png')
dev.off()
#make a new scatterplot of Knee Girth vs Knee Diameter
plot(body$knee_diam, body$knee_girth,main="Scatterplot of Leg Measurements",
xlab="Diameter (cm)",ylab="Girth (cm)")
#adding a regression line to the scatterplot
abline(lm(body$knee_girth~body$knee_diam))
#ADDING NEW POINTS TO THE SCATTERPLOT
points(body$ankle_diam,body$ankle_girth, col="blue")
#FINDING THE X & Y AXIS VALUES OF BOTH SETS OF POINTS
range(c(body$knee_diam,body$ankle_diam))
[1] 9.9 24.3
range(c(body$knee_girth,body$ankle_girth))
[1] 16.4 49.0
#READJUSTING THE AXES & COLORCODING THE 2 SETS OF POINTS
plot(c(body$knee_diam, body$knee_girth),c(body$ankle_diam,body$ankle_girth),xlim=c(5,30),
ylim=c(10,60),main="Scatterplot of Leg Measurements",xlab="Diameter (cm)",ylab="Girth
(cm)",type="n")
points(body$knee_diam,body$knee_girth,col="turquoise2")
points(body$ankle_diam,body$ankle_girth,col="hotpink2")
#ADDING THE 2 REGRESSION LINES, COLODCODING, & ADJUSTING LINE WIDTH
abline(lm(body$knee_girth~body$knee_diam,),col="turquoise2",lwd=2)
abline(lm(body$ankle_girth~body$ankle_diam),col="hotpink2",lwd=2)
#CHANGING THE SHAPES OF THE 2 SETS OF POINTS
points(body$knee_diam,body$knee_girth,col="turquoise2",pch=24) #bell
points(body$ankle_diam,body$ankle_girth,col="hotpink2",pch=22) #gear
#changing the shapes of the 2 sets of points
points(body$knee_diam,body$knee_girth,col="turquoise2",pch=5) #open diamond
points(body$ankle_diam,body$ankle_girth,col="hotpink2",pch=8) #asterisk
#ADDING A LEGEND
#the locator function allows you to click where on the graph you want the legend
legend(locator(1),c("knee","ankle"),pch=c(5,8),col=c("turquoise2","hotpink2"))
#saving the scatterplot
dev.copy(png, 'LegGirthsvDiams.png')
dev.off()
# IDENTIFYING INDIVIDUALS IN SCATTERPLOTS USING as.numeric() TO CONVERT
THE GROUPING FACTOR / SUBJECT INDENTIFIER INTO A COLOR AND/OR SYMBOL
subject=factor(body$height)
plot(body$knee_diam,body$knee_girth,col=as.numeric(subject),pch=as.numeric(subject),xlab="
diameter",ylab="girth")
#note this does not work well with this dataset, because there isn't a subject or ID variable
# LABELING EACH POINT ON THE SCATTERPLOT WITH A 3RD VARIABLE
plot(body$knee_diam, body$knee_girth,xlab="Diameter (cm)",ylab="Girth (cm)", main="Knee
Dimensions by Age")
text(body$knee_diam, body$knee_girth,labels=round(body$age,2),pos=1, offset=0.5,cex=0.7)
# COLORCODING EACH POINT ON THE SCATTERPLOT WITH A 3RD VARIABLE
#making the points above the median age are in red the others in black
plot(body$knee_diam, body$knee_girth,xlab="Diameter (cm)",ylab="Girth (cm)", main="Knee
Dimensions by Age", pch=16, col=ifelse(body$age>median(body$age), "red","black"))
#ADDING TEXT TO A PLOT
text(locator(1),"part a.")
#you can also type in the exact location of where you would like the text to go, if you do not
want to use the locator function
text(x=16,y=45,"test")
#ADDING SHAPES TO A PLOT
plot(body$knee_diam, body$knee_girth,xlab="Diameter (cm)",ylab="Girth (cm)",
main="Scatterplot With Shapes")
#ADDING A RECTANGLE: rect(xleft, ybottom, xright, ytop)
rect(21,45,24.5,49.5,border="dodgerblue") #drawing a rectangle around 3 outliers
#FILLING IN A RECTANGLE WITH SHADING LINES density= and angle=
rect(22,30,24,34,border="tomato2",density=12,angle=125,col="tomato2")
#ADDING ARROWS WITH THE ARROWHEAD BY DEFAULT AT THE SECOND END
arrows(16.1,28,16.1,29.9,col="darkorchid2",lwd=2)
#ADDING A DOUBLE HEADED ARROW
arrows(21.9,42.9,22.9,42.2,col="goldenrod3",code=3)
#ADDING AN ERROR BAR (vertical bar with 2 straight ends)
arrows(22.4,38,22.4,40,code=3,angle=90)
#MAKING A POLYGON USING THE LOCATOR FUNCTION
locations=locator(6) #click on 6 points
class(locations)
locations$x
locations$y
polygon(locations, density=15,angle=45,border="seagreen2",col="seagreen2")
#PLOT MULTIPLE GRAPHS AT ONCE TIME
par(mfrow=c(2,2))
plot(body$height,body$chest_depth,xlab="Height (cm)",ylab="Chest Depth (cm)")
plot(body$weight,body$chest_depth,xlab="Weight (kg)",ylab="Chest Depth (cm)")
plot(body$height,body$chest_diam,xlab="Height (cm)",ylab="Chest Diameter (cm)")
plot(body$weight,body$chest_diam,xlab="Weight (kg)",ylab="Chest Diameter (cm)")
#BOXPLOTS
#first we need to declare the factors
gender=factor(body$gender)
#now we can make a simple boxplot with the plot(f,y) function of y for each level of f
#you can use plot() or boxplot() they both do the same thing, as long as you have already
declared the factors
plot(gender,body$height,xlab="Gender (0=Female, 1=Male)",ylab="Height")
#adding notches to indicate significant difference on the boxplot
plot(body$height~gender,xlab="Gender (0=Female, 1=Male)",ylab="Height",notch=T)
#here is a boxplot with more factors
AgeGroup=factor(body$age_group)
plot(AgeGroup,body$chest_diam,xlab="Age Group",ylab="Chest Diameter")
#using boxplots for multiple comparisons with the function: plot(response~factor)
plot(body$chest_diam~factor(AgeGroup))
#note this gives you the same plot as using the code plot(AgeGroup,body$chest_diam)
Download