#R script for Ann Arbor ASA R graphics class, 16 Jan. 2012, Kyle Enger #A few basics: ?FunctionName #Get help for an R function (should open in your web browser). z = seq(from=2, to=100, by=2) #Generate & store a sequence of values. Some people type '<-' (a left arrow) instead of the first '='. print(z) #Look at the sequence we just generated. z[3] z[2:4] #Retrieve a particular value by its index. #Retrieve a series of values. #Remember that row & column index numbers go in square brackets. #Functions use parentheses to enclose their arguments. #Start by trying out a few handy functions. plot(x=runif(10), y=runif(10)) #Scatterplot of 10 random sets of xy coordinates. plot() starts a new graphing device. plot(seq(from=0, to=10, by=0.1), sin(seq(from=0, to=10, by=0.1))) curve(sin(x), xlim=c(0,10)) #Graphing the sine wave function directly over a certain range points(seq(from=0,to=10,by=0.1), sin(seq(from=0,to=10,by=0.1))) curve(1/x*sin(5*x), xlim=c(0,10)) #Points on a sine wave #Overlaying some points on the sine wave. points() does not start a new graph. #Another arbitrary function; curve() also starts a new graph. Note the automatic y axis label. x = seq(from=1, to=9, by=0.01) #Can add another arbitrary function using lines(), but first we need to generate & store x values. lines(x, cos(x), col='brown') #The x and y axis settings set (implicitly) by curve() above are retained. abline(h=0, lwd=4) #Adding a fat horizontal line at 0 (hence 'h=0'). 'lwd'= line width. abline(a=0, b=1, col='red') #Adding a red line of intercept 0 and slope 1. 'col'= color. arrows(x0=2.3, y0=-1/2, x1=1, y1=-1.1, angle=15) #Adding an arrow. text(x=3.2, y=-1/2, 'Minimum') #Adding a label at specified coordinates. text(x=1.3, y=4, expression(y == over(1,x)~'sin(5x)'))#Formatting mathematical expressions is tricky; see ?plotmath or demo(plotmath). legend('topright', title='The Legend', legend=c('y=0','y=x'), lty=c('solid','solid'), lwd=c(4,1), col=c('black','red')) #Note that the legend is completely independent of anything else. You have to make certain to specify it correctly. points(locator(1),pch=4,cex=3) #Uses locator() to put a big X whereever you click on the graph. 'pch'= print character, 'cex'= character expansion. #Now let's make a two-part graph. par(mfrow=c(1,2),lwd=2,col='cyan') #'mfrow' divides up the plot area. You can access many options with par() including 'lty', 'lwd', 'pch', 'cex', etc. See ?par. curve(sin(x), main='Sine') curve(cos(x), main='Cosine') #Double paned graph. #Any questions? #Play around with these a little bit - see what you can do. Discuss with your neighbors, & raise your hand if you're stuck. #Basic dataset exploration: R comes with many built-in datasets. ?ToothGrowth tg = ToothGrowth str(tg) #See what the heck this dataset is about. Guinea pigs, naturally. #Make a copy with a shorter name, for convenience #View the structure of the dataset summary(tg) #Summarize the dataset (quantiles & mean) head(tg) #Look at the first few rows of the dataset tail(tg) tg[13:19,] #Look at the last few rows of the dataset #Look at an arbitrary section of the dataset tg[sample(1:60,5),] #Look at some random rows of the dataset #Now we start graphing. dev.new() #Start a brand-new graphics window (AKA, 'device'). hist(tg$len, main='Histogram of tooth length') #Simple histogram. Good place to start. par(mfrow=c(1,2)) #Split graphics device into 1 row, 2 columns. plot(density(ToothGrowth$len), main='Density plot of tooth length') #Density plot. Depends on bandwidth - see next line. plot(density(ToothGrowth$len, bw=1), main='Density plot of tooth length,\nbandwidth=1') #Note '\n' for a new line. dev.off() #Turn off (i.e., close) the graphics device. dev.new() #Start a new one. Note that the 'mfrow' option in par() a few lines above has been cleared. boxplot(len ~ supp, data=tg, main='Box plot of tooth length by treatment') par(mfrow=c(1,2)) #Box plot, with the 2 treatments side-by-side. Standard model syntax. #Split graphics device into 1 row, 2 columns (again). hist(tg$len[which(tg$supp=='OJ')], main='Histogram of tooth length,\norange juice group') points(x=median(tg$len[which(tg$supp=='OJ')]), y=0) #which() outputs index values that match the condition. #Mark median. hist(tg$len[which(tg$supp=='VC')], main='Histogram of tooth length,\nvit. C group') points(x=median(tg$len[which(tg$supp=='VC')]), y=0, pch=4, cex=2) #Mark median with a large X. #Axes don't match on these 2 histograms, which makes it hard to compare them. par(mfrow=c(2,1)) #Now we'll have 1 histogram atop the other. hist(tg$len[which(tg$supp=='OJ')], main='Histogram of tooth length,\norange juice group', breaks=seq(from=0, to=35, by=5), ylim=c(0,10), xlab='Tooth length') hist(tg$len[which(tg$supp=='VC')], main='Histogram of tooth length,\nvit. C group', breaks=seq(from=0, to=35, by=5), ylim=c(0,10), xlab='Tooth length') #Now let's try a couple scatterplots. plot(x=tg$dose, y=tg$len, main='Scatterplot of dose by length') plot(x=jitter(tg$dose), y=tg$len, main='Scatterplot of dose by length with jitter') #What if points are on top of each other? jitter() addresses this by adding 'noise'. mtext('Same graph, with and without jitter()', outer=TRUE, padj=1, cex=1.5) #Can use mtext() for multichart titles. #Bar plots table(tg$dose,tg$supp) par(mfrow=c(1,2)) #How many observations do we have for each dose? #Switch back to 2 graphs next to each other barplot(table(tg$dose), main='Number of observations\nfor each dose') barplot(table(tg$supp), main='Number of observations\nfor each treatment') #Not terribly interesting, but it illustrates barplot(). #Any questions? #Now load your own data, like this: YourData = read.csv('X:/Path/To/Your/File/YourFile.csv',header=TRUE) #Read in your own .CSV file and examine/graph as above. #or pick a dataset that looks interesting from the many datasets already in R. library(help='datasets') #Browse the list of datasets in R (use arrow keys to scroll). Hit 'q' when done. ?YourChoiceOfDataset #Learn more about some dataset or another, and examine/graph it as above. #Explore your data with graphs. Don't worry about making a brilliant chart; #just try out a few things and get a feel for the graphing functions. #Take some time, try out some functions, discuss with your neighbors, raise your hand if you have any questions. #Exporting graphs, to include them in manuscripts, presentations, etc. getwd() #Find out what your working directory is. That's where R looks for files and saves files. setwd('X:/Path/To/Your/Working/Directory') #Set your working directory to whatever you like - maybe a flash drive. dev.new() plot(seq(from=1,to=16,by=1),rep(0,16),pch=seq(from=1,to=16,by=1), cex=2) #Making a plot to show possible plot characters. dev.copy(pdf, 'TestPlot.pdf') #Creates a new graphics device and copies the current plot window to it. dev.off() #Turns off that new graphics device, saving the file. #Now open your working directory and doubleclick the file, and see what it looks like. It often differs from the plot window. #A more reliable way to export graphs: write them directly to the device/filetype you want: png(), tif(), pdf(), etc. png('TestPlot2.png') #In addition to .PNG and .PDF, R supports many other graphics formats. .TIF is also useful. hist(rnorm(1000, mean=0, sd=1), main='1000 standard normal variates') dev.off() #Now you can open the file in your working directory and view it. #It is convenient to tinker with these graphs by continually modifying & re-running code, #then repeatedly viewing the graph until it looks right. #===========================================================================================================# #Apply what you have learned ASAP (tomorrow!) when you return to work. Learning R requires a LOT of practice. #===========================================================================================================# #R graphics are very powerful, but great power often comes with great confusion. #There are options you can set to do just about anything you want - but they are not always well documented. #To search for general R info, try http://rseek.org or just Google it, prefixing your query with 'GNU R'. ?par #Remember that these options can also be used within graphing functions. #Get any color you want: http://research.stowers-institute.org/efg/R/Color/Chart/index.htm colors()[155] #colors() is a function, but consider it a big vector. Use the names that it outputs with the 'col' option. #How to add special symbols: http://statisticsr.blogspot.com/2008/01/special-symbols-on-r-plot.html #Very useful site for learning R in general (or Google 'Quick-R'): http://www.statmethods.net/ #R graph gallery (includes source code): http://addictedtor.free.fr/graphiques/ #There are other packages that further extend graphing capabilities. #The 'lattice' package is powerful and widely used. 'ggplot' is another popular package. #Install packages like this (very easy): install.packages('YourPackageName') #The standard 'R Editor' in Windows is pretty crummy. This free text editor is better: http://notepad-plus-plus.org/ #Better yet: Switch to Linux (try Ubuntu or Linux Mint) and use 'gedit' with the 'rgedit' plugin. R runs faster on Linux.