R Basics 2 exercise answers (64 KB pptx)

advertisement
Basics
1
Exercise 7 Answers
# 7.1. Load testdat.csv again, assigning it to object ex1df.
ex1df <- read.csv("PlotData/testdat.csv", header=TRUE, stringsAsFactors=FALSE)
# 7.2. Display the dimensions, the column names, and the structure of ex1df.
dim(ex1df)
names(ex1df)
str(ex1df)
# 7.3. Display the first 6 rows of ex1df.
head(ex1df)
tail(ex1df)
# or ex1df[1:6,]
# or ex1df[(nrow(ex1df)-5):nrow(ex1df),]
# 7.4. Display this table ordered by heights with the largest trees first and display the
maximum height.
ex1df[order(ex1df$HT, decreasing=TRUE), ]
max(ex1df$HT)
# 7.5. Change name of column STATUS to 'ESTADO', DIA to 'DAP', and column HT to
'ALTURA'.
names(ex1df)[names(ex1df) == "STATUSCD"] <- "ESTADO"
names(ex1df)[names(ex1df) == "DIA"] <- "DAP"
names(ex1df)[names(ex1df) == "HT"] <- "ALTURA"
2
Exercise 7 Answers cont..
# 7.6. Merge the sptab table we made on slide 18 to the ex1df table, using the SPCD column,
and assign to ex1df2.
Hint: use by.x and by.y for merging to columns with different names.
ex1df2 <- merge(ex1df, sptab, by.x="SPCD", by.y="SPECIES")
# 7.7. Display only rows with subalpine fir and DAP greater than 10.0
ex1df2[ex1df2$SPECIESNM == "subalpine fir" & ex1df2$DAP > 10.0,]
# 7.8. Display the number of trees by ESTADO and SPECIESNM.
table(ex1df2$SPECIESNM, ex1df2$ESTADO)
# 7.9. Display the total basal area (BA) for lodgepole pine
sum(ex1df2[ex1df2$SPECIESNM == "lodgepole pine", "BA"])
# 7.10. Create a new object named 'aspen' with just the rows in ex1df2 that are aspen.
aspen <- ex1df2[ex1df2$SPECIESNM == "aspen",]
# 7.11. The 2 columns, SPCD and SPECIESNM, are not important in the aspen table.
Remove them and overwrite object 'aspen'.
aspen$SPCD <- NULL
aspen$SPECIESNM <- NULL
# or
aspen <- aspen[,!(names(aspen) %in% c("SPCD", "SPECIESNM"))]
# 7.12. Display the mean ALTURA of live aspen trees
mean(aspen[aspen$ESTADO == 1, "ALTURA"])
3
Exercise 7 Answers cont..
# 7.13. Create a look up table for ESTADO and merge this table to ex1df2. Assign the merged
table to ex1df3. Then order ex1df3 by PLT_CN.
# Hint:
#
1. Get vector of unique values of ESTADO
#
2. Define new vector called ESTADONM where 1 = Live and 2 = Dead
#
3. Create dataframe of vectors with names ESTADO and ESTADONM
#
4. Merge new dataframe to ex1df2
#
5. Order table by PLT_CN and TREE
ESTADO <- sort(unique(ex1df2$ESTADO))
ESTADONM <- c("Live", "Dead")
ESTADODF <- cbind(ESTADO, ESTADONM)
ex1df3 <- merge(ex1df2, ESTADODF, by="ESTADO")
ex1df3 <- ex1df3[order(ex1df3$PLT_CN), ]
## 7.14. Display the number of trees again, this time by ESTADONM and SPECIESNM.
table(ex1df3$SPECIESNM, ex1df3$ESTADONM)
4
Exercise 8 Answers
# 8.1 Use sapply to change the names of dat to all upper case letters.
names(dat) <- sapply(names(dat), toupper)
# 8.2 Use apply to get the range of values for DIA and HT in dat. Set the results to an object
named 'ranges'. Is this object an array? What is the class of 'ranges'?
ranges <- apply(dat[,c("DIA", "HT")], 2, range)
is.array(ranges)
class(ranges)
# 8.3 Use tapply to get the minimum 'HT' by SPECIESNM and STATUSCD and set to an
object named 'ht.min'. Change any NA values to 0.
ht.min <- tapply(dat$HT, dat[,c("SPECIESNM", "STATUSCD")], min)
ht.min[is.na(ht.min)] <- 0
ht.min
# 8.4 Use aggregate to get a sum of BA by SPECIESNM. Set this to an object named 'ba.sum'.
Add names to ba.sum, SPECIESNM, SUMBA. What is the class of 'ba.sum'?
ba.sum <- aggregate(dat$BA, list(dat$SPECIESNM), sum)
names(ba.sum) <- c("SPECIESNM", "SUMBA")
class(ba.sum)
ba.sum
# R Built-in functions(numeric, character, statistical, etc..)
http://www.statmethods.net/management/functions.html
5
Exercise 9 Answers
# 9.1 Use the sapply function to determine if any columns of dat are factors.
sapply(dat, is.factor)
# 9.2 Create a loop to check if any of the columns of dat is a factor. Print to screen the name
of the columns that are factors.
for(name in names(dat)){
if(is.factor(dat[,name])){
print(name)
}
}
# 9.3 Append a column named HTCLASS to dat with 2 categories: "SHORT "or "TALL", use
the mean of HT as the break point.
So: for trees with HT less than the mean HT, set HTCLASS = "SHORT", and
for trees with HT greater than or equal to the mean HT, set HTCLASS = "TALL".
Check if this works using a table on HT and HTCLASS.
# First, get the mean HT value. Then create an ifelse statement with logical vector
ht.mean <- mean(dat$HT)
dat$HTCLASS <- ifelse(dat$HT < ht.mean, "SHORT", "TALL")
table(dat$HTCLASS, dat$HT)
6
Exercise 9 Answers cont..
# 9.4 Create a function named 'getrows', that will print out all the records (or trees) of dat
for a specified SPECIES. Include one parameter named 'sp'. How many records have
lodgepole trees (108)?
getrows <- function(sp){
dat[dat$SPECIES == sp, ] }
getrows(108)
## 9.5. Using the function you created in 9.4, how many records have aspen trees? What is
the average HT for these records (trees)?
asp <- getrows(746)
mean(asp$HT)
# or
mean(getrows(746)$HT)
## 9.6 Create a function to use with sapply to add a "_1" to all the column names of dat.
add1fun <- function(x){ paste(x, "_1", sep="") }
# Apply add1fun on all elements of the vector, names(dat)
sapply(names(dat), add1fun)
7
Exercise 10 Answers
# 10.1 Create a scatterplot of BA vs. DIA with proper labels. Are these variables
correlated?
par(mfrow=c(1,1))
plot(dat$BA, dat$DIA, main="Basal area by diameter", xlab="Basal area",
ylab="Diameter")
# 10.2 Add a regression line to the scatterplot in 10.1 and color it red.
abline(lm(dat$DIA ~ dat$BA), col="red")
# 10.3 Create a histogram of HT with 5 breaks and labels. What is the range of heights
with the highest frequency?
hist(dat$HT, breaks=5, main="Histogram of heights", xlab="Height")
# Range: 60 to 70
# 10.4 Create a barplot of maximum heights by species with labels. Which species has the
maximum height?
spht <- tapply(dat$HT, dat$SPECIESNM, max)
barplot(spht, main="Max Height by Species", xlab="Species", ylab="Max
height", cex.names=0.7)
# Max ht: lodgepole pine
8
Download