# Based on the code created by Vincent Vu # This work is licensed under a Creative Commons # Attribution-NonCommercial-ShareAlike 3.0 Unported License. # # golfdata.Rdata contains a single object, 'golfdata', that is a list with # 3 components: 'leaderboard', 'scorecard', 'course' load('golfdata.RData') # ============================================== # = Look at an individual player's performance = # ============================================== # Extract Tiger Woods' score card for the 4th round and calculate his running # over/under par score df <- subset(golfdata$scorecard, player == 'Tiger Woods' & round == 4) tiger <- as.numeric(df[, 1:18]) tiger <- cumsum(tiger - golfdata$course$par) plot(tiger, type = 'l') # Extract Tiger Wood's score card and reorder the rows by the round (1 to 4) df <- subset(golfdata$scorecard, player == 'Tiger Woods') tiger <- as.matrix(df[order(df$round), 1:18]) # Convert from an 18 x 4 matrix to a vector of length 18*4, rowwise tiger <- as.vector(t(tiger)) # Calculate Tiger's running score across the 4 rounds of the tournament tiger <- cumsum(tiger - rep(golfdata$course$par, 4)) plot(tiger, type = 'l') # ========================================================================== # = Abstract the analysis so that it can be easily repeated for any player = # ========================================================================== runningTotal <- function(df, par = golfdata$course$par) { # Reorder the rows of the data frame by the round number # (so that the scores are in chronological order) # and extract the scores as a matrix x <- as.matrix(df[order(df$round), 1:18]) n <- nrow(x) # Convert from an 18 x n matrix to a vector of length 18*n, rowwise x <- as.vector(t(x)) # Calculate the running over/under score x <- cumsum(x - rep(par, n)) return(x) } # Check out Charl Schwartzel charl <- runningTotal(subset(golfdata$scorecard, player == 'Charl Schwartzel')) plot(charl, xlab = 'hole', ylab = 'score', type = 'l') # ================================================================== # = Look at all players that made the cut (played in all 4 rounds) = # ================================================================== madecut <- subset(golfdata$leaderboard, position != 'CUT')$player df <- subset(golfdata$scorecard, player %in% madecut) df <- droplevels(df) # Two approaches to computing runningTotal for each player. # The results are stored row-wise in a 2-dimensional array # ===================== # = For-loop approach = # ===================== scores <- matrix(nrow = nlevels(df$player), ncol = 18 * 4) for (i in 1:nlevels(df$player)) { x <- subset(df, player == levels(df$player)[i]) scores[i, ] <- runningTotal(x) } rownames(scores) <- levels(df$player) # ===================================== # = Split, apply, combine with base R = # ===================================== x <- split(df, df$player) x <- lapply(x, runningTotal) scores <- do.call(rbind, x) # ==================== # = Plot the results = # ==================== matplot(t(scores), xlab = 'hole', ylab = 'total to par', type = 'l') # Add the median runing total lines(1:ncol(scores), apply(scores, 2, median), lwd = 2) # ================================================================== # = Look at all players that made the cut (played in all 4 rounds) = # ================================================================== madecut <- subset(golfdata$leaderboard, position != 'CUT')$player df <- subset(golfdata$scorecard, player %in% madecut) df <- droplevels(df)