################################### # Code for: R for Marketing Research and Analytics, 2nd ed: Chapter 2 # # Authors: Chris Chapman Elea McDonnell Feit # cnchapman+rbook@gmail.com efeit@drexel.edu # # Copyright 2019, Springer # # Last update: April 7, 2019 # Version: 2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ################################################################# # BRIEF HOW TO USE # This file contains scripts used in Chapter 2 of Chapman & Feit (2019), # "R for Marketing Research and Analytics, 2nd edition", Springer. ################################################################# # chapter 2 x <- c(2, 4, 6, 8) x # a brief tour of R ### START TOUR # some setup install.packages(c("lavaan", "semPlot", "corrplot", "multcomp")) # Load the data # Long version: # satData <- read.csv("http://r-marketing.r-forge.r-project.org/data/rintro-chapter2.csv") satData <- read.csv("http://goo.gl/UDv12g") # convert the Segment to a factor (categorical) variable satData$Segment <- factor(satData$Segment) head(satData) summary(satData) # correlation plot library(corrplot) corrplot.mixed(cor(satData[, -3])) # product satisfaction by segment aggregate(iProdSAT ~ Segment, satData, mean) # ANOVA sat.anova <- aov(iProdSAT ~ -1 + Segment, satData) summary(sat.anova) # plot the ANOVA estimates library(multcomp) par(mar=c(4,8,4,2)) plot(glht(sat.anova)) # define a structural model satModel <- "SAT =~ iProdSAT + iSalesSAT REC =~ iProdREC + iSalesREC REC ~ SAT " # fit the structural model library(lavaan) sat.fit <- cfa(satModel, data=satData) summary(sat.fit, fit.m=TRUE) # plot the structural model library(semPlot) semPaths(sat.fit, what="est", residuals=FALSE, intercepts=FALSE, nCharNodes=9) ### END TOUR # Language Fundamentals x <- c(2, 4, 6, 8) x X x <- c(2, 4, 6, 8) # start a cheer # basic objects # vectors xNum <- c(1, 3.14159, 5, 7) xLog <- c(TRUE, FALSE, TRUE, TRUE) xChar <- c("foo", "bar", "boo", "far") xMix <- c(1, TRUE, 3, "Hello, world!") xNum x2 <- c(x, x) x2 summary(xNum) summary(xChar) xNum[2] x2 + 1 x2 * pi (x+cos(0.5)) * x2 length(x) length(x2) c(1, 2, 3.5) xMix xNum[1] xMix[1] xNum[1] + 1 xMix[1] + 1 # error as.numeric(xMix[1])+1 str(xNum) str(xChar) str(xMix) # more on vectors and indexing xSeq <- 1:10 xSeq 1:5*2 1:(5*2) xNum xNum[2:4] myStart <- 2 xNum[myStart:sqrt(myStart+7)] seq(from=-5, to=28, by=4) rep(c(1,2,3), each=3) rep(seq(from=-3, to=13, by=4), c(1, 2, 3, 2, 1)) xSeq xSeq[-5:-7] 1:300 1001:1300 xNum[2:4] xSub <- xNum[2:4] xSub xNum xNum[c(FALSE, TRUE, TRUE, TRUE)] xNum > 3 xNum[xNum > 3] # missing and interesting values my.test.scores <- c(91, NA, NA) mean(my.test.scores) max(my.test.scores) mean(my.test.scores, na.rm=TRUE) max(my.test.scores, na.rm=TRUE) mean(na.omit(my.test.scores)) is.na(my.test.scores) my.test.scores[!is.na(my.test.scores)] my.test.scores <- c(91, -999, -999) mean(my.test.scores) my.test.scores[my.test.scores < -900] <- NA mean(my.test.scores, na.rm=TRUE) log(c(-1,0,1)) # warning about NA # lists str(xNum) str(xChar) xList <- list(xNum, xChar) xList str(xList) summary(xList[[1]]) lapply(xList, summary) xList <- list(xNum, xChar) names(xList) <- c("itemnum", "itemchar") # method 1 xList <- list(itemnum=xNum, itemchar=xChar) # method 2 names(xList) xList[[1]] xList$itemnum xList[["itemnum"]] # data frames x.df <- data.frame(xNum, xLog, xChar) x.df x.df[2, 1] x.df[1, 3] x.df <- data.frame(xNum, xLog, xChar, stringsAsFactors=FALSE) x.df[1, 3] x.df[2, ] # all of row 2 x.df[ , 3] # all of column 3 x.df[2:3, ] x.df[ , 1:2] x.df[-3, ] # omit the third observation x.df[ , -2] # omit the second column str(x.df[2, 1]) str(x.df[ , 2]) str(x.df[c(1,3), ]) # use c() to get rows 1 and 3 only x.df$xNum # create more interesting data # warning!! rm(list=ls()) # caution, deletes all objects; see below store.num <- factor(c(3, 14, 21, 32, 54)) # store id store.rev <- c(543, 654, 345, 678, 234) # store revenue, $1000 store.visits <- c(45, 78, 32, 56, 34) # visits, 1000s store.manager <- c("Annie", "Bert", "Carla", "Dave", "Ella") (store.df <- data.frame(store.num, store.rev, store.visits, store.manager, stringsAsFactors=F)) # F = FALSE store.df$store.manager mean(store.df$store.rev) cor(store.df$store.rev, store.df$store.visits) summary(store.df) # loading and saving data save(store.df, file="store-df-backup.RData") rm(store.df) # caution, first ensure 'save' worked mean(store.df$store.rev) # error load("store-df-backup.RData") mean(store.df$store.rev) # works now save(list=c("store.df","store.visits"), file="store-df-backup.RData") store.df <- 5 store.df load("store-df-backup.RData") store.df # Works on Windows: save(store.df, file="C:\\Documents and Settings\\user\\My Documents\\R\\store-df-backup.RData") # Works on Mac OSX, Linux and Windows: save(store.df, file="~/Documents/R/store-df-backup.RData") getwd() setwd("~/Documents/R") # tilde is handled on UNIX-like systems getwd() save.image() # saves .RData save.image("mywork.RData") load("mywork.RData") list.files() # warning: dangerous! file.remove("mywork.RData", "store-df-backup.RData") # CSV Files write.csv(store.df, row.names=FALSE) write.csv(store.df, file="store-df.csv", row.names=FALSE) read.csv("store-df.csv") # "file=" is optional store.df2 <- read.csv("store-df.csv", stringsAsFactors=FALSE) # "file=" is optional store.df2$store.num <- factor(store.df2$store.num) store.df == store.df2 all.equal(store.df, store.df2) rm(store.df2) #### #### Functions se <- function(x) { sd(x) / sqrt(length(x)) } se(store.df$store.visits) mean(store.df$store.visits) + 1.96 * se(store.df$store.visits) se(store.df$store.manager) # warning se <- function(x) { # computes standard error of the mean tmp.sd <- sd(x) # standard deviation tmp.N <- length(x) # sample size tmp.se <- tmp.sd / sqrt(tmp.N) # std error of the mean return(tmp.se) } #### #### Language structures x <- -2:2 log(x) # warning, can't log() negative numbers ifelse(x > 0, x, NA) # replace non-positive values with NA log(ifelse(x > 0, x, NA)) # no warning now # Anonymous Functions my.data <- matrix(runif(100), ncol=5) # 100 random numbers in 5 columns apply(my.data, 2, median) / 2 halfmedian <- function (x) { median(x) / 2 } apply(my.data, 2, halfmedian) apply(my.data, 2, function(x) { median(x) / 2 } ) ls() rm(store.num) rm(list=c("store.rev","store.visits")) rm(list=ls(pattern="store")) # Warning!! putting this inside an "if (FALSE)" block on purpose if (FALSE) { rm(list=ls()) # warning! deletes all objects in memory (except hidden ones) }