###################################
# Code for: R for Marketing Research and Analytics, Chapter 13
#
# Authors:  Chris Chapman               Elea McDonnell Feit
#           cnchapman+rbook@gmail.com   efeit@drexel.edu
#
# Copyright 2015, Springer 
#
# Last update: January 7, 2015
# Version: 1.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#################################################################
# BRIEF HOW TO USE
# This file contains scripts used in Chapter 13 of Chapman & Feit (2015),
#   "R for Marketing Research and Analytics", Springer. 
#################################################################


# Chapter 13 -- Choice Models and Conjoint Analysis

##### Load data from website, if you prefer not to simulate it
cbc.df <- read.csv("http://goo.gl/5xQObB", 
                   colClasses = c(seat = "factor", price = "factor"))
summary(cbc.df)
#####


## Simulate data

attrib <- list(seat = c("6", "7", "8"), 
               cargo = c("2ft", "3ft"),
               eng = c("gas", "hyb", "elec"), 
               price = c("30", "35", "40"))
coef.names <- NULL
for (a in seq_along(attrib)) {
  coef.names <- c(coef.names, 
                  paste(names(attrib)[a], 
                        attrib[[a]][-1], sep=""))
}
coef.names
mu <- c(-1, -1, 0.5, -1, -2, -1, -2)
names(mu) <- coef.names
mu
Sigma <- diag(c(0.3, 1, 0.1, 0.3, 1, 0.2, 0.3))
dimnames(Sigma) <- list(coef.names, coef.names)
Sigma["enghyb", "engelec"] <- Sigma["engelec", "enghyb"] <- 0.3

set.seed(33040)
resp.id <- 1:200 # respondent ids
carpool <- sample(c("yes", "no"), size=length(resp.id), replace=TRUE, 
                  prob=c(0.3, 0.7))
library(MASS)
coefs <- mvrnorm(length(resp.id), mu=mu, Sigma=Sigma)
colnames(coefs) <- coef.names
coefs[carpool=="yes", "seat8"] <- coefs[carpool=="yes", "seat8"] + 2
coefs[carpool=="yes", "seat7"] <- coefs[carpool=="yes", "seat7"] + 1.5
# Try it!: head(cbind(carpool, coefs))
# Try it!: by(coefs, carpool, colMeans)
nques <- 15
nalt <- 3

profiles <- expand.grid(attrib)
nrow(profiles)
head(profiles)
profiles.coded <- model.matrix(~ seat + cargo + eng + price, data=profiles)[,-1]
head(profiles.coded)

cbc.df <- data.frame(NULL)
for (i in seq_along(resp.id)) {
  profiles.i <- sample(1:nrow(profiles), size=nques*nalt)
  utility <- profiles.coded[profiles.i,] %*% coefs[i,] 
  wide.util <- matrix(data=utility, ncol=nalt, byrow=TRUE)
  probs <- exp(wide.util) / rowSums(exp(wide.util))
  choice <- apply(probs, 1, function(x) sample(1:nalt, size=1, prob=x))
  choice <- rep(choice, each=nalt)==rep(1:nalt, nques)
  conjoint.i <- data.frame(resp.id=rep(i, nques), 
                           ques = rep(1:nques, each=nalt), 
                           alt = rep(1:nalt, nques), 
                           carpool = rep(carpool[i], nques), 
                           profiles[profiles.i,], 
                           choice = as.numeric(choice))
  cbc.df <- rbind(cbc.df, conjoint.i)
}  

# cleanup! -- not in book:
rm(a, i, resp.id, carpool, mu, Sigma, coefs, coef.names,
   conjoint.i, profiles, profiles.i, profiles.coded, utility, 
   wide.util, probs, choice, nalt, nques)
# Try it!: ls()

## END data simulation


# Choice data descriptives
head(cbc.df)
summary(cbc.df)

# write.csv(cbc.df, "rintro-chapter13conjoint.csv", row.names=FALSE)

xtabs(choice ~ price, data=cbc.df)
xtabs(choice ~ cargo, data=cbc.df)

# Fitting a choice model with mlogit
library(mlogit)
cbc.mlogit <- mlogit.data(data=cbc.df, choice="choice", shape="long", 
                          varying=3:6, alt.levels=paste("pos",1:3), 
                          id.var="resp.id")

m1 <- mlogit(choice ~ 0 + seat + cargo + eng + price, data = cbc.mlogit)
summary(m1)

m2 <- mlogit(choice ~ seat + cargo + eng + price, data = cbc.mlogit)
summary(m2)

lrtest(m1, m2)
m3 <- mlogit(choice ~ 0 + seat + cargo + eng 
                      + as.numeric(as.character(price)), 
             data = cbc.mlogit)
summary(m3)
lrtest(m1, m3)


# Willingness to pay
coef(m3)["cargo3ft"]/(-coef(m3)["as.numeric(as.character(price))"]/1000)

# Predicting shares
predict.mnl <- function(model, data) {
  # Function for predicting shares from a multinomial logit model 
  # model: mlogit object returned by mlogit()
  # data: a data frame containing the set of designs for which you want to 
  #       predict shares.  Same format at the data used to estimate model. 
  data.model <- model.matrix(update(model$formula, 0 ~ .), data = data)[,-1]
  utility <- data.model%*%model$coef
  share <- exp(utility)/sum(exp(utility))
  cbind(share, data)
}

(new.data <- expand.grid(attrib)[c(8, 1, 3, 41, 49, 26), ])
predict.mnl(m3, new.data)
predict.mnl(m1, new.data)

# Share sensitivity
sensitivity.mnl <- function(model, attrib, base.data, competitor.data) {
  # Function for creating data for a share-sensitivity chart
  # model: mlogit object returned by mlogit() function
  # attrib: list of vectors with attribute levels to be used in sensitivity
  # base.data: data frame containing baseline design of target product
  # competitor.data: data frame contining design of competitive set
  data <- rbind(base.data, competitor.data)
  base.share <- predict.mnl(model, data)[1,1]
  share <- NULL
  for (a in seq_along(attrib)) {
    for (i in attrib[[a]]) {
      data[1,] <- base.data
      data[1,a] <- i
      share <- c(share, predict.mnl(model, data)[1,1])
    }
  }
  data.frame(level=unlist(attrib), share=share, increase=share-base.share)
}
base.data <- expand.grid(attrib)[c(8), ]
competitor.data <- expand.grid(attrib)[c(1, 3, 41, 49, 26), ]
(tradeoff <- sensitivity.mnl(m1, attrib, base.data, competitor.data))

barplot(tradeoff$increase, horiz=FALSE, names.arg=tradeoff$level,
        ylab="Change in Share for Baseline Product")


# Share predictions for two identical vehicles
new.data.2 <- expand.grid(attrib)[c(8, 8, 1, 3, 41, 49, 26), ]
predict.mnl(m1, new.data.2)  


# Prediction uncertainty & conjoint design
small.cbc <- mlogit.data(data=cbc.df[1:(25*15*3),], 
                              choice="choice", shape="long", 
                              varying=3:6, alt.levels=paste("pos", 1:3), 
                              id.var="resp.id")
m4 <- mlogit(choice ~ 0 + seat + cargo + eng + price, data = small.cbc)
summary(m4)  # larger standard errors

cbind(predict.mnl(m4, new.data), predict.mnl(m1, new.data))


# ------- Mixed logit model ------------------------------
# Model estimation
m1.rpar <- rep("n", length=length(m1$coef))
names(m1.rpar) <- names(m1$coef)
m1.rpar
m1.hier <- mlogit(choice ~ 0 + seat + eng + cargo + price, 
                  data = cbc.mlogit, 
                  panel=TRUE, rpar = m1.rpar, correlation = FALSE)
summary(m1.hier)

m2.hier <- update(m1.hier, correlation = TRUE)
summary(m2.hier)
cov2cor(cov.mlogit(m2.hier))


# Simulating shares
predict.hier.mnl <- function(model, data, nresp=1000) {
  # Function for predicting shares from a hierarchical multinomial logit model 
  # model: mlogit object returned by mlogit()
  # data: a data frame containing the set of designs for which you want to 
  #       predict shares.  Same format at the data used to estimate model. 
  # Note that this code assumes all model parameters are random
  data.model <- model.matrix(update(model$formula, 0 ~ .), data = data)[,-1]
  coef.Sigma <- cov.mlogit(model)
  coef.mu <- m2.hier$coef[1:dim(coef.Sigma)[1]]
  draws <- mvrnorm(n=nresp, coef.mu, coef.Sigma)
  shares <- matrix(NA, nrow=nresp, ncol=nrow(data))
  for (i in 1:nresp) {
    utility <- data.model%*%draws[i,]
    share = exp(utility)/sum(exp(utility))
    shares[i,] <- share
  }
  cbind(colMeans(shares), data)
}

predict.hier.mnl(m2.hier, data=new.data)


# ------- Hierarchical Bayes multinomial logit -----------
choice <- rep(0, nrow(cbc.df))
choice[cbc.df[,"alt"]==1] <- cbc.df[cbc.df[,"choice"]==1,"alt"]
head(choice)

cbc.coded <- model.matrix(~ seat + eng + cargo + price, data = cbc.df)
cbc.coded <- cbc.coded[, -1] # remove the intercept

choicemodelr.data <- cbind(cbc.df[, 1:3], cbc.coded, choice)
head(choicemodelr.data)

carpool <- cbc.df$carpool[cbc.df$ques==1 & cbc.df$alt==1]=="yes"
carpool <- as.numeric(carpool)
choicemodelr.demos <- as.matrix(carpool, nrow=length(carpool))
str(choicemodelr.demos)

library(ChoiceModelR)
hb.post <- choicemodelr(data=choicemodelr.data, xcoding=rep(1, 7), 
                           demos=choicemodelr.demos, 
                           mcmc=list(R=20000, use=10000),
                           options=list(save=TRUE))

names(hb.post)

# Model parameters
hb.post$compdraw[[567]]$mu
hb.post$deltadraw[567,]

hb.post$compdraw[[567]]$rooti
crossprod(hb.post$compdraw[[567]]$rooti)

# Individual-level betas
head(hb.post$betadraw[,,567])
str(hb.post$betadraw)

beta.post.mean <- apply(hb.post$betadraw, 1:2, mean)
head(beta.post.mean)

beta.post.q05 <- apply(hb.post$betadraw, 1:2, quantile, probs=c(0.05))
beta.post.q95 <- apply(hb.post$betadraw, 1:2, quantile, probs=c(0.95))
rbind(q05=beta.post.q05[1,], mean=beta.post.mean[1,], q95=beta.post.q95[1,])

# Prediction using individual-level draws
predict.hb.mnl <- function(betadraws, data) {
  # Function for predicting shares from a hierarchical multinomial logit model 
  # model: mlogit object returned by mlogit()
  # data: a data frame containing the set of designs for which you want to 
  #       predict shares.  Same format at the data used to estimate model. 
  data.model <- model.matrix(~ seat + eng + cargo + price, data = data)
  data.model <- data.model[,-1] # remove the intercept
  nresp <- dim(betadraws)[1]
  ndraws <- dim(hb.post$betadraw)[3]
  shares <- array(dim=c(nresp, nrow(data), ndraws))
  for (d in 1:ndraws) {
    for (i in 1:nresp) {
      utility <- data.model%*%betadraws[i,,d]
      shares[i,,d] = exp(utility)/sum(exp(utility))
    }
  }
  shares.agg <- apply(shares, 2:3, mean)
  cbind(share=apply(shares.agg, 1, mean), 
        pct=t(apply(shares.agg, 1, quantile, probs=c(0.05, 0.95))), 
        data)
}

predict.hb.mnl(hb.post$betadraw, new.data)
# Try it!
# new.data[2,1] <- 7
# predict.hb.mnl(hb.post$betadraw, new.data)