#Replication Code in R for "Do Insurers Compete on the Federal Heath Insurance Exchange" # Jesse N. Cohen, M.D., M.P.H., Alexander Coppock, M.P.A, Arnab Ghosh, M.D., M.A., Benjamin P. Geisler, M.D., M.P.H rm(list=ls()) setwd("~/Google Drive/health exchange prices/analysis") library(maps) library(plyr) library(ggplot2) # Bring in Data and Recode data(county.fips) health <- read.csv(file="Healthplanwebsite.csv", stringsAsFactors=FALSE) health <- within(health, { state.fullname <- state.name[match(State, state.abb)] state_county <- paste0(tolower(state.fullname), ",", tolower(County)) uniqueplan <- paste(State,Metal.Level,Plan.Type,Plan.Marketing.Name,sep="_") uniqueplan_price <- paste(State,Metal.Level,Plan.Type,Plan.Marketing.Name, Rating.Area,Premium.Family ,sep="_") state_ratingarea <- paste0(state.fullname,Rating.Area) }) planlevel <- health[, c("uniqueplan", "uniqueplan_price", "Premium.Family", "Rating.Area","Issuer.Name", "state.fullname", "Metal.Level","Plan.Type","state_ratingarea")] planlevel_ratingarea <- planlevel[!duplicated(planlevel$uniqueplan_price),] planlevel_ratingarea <-within(planlevel_ratingarea,{ price <- as.numeric(Premium.Family) }) planlevel_ratingarea <- planlevel_ratingarea[!is.na(planlevel_ratingarea$price),] num_issuers_rating_area <- ddply(planlevel_ratingarea, c("state_ratingarea"), summarize, num_issuers = length(unique(Issuer.Name))) planlevel_ratingarea <- join(planlevel_ratingarea, num_issuers_rating_area, by="state_ratingarea") ## Statistics refered to in text: # number of unique plans length(unique(planlevel_ratingarea$uniqueplan)) # number of unique insurers length(unique(planlevel_ratingarea$Issuer.Name)) # number of unique state length(unique(planlevel_ratingarea$state.fullname)) # number of unique rating areas length(unique(planlevel_ratingarea$state_ratingarea)) # number of total offerings nrow(planlevel_ratingarea) # Ranges maxdif <- function(x){ max_x <- max(x, na.rm=TRUE) min_x <- min(x, na.rm=TRUE) dif <- max_x - min_x return(dif) } maxdifs <- ddply(planlevel_ratingarea, "uniqueplan", summarize, maxdif = maxdif(price)) max(maxdifs$maxdif) mean(maxdifs$maxdif) mean(maxdifs$maxdif < 500) ## Main models #fit.1 <- lm(price ~ num_issuers, data= planlevel_ratingarea) #fit.2 <- lm(price ~ num_issuers + uniqueplan + uniqueplan:num_issuers , data= planlevel_ratingarea) #save(fit.1, fit.2, file="ACAsavedfits.rdata") load("ACAsavedfits.rdata") coefs <- c(0, fit.2$coefficients[c(2278:4552)]) + fit.2$coefficients[2] t.test(coefs) ### Make Figure 3 pdf("Figure3.pdf") hist(coefs, breaks=50, main="Distribution of Slopes",xlab="Change of Premium ($) per Additional Insurer in Rating Area, by Plan", ylab="Number of Plans") dev.off() ### Make Figure 4 forfigure4<- ddply(planlevel_ratingarea, c("state.fullname"), summarize, count = length(unique(state_ratingarea))) pdf("Figure4.pdf") with(forfigure4,{ hist(count, main="Distribution of Rating Areas per State", ylab="Number of States", xlab="Rating Areas per State", col="white", breaks=20) }) dev.off() ### Make Figure 5 forfigure5<- ddply(planlevel_ratingarea, c("num_issuers"), summarize, count = length(unique(state_ratingarea)), percentage = count/395) pdf("Figure5.pdf") with(forfigure5,{ barplot(height=count, names.arg=num_issuers, main="Number of Insurers in Rating Area", ylab="Number of Rating Areas", xlab="Number of Insurers in Rating Area", col="white") }) dev.off() ### Demonstration of the model plans <- c(rep("Insurance Plan A", 4), rep("Insurance Plan B", 4), rep("Insurance Plan C", 4)) premiums <- c(950, 1000, 1050, 1060, 800, 700, 750, 700, 850, 800, 650, 625) insurers_per_rating_area <- c(2,2,3,5,1,1,2,4,5,5,7,8) demonstration <- data.frame(plans, premiums, insurers_per_rating_area) g <- ggplot(demonstration, aes(y=premiums, x=insurers_per_rating_area, color=plans, shape=plans)) g <- g + geom_point(size=5,guide = guide_legend(title = NULL)) g <- g + xlim(0,10) + ylim(500, 1200) g <- g + xlab("Number of Insurers in Rating Area") + ylab("Premiums ($)") + ggtitle("Premiums vs Number of Insurers in Rating Area") g <- g+ theme_bw() g <- g + theme(legend.position="bottom") + theme(legend.title=element_blank()) g1 <- g + stat_smooth(method = "lm",se = FALSE, aes(group=1), color="black",fullrange = TRUE) g2 <- g + stat_smooth(method = "lm",se = FALSE,fullrange = TRUE) pdf("Figure1.pdf") print(g1) dev.off() pdf("Figure2.pdf") print(g2) dev.off()