Hi, I'm trying to run a panel model (of N counties over T years) with contemporaneous/spatial correlation in the errors. So this is a "spatial error model" in this literature's terminology.
In order to explore this function I'm simulating spatially correlated data and errors and comparing it to the results from the plm() function with some Monte Carlo analysis. However, I'm getting /exactly/ the same (wrong) results when specifying "b" or "kkp" or "none" in the spatial.error option. On the other hand, while the empirical distributions I get with spml() are more efficient (narrower) the standard errors I get are virtually identical to the "naive" standard errors returned by the plm canned routine. You will find my reproducible example below, although you need to download the US shape file: http://dl.dropbox.com/u/45311184/spatial/co99_d00.shp So, my very broad question is, what's wrong? More specifically, - Is it a matter of spatial weights? - Am I specifying the call for spml incorrectly? - Why does the "p" "kkp" and "none" options return the same result? - Why is there a "rho" estimated in the case in which spatial.error="none"? - Why is the "rho" reported in the "coefficients" table with my beta and not in a "Error variance parameters" table as it is shown here (page 9): www.jstatsoft.org/v47/i01/paper Any help, hints, suggestions would be greatly appreciated. Let's hope my question and code are clear enough! Thanks, Ariel ---------------------------------- ----- CODE FROM HERE ON ---- ---------------------------------- # Preliminary #~~~~~~~~ library(rgdal) library(spdep) library(plm) library(splm) # download US shape file: http://dl.dropbox.com/u/45311184/spatial/co99_d00.shp counties <- readOGR("co99_d00.shp","co99_d00") counties <- counties[counties$STATE %in% c("17"),] # only keep Illinois for the example plot(counties) n <- length(counties@polygons) # number of counties # Neigbhors and weights nb <- poly2nb(counties) nbw <- nb2listw(nb, style="W") # Row-standardised weights # Parameters beta = matrix(1, nrow=1, ncol=1) sig2 = 25 # creating spatial correlation distmatrix <- spDists(counties, longlat = F) SD <- max(distmatrix)/5 # standard deviation in m Sigma1.nor <- distmatrix Sigma1.nor <- dnorm(Sigma1.nor, mean=0,sd=SD) / dnorm(0,mean=0,sd=SD) Sigma1.nor <- sig2 * Sigma1.nor Sigma1.nor[1:10,1:10] rm(SD) # Ability to generate multivariate normals vs.nor <- svd(Sigma1.nor) vcsqrt.nor <- t(vs.nor$v %*% (t(vs.nor$u) * sqrt(vs.nor$d))) # function to generate multivariate normals mrnorm.nor <- function(num.vals, mu.vec) { k <- ncol(Sigma1.nor) ans.mat <- sweep(matrix(rnorm(num.vals*k), nrow = num.vals) %*% vcsqrt.nor,2,mu.vec,"+") # 1 sec c(t(ans.mat)) } # Artificial data: #~~~~~~~~~ # Panel structure years <- 1980:2010 # time periods t <- length(years) # number of time periods dataset <- data.frame( fips = as.numeric(paste(counties@data$STATE, counties@data$COUNTY, sep="")), year=rep(years, each=n) ) # The Xs set.seed(1) dataset$Xcor <- mrnorm.nor(t , rep(1,times=n) ) # generate contemporaneously correlated Xs over space dataset$alpha<- rep(rnorm(n,mean=5,sd=sqrt(sig2)), times=t) # county effect/intercept head(dataset) # Mapping function choropleth <- function(dataframe, var, years, title,colors){ df <- data.frame(matrix( dataframe[,var][dataframe$year %in% years] , ncol=length(years))) names(df) <- years temp <- SpatialPolygonsDataFrame(counties, data.frame(counties@data, df)) arrow = list("SpatialPolygonsRescale", layout.north.arrow(), offset = c(-76,34), scale = 0.5, which = 2) spplot(temp, zcol= paste("X",years,sep=""), par.settings = list(axis.line = list(col = 'transparent')), # removes lines around plot names.attr = paste(years), # names on top of each panel e.g. year col.regions = colorRampPalette(colors)(100), #cm.colors(100), #, heat.colors(), topo.colors() colorkey=list(space="right"), scales = list(draw = F), # location of legend main = title, sp.layout = list(arrow), as.table = TRUE) } # Visualize Xs for year 1995 for example choropleth(dataset, "Xcor", 1995, "Correlated X for 1995",colors=c("blue","cyan","red")) # Same thing for errors (note Xs and Errors are not correlated ) dataset$Ecor <- mrnorm.nor(t, rep(0,times=n)) choropleth(dataset, "Ecor", 1995, "Spatially correlated errors - 1995", colors=c("red","white","black")) # Compute theoretical standard errors for beta Sigma.nor <- bdiag(lapply(1:t, function(x) Sigma1.nor) ) # block diagonal covariance matrix D <- outer(dataset$fips, unique(dataset$fips), "==") + 0 # fixed-effect dummy matrix colnames(D) <- paste(unique(dataset$fips)) Z <- cbind(dataset$Xcor, D) # the regressor matrix ZZ_inv <- solve(t(Z) %*% Z) vcov <- ZZ_inv %*% t(Z) %*% Sigma.nor %*% Z %*% ZZ_inv rm(Z,D) sqrt(vcov[1,1]) # teh correct standard error of beta # Monte carlo analysis: #~~~~~~~~~~~~~~ ndraws <- 200 fitfe.bsave = matrix(0, nrow=ndraws, ncol=4) fitfe.sesave = matrix(0, nrow=ndraws, ncol=4) # Monte Carlo experiment system.time({ # takes ~120 secs for 200 draws in my 3-year old computer for (i in 1:ndraws) { print(i) # Generate vector of errors Ecor <- mrnorm.nor(t, rep(0,times=n)) # Compute the implied y variable dataset$y <- as.matrix(dataset[,c("Xcor")]) %*% beta + dataset$alpha + Ecor # Run regressions fitfe <- plm( y ~ Xcor, data = dataset, model="within", index = c("fips","year")) fitfe.b <- spml(formula = y ~ Xcor, data = dataset, model="within", index = c("fips","year"), listw = nbw, lag = FALSE, spatial.error = "b") fitfe.kkp <- spml(formula = y ~ Xcor, data = dataset, model="within", index = c("fips","year"), listw = nbw, lag = FALSE, spatial.error = "kkp") fitfe.none <- spml(formula = y ~ Xcor, data = dataset, model="within", index = c("fips","year"), listw = nbw, lag = FALSE, spatial.error = "none") # Save fitfe.bsave[i,1] <- coefficients(fitfe) fitfe.sesave[i,1] <- sqrt(diag(vcov(fitfe))) fitfe.bsave[i,2] <- fitfe.b$coefficients[2] fitfe.sesave[i,2] <- sqrt(fitfe.b$vcov[2,2]) fitfe.bsave[i,3] <- fitfe.kkp$coefficients[2] fitfe.sesave[i,3] <- sqrt(fitfe.kkp$vcov[2,2]) fitfe.bsave[i,4] <- fitfe.none$coefficients[2] fitfe.sesave[i,4] <- sqrt(fitfe.none$vcov[2,2]) # Clean up dataset$y <- NULL } }) # Something I dont understand: why is there a "rho" estimated in the model where spatial.error="none"? # Also, why is this rho in the coefficients table and not in a error parameters table by itself? summary(fitfe.none) # Visualize: #~~~~~~~~~~~ # Initial plot: distribution of beta with Xcor and Ecor plot( density(fitfe.bsave[,1]), ylim=c(0,10), xlim=c(0.8,1.3), type="l", lwd=2, col= "black", xlab="coefficient", ylab="density", main=paste("Monte Carlo analysis - ",ndraws,"draws")) # Theoretical distribution of beta lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001), mean=beta, sd= sqrt(vcov[1,1])) ,lty="dashed", col="black",lwd=2) # Average "naive" distributions from canned FE routine lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001), mean=mean(fitfe.bsave[,1]), sd= mean(fitfe.sesave[,1] )),lty="solid", col="red",lwd=2) # Empirical Spatial Error Model (SER) distribution of beta lines(density(fitfe.bsave[,2]),lty="solid", col="blue",lwd=2) # option "b" lines(density(fitfe.bsave[,3]),lty="solid", col="blue",lwd=2) # option "kkp" lines(density(fitfe.bsave[,4]),lty="solid", col="blue",lwd=2) # option "none" # Average Spatial Error Model (SER) distribution of beta from canned routine lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001), mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,2] )),lty="solid", col="cyan",lwd=2) # option "b" lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001), mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,3] )),lty="solid", col="cyan",lwd=2) # option "kkp" lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001), mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,4] )),lty="solid", col="cyan",lwd=2) # option "none" # Legend and reference line legend("topright", cex=0.7, title= "Distribution of beta", legend=c("Empirical FE", "Theoretical FE", "Avg. naive FE", "Empirical SER (3 lines)", "Avg. reported SER (3 lines)"), lty=c("solid","dashed","solid","solid","solid"), lwd=2,col=c("black","black","red","blue","cyan")) abline(v=beta, lty=3) ----- Ariel Ortiz-Bobea PhD Candidate in Agricultural & Resource Economics University of Maryland - College Park -- View this message in context: http://r-sig-geo.2731867.n2.nabble.com/spml-function-and-the-spatial-error-option-tp7522051.html Sent from the R-sig-geo mailing list archive at Nabble.com. _______________________________________________ R-sig-Geo mailing list [email protected] https://stat.ethz.ch/mailman/listinfo/r-sig-geo
