Dear all,

Below is my attempt at a function to fit Alternate Least Squares
Optimal Scaling models, as described in Young (1981) "Quantitative
Analysis of Qualitative Data" and Jacoby (1999) "Levels of Measurement
and Political Research: An Optimistic View".

I would welcome any comments on coding style, tips & tricks etc.

I also have a specific problem: the output tends to give very small
coefficients, and very large fitted values for specific factor levels.
Am I doing the normalization right?

Cheers
David


library(car) # for "recode"

alsos.fit = function (y, x, tol = 1e-7) {
        # y is a vector or factor or ordered factor
        # x is a data frame of vectors/factors/ordereds

        # we treat the y's as the first column of the matrix
        x = cbind(y, x)
        x = x[complete.cases(x),]
        ox = x
        x.facs = sapply(x, is.factor)
        x.ords = sapply(x, is.ordered)

        # start with our numeric values whatever they are
        x = sapply(x, as.numeric)

        old.SSE = Inf
        while(T) {
                # least squares regression with an intercept
                ols = lm.fit(cbind(rep(1,nrow(x)), x[,-1]) , x[,1])
                b = ols$coefficients
                SSE = drop(ols$residuals %*% ols$residuals)
                if (old.SSE-SSE<tol) {
                        factor.scores=list()
                        for (i in (1:ncol(x))[x.facs]) {
                                nm = colnames(x)[i]
                                factor.scores[[nm]] = tapply(x[,i], ox[,i],
                                                function (foo) {return(foo[1])})
                                names(factor.scores[[nm]]) = levels(ox[,i])
                        }

                        return(list(
                                factor.scores=factor.scores,
                                scaled.y=x[,1], scaled.x=x[,-1],
                                coefficients=b, SSE=SSE,
                                        ))
                }
                old.SSE=SSE
                        
                mx = nx = x
                mx[] = 0
                nx[] = 0
                for (i in (1:ncol(x))[x.facs]) {

                        # optimal scaling
                        if (i==1) nx[,i] = ols$fitted.values
                        else nx[,i] = (x[,1] - cbind(rep(1,nrow(x)), 
x[,c(-1,-i)]) %*% b[-i])/b[i]
                        
                        # create within-category means
                        tmpfac = factor(ox[,i], labels=1:nlevels(ox[,i]))
                        catmeans = tapply(nx[,i], tmpfac, mean)

                        # ensure ordinal values are correctly ordered
                        if (x.ords[i]) {
                                tmp = kruskal.ordering(nx[,i], tmpfac)
                                tmpfac = tmp$tmpfac
                                catmeans = tmp$catmeans
                        }

                        # set values to within-category means
                        mx[,i] = catmeans[tmpfac]

                        # normalize according to Young (1981)
                        mx[,i] = mx[,i] * (nx[,i] %*% nx[,i]) / (mx[,i] %*% 
mx[,i])
                        
                }
                x[,x.facs] = mx[,x.facs]
        }
}

# as described in Kruskal (1964)
kruskal.ordering = function(numeric.data, tmpfac) {
        j = 1
        upact = T
        while(T) {
                catmeans = tapply(numeric.data, tmpfac, mean) # vector w as many
items as tmpfac cats
                # have we finished?
                if (j>nlevels(tmpfac)) return 
(list(catmeans=catmeans,tmpfac=tmpfac))
                if ((j==nlevels(tmpfac) || catmeans[j] <= catmeans[j+1]) &&
                                (j==1 || catmeans[j] >= catmeans[j-1])) {
                        j=j+1
                        upact=T
                        next
                }
                if (upact) {
                        if (j < nlevels(tmpfac) && catmeans[j] > catmeans[j+1]) 
{
                                tmpfac = recode(tmpfac, paste(j, ":", j+1,"='", 
j+1, "'", sep=""))
                                levels(tmpfac) = 1:nlevels(tmpfac)
                        }
                        upact=F
                }
                else {
                        if (j > 1 && catmeans[j] < catmeans[j-1]) {
                                tmpfac = recode(tmpfac, paste(j-1, ":", j, 
"='", j, "'", sep=""))
                                levels(tmpfac) = 1:nlevels(tmpfac)
                                j=j-1
                        }
                        upact=T
                }
        }
}

______________________________________________
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html

Reply via email to