########################################################################### # This is the function for end users. It computes score test statistic at # each locus for given IBD information, pedigree and phenotype values. # # ARGUMENTS: # # ibddata: the file name of IBD output from GENEHUNTER, say IBD_DIST.out # # phenotype: the name of an ASCII file with six columns, say PHENO.txt # the first row of this file label each column. # # Note: these labels are not used in the calculation. # they help to clarify the meaning of each column. # # the order of these six columns are: # family id, person id, father id, mother id, gender and phenotypic value # # VALUE: # # a matrix with each row containing the locus information and the # associated value of the test statistic ########################################################################### comp.score <- function(ibddata = "IBD_DIST.out", phenotype = "PHENO.txt") { ibd <<- read.table(ibddata, skip = 1, col.names = c("pos", "pedigree", "pair", "prior.Z0", "prior.Z1", "prior.Z2", "Z0", "Z1", "Z2")) pedigreepheno <- read.table(phenotype, header = T) person <- paste(pedigreepheno$family, pedigreepheno$person, sep = ".") pedigreepheno$father <- paste(pedigreepheno$family, pedigreepheno$father, sep = ".") pedigreepheno$mother <- paste(pedigreepheno$family, pedigreepheno$mother, sep = ".") parent <- paste(pedigreepheno$father, pedigreepheno$mother, sep = ",") pheno.value <- structure(pedigreepheno$phenotype, .Names = person) chromo.pos <- unique(ibd$pos) val <- NULL nval <- NULL for (i in chromo.pos) { cat(i) val <- c(val, score(i, person, parent, pheno.value)) nval <- c(nval, i) } cbind(pos = nval, score = val, p.value = 0.5-0.5*pchisq(val, df = 1)) } score <- function(chromo.pos = 0, person, parent, pheno.value) { ibddata <- ibd[ibd$pos == chromo.pos & ibd$prior.Z0 == 0.25 & ibd$prior.Z1 == 0.5 & ibd$prior.Z2 == 0.25, c("pedigree", "pair", "Z1", "Z2")] pairs <- do.call("rbind", strsplit(as.character(ibddata[, "pair"]), ",")) # we add family id to the person id. pairs[, 1] <- paste(ibddata$pedigree, pairs[, 1], sep = ".") pairs[, 2] <- paste(ibddata$pedigree, pairs[, 2], sep = ".") # get all persons in all sibships at chromosome position = pos all.persons <- unique(unlist(pairs)) # get mu. mu <- mean(pheno.value[all.persons], na.rm = T) sigma <- sqrt(var(pheno.value[all.persons], na.rm = T)) # for S+ #sigma <- sqrt(var(pheno.value[all.persons], na.method = "available")) rho0 <- cor((pheno.value[pairs[, 1]]-mu)/sigma, (pheno.value[pairs[, 2]]-mu)/sigma, use = "complete.obs") #(pheno.value[pairs[, 2]]-mu)/sigma, na.method = "available") # for S+ pi <- ibddata[, "Z1"]/2 + ibddata[, "Z2"] Epi <- mean(pi) Vpi <- var(pi) # find out mean and variance of v using families of same size allvi <- NULL parents <- parent[match(pairs[, 1], person)] parentsKept <- rep(TRUE, length(parents)) for (j in unique(parents)) { member <- pairs[parents==j, , drop=FALSE] yi <- (pheno.value[unique(member)]-mu)/sigma if (any(is.na(yi))) { parentsKept <- parentsKept & parents!=j next } ni <- length(yi) ri <- rho0/(1+(ni-1)*rho0) wi <- (yi-ni*ri*mean(yi))/(1-rho0) vi <- apply(member, 1, function(x, wi)prod(wi[x]), wi) allvi <- rbind(allvi, cbind(vi = vi, ni = rep(ni, length(vi)))) } Evi <- tapply(allvi[, "vi"], as.factor(allvi[, "ni"]), mean) Vvi <- tapply(allvi[, "vi"], as.factor(allvi[, "ni"]), var) parents <- parents[parentsKept] ibddata <- ibddata[parentsKept, ] pairs <- pairs[parentsKept, ] # get score statistic b <- vb <- 0 for (j in unique(parents)) { ni <- length(unique(pairs[parents==j, , drop=FALSE])) pi <- ibddata[parents==j, "Z1"]/2 + ibddata[parents==j, "Z2"] vi <- allvi[parents==j, "vi"] newb <- as.vector((pi-Epi) %*% (vi-Evi[as.character(ni)])) newvb <- ni*(ni-1)/2*Vpi*Vvi[as.character(ni)] b <- b + newb vb <- vb + newvb } ifelse (b>0, b^2/vb, 0) }