# SUMMARY # ------- # This file defines some functions that I use for QTL mapping in # GEMMA. Here is an overview of the functions defined in this file: # # prepare.pheno(pheno) # remove.outliers(pheno,phenotype,covariates,outliers,verbose) # read.pheno.pl(file) # computemaf(geno) # none.missing.row(x) # center.columns(X) # repmat(A,m,n) # FUNCTION DEFINITIONS # ---------------------------------------------------------------------- # This function prepares the phenotype data for QTL mapping. prepare.pheno <- function (pheno) { # Remove rows marked as "discard" and as possible sample mixups. pheno <- subset(pheno,discard == "no" & mixup == "no") #Scale Centroid size skull to mean zero and std 1. pheno <- transform(pheno,sCS = (sCS - mean(sCS, na.rm=TRUE))/sqrt(var(sCS, na.rm=TRUE))) #Scale Centroid size mandible new phenotypes to mean zero and std 1. pheno <- transform(pheno,mCS = (mCS - mean(mCS, na.rm=TRUE))/sqrt(var(mCS, na.rm=TRUE))) # Return the updated phenotype data table. return(pheno) } # ---------------------------------------------------------------------- # Remove outliers from the phenotype data for the given phenotype, # optionally conditioning on covariates. If covariates are specified, # outliers are determined according to the residual of the phenotype # after regressing on the covariates. remove.outliers <- function (pheno, phenotype, covariates, outliers, verbose = TRUE) { # Specify the function for removing the outliers. is.outlier <- function (x) { y <- outliers(x) y[is.na(y)] <- FALSE return(y) } # If we are conditioning on one or more covariates, get the # residuals of the phenotype conditioned on these covariates. if (length(covariates) > 0) { f <- formula(paste(phenotype,"~",paste(covariates,collapse="+"))) r <- resid(lm(f,pheno,na.action = na.exclude)) } else r <- pheno[[phenotype]] # If requested, report how many outlying data points are removed. if (verbose) { n <- sum(is.outlier(r)) if (n == 0) cat("No outliers for",phenotype) else cat(n,"outliers for",phenotype) if (length(covariates) == 0) cat(" are removed.\n") else cat(" conditioned on",paste(covariates,collapse=" + "),"are removed.\n") } # Remove the outlying data points. pheno[is.outlier(r),phenotype] <- NA # Return the updated phenotype data table. return(pheno) } #------------------------------------------------------------------------ # Returns a data frame containing the Ploen phenotype stored in a CSV file read.pheno.pl <- function (file) { pheno <- read.csv(file,header = TRUE,quote = "",check.names = FALSE, stringsAsFactors = FALSE,skip = 5) # Convert several columns to factors. pheno <- transform(pheno, id = as.character(id), #round = factor(round,paste0("SW",1:25)), discard = factor(discard), mixup = factor(mixup)) return(pheno) } # ---------------------------------------------------------------------- # Returns the minor allele frequency given a vector of genotypes # encoded as allele counts. computemaf <- function (geno) { f <- mean(geno,na.rm = TRUE)/2 return(min(f,1-f)) } # ---------------------------------------------------------------------- # For each row of the matrix or data frame, returns true if all the # entries in the row are provided (not missing). none.missing.row <- function (x) rowSums(is.na(x)) == 0 # ---------------------------------------------------------------------- # Centers the columns of matrix X so that the entries in each column # of X add up to zero. center.columns <- function (X) { mu <- matrix(colMeans(X),1,ncol(X)) X <- X - repmat(mu,nrow(X),1) return(X) } # ---------------------------------------------------------------------- # Does the same thing as repmat(A,m,n) in MATLAB. repmat <- function (A,m,n) { return(kronecker(matrix(1,m,n),A)) }