# $Id: combinations.R 1083 2007-03-23 22:53:00Z warnes $
#

##
## From email by Brian D Ripley <ripley@stats.ox.ac.uk> to r-help
## dated Tue, 14 Dec 1999 11:14:04 +0000 (GMT) in response to
## Alex Ahgarin <datamanagement@email.com>.  Original version was
## named "subsets" and was Written by Bill Venables.  
##

combinations <- function(n, r, v = 1:n, set = TRUE, repeats.allowed=FALSE)
{
  if(mode(n) != "numeric" || length(n) != 1 
     || n < 1 || (n %% 1) != 0) stop("bad value of n") 
  if(mode(r) != "numeric" || length(r) != 1 
     || r < 1 || (r %% 1) != 0) stop("bad value of r") 
  if(!is.atomic(v) || length(v) < n) 
    stop("v is either non-atomic or too short")
  if( (r > n) & repeats.allowed==FALSE)
    stop("r > n and repeats.allowed=FALSE")
  if(set) {
    v <- unique(sort(v))
    if (length(v) < n) stop("too few different elements")
  }
  v0 <- vector(mode(v), 0)
  ## Inner workhorse
  if(repeats.allowed)
    sub <- function(n, r, v)
      { 
        if(r == 0) v0 else
        if(r == 1) matrix(v, n, 1) else
        if(n == 1) matrix(v, 1, r) else
        rbind( cbind(v[1], Recall(n, r-1, v)),
              Recall(n-1, r, v[-1]))
      }
  else
    sub <- function(n, r, v)
      { 
        if(r == 0) v0 else
        if(r == 1) matrix(v, n, 1) else
        if(r == n) matrix(v, 1, n) else
        rbind(cbind(v[1], Recall(n-1, r-1, v[-1])),
              Recall(n-1, r, v[-1]))
      }
  sub(n, r, v[1:n])
}

##
## Original version by Bill Venables and cited by by Matthew
## Wiener (mcw@ln.nimh.nih.gov) in an email to R-help dated
## Tue, 14 Dec 1999 09:11:32 -0500 (EST) in response to
## Alex Ahgarin <datamanagement@email.com>
##
##


permutations <- function(n, r, v = 1:n, set = TRUE, repeats.allowed=FALSE)
{
  if(mode(n) != "numeric" || length(n) != 1 
     || n < 1 || (n %% 1) != 0) stop("bad value of n") 
  if(mode(r) != "numeric" || length(r) != 1 
     || r < 1 || (r %% 1) != 0) stop("bad value of r") 
  if(!is.atomic(v) || length(v) < n) 
    stop("v is either non-atomic or too short")
  if( (r > n) & repeats.allowed==FALSE)
    stop("r > n and repeats.allowed=FALSE")
  if(set) {
    v <- unique(sort(v))
    if (length(v) < n) stop("too few different elements")
  }
  v0 <- vector(mode(v), 0)
  ## Inner workhorse
  if(repeats.allowed)
    sub <- function(n, r, v)
      {
        if(r==1) matrix(v,n,1) else
        if(n==1) matrix(v,1,r) else
        {
          inner  <-  Recall(n, r-1, v)
          cbind( rep( v, rep(nrow(inner),n)  ),
                 matrix( t(inner), ncol=ncol(inner), nrow=nrow(inner) * n ,
                        byrow=TRUE )
                )
        }
      }
  else
    sub <- function(n, r, v)
      {
        if(r==1) matrix(v,n,1) else
        if(n==1) matrix(v,1,r) else
        {
        X  <-  NULL
        for(i in 1:n)
          X  <-  rbind( X, cbind( v[i], Recall(n-1, r - 1, v[-i])))
        X
        }
      }

  sub(n, r, v[1:n])
}
# Clear the workspace of all objects whose names don't start with a full stop

clear <- function()  {
  env <- as.environment(1)
  to.go <- ls(env, all.names=FALSE)
  continue <- TRUE
  while (continue) {
    nxt <- search()[[2]]
    # bit of a botch
    if (substr(nxt, 1, 8)!="package:") 
      detach()
    else
      continue <- FALSE
    }
  remove(list=to.go, envir=env)
}

# Simple logistic regression wrapper

logistic <- function(formula, weights) {
  if (missing(weights))
    res <- glm(formula, family=binomial)
  else {
    wt <- weights
    res <- glm(formula, family=binomial, weights=wt)
  }
  class(res) <- c("logistic", "glm", "lm")
  res$call$formula <- formula
  res
}

print.logistic <- function(x) {
  sx <- summary(x)
  beta <- sx$coefficients
  intcp <- match("(Intercept)", rownames(beta))
  if (!is.na(intcp)) {
    beta <- beta[-intcp,,drop=FALSE]
  }
  or <- matrix(nrow=nrow(beta), ncol=5)
  rownames(or) <- rownames(beta)
  colnames(or) <- c("OR", "Lower",
                    "Upper", "z-test", "P-value")
  or[,1] <- exp(beta[,1])
  or[,2] <- exp(beta[,1]-1.96*beta[,2])
  or[,3] <- exp(beta[,1]+1.96*beta[,2])
  or[,4] <- beta[,3]
  or[,5] <- beta[,4]
  cat("Logistic regression: ", deparse(sx$call$formula), "\n\n")
  cat("Odds ratios (1 unit change), lower and upper confidence limits, and tests:\n\n")
  print(or)
}
  
# Find most representative member of each cluster in a hierarchical clustering
# clusters: object calculated either by rect.hclust or by cutree
# In case of ties, select member with highest value for tie.break

representative <- function(clusters, distance, minimize="mean", tie.break){
  cri <- match(minimize, c("mean", "maximum"))
  if (is.na(cri))
    stop("minimize argument should be mean or  minimum") 
  if (class(distance)=="dist") {
    distance <- as.matrix(distance)
  }
  else if (!is.matrix(distance)) 
    stop("distance argument must be a distance object or a matrix")
  N <- nrow(distance)
  if (!is.list(clusters)) {
    if (length(clusters)!=N)
      stop("Incorrect length for clusters argument")
    ncl <- max(clusters)
    nms <- names(clusters)
    new <- vector("list", ncl)
    ord <- 1:N
    for (i in 1:ncl) {
      cli <- ord[clusters==i]
      names(cli) <- nms[clusters==i]
      new[[i]] <- cli
    }
    clusters <- new
  }   
  ncl <- length(clusters)
  if (missing(tie.break))
    tie.break <- runif(N)
  else if (length(tie.break)!=N)
    stop("tie.break argument has the wrong length")
  diag(distance) <- NA
  reps <- numeric(ncl)
  nreps <- character(ncl)
  dreps <- numeric(ncl)
  for (i in 1:ncl) {
    cli <- clusters[[i]]
    nmi <- names(cli)
    size <- length(cli)
    if (size==1) {
      reps[i] <- cli[1]
      nreps[i] <- nmi[1]
      dreps[i] <- 0
    }
    else {
      di <- distance[cli, cli]
      pri <- tie.break[cli]
      if (cri==1) 
        ds <- apply(di, 1, mean, na.rm=TRUE)
      else
        ds <- apply(di, 1, max, na.rm=TRUE)
      best <- min(ds)
      cand <- best==ds
      if (sum(cand)==1) {
        reps[i] <- cli[cand]
        nreps[i] <- nmi[cand]
      }
      else {
        maxpr <- max(pri[cand])
        ch <- (1:size)[cand]
        which <- ch[match(maxpr, pri[cand])]
        reps[i] <- cli[which]
        nreps[i] <- nmi[which]
      }
      dreps[i] <- best
    }
  }
  names(reps) <- nreps
  names(dreps) <- nreps
  isr <- rep(FALSE, N)
  isr[reps] <- TRUE
  list(rep=reps, is.rep=isr, dist=dreps)
}
  
# Weighted table

wtable <- function(..., weights) {
  by <- list(...)
  if (missing(weights))
    table(by)
  else {
    tw <- tapply(weights, by, sum, na.rm=TRUE)
    tw[is.na(tw)] <- 0
    tw
  }
}
    
# Weighted means

wmean <- function(x, weights, na.rm=TRUE) {
  if (missing(weights))
    mean(x, na.rm=na.rm)
  else 
    sum(x*weights, na.rm=na.rm)/sum(weights, na.rm=na.rm)
}
    
# Table of means

mean.table <- function(x, ..., weights) {
  by <- list(...)
  if (missing(weights)) {
    num <- tapply(x, by, sum, na.rm=TRUE)
    den <- table(by)
    tab <- num/den
    class(tab) <- "table"
    tab
  }
  else {
    num <- tapply(x*weights, by, sum, na.rm=TRUE)
    den <- tapply(weights, by, sum, na.rm=TRUE)
    num/den
  }
}

# Cochran-Armitage test - with Mantel extension
# See Clayton & Hills 20.3 pp 201-203

Cochran.Armitage.test <- function(exposure, cc, stratum=rep(1,length(cc))) {
  N <- length(cc)
  if (is.factor(exposure)) {
    if (is.ordered(exposure))
      exposure <- as.numeric(exposure)
    else
      stop("exposure argument must be numeric or an ordered factor")
  }
  cl <- match.call()
  narg <- length(cl) - 1
  arguments <- character(narg)
  for (i in 1:narg) {
    argi <- as.character(cl[[i+1]])
    arguments[i] <- paste(argi, collapse=".")
  }
  use <- !(is.na(cc) | is.na(exposure) | is.na(stratum))
  if (any(!use)) {
    cc <- cc[use]
    exposure <- exposure[use]
    stratum <- stratum[use]
  }
  dh <- table(stratum, cc)
  if (ncol(dh)!=2)
    stop("cc argument must have two levels")
  nt <- table(stratum)
  zm <- tapply(exposure, list(stratum, cc), mean)
  ut <- dh[,1]*dh[,2]*(zm[,1] - zm[,2])/nt
  zv <- tapply(exposure, stratum, var)
  vt <-  dh[,1]*dh[,2]*zv/nt
  x2 <- sum(ut, na.rm=TRUE)^2/sum(vt, na.rm=TRUE)
  names(x2) <- "Chi-squared"
  df <- 1
  names(df) <- "df"
  res <- list(statistic=x2, parameter=df,
              p.value=pchisq(x2, 1, lower.tail=FALSE),
              method="Cochran-Armitage test with Mantel's extension",
              data.name=paste(arguments, paste=" "),
              score=ut,
              score.variance=vt)
  class(res) <- "htest"
  res
}

# expectedGenotypes.R
#--------------------------------------------------------------------------
# What: Construct expected genotypes according to known allele variants
# Time-stamp: <2007-07-19 12:06:26 ggorjan>
#--------------------------------------------------------------------------

expectedGenotypes <- function(x, alleles=allele.names(x), ploidy=2,
                              sort=TRUE, haplotype=FALSE)
{
    ## Checks
    if (missing(x) && missing(alleles))
        stop("at least one of 'x' or 'alleles' must be given")
    if (!(missing(x) && !missing(alleles))) {
        if (!is.genotype(x)) stop("x must be of class 'genotype'")
    }

    nA <- length(alleles)
    if(nA==0)
      stop("Zero length alleles vector.")

    ## Add numbers to allele names, so we get result sorted by
    ## given allele names
    if (sort) {
        allelesOrig <- alleles
        alleles <- paste(1:nA, alleles, sep="")
    }

    ## Find possible genotypes according to allele variants
    if(!haplotype) {
      comb <- combinations(n=nA, r=ploidy, v=alleles, repeats.allowed=T)
    } else {
      comb <- permutations(n=nA, r=ploidy, v=alleles, repeats.allowed=T)
    }

    if (sort) {
        for (i in 1:nA) comb[comb == alleles[i]] <- allelesOrig[i]
    }

    ## Create a nice character vector of expected genotypes
    nC <- dim(comb)
    ret <- vector(mode="character", length=nC[1])
    ret <- paste(comb[, 1], comb[, 2], sep="/")
    if (ploidy > 2) {
        for (i in 3:nC[2]) {
            ret <- paste(ret, comb[, i], sep="/")
        }
    }
    return(ret)
}

expectedHaplotypes <- function(x, alleles=allele.names(x), ploidy=2,
                              sort=TRUE, haplotype=TRUE)
{
  expectedGenotypes(x=x, alleles=alleles, ploidy=ploidy,
                    sort=sort, haplotype=haplotype)
}

#--------------------------------------------------------------------------
# expectedGenotypes.R ends here
# $Id: genotype.R 1329 2007-11-20 20:20:51Z warnes $

genotype  <- function(a1, a2=NULL, alleles=NULL, sep="/",
                      remove.spaces=TRUE,
                      reorder=c("yes", "no", "default", "ascii", "freq"),
                      allow.partial.missing=FALSE,
                      locus=NULL, genotypeOrder=NULL)
{
    if(missing(reorder))
      reorder  <- "freq"
    else
      reorder <- match.arg(reorder)

    if(is.genotype(a1)){
        a1  <-  as.character(a1)
        ## ignore a2
        a2 <- NULL
    }
    else
      {
        a1.d <- dim(a1)
        a1 <- as.character(a1)
        dim(a1) <- a1.d
        a1[is.na(a1)] <- ""   # necessary because of bug in grep & friends,
                              # will be fixed in 1.7.1
      }

    if(!is.null(a2))
      {
        a2.d <- dim(a2)
        a2 <- as.character(a2)
        dim(a2) <- a2.d
        a1[is.na(a1)] <- ""  # necessary because of bugs in grep & friends
                             # will be fixed in 1.7.1
      }

    if(remove.spaces)
    {
        a1dim <- dim(a1)
        a1  <-  gsub("[ \t]", "", a1)
        dim(a1) <- a1dim
        if(!is.null(a2))
            a2  <-  gsub("[ \t]", "", a2)
    }

    if(!is.null(dim(a1)) && ncol(a1) > 1)
        parts <- a1[,1:2]
    else if(!is.null(a2))
        parts  <- cbind(a1,a2)
    else
      {
        # if sep is empty, assume allele names are single characters
        # pasted together
        if(sep=="")
          sep  <- 1

        # Based on the value of sep, reformat into our standard
        # name-slash-name format
        if (is.character(sep) )
          {
            part.list   <- strsplit(a1,sep)
            part.list[ sapply(part.list, length)==0] <- NA

            ## Handle missing / empty values correctly. 
            ## Without this, empty elements are silently dropped
            ## and/or cause errors

            # only first field was given
            half.empties  <- lapply(part.list, length)==1
            part.list[half.empties]  <-  lapply(part.list[half.empties],c,NA)

            # neither field was given
            empties  <- is.na(a1) | lapply(part.list, length)==0
            part.list[empties]  <- list(c(NA,NA))

            parts <- matrix(unlist(part.list),ncol=2,byrow=TRUE)

          }
        else if (is.numeric(sep))
          parts  <- cbind( substring(a1,1,sep), substring(a1,sep+1,9999))
        else
          stop(paste("I don't know how to handle sep=",sep))
      }

    mode(parts) <- "character"  # needed for bare NA's o

    # convert entirely whitespace alleles to NAs
    temp  <- grep("^[ \t]*$", parts)
    parts[temp]  <-  NA

    #parts[parts=="NA"]  <-  NA

    if(missing(alleles) || is.null(alleles))
      alleles <- unique(c(na.omit(parts)))
    else
      {
        which.alleles  <- !(parts %in% alleles)
        ## Skipping NA's
        which.alleles <- which.alleles & !is.na(parts)
        if(any(which.alleles))
          {
            warning("Found data values not matching specified alleles. ",
                    "Converting to NA.")
            parts[which.alleles] <- NA
          }
      }

    if(!allow.partial.missing)
      parts[is.na(parts[,1]) | is.na(parts[,2]),]  <- c(NA,NA)

    if(reorder!="no")
    {
        if(reorder=="ascii")
        {
            alleles <-  sort(alleles)
        }
        else if(reorder=="freq")
        {
            ## get reordering of alleles by frequency
            tmp  <- names(rev(sort(table(parts))))
            alleles  <- unique(c(tmp,alleles))
        }

        reorder  <- function( x, alleles)
        {
            tmp <- match( x, alleles )
            x[order(tmp)]
        }

        parts  <- t(apply(parts,1, reorder, alleles))

      }

    tmp  <-  ifelse( is.na(parts[,1]) & is.na(parts[,2]),
                    NA,
                    apply(parts,1,paste,collapse="/") )

    object  <- factor( tmp )

    # force "NA" not to be a factor level
    ll  <- levels(object)  <-  na.omit(levels(object))

    class(object)  <-  c("genotype","factor")
    attr(object,"allele.names")  <- alleles
    attr(object,"allele.map")  <- do.call("rbind", strsplit(ll, "/"))

    genotypeOrder(object) <- genotypeOrder

    if(is.null(locus) || is.locus(locus)  )
      attr(object,"locus")  <- locus
    else
      stop("parameter locus must be of class locus")
    return(object)
  }

is.genotype  <- function(x)
    inherits(x, "genotype")

is.haplotype  <- function(x)
    inherits(x, "haplotype")

###
### Haplotype -- differs only in that order of a1,a2 is considered siginificant
###
haplotype <- function (a1, a2 = NULL, alleles = NULL, sep = "/",
                       remove.spaces = TRUE, reorder = "no",
                       allow.partial.missing = FALSE, locus = NULL,
                       genotypeOrder=NULL) 
{
    retval <- genotype(a1 = a1, a2 = a2, alleles = alleles, sep = sep, 
                       remove.spaces = remove.spaces, reorder = reorder,
                       allow.partial.missing = allow.partial.missing, 
                       locus = locus, genotypeOrder=genotypeOrder)
    class(retval) <- c("haplotype", "genotype", "factor")
    retval
}


as.haplotype  <- function(x,...)
{
    retval <- as.genotype(x,...,reorder="no")
    class(retval)  <- c("haplotype","genotype","factor")
    retval
}

###
### Display by giving values plus list of alleles
###

print.genotype  <-  function(x,...)
  {
    if(!is.null(attr(x,"locus")))
        print(attr(x,"locus"))
    print(as.character(x))
    cat("Alleles:", allele.names(x), "\n" )
    invisible(x)
  }

###
### Conversion Functions
###

as.genotype  <- function (x,...) 
  UseMethod("as.genotype")

# Do we want to do this?
as.genotype.default  <-  function(x,...)
  genotype(x,...)

#  stop("No method to convert this object to a genotype")

# for characters, and factors, just do the standard thing (factors get
# implicitly converted to characters so both have the same effect.
as.genotype.character  <-  function(x,...)
  genotype(x,...)

as.genotype.factor  <-  function(x,...)
  genotype(as.character(x),...)

as.genotype.genotype  <- function(x,...)
  return(x)

as.genotype.haplotype  <- function(x,...)
  return(x)


## genotype.allele.counts give the count of each allele type as a
## matrix.  Collapse back into the form we need

as.genotype.allele.count  <- function(x, alleles=c("A","B"), ...)
  {
    if(!is.matrix(x) & !is.data.frame(x) )
      {
        x  <- cbind(x, 2-x)
        colnames(x)  <- alleles
      }

    if(any(x > 2, na.rm=TRUE) || any( x < 0, na.rm=TRUE ) )
      stop("Allele counts must be in {0,1,2}")

    allele.names  <-  colnames(x)
    tmp  <-  apply(x, 1, function(y)
                    rep( colnames(x), ifelse(is.na(y), 0, y) ))

    if(!is.matrix(tmp))
      retval  <-  genotype(sapply(tmp,paste,collapse="/"), alleles=alleles, ...)
    else
      retval  <- genotype(a1=tmp[1,], a2=tmp[2,], ... )
    return(retval)
  }

allele.count.2.genotype  <-  function(...)
  as.genotype.allele.count(...)



as.genotype.table <- function(x, alleles, ...)
  {
    #if(missing(alleles)) alleles <- unique(unlist(dimnames(x)))
    tmp <- outer( rownames(x), colnames(x), paste, sep="/")
    retval <- genotype( rep(tmp,x), alleles=alleles )
    retval
  }


###
### Equality test for genotype, assumes allele order is _not_ significant
###
"==.genotype"  <-  function(x,y)
  {
    if(!is.genotype(y))
      y <- as.genotype(y)

    x.a1  <- allele(x,1)
    x.a2  <- allele(x,2)

    y.a1  <- allele(y,1)
    y.a2  <- allele(y,2)

    return( (x.a1==y.a1 & x.a2==y.a2) | (x.a1==y.a2 & x.a2==y.a1) )
  }

###
### Equality test for haplotype, assumes allele order _is_ significant
###
"==.haplotype"  <-  function(x,y)
  {
    if(!is.genotype(y))
      y <- as.haplotype(y)

    x.a1  <- allele(x,1)
    x.a2  <- allele(x,2)

    y.a1  <- allele(y,1)
    y.a2  <- allele(y,2)

    return( x.a1==y.a1 & x.a2==y.a2 )
  }

###
### is.element i.e. %in%
###

"%in%" <- function(x, table)
  UseMethod("%in%")

## Get default method for %in% from base package
"%in%.default" <- get("%in%", pos="package:base")

"%in%.genotype" <- function(x, table)
{
  xA1  <- allele(x, 1)
  xA2  <- allele(x, 2)

  x1 <- paste(xA1, xA2, sep="/")
  x2 <- paste(xA2, xA1, sep="/")

  ## Return
  ((x1 %in% table) | (x2 %in% table))
}

"%in%.haplotype" <- function(x, table)
  as.character(x) %in% as.character(table)

###
### Extract the first and/or second allele.
###
### By default, return a 2 column matrix containing both alleles
###

#allele  <- function (x,...) 
#  UseMethod("allele")


#allele.genotype  <-  function(x, which=c(1,2) )
allele  <-  function(x, which=c(1,2) )
  {
    alleles.x  <- attr(x,"allele.map")
    retval  <- alleles.x[as.integer(x),which]
    attr(retval,"locus")  <- attr(x,"locus")
    attr(retval,"which")  <- which
    attr(retval,"allele.names")  <- allele.names(x)    
    #class(retval)  <- c("allele.genotype", class(retval))
    return( retval)
  }

as.factor  <- function(x, ...)
  UseMethod("as.factor")

as.factor.default  <- get("as.factor",pos="package:base")
formals(as.factor.default) <- c(formals(as.factor.default),alist(...= ))

as.factor.genotype <- function(x, ...)
  {
    attr(x,"class") <- "factor"
    attr(x,"allele.names") <- NULL
    attr(x,"allele.map") <- NULL
    attr(x,"locus") <- NULL
    attr(x,"genotypeOrder") <- NULL
    x
  }

as.factor.allele.genotype  <-  function(x,...)
  factor(x,levels=allele.names(x))

print.allele.genotype  <- function(x,...)
  {
    if(!is.null(attr(x,"locus")))
      print(attr(x,"locus"))
    cat("Allele(s):", attr(x,"which"), "\n")
    attr(x, "which")  <-  attr(x, "class") <- attr(x,"locus") <- attr(x,"allele.names")  <- NULL
    NextMethod("print",x)
  }


###
### Obtain the count of the number of copies of alleles for each individual
###
### By default, return a matrix containing the counts for all possible allele values.
###

#allele.count  <- function (x,...) 
#  UseMethod("allele.count")

#allele.count.default <- function (x, ... )
#  {
#    x <- as.genotype(x)
#    allele.count(x, ...)
#  }

#allele.count.genotype  <- function(x, allele.name=allele.names(x),

allele.count  <- function(x, allele.name=allele.names(x),
                          any=!missing(allele.name), na.rm=FALSE)
{
  if(!missing(allele.name) && length(allele.name)==1)
    {
      a.1  <- allele(x,1)
      a.2  <- allele(x,2)

      retval  <- ifelse(is.na(a.1) | is.na(a.2),
                        ifelse(na.rm, 0, NA),
                        (a.1==allele.name) + (a.2==allele.name) )
#      class(retval)  <- "allele.count"
      attr(retval,"allele") <- allele.name
      attr(retval,"locus")  <- attr(x,"locus")
      return(retval)
    }
  else
    {
      retval  <- sapply( allele.name, function(y) allele.count(x,y))
      if(any==TRUE && is.matrix(retval)  )
      retval  <- apply(retval,1,sum,na.rm=na.rm)
      if(na.rm) retval[is.na(retval)]  <- 0
#      class(retval)  <- "allele.count"
      attr(retval,"locus")  <- attr(x,"locus")
      return(retval)
    }

}


#print.allele.count  <- function(x,...)
#  { 
#    if(!is.null(attr(x,"locus")))
#        print(attr(x,"locus"))
#    
#    if(is.null(attr(x,"allele")))
#      cat("Allele Counts:\n")
#    else
#      cat("Allele Count (", attr(x,"allele"), " allele):\n", sep="")
#    val  <- x
#    attr(val,"class")  <- NULL
#    attr(val,"allele")  <- NULL
#    print(val)
#    invisible(x)
#  }

###
### Check for the presence of alleles for each individual
###
### By default, return a matrix containing indicators for all possible
### allele values except the last.
###
#
#allele.ind  <-  function(x,allele)
#  {
##    if(missing(allele))
##      stop("Alleles to test must be specified")
##    if(length(allele)==1)
#      retval  <- allele.count(x,allele) > 0
##    else
##      retval  <- apply(allele.count(x,allele) ,1,sum) > 0
#
#      if(missing(allele))
#          allele  <-  colnames(retval)
#      attr(retval,"allele")  <- allele
#      attr(retval,"locus")  <- attr(x,"locus")
#      class(retval)  <-  "allele.ind"
#      return(retval)
#  }

#print.allele.ind  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Indicator(s) for allele(s):", attr(x,"allele"), "\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",x)
#  }

###
### Methods for creating subsets based on a genotype
###

homozygote  <- function (x,allele.name,...) 
  UseMethod("homozygote")

homozygote.genotype  <-  function(x,allele.name,...)
  {
    a1  <- allele(x,1)
    a2  <- allele(x,2)
    if(missing(allele.name))
      retval  <- ifelse( is.na(a1) | is.na(a2), NA, a1==a2 )
    else
      retval  <- ifelse( is.na(a1) | is.na(a2), NA,
                         a1==allele.name & a2==allele.name )
    attr(retval,"locus")  <-  attr(x,"locus")
#    class(retval)  <-  "homozygote"
    return(retval)
  }

#print.homozygote  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Homozygote Indicators:\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",x)
#  }


heterozygote  <- function (x,allele.name,...) 
  UseMethod("heterozygote")

heterozygote.genotype  <-  function(x,allele.name,...)
  {
  {
    a1  <- allele(x,1)
    a2  <- allele(x,2)
    if(missing(allele.name))
      retval  <- ifelse( is.na(a1) | is.na(a2), NA, !a1==a2 )
    else
      retval  <- ((a1 %in% allele.name) | (a2 %in% allele.name)) &
                 (a1 != a2)
    attr(retval,"locus")  <-  attr(x,"locus")
#    class(retval)  <-  "homozygote"
    return(retval)
  }
  }

#print.heterozygote  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Heterozygote Indicators:\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",x)
#  }

carrier <- function (x,allele.name,...) 
  UseMethod("carrier")

carrier.genotype  <-  function(x, allele.name=allele.names(x),
                                   any=!missing(allele.name), na.rm=FALSE, ...)
{
  retval  <- allele.count(x,allele.name=allele.name,any=any,na.rm=na.rm) > 0

  attr(retval,"allele")  <- retval$allele
  attr(retval,"locus")  <-  attr(x,"locus")
#  class(retval)  <- "carrier"
  return(retval)
}


#print.carrier  <- function(x,...)
#  {
#    if(!is.null(attr(x,"locus")))
#      print(attr(x,"locus"))
#    
#    cat("Carrier Indicator(s) for allele(s):", attr(x,"allele"), "\n")
#    attr(x,"locus")  <-  attr(x,"class")  <- attr(x,"allele")  <-  NULL
#    NextMethod("print",unclass(x))
#  }


###
###
###

allele.names<- function(x)
  {
    retval  <- attr(x,"allele.names")
    if(is.null(retval))
      retval  <- x$allele.names
    return(retval)
  }

###
### Subset method
###

"[.genotype"  <-  function(x, i, drop=FALSE)
  {
    allelesOld <- attr(x, "allele.names")
    retval  <- NextMethod("[")

    # force "NA" not to be a factor level
    ll  <- levels(retval)  <-  na.omit(levels(retval))

    class(retval)  <-  c("genotype","factor")

    if(drop) {
      alleles <- unique( unlist(strsplit(ll, "/") ) )
    } else {
      alleles <- attr(x, "allele.names")
    }
    attr(retval,"allele.names")  <- alleles
    attr(retval,"allele.map")  <- do.call("rbind", strsplit(ll, "/"))
    attr(retval,"locus")  <- attr(x,"locus")
    attr(retval,"label")  <-  attr(x,"label")
    goCur <- attr(x, "genotypeOrder")
    if(drop) {
      ## Removing genotype names having dropped alleles
      allelesOld <- allelesOld[!(allelesOld %in% alleles)]
      tmp <- allele(as.haplotype(goCur))
      test <- tmp %in% allelesOld
      test <- rowSums(matrix(test, ncol=2)) > 0
      attr(retval, "genotypeOrder") <- goCur[!test]
    } else {
      attr(retval, "genotypeOrder") <- goCur
    }
    return(retval)
  }

"[.haplotype"  <-  function(x, i, drop=FALSE)
  {
    retval  <- NextMethod("[")
    class(retval) <- c("haplotype","genotype","factor")
    retval
  }

###
### Subset Assigment method
###

"[<-.genotype"  <-  function(x, i, value)
  {
    ## Special case for insertion of NA and "" values
    if(all( is.na(value) | as.character(value)<="" ) )
      {
        x.class <- class(x)
        x <- unclass(x)
        x[i] <- NA
        class(x) <- x.class
        return(x)
      }
      
    if(!is.genotype(value))
      {
        value <- genotype(value)
      }

    lx <- levels(x)
    lv <- levels(value)
    ax <- allele.names(x)
    av <- allele.names(value)

    m  <- is.na(match(av,ax) )
    if( any( m  )  )
       warning(paste("Adding new allele name:", av[m], "\n"))

    la <- unique(c(lx,lv))
    aa <- unique(c(ax,av))

    cx <- class(x)
    nas <- is.na(x)

    data  <-  match(levels(value)[value],la)

    class(x) <- NULL
    x[i] <- data
    attr(x, "levels") <- la
    map  <- attr(x, "allele.map")  <- do.call("rbind", strsplit(la, "/"))
    attr(x, "allele.names")  <- aa
    goCur <- attr(x, "genotypeOrder")
    goAll <- expectedGenotypes(alleles=aa, haplotype=TRUE)
    attr(x, "genotypeOrder") <- c(goCur, goAll[!(goAll %in% goCur)])
    class(x) <- cx
    x
  }

"[<-.haplotype"  <-  function(x, i, value)
  {
    if(!is.haplotype(value))
      stop("Assigned value must be of class haplotype.")
    NextMethod("[<-")
  }

nallele <- function(x)
  length(allele.names(x))

genotypeOrder <- function(x)
  attr(x, "genotypeOrder")

"genotypeOrder<-" <- function(x, value)
{
  if(!is.genotype(x)) stop("'x' must be of a genotype class")
  alleles <- allele.names(x)

  goAll <- expectedGenotypes(alleles=alleles, haplotype=TRUE)
  goDef <- unique(sort(as.character(x)))

  if(is.null(value)) {
    attr(x, "genotypeOrder") <- goAll
  } else {
    value <- unique(value)

    ## Stop msg says all
    parts <- strsplit(x=value, split="/")
    parts <- sapply(parts, c)
    test <- !(parts %in% alleles)
    if(any(test))
      stop("adding genotype names with alleles that are not in the data")

    ## Any genotypes in the data that are not in value?
    test <- !(goDef %in% value)
    if(any(test)) {

      ## These values are in all possible genotypes/haplotypes
      testDefinAll <- goDef[test] %in% goAll
      ## but not in value
      testDefinAllnotVa <- !(goDef[testDefinAll] %in% value)
      goPos <- goDef[testDefinAllnotVa]

      ## We could simply add goPos to value now. However, A/B in goPos
      ## should also match B/A, since genotype() allows reordering of
      ## original data and additionally we want this to work also for
      ## haplotype.

      ## Extend value first. We do not do this before, since one
      ## might not necessarily like to have B/A together with A/B in
      ## first place.
      value <- genetics:::.genotype2Haplotype(x=value)
      ## Remove heterozygos matches in goPos
      test <- !(goPos %in% value)
      goPos <- goPos[test]

      ## Add goPos to the end of value
      if(any(test)) value <- c(value, goPos)

      ## If there are still some values in all, but not in value
      ## now, we just add them at the end
      testGOnotAll <- !(goAll %in% value)
      if(any(testGOnotAll)) value <- c(value, goAll[testGOnotAll])
    } else {
      value <- genetics:::.genotype2Haplotype(x=value)
    }
    attr(x, "genotypeOrder") <- value
  }
  x
}

.genotype2Haplotype <- function(x)
{
  ## Internal function
  ##
  ## Returns a character vector of possible haplotypes for given genotypes
  ## in such a way that for say c("A/A", "A/B", "B/B") you get c("A/A",
  ## "A/B", "B/A", "B/B") i.e. "B/A" comes directly after "A/B"!
  ##
  ## x - character, vector of genotype values in form allele1/allele2
  ##
  ## Details
  ## Unique values of x are taken i.e. first occurrence prevails
  ##
  ## Example
  ## genetics:::.genotype2Haplotype(c("A/A", "A/B", "B/B"))
  ## "A/A" "A/B" "B/A" "B/B"
  ## genetics:::.genotype2Haplotype(c("B/B", "A/B", "A/A"))
  ## "B/B" "A/B" "B/A" "A/A"

  x <- unique(x)
  N <- length(x)
  parts <- genetics:::.genotype2Allele(x=x)
  parts <- rbind(parts, parts[, 2:1])
  ind <- rep(1:N, each=2) + c(0, N)
  parts <- parts[ind, ]
  parts <- unique(paste(parts[, 1], parts[, 2], sep="/"))
  parts
}

.genotype2Allele <- function(x)
{
  ## Internal function
  ##
  ## Returns a matrix of alleles from a character vector of genotype names
  ##
  ## x - character, vector of genotype values in form allele1/allele2
  ##
  ## Details:
  ## Coercing to character is done for x.
  ##
  ## Example
  ## genetics:::.genotype2Allele(c("A/A", "A/B", "B/B"))
  ##      [,1] [,2]
  ## [1,] "A"  "A"
  ## [2,] "A"  "B"
  ## [3,] "B"  "B"

  parts <- strsplit(x=as.character(x), split="/")
  parts <- t(sapply(parts, c))
  parts
}
is.qna <- function(x) {is.na(x) | x=="NA"}

allele.pairs <- function(names, postfixes = c(".1", ".2")) {
  if (!is.character(names))
    stop("illegal names argument")
  if (!is.character(postfixes) || length(postfixes)!=2)
    stop("illegal postfixes argument")
  N <- length(names)
  s1 <- sub(paste(postfixes[1],"$",sep=""), "", names)
  s2 <- sub(paste(postfixes[2],"$",sep=""), "", names)
  res <- list()
  nres <- NULL
  for (i in 1:N) {
    if (s1[i]!=names[i]) {
      for (j in 1:N) {
        if (j!=i && s2[i]!=names[j] && s2[j]==s1[i]) {
          new <- list(c(names[i], names[j]))
          names(new) <- s1[i]
          res <- c(res, new) 
        }
      }
    }
  }
  res
}


# This is very basic at present and only loads marker position

"annotation<-" <- function(data, value, reorder=FALSE, ...) {
  if (!(is.data.frame(data) && is.data.frame(value)))
    stop("Arguments must be dataframes")
  varnames <- names(data)
  nv <- length(varnames)
  n.an <- nrow(value)
  gtypes <- NULL
  positions <- NULL
  others <- NULL
  for (i in 1:nv) {
    var <- data[[i]]
    if (is.genotype(var)) {
      lname <- varnames[i]
      m <- (value$name == lname)
      sm <- sum(m)
      if (sm==0)
        warning(paste("No annotation found for", lname))
      else {
        if (sm>1) {
          warning(paste("Duplicate annotations found for", lname,
                        "- using first"))
        }
        line <- min((1:n.an)[m])
        type <- value$type[line]
        if (is.null(type))
          type <- "SNP"
        bps <- value$bp.start[line]
        bpe <- value$bp.end[line]
        lg <- ""
        mark <-  marker(lname, type, lg, bps, bpe)
        attr(data[[i]], "locus") <- mark
        gtypes <- c(gtypes, i)
        positions <- c(positions, bps)
      }
    }
    else 
      others <- c(others, i)
  }
  if (reorder) {
    reord <- c(others, gtypes[order(positions)])
    out <- vector("list", nv)
    for (i in 1:nv) {
      out[[i]] <- data[[reord[i]]]
    }
    names(out) <- varnames[reord]
    return(as.data.frame(out))
  }
  else {
    return(data)
  }
}
        
# Contrast matrix for additive, dominance, and parent-of-origin,
# genotype, and haplotype effects


genetic.contrasts <- function(g, mode="additive", base) {
  if (!is.genotype(g)) {
    stop("Argument must be of class genotype")
  }
  if (!is.character(mode))
    stop("Illegal contrast specification")
  modes <- c("genotype", "haplotype",
                  "additive", "dominance",
                  "additive.origin", "dominance.origin", 
                  "parent.of.origin") # For backward compatibility
  mode <- match(mode, modes)
  if (is.na(mode))
    stop("Illegal contrast specification")
  if (mode>4 && !is.haplotype(g))
    stop("Parent-of-origin contrasts  only legal for haplotype variables")
  if (mode==2 && !is.haplotype(g))
    stop("Haplotype contrasts only legal for haplotype variables")
  if (mode==7)
    mode <- 6
  alleles <- attr(g, "allele.names")
  na <- length(alleles)
  nd <- (na*(na-1))/2
  cnames <- character(nd) 
  levs <- attr(g,"levels")
  if (length(levs)<2) {
    stop("Genotype is not polymorphic")
  }
  if (mode > 2) {
    if (missing(base)) {
      af <- allele.table(g)
      base <- order(alleles)[match(max(af), af)]
    }
    if (is.numeric(base)) {
      if (base<0 || base>na) {
        stop("base out of range")
      }
    }
    else if (is.character(base)) {
      bi <- match(base, alleles)
      if (is.na(bi))
        stop("Base allele not found")
      base <- bi
    }
    else {
      stop("Illegal base argument")
    }
    if (base!=0)
      alladd <- alleles[-base]
    else
      alladd <- alleles
    add <- matrix(nrow=length(levs), ncol=length(alladd),
                  dimnames=list(levs, alladd))
    storage.mode(add) <- "double"
    map <- attr(g, "allele.map")
    for (all in alladd) 
      add[, all] <- ((map[,1]==all) + (map[,2]==all))
    prefix <- ":a"
    colnames(add) <- paste(prefix, alladd, sep=":")
    cmat <- add

    if (mode==4 || mode==6){
      dom <- matrix(nrow = length(levs), ncol=nd)
      storage.mode(dom) <- "double"
      prefix <- ":d"
      ij <- 0
      for (i in 1:(na-1)) {
        ai <- alleles[i]
        for (j in (i+1):na) {
          aj <- alleles[j]
          ij <- ij+1
          cnames[ij] <- paste(prefix, ai, aj, sep=":")
          dom[,ij] <- ((map[,1]==ai & map[,2]==aj)|(map[,1]==aj & map[,2]==ai))
        }
      }
      rownames(dom) <- levs
      colnames(dom) <- cnames
      cmat <- cbind(cmat, dom)
    }

    if (mode==5 || mode==6) {
      poo <- matrix(nrow = length(levs), ncol=nd)
      storage.mode(poo) <- "double"
      prefix <- ":p"
      ij <- 0
      for (i in 1:(na-1)) {
        ai <- alleles[i]
        for (j in (i+1):na) {
          aj <- alleles[j]
          ij <- ij+1
          cnames[ij] <- paste(prefix, ai, aj, sep=":")
          poo[,ij] <- ((map[,1]==ai & map[,2]==aj) -
                       (map[,1]==aj & map[,2]==ai))/2
        }
      }
      rownames(poo) <- levs
      colnames(poo) <- cnames
      cmat <- cbind(cmat, poo)
    }
  }
  else {
    if (missing(base)) {
      gf <- table(g)
      base <- order(levs)[match(max(gf), gf)]
    }
    if (is.character(base))
      base <- match(base, levels(g))
    if (!is.haplotype(g) || mode==2) {
      cmat <- contr.treatment(levels(g), base=base)
    }
    else {
      nl <- length(levs)
      ng <- (na*(na+1))/2
      cmat <- matrix(nrow=nl, ncol=ng)
      cnames <- character(ng)
      am <- attr(g, "allele.map")
      ig <- 0
      for (i in 1:na) {
        for (j in 1:i) {
          ig <- ig+1
          cnames[ig] <- paste(alleles[j], alleles[i], sep="/")
          cmat[,ig] <- (am[,1]==alleles[i] & am[,2]==alleles[j]) |
                       (am[,2]==alleles[i] & am[,1]==alleles[j])
        }
      }
      keep <- !cmat[base,]
      cmat <- matrix(as.numeric(cmat[,keep]), nrow=nl)
      dimnames(cmat) <- list(levs, cnames[keep])
    }
  } 
  attr(cmat, "mode") = modes[mode]
  cmat
}

# Functions to use in model formula

additive <- function(g, base) {
  attr(g, "contrasts") <- genetic.contrasts(g, "additive", base)
  g
}

dominance <- function(g, base) {
  attr(g, "contrasts") <- genetic.contrasts(g, "dominance", base)
  g
}

additive.origin <- function(g, base) {
  attr(g, "contrasts") <- genetic.contrasts(g, "additive.origin", base)
  g
}

dominance.origin <- function(g, base) {
  attr(g, "contrasts") <- genetic.contrasts(g, "dominance.origin", base)
  g
}


porigin <- function(g, base) {
  attr(g, "contrasts") <- genetic.contrasts(g, "parent.of.origin", base)
  g
}

# Interrogate genotype contrasts

gcontrasts <- function(g) {
  contrasts(g)
}

# Set mode of a genotype 

"gcontrasts<-" <- function(g, base, value) {
  if (!is.genotype(g))
    stop("argument must be a genotype")
  modes <- c("genotype", "haplotype",
             "additive", "dominance",
             "additive.origin", "dominance.origin", 
             "parent.of.origin") # For backward compatibility
  if (is.character(value)) {
    if (is.na(match(value, modes)))
      stop("Unrecognized contrast spec")
    cntr <- genetic.contrasts(g, mode=value, base=base)
  }
  else if (is.matrix(value)) {
    if (any(sort(levels(g))!= sort(rownames(value))))
      stop("Row names of contrast matrix don't match genotypes/haplotypes")
    cntr <- value
  }
  else
    stop("Illegal contrast spec")
  how.many <- ncol(cntr)
  contrasts(g, how.many) <- cntr
  g
}

# And of all genotypes in a dataframe

"default.mode<-" <- function(data, value) {
  if (!is.data.frame(data))
    stop("argument must be a dataframe")
  if (!is.character(value))
    stop("Illegal mode")
  m <- match(value, c("additive", "dominance", "parent.origin", "genotype",
                      "haplotype")) 
  if (is.na(m))
    stop("Illegal mode")
  nv <- length(data)
  for (i in 1:nv) {
    if (is.genotype(data[[i]])) {
      if (!is.haplotype(data[[i]])) {
        if (m==3) 
          m <- 2
        if (m==5)
          m <- 4
      }
      gcontrasts(data[[i]]) <-
          c("additive", "dominance", "parent.origin", "genotype",
            "haplotype")[m]
    }
  }
  data
}

# Interrogate contrasts for mode

inheritance.mode <- function(g) {
  cntr <- attr(g, "contrasts")
  if (is.null(cntr))
    NULL
  else {
    pre <- substr(colnames(cntr), 1, 3)
    comp <- match(pre, c(":a:", ":d:", ":p:"))
    if (any(is.na(comp)))
      NULL
    else
      c("additive", "dominance", "parent.of.origin")[max(comp)]
  }
}

# Merge minor alleles if MAF less than a threshold.
# Can be applied to a single genotype (haplotype) variable or a dataframe
# Variable is dropped if merging can't solve the problem

mamerge <- function(object, maf=0.05, others="*") {
  df <- is.data.frame(object)
  if (!df) {
    if (is.genotype(object)) 
      object <- list(genotype=object)
    else
      stop("argument is not a dataframe or a genotype")
  }
  if (!is.character(others) || nchar(others)!=1)
    stop("others argument must be a single character")
  obj.names <- names(object)
  nobj <- length(object)
  status <- rep(0, nobj)
  for (i in 1:nobj) {
    ob <- object[[i]]
    if (is.genotype(ob)) {
      af <- apply(allele.count(ob), 2, mean, na.rm=TRUE)/2
      if (min(af)<maf) {
        na <- length(af)
        att <- attributes(ob)
        an <- att$allele.names
        map <- att$allele.map
        lab <- others
        while (min(af)<maf && na>2) {
          asc <- (1:na)[order(af)]
          one <- asc[1]
          two <- asc[2]
          name.one <- an[one]
          name.two <- an[two]
          af[two] <- af[two]+af[one]
          if (substr(name.one, 1, 1)=="others")
            an[two] <- name.one
          else if(substr(name.two, 1, 1)!="others") {
            an[two] <- lab
            lab <- paste(lab, others, sep="")
          }
          af <- af[-one]
          an <- an[-one]
          map <- ifelse(map==name.one | map==name.two, others, map)
          na <- na - 1
        }
        if (min(af)<maf) {
          status[i] <- -1 # drop
          warning(paste(obj.names[i], "dropped, MAF <", maf))
        }
        else {
          status[i] <- 1 # changed
          if (is.haplotype(ob))
            object[[i]] <- as.haplotype(map[as.numeric(ob),])
          else 
            object[[i]] <- as.genotype(map[as.numeric(ob),])
          warning(paste("Rare alleles of", obj.names[i], "merged")) 
        }
      }
    }
  }
  if (df) {
    for (i in nobj:1) {
      if (status[i]<0)
        object[[i]] <- NULL
    }
    return(object)
  }
  else {
    if (status[1]<0) 
      return(NULL)
    else
      return(object[[1]])
  }
}

# Eliminate genetically identical SNPs from a dataset
# Optionally amalgamate identical variables to fill in missing data,
# but phase is lost in the process
# Only works with diallelic loci

gielim <- function(data, amalgamate=TRUE) {
  if (!is.data.frame(data)) {
    stop("Argument must be a dataframe")
  }
  nv <- ncol(data)
  gts <- NULL
  for (i in 1:nv) {
    if (is.genotype(data[[i]]) && nallele(data[[i]])==2) {
      gts <- c(gts, i)
    }
  }
  gnames <- names(data)[gts]
  ng <- length(gts)
  if (ng==0) {
    stop("Dataframe contains no genotype variables")
  }
  status <- rep(0,ng) # unchanged
  N <- nrow(data)
  change <- FALSE
  if (ng > 1) {
    count <- matrix(nrow=N, ncol=ng)
    for (i in 1:ng) {
      ig <- gts[i]
      gi <- data[[ig]]
      count[,i] <- allele.count(gi, allele.names(gi)[1])
    }
    for (i in 1:(ng-1)) {
      ci <- count[,i]
      for (j in (i+1):ng) {
        cj <- count[,j]
        identical <- all(ci==cj, na.rm=TRUE)
        reversed <- all((ci+cj)==2, na.rm=TRUE)
        if (identical || reversed) {
          change <- TRUE
          if (amalgamate) {
            status[i] <- -1 # delete
            if (any((is.na(ci)&!is.na(cj))|(is.na(cj)&!is.na(ci)))) {
              status[j] <- 1 # to be changed
              if (reversed) ci <- 2-ci
              cj <- ifelse(is.na(cj), ci, cj)
              count[,j] <- cj
            }
            warning(paste(gnames[i], "is genetically identical to",
                        gnames[j], "and has been omitted"))
          }
          else {
            if (sum(is.na(cj))>sum(is.na(ci))){
              c[,j] <- ci
              status[j] <- -1 # delete
              warning(paste(gnames[j], "is genetically identical to",
                        gnames[i], "and has been omitted"))
              gnames[j] <- gnames[i]
            }
            else {
              status[i] <- -1 # delete
              warning(paste(gnames[i], "is genetically identical to",
                            gnames[j], "and has been omitted"))
            }
          }
          break
        }
      }
    }
  }
  if (change) {
    for (i in 1:ng){
      if (status[i]==1) {
        ci <- count[,i]
        att <- attributes(data[[gts[i]]])
        anames <-  att$allele.names
        contr <- att$contrasts
        wopt <- options()$warn
        options(warn=-1)
        new <- as.genotype.allele.count(ci, anames)
        if (!is.null(contr)) 
          attr(new, "contrasts") <- contr 
        options(warn=wopt)
        data[[gts[i]]] <- new
      }
    }
    for (i in ng:1) {
      if (status[i]==-1) {
        data[[gts[i]]] <- NULL
      }
    }
  }
  data
}

# Minor allele frequency, data frame or single genotype
# When applied to a data frame, returns values only for genotype variables
# if simplify=T; otherwise returns NA's for non-genotype variables

MAF <- function(g, simplify=TRUE) {
  if (is.data.frame(g)) {
    nv <- length(g)
    gt <- logical(nv)
    mf <- as.numeric(rep(NA, nv))
    names(mf) <- names(g)
    for (i in 1:nv) {
      gi <- g[[i]]
      if (is.genotype(gi)) {
        gt[i] <- TRUE
        fr <- table(allele(gi))
        mf[i] <- min(fr)/sum(fr)
      }
      else
        gt[i] <- FALSE
    }
    if (simplify)
      mf[gt]
    else
      mf
  }
  else if (is.genotype(g)) {
    fr <- table(allele(g))
    min(fr)/sum(fr)
  }
}

# Partial correlations (with sign reversed)

partial.LD <- function(object) {
  if (class(object)[1]=="LD.data.frame")
    R <- object$r
  else if (is.data.frame(object))
    R <- LD(object)$r
  else
    stop("Argument must be a dataframe or an LD dataframe")
  dn <- dimnames(R)
  p <- nrow(R)
  R <- ifelse(is.na(R), t(R), R)
  diag(R) <- 1
  if (any(is.na(R)))
    stop("NAs in correlation matrix")
  Ri <- solve(R)
  if (any(diag(Ri)<=0))
    stop("Matrix not positive semidefinite")
  D <- diag(1/sqrt(diag(Ri)))
  Ri <- D %*% Ri %*% D
  dimnames(Ri) <- dn
  Ri
}
      
#
# Identify trios of affected offspring and both parents
#

trios <- function(pedigree, id, id.father, id.mother, sex, affected, 
                  codes=2, first=FALSE,
                  data = sys.frame(sys.parent())) {
  cl <- match.call()
  if (missing(pedigree))
    pedigree <- eval(as.symbol("pedigree"), data)
  else
    pedigree <- eval(cl$pedigree, data)
  N <- length(pedigree)
  if (missing(id))
    id <- eval(as.symbol("id"), data)
  else
    id <- eval(cl$id, data)
  if (missing(id.father))
    id.father <- eval(as.symbol("id.father"), data)
  else
    id.father <- eval(cl$id.father, data)
  if (missing(id.mother))
    id.mother <- eval(as.symbol("id.mother"), data)
  else
    id.mother <- eval(cl$id.mother, data)
  if (missing(sex))
    sex <- eval(as.symbol("sex"), data)
  else
    sex <- eval(cl$sex, data)
  if (missing(affected))
    affected <- eval(as.symbol("affected"), data)
  else
    affected <- eval(cl$affected, data)
  if (N==0 || length(id)==0 || length(id.father)==0 || length(id.mother)==0
      || length(sex)==0 || length(affected)==0)
    stop("missing or inconsistent arguments")
  index <- (1:N)
  if (is.factor(affected)) {
    if (is.numeric(codes))
      is.affected <- !is.na(match(as.numeric(affected), codes))
    else 
      is.affected <- !is.na(match(affected, codes))
  }
  else
    is.affected <- !is.na(match(affected, codes))
  use <- index[is.affected & !is.na(id.father) & !is.na(id.mother)]
  if (first) {
    nuc <- paste(pedigree,id.mother,id.father,sep=":")[use]
    fst <- match(unique(nuc), nuc)
    use <- use[fst]
  }
  lid <- paste(pedigree, id, sep=":")
  ma <- match(paste(pedigree, id.mother, sep=":")[use], lid)
  pa <- match(paste(pedigree, id.father, sep=":")[use], lid)
  comp <- !(is.na(id.mother[use]) | is.na(ma) |
            is.na(id.father[use]) | is.na(pa))
  if (any(!comp)) {
    use <- use[comp]
    ma <- ma[comp]
    pa <- pa[comp]
  }
  wrong <- sum(as.numeric(sex[ma])==1, na.rm=TRUE) +
    sum(as.numeric(sex[pa])==2, na.rm=TRUE)
  if (wrong>0)
    warning(paste("sex of mothers and/or fathers incorrectly coded", wrong,
                  "times"))
  data.frame(case=use, father=pa, mother=ma, pedigree=pedigree[use],
       id=id[use], id.father=id.father[use], id.mother=id.mother[use])
}


# Resolve a single genotype for trios into transmitted and untransmitted
# haplotypes
# If one.parent true, allows partial resolution when one parent is missing
# If one.parent.all true, only resolves these if resolution possible for
# either transmission from other parent
# If allow.ambiguous, resolutions which are ambiguous for parent of origin
# are allowed

phase.resolve <- function(offspring, mother, father, allow.ambiguous=FALSE,
                          one.parent=FALSE, one.parent.all=FALSE,
                          as.allele.pair=FALSE) {
  if(!(is.genotype(offspring) && is.genotype(mother) && is.genotype(father)))
    stop("Arguments must be genotype variables")
  alleles <- allele.names(offspring)
  a.o <- allele(offspring)
  a.m <- allele(mother)
  a.f <- allele(father)
  full <- !(is.na(offspring) | is.na(mother) | is.na(father))
  asis <- is.na(offspring) | 
          ((is.na(mother) | a.o[,1]==a.m[,1] | a.o[,1]==a.m[,2]) &
           (is.na(father) | a.o[,2]==a.f[,1] | a.o[,2]==a.f[,2]))
  swap <- is.na(offspring) |
          ((is.na(father) | a.o[,1]==a.f[,1] | a.o[,1]==a.f[,2]) &
           (is.na(mother) | a.o[,2]==a.m[,1] | a.o[,2]==a.m[,2]))
  asis <- ifelse(is.na(asis), TRUE, asis)
  swap <- ifelse(is.na(swap), TRUE, swap)
  misinherits <- (!asis) & (!swap)
  if (any(misinherits))
    warning(sum(misinherits), " misinheritances")
  homo <- homozygote(offspring)
  ambiguous <- asis & swap & !homo
  mat.t <- ifelse(swap, a.o[,2], a.o[,1])
  pat.t <- ifelse(swap, a.o[,1], a.o[,2])
  mat.u <- ifelse(mat.t==a.m[,1], a.m[,2], a.m[,1])
  pat.u <- ifelse(pat.t==a.f[,1], a.f[,2], a.f[,1])
  resolved <- full & (allow.ambiguous | !ambiguous)
  # Partial resolution if only one parent typed
  # (only carried out if possible for either transmission)
  if (one.parent) {
    m.only <- is.na(father) & !(is.na(offspring) | is.na(mother))
    f.only <- is.na(mother) & !(is.na(offspring) | is.na(father))
    if (one.parent.all)
      resolved <- resolved | ((m.only | f.only) & !ambiguous)
    else
      resolved <- resolved | ((m.only | f.only) & !ambiguous & !homo)
  }
  resolved <- resolved & !misinherits & (allow.ambiguous | !ambiguous)
  mat.t[!resolved] <- NA
  pat.t[!resolved] <- NA
  mat.u[!resolved] <- NA
  pat.u[!resolved] <- NA
  mlist <- (1:length(misinherits))[misinherits]
  if (as.allele.pair) {
    tr <- cbind(mat.t, pat.t)
    un <- cbind(mat.u, pat.u)
    if (allow.ambiguous) {
      colnames(tr) <- NULL
      colnames(un) <- NULL
      list(trans=tr, untrans=un, misinherits=mlist, ambiguous=ambiguous)
    }
    else {
      colnames(tr) <- c("Maternal", "Paternal")
      colnames(un) <- c("Maternal", "Paternal")
      list(trans=tr, untrans=un, misinherits=mlist)
    }
  }
  else {
    if (allow.ambiguous) 
      list(
           trans=haplotype(mat.t, pat.t, allow.partial.missing=one.parent),
           untrans=haplotype(mat.u, pat.u, allow.partial.missing=one.parent),
           misinherits= mlist, ambiguous=ambiguous
           )
    else
      list(
           trans=haplotype(mat.t, pat.t, allow.partial.missing=one.parent),
           untrans=haplotype(mat.u, pat.u, allow.partial.missing=one.parent),
           misinherits= mlist
           )
  }
}

tdt <- function(genotype, data = sys.frame(sys.parent()),
                pedigree, id, id.father, id.mother, sex, affected,
                codes = 2, parent="both", robust=FALSE, first=FALSE) {
  if (is.na(match(parent, c("both", "mother", "father"))))
    stop("Unrecognized parent= argument")
  if (missing(genotype))
    stop("Missing genotype argument with no default")
  cl <- match.call()
  g <- eval(cl$genotype, data)
  gname <- deparse(cl$genotype)
  if (missing(pedigree))
    ped <- evalq(pedigree, data)
  else
    ped <- eval(cl$pedigree, data)
  anames <- allele.names(g)
  nall <- length(anames)
  tr <- trios(pedigree, id, id.father, id.mother, sex, affected, codes=codes,
              data=data, first=first)
  g.cs <- g[tr$case]
  g.fr <- g[tr$father]
  g.mr <- g[tr$mother]
  ped.cs <- ped[tr$case]
  res <- phase.resolve(g.cs, g.mr, g.fr, as.allele.pair = TRUE,
                       allow.ambiguous=(parent=="both"),
                       one.parent=TRUE)
  if (parent=="both") {
    tr <- rbind(res$trans[,1], res$trans[,2])
    un <- rbind(res$untrans[,1], res$untrans[,2])
    pe <- rbind(ped.cs, ped.cs)
  }
  else if (parent=="mother") {
    tr <- res$trans[,1]
    un <- res$untrans[,1]
    pe <- ped.cs
  }
  else if (parent=="father") {
    tr <- res$trans[,2]
    un <- res$untrans[,2]
    pe <- ped.cs
  }
  af <- table(un, dnn="Allele frequency")
  af <- af/sum(af)
  infmtv <- !is.na(tr) & !is.na(un) & (tr!=un)
  ninf <- sum(infmtv)
  tr <- tr[infmtv]
  un <- un[infmtv]
  pe <- pe[infmtv]
  trtab <- table(c(tr, un), rep(c(0,1), c(ninf, ninf)))
  anames.old <- anames
  anames <- rownames(trtab)
  anames.omitted <- setdiff(anames.old, anames)
  if (length(anames.omitted)>0) {
    warning("No informative transmissions for alleles: ",
            paste(anames.omitted, collapse=","))
  }
  nall <- length(anames)
  dimnames(trtab) <- list(anames, c("Transmitted", "Untransmitted"))
  Ia <- diag(nall)
  rownames(Ia) <- anames
  U <- (Ia[tr,] - Ia[un,])
  if (robust) {
    peds <- unique(pe)
    if (length(peds)<length(pe)) {
      U <- mtxgrp(U, rgroup=pe)
    }
  }
  qrU <- qr(U)
  X2 <- sum(qr.fitted(qrU, rep(1, nrow(U))))
  names(X2) <- "Chi-squared test"
  df <- qrU$rank
  names(df) <- "DF"
  exact <- numeric(nall)
  names(exact) <- anames
  for (a in anames) {
    if (robust) {
      ua <- U[, a]
      c2 <- (sum(ua))^2/sum(ua^2)
      exact[a] <- pchisq(c2, 1, lower.tail=FALSE)
    }
    else {
      bt <- binom.test(trtab[a,], alternative="two.sided")
      exact[a] <- bt$p.value
    }
  }
  result <- list(statistic=X2, parameter=df, exact=!robust,
                 p.value=c(pchisq(X2, df, lower.tail=FALSE), exact),
                 method="Transmission/disequilibrium test",
                 data.name=gname,
                 allele.frequencies=af,
                 informative.transmissions=trtab
               )
  class(result) <- "tdt.htest"
  result
}

# Collapse matrix by grouping over rows and/or columns

mtxgrp <-
function(mat, rgroup=1:nrow(mat), cgroup=1:ncol(mat)) {
  rgroup <- factor(rgroup)
  cgroup <- factor(cgroup)
  # collapse a matrix by adding over rows and/or columns within groups
  nc <- ncol(mat)
  nr <- nrow(mat)
  if (nr != length(rgroup) || nc != length(cgroup))
    stop("wrong length grouping vector")
  res <- tapply(mat, list(rep(rgroup, nc), rep(cgroup, rep(nr, nc))), "sum")
  res
}

# Print a tdt.htest object

print.tdt.htest <- function(x) {
  cat("\t", x$method, "\nData:\t", x$data.name,"\n\n", sep="")
  cat("Untransmitted allele frequencies, informative transmissions\n")
  if (x$exact)
    cat("and exact P-values\n\n")
  else
    cat("and asymptotic P-values\n\n")
  colwidth <- 13
  head <- format(c("Allele", "Frequency", "Transmitted", "Untransmitted",
                   "P-value"), width=colwidth)
  cat(head, "\n")
#  anames <- names(x$allele.frequencies)
  anames <- rownames(x$informative.transmissions) # From Chris 15/02/06
  na <- length(anames)
  tr <- format(as.character(x$informative.transmissions[,1]),
                    width=colwidth)
  un <- format(as.character(x$informative.transmissions[,2]),
                    width=colwidth)
#  freq <- formatC(x$allele.frequencies, width=colwidth, format="f",
#                  digits=4, flag=flag)
  freq <- format(x$allele.frequencies, width=colwidth, nsmall=4, digits=4)
  pvals <- format(format.pval(x$p.value, digits=3), width=colwidth)
  if (na<=2) {
    i <- order(x$allele.frequencies)[1]
    fa <- format(anames[i], width=colwidth, flag=flag)
    cat(fa, freq[i], tr[i], un[i], pvals[1+i], "\n")   
  }
  else {
    for (i in 1:na) {
      fa <- format(anames[i], width=colwidth, flag=flag)
      cat(fa, freq[i], tr[i], un[i], pvals[1+i], "\n")
    }
  }
  if (x$parameter>1) {
    cat("\nGlobal chi-squared test =", x$statistic, "on", x$parameter,
        "df. Asymptotic P-value =", format.pval(x$p.value[1], digits=3),"\n")
  }
}

# long.phase.resolve resolves phase of transmitted and untransmitted
#    haplotypes
# falk.rubinstein simply computes (multilocus) transmitted and untransmitted
#    genotypes
#
# offspring, mother, father are lists of genotype variables


long.phase.resolve <- function(offspring, mother, father,
                               parent.of.origin=FALSE,
                               as.allele.pair=FALSE) {
  if (!(is.list(offspring) && is.list(mother) && is.list(father)))
    stop("Arguments should be lists of genotypes")
  nloci <- length(offspring)
  if (!(length(mother)==nloci && length(father)==nloci)) 
    stop("Arguments should be lists of same length")
  tr <- vector("list", nloci)
  un <- vector("list", nloci)
  locnames <- names(offspring)
  names(tr) <- locnames
  names(un) <- locnames
  ntr <- length(offspring[[1]])
  resolved <- rep(TRUE, ntr)
  if (!parent.of.origin) {
    n.ambig<- rep(0, ntr)
    n.het.tr <- rep(0, ntr)
    n.het.un <- rep(0, ntr)
  }
  for (i in 1:nloci) {
    phi <- phase.resolve(offspring[[i]], mother[[i]], father[[i]],
                         allow.ambiguous = !parent.of.origin,
                         as.allele.pair = as.allele.pair)
    tr[[i]] <- phi$trans
    un[[i]] <- phi$untrans
    if (as.allele.pair) {
      resolved <- resolved & !is.na(phi$trans[,1]) & !is.na(phi$untrans[,1]) & 
                             !is.na(phi$trans[,2]) & !is.na(phi$untrans[,2]) 
      if (!parent.of.origin) {
        resolved <- resolved & !phi$ambiguous
        n.ambig <- n.ambig + phi$ambiguous
        n.het.tr <- n.het.tr + (phi$trans[,1]!= phi$trans[,2])
        n.het.un <- n.het.un + (phi$untrans[,1]!= phi$untrans[,2])
      }
    }
    else {
      resolved <- resolved & !is.na(phi$trans) & !is.na(phi$untrans)
      if (!parent.of.origin) {
        resolved <- resolved & !phi$ambiguous
        n.ambig <- n.ambig + phi$ambiguous
        n.het.tr <- n.het.tr + heterozygote(phi$trans)
        n.het.un <- n.het.un + heterozygote(phi$untrans)
      }
    }
  }
  if (!parent.of.origin)
    resolved <- resolved | n.ambig==0 |
                             (n.ambig==1 & !(n.het.tr>1 | n.het.un>1))
  resolved[is.na(resolved)] <- FALSE
  for (i in 1:nloci) {
    if (as.allele.pair) {
      tr[[i]][!resolved,] <- NA
      un[[i]][!resolved,] <- NA
    }
    else {
      tr[[i]][!resolved] <- as.haplotype(NA)
      un[[i]][!resolved] <- as.haplotype(NA)
    }
  }
  list(trans=tr, untrans=un, resolved=resolved)
}

falk.rubinstein <- function(offspring, mother, father, as.allele.pair=FALSE,
                            allow.partial.missing=TRUE) {
  if (!(is.list(offspring) && is.list(mother) && is.list(father)))
    stop("Arguments should be lists of genotypes")
  nloci <- length(offspring)
  if (!(length(mother)==nloci && length(father)==nloci)) 
    stop("Arguments should be lists of same length")
  tr <- vector("list", nloci)
  un <- vector("list", nloci)
  locnames <- names(offspring)
  names(tr) <- locnames
  names(un) <- locnames
  ntr <- length(offspring[[1]])
  misinherits <- rep(FALSE, ntr)
  any.missing <- FALSE
  for (i in 1:nloci) {
    a.m <- allele(mother[[i]])
    a.f <- allele(father[[i]])
    a.o <- allele(offspring[[i]])
    asis <- is.na(offspring[[i]]) | 
          ((is.na(mother[[i]]) | a.o[,1]==a.m[,1] | a.o[,1]==a.m[,2]) &
           (is.na(father[[i]]) | a.o[,2]==a.f[,1] | a.o[,2]==a.f[,2]))
    swap <- is.na(offspring[[i]]) |
          ((is.na(father[[i]]) | a.o[,1]==a.f[,1] | a.o[,1]==a.f[,2]) &
           (is.na(mother[[i]]) | a.o[,2]==a.m[,1] | a.o[,2]==a.m[,2]))
    asis <- ifelse(is.na(asis), TRUE, asis)
    swap <- ifelse(is.na(swap), TRUE, swap)
    cant <-  (!asis) & (!swap)
    misinherits <- misinherits | cant
    uni <- cbind(ifelse(asis,
                        ifelse(a.o[,1]==a.m[,1], a.m[,2], a.m[,1]),
                        ifelse(a.o[,1]==a.f[,1], a.f[,2], a.f[,1])),
                 ifelse(asis,
                        ifelse(a.o[,2]==a.f[,1], a.f[,2], a.f[,1]),
                        ifelse(a.o[,2]==a.m[,1], a.m[,2], a.m[,1]))
                 )
    any.missing <- any.missing | is.na(uni[,1]) | is.na(uni[,2]) |
       is.na(a.o[,1]) | is.na(a.o[,2])
    un[[i]] <- uni
    tr[[i]] <- a.o
  }
  if (any(misinherits))
    warning(sum(misinherits), " misinheritances")
  if (!allow.partial.missing)
    misinherits <- misinherits | any.missing
  if (any(misinherits)) {
    for (i in 1:nloci) {
      un[[i]][misinherits,1] <- NA
      un[[i]][misinherits,2] <- NA
      tr[[i]][misinherits,1] <- NA
      tr[[i]][misinherits,2] <- NA
    }
  }
  if (!as.allele.pair) {
    for (i in 1:nloci) {
      un[[i]] <- genotype(un[[i]], allow.partial.missing=allow.partial.missing)
      tr[[i]] <- genotype(tr[[i]], allow.partial.missing=allow.partial.missing)
    }
  }
  list(trans=tr, untrans=un)
}
    
# Bind multiple haplotype variables  together 

hbind <- function(..., sep=":", as.allele.pair=FALSE) {
  args <- list(...)
  if (length(args)==1 && is.list(args[[1]]))
    args <- args[[1]]
  nl <- length(args)
  if (nl<=1)
    stop("Too few arguments or list elements")
  first <- args[[1]]
  if (is.haplotype(first)) {
    as.allele.pair.in <- FALSE
    hap <- allele(first)
  }
  else if (is.character(first) && is.matrix(first) && ncol(first)==2) {
    as.allele.pair.in <- TRUE
    hap <- first
  }
  else
    stop("Illegal haplotype argument or list element")
  for (i in 2:nl) {
    ht <- args[[i]]
    if (as.allele.pair.in) {
      if (!(is.character(ht) && is.matrix(ht) && ncol(ht)==2))
        stop("Illegal haplotype argument or list element")
    }
    else {
      if (!is.haplotype(ht))
        stop("Illegal haplotype argument or list element")
      ht <- allele(ht)
    }
    hap[,1] <- ifelse(is.na(hap[,1])|is.na(ht[,1]), NA,
                      paste(hap[,1], ht[,1], sep=sep))
    hap[,2] <- ifelse(is.na(hap[,2])|is.na(ht[,2]), NA,
                      paste(hap[,2], ht[,2], sep=sep))
  }
  if (as.allele.pair)
    hap
  else
    as.haplotype(hap)
}

# Make dataframe of cases and matched pseudo-controls

pseudocc <- function(...,  data = sys.frame(sys.parent()), 
                     pedigree, id, id.father, id.mother, sex, affected,
                     codes = 2, phase=TRUE, first=FALSE,
                     parent.of.origin=exchangeable,
                     exchangeable = FALSE, allow.partial.missing=FALSE) {
  if (exchangeable && !parent.of.origin)
    stop("Incompatible exchangeable and parent.of.origin options")
  if (parent.of.origin && !phase)
    stop("Incompatible phase and parent.of.origin options")
  if (allow.partial.missing && phase)
    stop("Incompatible allow.partial.missing and phase options")
  cl <- match.call()
  arg.names <- names(cl)
  narg <- length(arg.names)
  ng <- sum(arg.names=="")-1
  gnames <- character(ng)
  gts <- vector("list", ng)
  for (i in 2:narg) {
    if (arg.names[i]=="") {
      gnames[i-1] <- deparse(cl[[i]])
      gtsi<- eval(cl[[i]], data)
      if (!is.genotype(gtsi))
        stop(gnames[i-1], " is not a genotype variable")
      gts[[i-1]] <- gtsi
    }
  }
  names(gts) <- gnames
  if (missing(pedigree))
    pedigree <- evalq(pedigree, data)
  else
    pedigree <- eval(cl$pedigree, data)
  if (missing(id))
    id <- evalq(id, data)
  else
    id <- eval(cl$id, data)
  if (missing(id.father))
    id.father <- evalq(id.father, data)
  else
    id.father <- eval(cl$id.father, data)
  if (missing(id.mother))
    id.mother <- evalq(id.mother, data)
  else
    id.mother <- eval(cl$id.mother, data)
  tr <- trios(pedigree, id, id.father, id.mother, sex, affected, codes,
              data=data, first=first)
  pedigree <- pedigree[tr$case]
  id <- id[tr$case]
  id.father <- id.father[tr$case]
  id.mother <- id.mother[tr$case]
  father <- mother <- offspring <- vector("list", ng)
  for (i in 1:ng) {
    offspring[[i]] <- gts[[i]][tr$case]
    father[[i]] <- gts[[i]][tr$father]
    mother[[i]] <- gts[[i]][tr$mother]
  }
  if (phase && ng>1) {
    extra <- 7
    out <- vector("list", 3*ng+7)
    hapname <- paste(gnames, sep="", collapse=":")
    if (nchar(hapname)>10)
      hapname <- "haplotype"
    names(out) <- c("set", "cc", "pedigree", "id", "id.father", "id.mother",
                    hapname, gnames, paste(gnames,"mother", sep=":"),
                    paste(gnames, "father", sep=":"))
  }
  else {
    extra <- 6
    out <- vector("list", 3*ng+6)
    names(out) <- c("set", "cc", "pedigree", "id", "id.father", "id.mother",
                    gnames, paste(gnames,"mother", sep=":"),
                    paste(gnames, "father", sep=":"))
  }    
  if (phase) {
    res <- long.phase.resolve(offspring, mother, father,
                              parent.of.origin=parent.of.origin,
                              as.allele.pair=TRUE)
    case <- res$trans
    untrans <- res$untrans
    res.case <- res$resolved
    off <- vector("list", ng)
    for (i in 1:ng) 
      off[[i]] <- genotype(untrans[[i]][,1], case[[i]][,2])
    res <- long.phase.resolve(off, mother, father,
                              parent.of.origin=parent.of.origin,
                              as.allele.pair=TRUE)
    control1 <- res$trans
    res.control1 <- res$resolved
    for (i in 1:ng) 
      off[[i]] <- genotype(case[[i]][,1], untrans[[i]][,2])
    res <- long.phase.resolve(off, mother, father,
                              parent.of.origin=parent.of.origin,
                              as.allele.pair=TRUE)
    control2 <- res$trans
    res.control2 <- res$resolved
    for (i in 1:ng) 
      off[[i]] <- genotype(untrans[[i]][,1], untrans[[i]][,2])
    res <- long.phase.resolve(off, mother, father,
                              parent.of.origin=parent.of.origin,
                              as.allele.pair=TRUE)
    control3 <- res$trans
    res.control3 <- res$resolved
    res.case <- res.case & (res.control1 | res.control2 | res.control3)
    set <- 1:length(res.case)
    n.case <- sum(res.case)
    n.control <- sum(res.control1) + sum(res.control2) + sum(res.control3)
    set <- c(set[res.case], set[res.control1], set[res.control2],
                 set[res.control3])
    cc <- c(rep(1, n.case), rep(0, n.control))
    ped <- c(pedigree[res.case], pedigree[res.control1],
                      pedigree[res.control2], pedigree[res.control3])
    id <- c(id[res.case], id[res.control1],
                id[res.control2], id[res.control3])
    fid <- c(id.father[res.case], id.father[res.control1],
                id.father[res.control2], id.father[res.control3])
    mid <- c(id.mother[res.case], id.mother[res.control1],
                id.mother[res.control2], id.mother[res.control3])
    if (exchangeable) {
      set1 <- set
      set <- rep(set, 2)
      cc <- c(cc, rep(0, length(cc)))
      ped <- rep(ped, 2)
      id <- rep(id, 2)
      fid <- c(fid, mid)
      mid <- c(mid, fid)
    }
    setord <- order(set)
    out$set <- set[setord]
    out$cc <- cc[setord]
    out$pedigree <- ped[setord]
    out$id <- id[setord]
    out$id.father <- fid[setord]
    out$id.mother <- mid[setord]
    for (i in 1:ng) {
      hapi <- rbind(case[[i]][res.case,],
                    control1[[i]][res.control1,],
                    control2[[i]][res.control2,],
                    control3[[i]][res.control3,])
      if (exchangeable)
        hapi <- rbind(hapi, cbind(hapi[,2], hapi[,1]))
      if (ng==1 && !parent.of.origin)
        out[[i+extra]] <- genotype(hapi[setord,])
      else 
        out[[i+extra]] <- haplotype(hapi[setord,])
    }
    if (phase && ng>1) {
      if (parent.of.origin)
        out[[extra]] <- hbind(out[extra +(1:ng)])
      else
        out[[extra]] <- as.genotype(hbind(out[extra +(1:ng)]))
    }
    for (i in 1:ng)  {
      if (exchangeable) 
        pg <- genotype(rbind(allele(mother[[i]][set1]),
                             allele(father[[i]][set1])))
      else
        pg <- mother[[i]][set]
      out[[i+ng+extra]] <- pg[setord]
    }
    for (i in 1:ng) {
      if (exchangeable)
        pg <- genotype(rbind(allele(father[[i]][set1]),
                             allele(mother[[i]][set1])))
      else
        pg <- father[[i]][set]
      out[[i+2*ng+extra]] <- pg[setord]
    }
  }
  else {
    res <- falk.rubinstein(offspring, mother, father, as.allele.pair=TRUE,
                           allow.partial.missing=allow.partial.missing)
    n.case <- nrow(res$trans[[1]])
    set <- rep(1:n.case, rep(2, n.case))
    out$set <- set
    out$cc <- rep(c(1,0), n.case)
    out$pedigree <- pedigree[set]
    out$id <- id[set]
    out$id.father <- id.father[set]
    out$id.mother <- id.mother[set]
    setord <- order(rep(1:n.case,2))
    for (i in 1:ng) 
      out[[i+extra]] <- genotype(
                rbind(res$trans[[i]], res$untrans[[i]])[setord,],
                allow.partial.missing = allow.partial.missing)
    for (i in 1:ng)  
      out[[i+ng+extra]] <- mother[[i]][set]
    for (i in 1:ng) 
      out[[i+2*ng+extra]] <- father[[i]][set]
  }
  as.data.frame(out)
}

  
# Trio types for parent-of-origin analyses

trio.types <- function(genotype, data = sys.frame(sys.parent()), 
                pedigree, id, id.father, id.mother, sex, affected,
                codes = 2, all=FALSE, first=FALSE, parent.of.origin=FALSE) {
  if (missing(genotype))
    stop("Missing genotype argument with no default")
  cl <- match.call()
  g <- eval(cl$genotype, data)
  gname <- deparse(cl$genotype) 
  tr <- trios(pedigree, id, id.father, id.mother, sex, affected, codes=codes,
              data=data, first=first)
  g.cs <- g[tr$case]
  g.fr <- g[tr$father]
  g.mr <- g[tr$mother]
  df <- as.data.frame(table(g.cs, g.mr, g.fr))
  df.swap <- as.data.frame(table(g.cs, g.fr, g.mr))
  for (i in 1:3)
    df[[i]] <- as.genotype(df[[i]])
  wl <- options()$warn
  options(warn=-1)
  pr <- phase.resolve(df[[1]], df[[2]], df[[3]], allow.ambiguous=TRUE)
  options(warn=wl)
  mendelian <- rep(TRUE, length(df[[1]]))
  mendelian[pr$misinherits] <- FALSE
  if (!all & any(!mendelian & (df[[4]]!=0)))
    warning("Non-Mendelian inheritances were observed")
  i.cs <- as.numeric(df[[1]])
  i.fr <- as.numeric(df[[2]])
  i.mr <- as.numeric(df[[3]])
  i.p1 <- ifelse(i.fr<i.mr, i.fr, i.mr)
  i.p2 <- ifelse(i.fr<i.mr, i.mr, i.fr)
  ord <- order(!mendelian, i.p1, i.p2, i.cs, i.mr)
  mendelian <- mendelian[ord]
  if (parent.of.origin) 
    pneq <- (i.fr!=i.mr)[ord]
  else
    pneq <- TRUE
  if (!all) {
    ord <- ord[mendelian & pneq]
    df.sort <- vector("list", 4)
    for (i in 1:4) {
      df.sort[[i]] <- df[[i]][ord]
    }
    names(df.sort) <- c("affected.offspring", "mother", "father", "frequency")
  }
  else {
    df.sort <- vector("list", 5)
    for (i in 1:4) {
      df.sort[[i]] <- df[[i]][ord]
    }
    df.sort[[5]] <- mendelian
    names(df.sort) <- c("affected.offspring", "mother", "father", "frequency",
                        "Mendelian")
  }
  as.data.frame(df.sort)            
} 

allele.table <- function(x, ...) {
  if (!is.genotype(x))
    stop("First argument must be a genotype variable")
  call <- match.call()
  len <- length(call) -1
  fn <- character(len)
  for (i in 1:len)
    fn[i] <- deparse(call[[i+1]])
  all <- allele(x)
  rst <- list(...)
  if (length(rst)>0) {
    df1 <- data.frame(x=all[,1], rst)
    names(df1) <- fn
    df2 <- data.frame(x=all[,2], rst)
    names(df2) <- fn
    table(rbind(df1, df2))
  }
  else
    table(c(all[,1], all[,2]))
}

origin <- function(genotype, data = sys.frame(sys.parent()),
                pedigree, id, id.father, id.mother, sex, affected,
                codes = 2, first=TRUE, maternal.effect=FALSE, base=1) {
  cl <- match.call()
  g <- eval(cl$genotype, data)
  if (missing(pedigree))
    pedigree <- eval(as.symbol("pedigree"), data)
  else
    pedigree <- eval(cl$pedigree, data)
  N <- length(pedigree)
  if (missing(id))
    id <- eval(as.symbol("id"), data)
  else
    id <- eval(cl$id, data)
  if (missing(id.father))
    id.father <- eval(as.symbol("id.father"), data)
  else
    id.father <- eval(cl$id.father, data)
  if (missing(id.mother))
    id.mother <- eval(as.symbol("id.mother"), data)
  else
    id.mother <- eval(cl$id.mother, data)
  if (missing(sex))
    sex <- eval(as.symbol("sex"), data)
  else
    sex <- eval(cl$sex, data)
  if (missing(affected))
    affected <- eval(as.symbol("affected"), data)
  else
    affected <- eval(cl$affected, data)
  tr <- trio.types(g, codes=codes, first=first, parent.of.origin=TRUE)
  n.pairs <- length(tr$affected.offspring)/2
  one <- rep(c(TRUE, FALSE), n.pairs)
  r <- tr$frequency[one]
  n <- r + tr$frequency[!one]
  p <- r/n
  ac <- allele.count(tr$affected.offspring)
  mc <- allele.count(tr$mother)
  fc <- allele.count(tr$father)
  maternal.origin <- matrix(as.numeric((ac==2) | ((ac==1) & (mc>fc))), ncol=2)
  mother.1 <- matrix(as.numeric(mc==1), ncol=2)
  mother.2 <- matrix(as.numeric(mc==2), ncol=2)
  colnames(maternal.origin) <- allele.names(tr$affected.offspring)
  colnames(mother.1) <- allele.names(tr$affected.offspring)
  colnames(mother.2) <- allele.names(tr$affected.offspring)
  maternal.origin <- maternal.origin[, -base, drop=FALSE]
  mother.1 <- mother.1[, -base, drop=FALSE]
  mother.2 <- mother.2[, -base, drop=FALSE]
  mother <- cbind(mother.1, mother.2)
  maternal.origin <- maternal.origin[one,] - maternal.origin[!one,]
  mother <- mother[one,] - mother[!one,]
  if (!maternal.effect) {
    g <- glm(p ~ -1 + maternal.origin, family=binomial,
             weights=n)
  }
  else {
    g <- glm(p ~ -1 + mother + maternal.origin, family=binomial,
             weights=n)
  }
  anova(g)
}
