## HW 05 question1 : explain this code
## I have removed most of my comments so you can add in yours
# Function: mateVectors
# Args:
# - x, y = Two vectors each representing a "genotype". The vectors should be
# atomic vectors, but this is not enforced. These vectors are the
# two parent genotypes and must be of the same length and type.
# - r = The per-locus recombination rate. A rate between 0 and 1 which
# indicates the probability of a crossover event between any two
# adjacent genes (vector elements).
# Returns:
# - vector of the same length and type as the parents.
mateVectors1 <- function(x, y, r) {
stopifnot(length(x)==length(y))
child <- y
usingX <- runif(1) <= 0.5
for (i in 1:length(x)) {
if(usingX) {
child[i] <- x[i]
}
if(runif(1) <= r) {
usingX <- !usingX
}
}
return(child)
}
# vectorized implementation of above without explicit loops
mateVectors <- function(x, y, r) {
stopifnot(length(x) == length(y))
crossovers <- runif(length(x)) <= r
useFirstParent <- cumsum(crossovers) %% 2 == 0
if(runif(1) <= 0.5) {
child <- ifelse(useFirstParent,x,y)
} else {
child <- ifelse(useFirstParent,y,x)
}
return(child)
}
############################
## Examples and Tests ##
############################
mother <- rep(0,100)
father <- rep(1,100)
mateVectors(mother,father, 0.05)
## it works with vectors of single char strings.
alph <- unlist(strsplit("abcdefghijklmnopqrstuvwxyz",split=""))
v1 <- alph
v2 <- toupper(alph)
child <-mateVectors(v1, v2, 0.15)
## print the child vector as a string rather than a vector:
paste(child,collapse="")
## test if distribution of run lengths looks like bernoulli process (geometric
## distribution). As length of vectors gos to infitiny, this distribution of
## parental run lengths should apporach a geometric distribution. So mean
## should be 1 / p and variance should be (1 - p) / p^2
mother <- rep(0,10000)
father <- rep(1,10000)
child <- mateVectors(mother, father, 0.05)
runs <- rle(child)
runlengths <- runs$lengths
# when r is 0.05 then
# Mean should be 1 / r = 20
# Variance should be (1 - r) / r^2 = 380
mean(runlengths)
var(runlengths)
# Hm hard to tell on variance. Try this a bunch and check the mean?
runLengthStats <- function(v) {
runs <- rle(v)
return(c(mean = mean(runs$lengths), var = var(runs$lengths)))
}
children <- matrix(replicate(10000, mateVectors(mother, father, 0.05)), nrow=10000)
# should be 50% 0s vs 1s so easy test:
means <- apply(children, MARGIN=2, mean)
mean(means) # should be 0.5
rl_stats <- apply(children, MARGIN=2, runLengthStats)
rowMeans(rl_stats) # should be 20 and 380