## HW 05 question1 : explain this code
## I have removed most of my comments so you can add in yours. You only need to
## add them to the functions, not the test code.
# Function: randomProb
# Args:
# - x = a vector of probabilities (each value in range 0-1)
# Returns:
# - A boolean vector of same length as x indicating successes
randomChance <- function(x) {
return(runif(length(x)) <= x)
}
# Function: mateVectors
# Args:
# - x, y = Two vectors each representing a "genotype". The vectors should be
# atomic vectors, but this is not enforced. These vectors are the
# two parent genotypes and must be of the same length and type.
# - r = The per-locus recombination rate (scalar). A rate between 0 and 0.5
# which indicates the probability of a crossover event between any
# two adjacent genes (vector elements).
# Returns
# - vector of the same length and type as the parents.
mateVectors1 <- function(x, y, r) {
stopifnot(length(x)==length(y))
child <- y
usingX <- randomChance(05)
for (i in 1:length(x)) {
if(usingX) {
child[i] <- x[i]
}
if(randomChance(r)) {
usingX <- !usingX
}
}
return(child)
}
# vectorized implementation of the above without explicit loops
mateVectors <- function(x, y, r) {
stopifnot(length(x) == length(y))
crossovers <- randomChance(rep(r, length(x)))
useFirstParent <- cumsum(crossovers) %% 2 == 0
if(randomChance(0.5)) {
child <- ifelse(useFirstParent,x,y)
} else {
child <- ifelse(useFirstParent,y,x)
}
return(child)
}
## END region to which students must add comments ##
############################
## Examples and Tests ##
############################
mother <- rep(0, 100)
father <- rep(1, 100)
mateVectors(mother, father, 0.05)
## it works with vectors of single char strings.
alph <- unlist(strsplit("abcdefghijklmnopqrstuvwxyz", split=""))
v1 <- alph
v2 <- toupper(alph)
child <- mateVectors(v1, v2, 0.15)
## print the child vector as a string rather than a vector:
paste(child,collapse="")
## test if distribution of run lengths looks like bernoulli process (geometric
## distribution). As length of vectors gos to infitiny, this distribution of
## parental run lengths should apporach a geometric distribution. So mean
## should be 1 / p and variance should be (1 - p) / p^2
mother <- rep(0,10000)
father <- rep(1,10000)
child <- mateVectors(mother, father, 0.05)
runs <- rle(child)
runlengths <- runs$lengths
# when r is 0.05 then
# Mean should be 1 / r = 20
# Variance should be (1 - r) / r^2 = 380
mean(runlengths)
var(runlengths)
# Hm hard to tell on variance. Try this a bunch and check the mean?
runLengthStats <- function(v) {
runs <- rle(v)
return(c(mean = mean(runs$lengths), var = var(runs$lengths)))
}
children <- matrix(replicate(10000, mateVectors(mother, father, 0.05)), nrow=10000)
# should be 50% 0s vs 1s so easy test:
means <- apply(children, MARGIN=2, mean)
mean(means) # should be 0.5
rl_stats <- apply(children, MARGIN=2, runLengthStats)
rowMeans(rl_stats) # should be 20 and 380