## R as a Research Tool
## HW 05: Genetic algorithm functions II
## Due Oct 2 2018
# Question 1 : explain the code in these functions:
## I have removed most of my comments so you can add in yours. You only need to
## add them to the functions, not the test code.
# Function: randomProb
# Args:
# - x = a vector of probabilities (each value in range 0-1)
# Returns:
# - A boolean vector of same length as x indicating successes
randomChance <- function(x) {
return(runif(length(x)) <= x)
}
# Function: mateVectors
# Args:
# - x, y = Two vectors each representing a "genotype". The vectors should be
# atomic vectors, but this is not enforced. These vectors are the
# two parent genotypes and must be of the same length and type.
# - r = The per-locus recombination rate (scalar). A rate between 0 and 0.5
# which indicates the probability of a crossover event between any
# two adjacent genes (vector elements).
# Returns
# - vector of the same length and type as the parents.
mateVectors1 <- function(x, y, r) {
stopifnot(length(x)==length(y))
child <- y
usingX <- randomChance(0.5)
for (i in seq_along(x)) {
if(usingX) {
child[i] <- x[i]
}
if(randomChance(r)) {
usingX <- !usingX
}
}
return(child)
}
# vectorized implementation of the above without explicit loops
mateVectors <- function(x, y, r) {
stopifnot(length(x) == length(y))
crossovers <- randomChance(rep(r, length(x)))
useFirstParent <- cumsum(crossovers) %% 2 == 0
if(randomChance(0.5)) {
child <- ifelse(useFirstParent, x, y)
} else {
child <- ifelse(useFirstParent, y, x)
}
return(child)
}
## END region to which students must add comments ##
############################
## Examples and Tests ##
############################
# examples
mother <- rep(0,100)
father <- rep(1,100)
child1 <-mateVectors(mother,father, 0.05)
child2 <-mateVectors1(mother,father, 0.05)
child1
child2
## it works with vectors of single char strings.
alph <- unlist(strsplit("abcdefghijklmnopqrstuvwxyz",split=""))
mother <- alph #sample(alph,100,replace=TRUE)
father <- sample(alph,length(alph),replace=TRUE)
child1 <-mateVectors(mother,father, 0.05)
child2 <-mateVectors1(mother,father, 0.05)
## print the child vector as a string rather than a vector:
paste(child1,collapse="")
paste(child2,collapse="")
# Using vectors of words as the genotype:
words <- "It was the best of times, it was the worst
of times, it was the age of wisdom, it was the age of foolishness, it was the
epoch of belief, it was the epoch of incredulity, it was the season of Light,
it was the season of Darkness, it was the spring of hope, it was the winter of
despair, we had everything before us, we had nothing before us, we were all
going direct to heaven, we were all going direct the other way - in short, the
period was so far like the present period, that some of its noisiest
authorities insisted on its being received, for good or for evil, in the
superlative degree of comparison only."
words <- unlist(strsplit(words, split="[[:space:][:punct:]]+"))
mother <- words
father <- rep("____", length(words))
child1 <-mateVectors(mother,father, 0.05)
child2 <-mateVectors1(mother,father, 0.05)
paste(child1,collapse=" ")
paste(child2,collapse=" ")
##
mother <- unlist(strsplit("I am the mother", split=""))
father <- unlist(strsplit("i AM THE FATHER", split=""))
child <- mateVectors(mother,father, 0.05)
paste(child,collapse="")
# tests:
## test if distribution of run lengths looks like bernoulli process (geometric
## distribution). As length of vectors goes to infinity, this distribution of
## parental run lengths should approach a geometric distribution. So mean
## should be 1 / p and variance should be (1 - p) / p^2
set.seed(100)
N_ALLELES <- 1000
mother <- rep(0,N_ALLELES)
father <- rep(1,N_ALLELES)
child <- mateVectors(mother, father, 0.05)
runs <- rle(child)
runlengths <- runs$lengths
# when r is 0.05 then
# Mean should be 1 / r = 20
# Variance should be (1 - r) / r^2 = 380
mean(runlengths)
var(runlengths)
# Hm hard to tell for a single trial. Try this a bunch and check the mean?
runLengthStats <- function(v) {
runs <- rle(v)
return(c(mean = mean(runs$lengths), var = var(runs$lengths)))
}
children <- matrix(replicate(10000, mateVectors(mother, father, 0.05)), nrow=N_ALLELES)
rl_stats <- apply(children, MARGIN=2, runLengthStats)
rowMeans(rl_stats) # should be 20 and 380
children <- matrix(replicate(10000, mateVectors(mother, father, 0.1)), nrow=N_ALLELES)
# should be 50% 0s vs 1s so easy test:
rl_stats <- apply(children, MARGIN=2, runLengthStats)
rowMeans(rl_stats) # should be 10 and 90