## R as a Research Tool
## HW05: Genetic algorithm functions II
## Due Oct 9 2018
## Instructions
## ------------
## This file contains several complete functions you've seen before:
## mateNumericVectors() and testFitness(). In addition, I've written a new
## function called simpleEvolve(), which creates a haploid population, creates
## a random optimal genotype, and then simulates evolution over a set number of
## generations. This new function depends upon two other functions. One simple
## one I've supplied (getRandChrom), but the other you will write yourself:
## getNextGeneration.
## Your tasks:
## 1. Write the getNextGeneration function (see comment header I supply). Hint:
## look up the "prob" argument to the sample() function to see how to choose
## among a list or vector according to weighted probabilities (fitnesses).
## 2. Comment all of the new code (e.g. everything except mateNumericVectors()
## and testFitness().
## 3. Write code at the end of the script to search through a reasonable number
## of values of mu and r and report which values result in the most rapid
## convergence of the population mean fitness to > 0.9. You will extract the
## num.generations element from the result of each run. Report your findings.
## How do mu and r influence time to convergence? To explore a wide parameter
## space could be time consuming, you will have to decide how many combinations
## to test. I'd suggest looking at r values between 0 and 0.3 and mu values
## between 0 and 0.03.
###############################################################################
## simple-evolve.R:
## A simple simulation of haploid evolution.
## global constants:
ALLELES <- 1:10 # possible allele values
CHROM_LENGTH <- 10 # length of "chomosome" vectors
POP_SIZE <- 100 # population size
FITNESS_CRITERION <- 0.9 # default mean population fitness at which to end
# simulation
# returns a random chromosome vector with alleles sampled uniformly among the
# allowed alleles
getRandChrom <- function(lngth=CHROM_LENGTH){
return(sample(ALLELES, lngth, replace=TRUE))
}
# mateNumericVectors(x, y, r, mu=0):
# Returns a child vector created by "mating" two parent
# vectors x and y and supplying a per-adjacent-locus recombination rate and a per-locus
# mutation rate. During mutation, an allele is randomly assigned a value between 1 and
# 100, inclusive.
#
# Args:
# - x, y = Two vectors each representing a "genotype". The vectors should be
# atomic vectors containing integer values between 1 and 100 inclusive.
# These vectors are the two parent genotypes and must be of the same
# length.
# - r = The per-locus recombination rate. A rate between 0 and 1 which
# indicates the probability of a crossover event between any two
# adjacent genes (vector elements). Rates between 0 and 0.5 make the
# most sense biologically.
# - mu The per-locus mutation rate.
#
# Returns:
# - A vector of the same length as the parents
mateNumericVectors <- function(x, y, r, mu) {
n <- length(x)
stopifnot(n==length(y))
crossovers <- runif(n) <= r
useFirstParent <- cumsum(crossovers) %% 2 == 0
if(runif(1) < 0.5) useFirstParent <- !useFirstParent
child <- ifelse(useFirstParent,x,y)
mutations <- runif(n) <= mu
child[mutations] <- sample(ALLELES, sum(mutations), replace=TRUE)
return(child)
}
# testFitness(genotype, targetGenotype):
# Returns a scalar number representing how well the genotype (a vector of
# numbers) matches the target genotype. The matching is measured as the
# recipricol sum of the squared deviations from the target (1 / (sse+1) ).
#
# Args:
# - genotype = the chromosome to be tested.
# - targetGenotype = The optimum genotype. These must be numeric
# vectors of the same length.
# Returns:
# - A scalar: the reciprocal of the sum of the squared deviations
# of the genotype from the target (1 / (sse+1) ).
testFitness <- function(genotype, targetGenotype){
stopifnot(length(genotype)==length(targetGenotype))
return(1.0 / (sum((genotype-targetGenotype)^2) + 1.0 ) )
}
# getNextGeneration(population, r, mu, fitnesses):
#
# Accepts a list of genotypes ("population") and returns the new population
# replacing each individual with the result of mating two vectors chosen
# according to their fitnesses. Note that these are non-overlapping
# generations: the parents should be chosen from the existing generation and
# not from any newly selected offspring. The basic algorithm: 1) for each spot
# in the new population select two parents from the current population. These
# are chosen with weighted probability based on their fitnesses (see fitnesses
# argument). 2) Use mateVectors to produce an offspring and add the offspring
# to the new population 4) When we have a new population of the same size as
# the original, return the new population.
#
# Args:
# - population = a list of chromosomes (a list of numeric vectors)
# - r = per-adjacent-locus reocmbination rate
# - mu = a per locus mutation rate
# - fitnesses = a vector of real numbers corresponding to the fitness
# values of each individual in the populations. This must
# be of the same length as population.
#
# Returns:
# - A list of vectors: the new population
getNextGeneration <- function(population, r, mu, fitnesses) {
# Your code here!
}
# Add your own comment header here. Figure out what this function does.
simpleEvolve <- function(generations, r, mu, break.fit = FITNESS_CRITERION,
do.hist=FALSE) {
optimum <- getRandChrom()
population <- replicate(POP_SIZE, getRandChrom(), simplify=FALSE)
for(gen in 1:generations){
allfits <- sapply(population, testFitness, targetGenotype=optimum)
meanfit <- mean(allfits)
# print(paste("generation", gen, "mean fitness", meanfit))
if(do.hist) {
hist(sqrt(allfits),
freq=FALSE, breaks=seq(0,1,0.02),col="gray",
xlab="Square root of fitness")
}
if ( meanfit > break.fit) break
population <- getNextGeneration(population, r, mu, allfits)
}
return(list("optimal.genotype" = optimum, "mean.fit"=meanfit,
"num.generations"=gen))
}
### examples:
simpleEvolve(500, r=0.1, mu=0.0001, do.hist=TRUE)
simpleEvolve(500, r=0.1, mu=0.001, do.hist=TRUE)
simpleEvolve(500, r=0.3, mu=0.0001, do.hist= TRUE)
simpleEvolve(500, r=0.3, mu=0.001, do.hist= TRUE)