@article{TEXTUAL,
      recid = {10689},
      author = {Stanhope, Stephen A.},
      title = {Occupancy Modeling, Maximum Contig Size Probabilities and  Designing Metagenomics Experiments},
      journal = {PLOS ONE},
      address = {2010-07-29},
      number = {TEXTUAL},
      abstract = {Mathematical aspects of coverage and gaps in genome  assembly have received substantial attention by  bioinformaticians. Typical problems under consideration  suppose that reads can be experimentally obtained from a  single genome and that the number of reads will be set to  cover a large percentage of that genome at a desired depth.  In metagenomics experiments genomes from multiple species  are simultaneously analyzed and obtaining large numbers of  reads per genome is unlikely. We propose the probability of  obtaining at least one contig of a desired minimum size  from each novel genome in the pool without restriction  based on depth of coverage as a metric for metagenomic  experimental design. We derive an approximation to the  distribution of maximum contig size for single genome  assemblies using relatively few reads. This approximation  is verified in simulation studies and applied to a number  of different metagenomic experimental design problems,  ranging in difficulty from detecting a single novel genome  in a pool of known species to detecting each of a random  number of novel genomes collectively sized and with  abundances corresponding to given distributions in a single  pool.},
      url = {http://knowledge.uchicago.edu/record/10689},
}