@article{TEXTUAL,
      recid = {8230},
      author = {Antezana, Marcos A. and Jordan, I. King},
      title = {Highly Conserved Regimes of Neighbor-Base-Dependent  Mutation Generated the Background Primary-Structural  Heterogeneities along Vertebrate Chromosomes},
      journal = {PLOS ONE},
      address = {2008-05-14},
      number = {TEXTUAL},
      abstract = {<p>The content of guanine+cytosine varies markedly along  the chromosomes of homeotherms and great effort has been  devoted to studying this heterogeneity and its biological  implications. Already before the DNA-sequencing era,  however, it was established that the dinucleotides in the  DNA of mammals in particular, and of most organisms in  general, show striking over- and under-representations that  cannot be explained by the base composition. Here we show  that in the coding regions of vertebrates both GC content  and codon occurrences are strongly correlated with such  “motif preferences” even though we quantify the latter  using an index that is not affected by the base  composition, codon usage, and protein-sequence encoding.  These correlations are likely to be the result of the  long-term shaping of the primary structure of genic and  non-genic DNA by a regime of mutation of which central  features have been maintained by natural selection. We find  indeed that these preferences are conserved in vertebrates  even more rigidly than codon occurrences and we show that  the occurrence-preference correlations are stronger in  intronic and non-genic DNA, with the R<sup>2</sup>s  reaching 99% when GC content is ∼0.5. The mutation regime  appears to be characterized by rates that depend markedly  on the bases present at the site preceding and at that  following each mutating site, because when we estimate such  rates of neighbor-base-dependent mutation (NBDM) from  substitutions retrieved from alignments of coding,  intronic, and non-genic mammalian DNA sorted and grouped by  GC content, they suffice to simulate DNA sequences in which  motif occurrences and preferences as well as the  correlations of motif preferences with GC content and with  motif occurrences, are very similar to the mammalian ones.  The best fit, however, is obtained with NBDM regimes  lacking strand effects, which indicates that over the long  term NBDM switches strands in the germline as one would  expect for effects due to loosely contained background  transcription. Finally, we show that human coding regions  are less mutable under the estimated NBDM regimes than  under matched context-independent mutation and that this  entails marked differences between the spectra of  amino-acid mutations that either mutation regime should  generate. In the Discussion we examine the mechanisms  likely to underlie NBDM heterogeneity along chromosomes and  propose that it reflects how the diversity and activity of  lesion-bypass polymerases (LBPs) track the landscapes of  scheduled and non-scheduled genome repair, replication, and  transcription during the cell cycle. We conclude that the  primary structure of vertebrate genic DNA at and below the  trinucleotide level has been governed over the long term by  highly conserved regimes of NBDM which should be under  direct natural selection because they alter drastically  missense-mutation rates and hence the somatic and the  germline mutational loads. Therefore, the non-coding DNA of  vertebrates may have been shaped by NBDM only  epiphenomenally, with non-genic DNA being affected mainly  when found in the proximity of genes.</p>},
      url = {http://knowledge.uchicago.edu/record/8230},
}