@article{TEXTUAL,
      recid = {8220},
      author = {Rosenberg, Noah A. and Mahajan, Saurabh and Ramachandran,  Sohini and Zhao, Chengfeng and Pritchard, Jonathan K, and  Feldman, Marcus W.},
      title = {Clines, Clusters, and the Effect of Study Design on the  Inference of Human Population Structure},
      journal = {PLOS Genetics},
      address = {2005-12-09},
      number = {TEXTUAL},
      abstract = {<p>Previously, we observed that without using prior  information about individual sampling locations, a  clustering algorithm applied to multilocus genotypes from  worldwide human populations produced genetic clusters  largely coincident with major geographic regions. It has  been argued, however, that the degree of clustering is  diminished by use of samples with greater uniformity in  geographic distribution, and that the clusters we  identified were a consequence of uneven sampling along  genetic clines. Expanding our earlier dataset from 377 to  993 markers, we systematically examine the influence of  several study design variables—sample size, number of loci,  number of clusters, assumptions about correlations in  allele frequencies across populations, and the geographic  dispersion of the sample—on the “clusteredness” of  individuals. With all other variables held constant,  geographic dispersion is seen to have comparatively little  effect on the degree of clustering. Examination of the  relationship between genetic and geographic distance  supports a view in which the clusters arise not as an  artifact of the sampling scheme, but from small  discontinuous jumps in genetic distance for most population  pairs on opposite sides of geographic barriers, in  comparison with genetic distance for pairs on the same  side. Thus, analysis of the 993-locus dataset corroborates  our earlier results: if enough markers are used with a  sufficiently large worldwide sample, individuals can be  partitioned into genetic clusters that match major  geographic subdivisions of the globe, with some individuals  from intermediate geographic locations having mixed  membership in the clusters that correspond to neighboring  regions.</p>},
      url = {http://knowledge.uchicago.edu/record/8220},
}