@article{THESIS,
      recid = {12912},
      author = {Katsianos, Vasileios},
      title = {Genetic Association Analysis of Phenotypes Jointly  Influenced by a Pair of Interacting Organisms},
      publisher = {University of Chicago},
      school = {Ph.D.},
      address = {2024-08},
      number = {THESIS},
      abstract = {The virulence of infectious diseases is usually affected  by a combination of a host and at least one pathogen  organism. Previous experiments have revealed that combining  genetic information from different organisms has enabled  the identification of more relevant genetic variants than  just individually performing an association analysis on  each organism. Hence, we are interested in performing a  joint association analysis to test for the interaction  effect of each possible pair of a host and pathogen genetic  variant on the phenotypic trait relating to the infectious  disease. Three main issues may arise when performing this  joint association analysis. First, the presence of a  non-trivial interaction effect between one of the genetic  variants being tested and some unaccounted factor - either  observed or unobserved - can lead to heteroscedasticity in  the phenotypic trait. Failure to account for this  heteroscedasticity may lead to overinflated type I error  rates when testing for an interaction effect between this  genetic variant and any genetic variant from the other  organism. We compare different methods to test and account  for the potential heteroscedasticity in the phenotypic  trait in the case where the genotype of the pathogen  organism is a binary variable. Secondly, the fact that the  phenotypic trait is held fixed while the interacting  genotypes vary across different interaction tests in a  joint genome-wide association analysis means that the  collection of interaction test statistics corresponding to  a fixed pathogen genetic variant may often display a  tangible departure from the known distribution of the  interaction test statistic. Under the global null  hypothesis of no interaction, the collection of interaction  p-values corresponding to a given pathogen genetic variant  might turn out to be consistently smaller than uniform,  leading to a phenomenon which has been called the "feast"  effect, since we end up with excess false discoveries.  Similarly, the collection of interaction p-values  corresponding to another fixed pathogen genetic variant  might turn out to be consistently larger than uniform,  leading to a phenomenon which has been called the "famine"  effect, since it limits our ability to make any important  discoveries. This "feast or famine" effect has been shown  to result from improper conditioning in the construction of  the interaction test statistic in a joint association  analysis. The ordinary interaction test statistic  conditions on the pair of genetic variants being tested for  interaction. Instead, we take the approach of conditioning  on the phenotypic trait and a fixed pathogen genetic  variant in order to construct a corrected host-pathogen  interaction test statistic which alleviates the feast or  famine effect. We focus our efforts on the case of diploid  host organisms where an appropriate discrete correction  might be required to account for the binomially distributed  host genotype. We present a diagnostic tool to predict the  prevalence of the feast or famine effect given only the  information about a phenotypic trait and a fixed pathogen  genetic variant and demonstrate its relationship with the  commonly used genomic control inflation factor. Lastly,  accounting for population structure among patients infected  with related strains of the same pathogen presents a  significant challenge, owing to the presence of genetic  variants with differing number of alleles within the  pathogen genome. As the number of alleles in a genetic  variant increases, some of the alleles may be associated  with excessively small observed allele frequencies, which  introduce numerical instabilities in the existing methods  of constructing a pathogen genetic relatedness matrix  (GRM). We build upon previous work to develop a novel  pathogen GRM for organisms with multiallelic genetic  variants which avoids filtering out genetic variants with  exceedingly small observed allele frequencies by  introducing an adjusted weighting for rare alleles. We  validate the type I error control and rectification of the  feast of famine effect by our correction framework through  a host of simulation studies. We demonstrate the  applicability of our proposed pathogen GRM and our  correction framework by testing for interaction effects  between human SNPs and hepatitis C viral genetic variants  on pre-treatment viral load in a cohort of HCV infected  patients from the BOSON clinical trial.},
      url = {http://knowledge.uchicago.edu/record/12912},
      doi = {https://doi.org/10.6082/uchicago.12912},
}