@article{TEXTUAL,
      recid = {6591},
      author = {Clyde, Austin and Liu, Xuefeng and Brettin, Thomas and  Yoo, Hyunseung and Partin, Alexander and Babuji, Yadu and  Blaiszik, Ben and Mohd-Yusof, Jamaludin and Merzky, Andre  and Turilli, Matteo and Jha, Shantenu and Ramanathan,  Arvind and Stevens, Rick},
      title = {AI-accelerated protein-ligand docking for SARS-CoV-2 is  100-fold faster with no significant change in detection},
      journal = {Scientific Reports},
      address = {2023-02-06},
      number = {TEXTUAL},
      abstract = {Protein-ligand docking is a computational method for  identifying drug leads. The method is capable of narrowing  a vast library of compounds down to a tractable size for  downstream simulation or experimental testing and is widely  used in drug discovery. While there has been progress in  accelerating scoring of compounds with artificial  intelligence, few works have bridged these successes back  to the virtual screening community in terms of utility and  forward-looking development. We demonstrate the power of  high-speed ML models by scoring 1 billion molecules in  under a day (50 k predictions per GPU seconds). We showcase  a workflow for docking utilizing surrogate AI-based models  as a pre-filter to a standard docking workflow. Our  workflow is ten times faster at screening a library of  compounds than the standard technique, with an error rate  less than 0.01% of detecting the underlying best scoring  0.1% of compounds. Our analysis of the speedup explains  that another order of magnitude speedup must come from  model accuracy rather than computing speed. In order to  drive another order of magnitude of acceleration, we share  a benchmark dataset consisting of 200 million 3D complex  structures and 2D structure scores across a consistent set  of 13 million “in-stock” molecules over 15 receptors, or  binding sites, across the SARS-CoV-2 proteome. We believe  this is strong evidence for the community to begin focusing  on improving the accuracy of surrogate models to improve  the ability to screen massive compound libraries 100 × or  even 1000 × faster than current techniques and reduce  missing top hits. The technique outlined aims to be a fast  drop-in replacement for docking for screening billion-scale  molecular libraries.},
      url = {http://knowledge.uchicago.edu/record/6591},
}