@article{TEXTUAL,
      recid = {7704},
      author = {Wang, Kanix and Stevens, Robert and Alachram, Halima and  Li, Yu and Soldatova, Larisa and King, Ross and Ananiadou,  Sophia and Schoene, Annika M. and Li, Maolin and  Christopoulou, Fenia and Ambite, José Luis and Matthew,  Joel and Garg, Sahil and Hermjakob, Ulf and Marcu, Daniel  and Sheng, Emily and Beißbarth, Tim and Wingender, Edgar  and Galstyan, Aram and Gao, Xin and Chambers, Brendan and  Pan, Weidi and Khomtchouk, Bohdan B. and Evans, James A.  and Rzhetsky, Andrey},
      title = {NERO: A biomedical named-entity (recognition) ontology  with a large, annotated corpus reveals meaningful  associations through text embedding},
      journal = {npj Systems Biology and Applications},
      address = {2021-10-20},
      number = {TEXTUAL},
      abstract = {Machine reading (MR) is essential for unlocking valuable  knowledge contained in millions of existing biomedical  documents. Over the last two decades, the most dramatic  advances in MR have followed in the wake of critical corpus  development. Large, well-annotated corpora have been  associated with punctuated advances in MR methodology and  automated knowledge extraction systems in the same way that  ImageNet4 was fundamental for developing machine vision  techniques. This study contributes six components to an  advanced, named entity analysis tool for biomedicine: (a) a  new, Named Entity Recognition Ontology (NERO) developed  specifically for describing textual entities in biomedical  texts, which accounts for diverse levels of ambiguity,  bridging the scientific sublanguages of molecular biology,  genetics, biochemistry, and medicine; (b) detailed  guidelines for human experts annotating hundreds of named  entity classes; (c) pictographs for all named entities, to  simplify the burden of annotation for curators; (d) an  original, annotated corpus comprising 35,865 sentences,  which encapsulate 190,679 named entities and 43,438 events  connecting two or more entities; (e) validated,  off-the-shelf, named entity recognition (NER) automated  extraction, and; (f) embedding models that demonstrate the  promise of biomedical associations embedded within this  corpus.},
      url = {http://knowledge.uchicago.edu/record/7704},
}