@article{TEXTUAL,
      recid = {10217},
      author = {Rzhetsky, Andrey and Shatkay, Hagit and Wilbur, W. John},
      title = {How to Get the Most out of Your Curation Effort},
      journal = {PLOS Computational Biology},
      address = {2009-05-22},
      number = {TEXTUAL},
      abstract = {<p>Large-scale annotation efforts typically involve  several experts who may disagree with each other. We  propose an approach for modeling disagreements among  experts that allows providing each annotation with a  confidence value (i.e., the posterior probability that it  is correct). Our approach allows computing certainty-level  for individual annotations, given annotator-specific  parameters estimated from data. We developed two  probabilistic models for performing this analysis, compared  these models using computer simulation, and tested each  model's actual performance, based on a large data set  generated by human annotators specifically for this study.  We show that even in the worst-case scenario, when all  annotators disagree, our approach allows us to  significantly increase the probability of choosing the  correct annotation. Along with this publication we make  publicly available a corpus of 10,000 sentences annotated  according to several cardinal dimensions that we have  introduced in earlier work. The 10,000 sentences were all  3-fold annotated by a group of eight experts, while a  1,000-sentence subset was further 5-fold annotated by five  new experts. While the presented data represent a  specialized curation task, our modeling approach is  general; most data annotation studies could benefit from  our methodology.</p>},
      url = {http://knowledge.uchicago.edu/record/10217},
}