@article{TEXTUAL,
      recid = {13599},
      author = {Nguyen, Tung Tho and Brownstein, Korey J.},
      title = {Utilization of a natural language processing-based  approach to determine the composition of artifact residues},
      journal = {BMC Bioinformatics},
      address = {2024-09-27},
      number = {TEXTUAL},
      abstract = {<p>Background: Determining the composition of artifact  residues is a central problem in ancient residue  metabolomics. This is done by comparing mass spectral  features in common with an experimental artifact and an  ancient artifact (standard method). While this method is  simple and straightforward, we sought to increase the  accuracy of predicting which plant species had been used in  which artifacts.</p> <p>Results: Here, we introduce an  algorithm (new method) based on ideas from the field of  natural language processing (NLP) to solve this problem. We  tested our strategy on a set of modern clay pipes. To limit  biases, we were not provided information on which plant  species had been smoked in which clay pipes. The results  indicate that our new method performed 12.5% better than  the standard method in predicting the plant species smoked  in each artifact.</p> <p>Conclusions: Utilizing an  NLP-based approach, we developed a robust algorithm for  characterizing the composition of artifact residues. This  work also discusses other general applications in which our  algorithm could be used in the field of metabolomics, such  as datasets where there are a limited number of  replicates.</p>},
      url = {http://knowledge.uchicago.edu/record/13599},
}