@article{Classification:130,
      recid = {130},
      author = {Omar, A.A.},
      title = {Addressing Subjectivity and Replicability in Thematic  Classification of Literary Texts: Using Cluster Analysis to  Derive Taxonomies of Thematic Concepts in the Thomas  Hardy’s Prose Fiction},
      address = {2010},
      abstract = {Thematic classification of Thomas Hardy’s work has  traditionally been based partly on textual content and  partly on biographical considerations. These analyses and  criticisms have been generated by what will henceforth be  referred to as ‘the philological method’, that is, by  individual researcher’s reading of printed materials and  the intuitive abstraction of generalizations from that  reading. A major problem with studies in this tradition is  that they are not objective or replicable. With the advent  of electronic text, however, a large number of literary  works, including the works of Thomas Hardy, have become  available, and this electronic format now permits  computational data analysis concepts and procedures to be  applied to them. This makes it possible for thematic  classifications of literary texts to be based to some  degree on objective computational methods. In order to  address issues of objectivity and replicability, this paper  proposes an automated text clustering of the prose fiction  works of Thomas Hardy using cluster analysis based on a  vector space model (VSM) representation of the lexical  content of the selected texts. The results reported here  indicate that the proposed clustering structures yield  usable results in understanding the thematic structure of  Hardy’s prose fiction texts and that they and so in an  objective and replicable way.},
      url = {http://knowledge.uchicago.edu/record/130},
}