@article{Operational:7591,
      recid = {7591},
      author = {Keceli, Cagla},
      title = {Learning in Operational Settings},
      publisher = {University of Chicago},
      school = {Ph.D.},
      address = {2023-08},
      pages = {165},
      abstract = {This thesis consists of three papers in the field of  learning, in operational settings. 

The first paper  (Chapter 2) is titled Thompson Sampling for  Infinite-Horizon Discounted Decision Processes. We model a  Markov decision process, parametrized by an unknown  parameter, and study the asymptotic behavior of a  sampling-based algorithm, called Thompson sampling. The  standard definition of regret is not always suitable to  evaluate a policy, especially when the underlying chain  structure is general. We show that the standard (expected)  regret can grow (super-)linearly and fails to capture the  notion of learning in realistic settings with non-trivial  state evolution. By decomposing the standard (expected)  regret, we develop a new metric, called the expected  residual regret, which forgets the immutable consequences  of past actions. Instead, it measures regret against the  optimal reward moving forward from the current period. We  show that the expected residual regret of the Thompson  sampling algorithm is upper bounded by a term which  converges exponentially fast to 0. We present conditions  under which the posterior sampling error of Thompson  sampling converges to 0 almost surely. We then introduce  the probabilistic version of the expected residual regret  and present conditions under which it converges to 0 almost  surely. Thus, we provide a viable concept of learning for  sampling algorithms which will serve useful in broader  settings than had been considered previously. 

The second  paper (Chapter 3) is titled Estimating the Mean and  Variance of Heterogeneous Tasks. The third paper (Chapter  4) is titled Equitable Data-Driven Assignments of Workers  to Tasks. These two chapters are tightly related to each  other. In Chapter 4, we put forward a simple yet effective  method of predicting completion times of tasks, assuming  that the completion time is a function of worker-task  familiarity. Our prediction algorithm requires estimating  the mean and standard deviation of time and familiarity  across tasks. Yet, in many settings it is not  straightforward to estimate the mean and variance of a task  that does not appear frequently. This constitutes the  motivation for Chapter 3. To estimate the mean and variance  of surgical encounters, we adapt two statistical methods  into this novel setting. 

In Chapter 4, we develop a  practical algorithm to predict task completion times, which  obscures workers' performance information and only uses the  task familiarity of individual workers, i.e., the equitable  policy. We bound the algorithm's steady-state predictions  when used in a sequential assignment framework against the  policy that doesn't obscure worker-specific performance,  i.e., the performance-aware policy. We study the  ramifications of using the equitable policy on the total  task completion times in the long-run. Under certain  assumptions, we uncover that the egalitarian policy  performs the worst, i.e., a policy which imposes that (i)  each worker has the same propensity to be assigned to a  task and (ii) each task has the same propensity to be  assigned to a worker, yields the worst outcome. This  constitutes an upper bound on the penalty of making  equitable assignments. },
      url = {http://knowledge.uchicago.edu/record/7591},
      doi = {https://doi.org/10.6082/uchicago.7591},
}