@inproceedings{f4f4f351ea274823a82fee98ae5cc64b,
title = "FiSSC: Finding smallest sequence covers to sets of degenerate reads with applications to RNA editing",
abstract = "High-throughput sequencing (HTS) is the most established technique to measure transcript abundance. HTS reads often contain uncertain or low-quality base calls that introduce ambiguity in determining the underlying sequence. In many applications, these unresolved nucleotides are handled by looking at the consensus sequence of all HTS reads. However, this approach is not applicable where sequence heterogeneity is of biological relevance. To gauge the biological complexity of a set of HTS reads in face of unresolved base calls, one may apply the parsimony principle, i.e., find a smallest set of sequences that cover all ambiguous reads. But, no method to date solves this problem optimally. Here, we present FiSSC, a new method to find a smallest sequence cover of a set of ambiguous reads. We first prove that the problem is NP-hard. We then present filtering steps that preserve optimal solution size, and an integer-linear-programming formulation, which together form FiSSC. We tested FiSSC on A-to-I RNA editing datasets with binary ambiguities. FiSSC outperformed all baseline methods and achieved optimal results in all but one dataset. We expect FiSSC to advance the study of sequence variation and biological complexity of ambiguous reads in various biological domains.",
keywords = "ILP, NP-hard, independent set, sequence cover",
author = "Ido Tziony and Jonathan Mandl and Kobi Shapira and Eli Eisenberg and Ely Porat and Yaron Orenstein",
note = "Publisher Copyright: {\textcopyright} 2024 Copyright held by the owner/author(s).; 15th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics, ACM-BCB 2024 ; Conference date: 22-11-2024 Through 25-11-2024",
year = "2024",
month = dec,
day = "16",
doi = "https://doi.org/10.1145/3698587.3701363",
language = "American English",
series = "ACM-BCB 2024 - 15th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics",
booktitle = "ACM-BCB 2024 - 15th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics",
}