@inproceedings{4cf4d252b5254098af3326dc64cae5a0,
title = "Functional Evolutionary Modeling Exposes Overlooked Protein-Coding Genes Involved in Cancer",
abstract = "Numerous computational methods have been developed to screening the genome for candidate driver genes based on genomic data of somatic mutations in tumors. Compiling a catalog of cancer genes has profound implications for the understanding and treatment of the disease. Existing methods make many implicit and explicit assumptions about the distribution of random mutations. We present FABRIC, a new framework for quantifying the evolutionary selection of genes by assessing the functional effects of mutations on protein-coding genes using a pre-trained machine-learning model. The framework compares the estimated effects of observed genetic variations against all possible single-nucleotide mutations in the coding human genome. Compared to existing methods, FABRIC makes minimal assumptions about the distribution of random mutations. To demonstrate its wide applicability, we applied FABRIC on both naturally occurring human variants and somatic mutations in cancer. In the context of cancer, ~3 M somatic mutations were extracted from over 10,000 cancerous human samples. Of the entire human proteome, 593 protein-coding genes show statistically significant bias towards harmful mutations. These genes, discovered without any prior knowledge, show an overwhelming overlap with contemporary cancer gene catalogs. Notably, the majority of these genes (426) are unlisted in these catalogs, but a substantial fraction of them is supported by literature. In the context of normal human evolution, we analyzed ~5 M common and rare variants from ~60 K individuals, discovering 6,288 significant genes. Over 98% of them are dominated by negative selection, supporting the notion of a strong purifying selection during the evolution of the healthy human population. We present the FABRIC framework as an open-source project with a simple command-line interface.",
keywords = "Cancer evolution, Driver genes, ExAC, Machine learning, Positive selection, Single nucleotide variants, TCGA",
author = "Nadav Brandes and Nathan Linial and Michal Linial",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 16th International Symposium on Bioinformatics Research and Applications, ISBRA 2020 ; Conference date: 01-12-2020 Through 04-12-2020",
year = "2020",
doi = "10.1007/978-3-030-57821-3_11",
language = "الإنجليزيّة",
isbn = "9783030578206",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "119--126",
editor = "Zhipeng Cai and Ion Mandoiu and Giri Narasimhan and Pavel Skums and Xuan Guo",
booktitle = "Bioinformatics Research and Applications - 16th International Symposium, ISBRA 2020, Proceedings",
}