2019 |
William R. Blevins, Jorge Ruiz-Orera, Xavier Messeguer, Bernat Blasco-Moreno, José Luis Villanueva-Cañas, Lorena Espinar, Juana Díez, Lucas B. Carey, M. Mar Albà Frequent birth of de novo genes in the compact yeast genome (Article) bioRxiv, March 13, 2019. (Abstract | Links | BibTeX | Tags: de novo gene, RNA-Seq, Saccharomyces cerevisiae, yeast) @article{Blevins2019, title = {Frequent birth of de novo genes in the compact yeast genome}, author = {William R. Blevins, Jorge Ruiz-Orera, Xavier Messeguer, Bernat Blasco-Moreno, José Luis Villanueva-Cañas, Lorena Espinar, Juana Díez, Lucas B. Carey, M. Mar Albà}, url = {https://doi.org/10.1101/575837 }, year = {2019}, date = {2019-03-13}, journal = {bioRxiv, March 13}, abstract = {Evidence has accumulated that some genes originate directly from previously non-genic sequences, or de novo, rather than by the duplication or fusion of existing genes. However, how de novo genes emerge and eventually become functional is largely unknown. Here we perform the first study on de novo genes that uses transcriptomics data from eleven different yeast species, all grown identically in both rich media and in oxidative stress conditions. The genomes of these species are densely-packed with functional elements, leaving little room for the co-option of genomic sequences into new transcribed loci. Despite this, we find that at least 213 transcripts (~5%) have arisen de novo in the past 20 million years of evolution of baker’s yeast-or approximately 10 new transcripts every million years. Nearly half of the total newly expressed sequences are generated from regions in which both DNA strands are used as templates for transcription, explaining the apparent contradiction between the limited ‘empty’ genomic space and high rate of de novo gene birth. In addition, we find that 40% of these de novo transcripts are actively translated and that at least a fraction of the encoded proteins are likely to be under purifying selection. This study shows that even in very highly compact genomes, de novo transcripts are continuously generated and can give rise to new functional protein-coding genes.}, keywords = {de novo gene, RNA-Seq, Saccharomyces cerevisiae, yeast} } Evidence has accumulated that some genes originate directly from previously non-genic sequences, or de novo, rather than by the duplication or fusion of existing genes. However, how de novo genes emerge and eventually become functional is largely unknown. Here we perform the first study on de novo genes that uses transcriptomics data from eleven different yeast species, all grown identically in both rich media and in oxidative stress conditions. The genomes of these species are densely-packed with functional elements, leaving little room for the co-option of genomic sequences into new transcribed loci. Despite this, we find that at least 213 transcripts (~5%) have arisen de novo in the past 20 million years of evolution of baker’s yeast-or approximately 10 new transcripts every million years. Nearly half of the total newly expressed sequences are generated from regions in which both DNA strands are used as templates for transcription, explaining the apparent contradiction between the limited ‘empty’ genomic space and high rate of de novo gene birth. In addition, we find that 40% of these de novo transcripts are actively translated and that at least a fraction of the encoded proteins are likely to be under purifying selection. This study shows that even in very highly compact genomes, de novo transcripts are continuously generated and can give rise to new functional protein-coding genes. |
2002 |
Albà, M Mar, Laskowski, Roman A, Hancock, John M Detecting cryptically simple protein sequences using the SIMPLE algorithm. (Article) Bioinformatics (Oxford, England), 18 (5), pp. 672–8, 2002, ISSN: 1367-4803. (Abstract | Links | BibTeX | Tags: Algorithms, Amino Acid, Amino Acid Sequence, Amino Acid: genetics, Databases, Genetic, Genetic Variation, Internet, Minisatellite Repeats, Minisatellite Repeats: genetics, Models, Molecular Sequence Data, Protein, Protein: methods, Proteins, Proteins: chemistry, Repetitive Sequences, Saccharomyces cerevisiae, Saccharomyces cerevisiae: genetics, Sensitivity and Specificity, Sequence Analysis, Sequence Homology, Software, Statistical) @article{Alba2002, title = {Detecting cryptically simple protein sequences using the SIMPLE algorithm.}, author = {Albà, M Mar and Laskowski, Roman A and Hancock, John M}, url = {http://www.ncbi.nlm.nih.gov/pubmed/12050063}, issn = {1367-4803}, year = {2002}, date = {2002-01-01}, journal = {Bioinformatics (Oxford, England)}, volume = {18}, number = {5}, pages = {672--8}, abstract = {Low-complexity or cryptically simple sequences are widespread in protein sequences but their evolution and function are poorly understood. To date methods for the detection of low complexity in proteins have been directed towards the filtering of such regions prior to sequence homology searches but not to the analysis of the regions per se. However, many of these regions are encoded by non-repetitive DNA sequences and may therefore result from selection acting on protein structure and/or function.}, keywords = {Algorithms, Amino Acid, Amino Acid Sequence, Amino Acid: genetics, Databases, Genetic, Genetic Variation, Internet, Minisatellite Repeats, Minisatellite Repeats: genetics, Models, Molecular Sequence Data, Protein, Protein: methods, Proteins, Proteins: chemistry, Repetitive Sequences, Saccharomyces cerevisiae, Saccharomyces cerevisiae: genetics, Sensitivity and Specificity, Sequence Analysis, Sequence Homology, Software, Statistical} } Low-complexity or cryptically simple sequences are widespread in protein sequences but their evolution and function are poorly understood. To date methods for the detection of low complexity in proteins have been directed towards the filtering of such regions prior to sequence homology searches but not to the analysis of the regions per se. However, many of these regions are encoded by non-repetitive DNA sequences and may therefore result from selection acting on protein structure and/or function. |
Publication List
Amino Acid Animals Computational Biology Databases de novo gene Evolution Genetic Genome Humans lncRNA Mice Molecular Molecular Sequence Data Nucleic Acid Proteins Proteins: chemistry Proteins: genetics Repetitive Sequences ribosome profiling RNA-Seq Selection Sequence Analysis Sequence Homology transcriptomics yeast
2019 |
Frequent birth of de novo genes in the compact yeast genome (Article) bioRxiv, March 13, 2019. |
2002 |
Detecting cryptically simple protein sequences using the SIMPLE algorithm. (Article) Bioinformatics (Oxford, England), 18 (5), pp. 672–8, 2002, ISSN: 1367-4803. |