2020 |
Shin-Young Hong, Bin Sun, Daniel Straub, Anko Blaakmeer, Lorenzo Mineri, Jonas Koch, Henrik Brinch-Pedersen, Inge B. Holme, Meike Burow, Hans Jørgen Lyngs Jørgensen HJ, M.Mar Albà, Stephan Wenkel Proceedings of the National Academy of Sciences USA, Online ahead of print. 2020. (Links | BibTeX | Tags: dominant negative, microprotein, plants, protein domain) @article{SY2020, title = {Heterologous microProtein expression identifies LITTLE NINJA, a dominant regulator of jasmonic acid signaling}, author = {Shin-Young Hong, Bin Sun, Daniel Straub, Anko Blaakmeer, Lorenzo Mineri, Jonas Koch, Henrik Brinch-Pedersen, Inge B. Holme, Meike Burow, Hans Jørgen Lyngs Jørgensen HJ, M.Mar Albà, Stephan Wenkel}, url = {https://www.pnas.org/content/early/2020/10/01/2005198117}, year = {2020}, date = {2020-10-10}, journal = {Proceedings of the National Academy of Sciences USA}, volume = {Online ahead of print.}, keywords = {dominant negative, microprotein, plants, protein domain} } |
Jorge Ruiz-Orera, José Luis Villanueva-Cañas, M.Mar Albà Evolution of New Proteins From Translated sORFs in Long Non-Coding RNAs (Article) Experimental Cell Research, 391 (1), pp. 111940, 2020. (BibTeX | Tags: de novo gene, lncRNA) @article{Ruiz-Orera2020b, title = {Evolution of New Proteins From Translated sORFs in Long Non-Coding RNAs}, author = {Jorge Ruiz-Orera, José Luis Villanueva-Cañas, M.Mar Albà}, year = {2020}, date = {2020-06-01}, journal = {Experimental Cell Research, 391 (1), pp. 111940}, keywords = {de novo gene, lncRNA} } |
Marina Reixachs-Solé, Jorge Ruiz-Orera, M.Mar Albà, Eduardo Eyras Nature Communications, 11 (1), pp. 1768, 2020, ISBN: 10.1038/s41467-020-15634-w . (Abstract | Links | BibTeX | Tags: cancer, de novo gene, isoform, lncRNA, microexon, sORF) @article{Reixachs-Solé2020, title = {Ribosome Profiling at Isoform Level Reveals Evolutionary Conserved Impacts of Differential Splicing on the Proteome}, author = {Marina Reixachs-Solé, Jorge Ruiz-Orera, M.Mar Albà, Eduardo Eyras}, url = {https://www.nature.com/articles/s41467-020-15634-w}, isbn = { 10.1038/s41467-020-15634-w }, year = {2020}, date = {2020-04-14}, journal = {Nature Communications}, volume = {11}, number = {1}, pages = {1768}, abstract = {The differential production of transcript isoforms from gene loci is a key cellular mechanism. Yet, its impact in protein production remains an open question. Here, we describe ORQAS (ORF quantification pipeline for alternative splicing), a pipeline for the translation quantification of individual transcript isoforms using ribosome-protected mRNA fragments (ribosome profiling). We find evidence of translation for 40-50% of the expressed isoforms in human and mouse, with 53% of the expressed genes having more than one translated isoform in human, and 33% in mouse. Differential splicing analysis revealed that about 40% of the splicing changes at RNA level are concordant with changes in translation. Furthermore, orthologous cassette exons between human and mouse preserve the directionality of the change, and are enriched in microexons in a comparison between glia and glioma. ORQAS leverages ribosome profiling to uncover a widespread and evolutionarily conserved impact of differential splicing on translation, particularly of microexon-containing isoforms. }, keywords = {cancer, de novo gene, isoform, lncRNA, microexon, sORF} } The differential production of transcript isoforms from gene loci is a key cellular mechanism. Yet, its impact in protein production remains an open question. Here, we describe ORQAS (ORF quantification pipeline for alternative splicing), a pipeline for the translation quantification of individual transcript isoforms using ribosome-protected mRNA fragments (ribosome profiling). We find evidence of translation for 40-50% of the expressed isoforms in human and mouse, with 53% of the expressed genes having more than one translated isoform in human, and 33% in mouse. Differential splicing analysis revealed that about 40% of the splicing changes at RNA level are concordant with changes in translation. Furthermore, orthologous cassette exons between human and mouse preserve the directionality of the change, and are enriched in microexons in a comparison between glia and glioma. ORQAS leverages ribosome profiling to uncover a widespread and evolutionarily conserved impact of differential splicing on translation, particularly of microexon-containing isoforms. |
Jorge Ruiz-Orera, José Luis Villanueva-Cañas, M.Mar Albà Evolution of New Proteins From Translated sORFs in Long Non-Coding RNAs (Article) Experimental Cell Research, 391 (1), pp. 111940, 2020. (Abstract | Links | BibTeX | Tags: de novo gene, lncRNA, microprotein, ribosome profiling, sORF) @article{Ruiz-Orera2020, title = {Evolution of New Proteins From Translated sORFs in Long Non-Coding RNAs }, author = {Jorge Ruiz-Orera, José Luis Villanueva-Cañas, M.Mar Albà}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0014482720301452?via%3Dihub}, year = {2020}, date = {2020-03-07}, journal = {Experimental Cell Research}, volume = {391}, number = {1}, pages = {111940}, abstract = {High throughput RNA sequencing techniques have revealed that a large fraction of the genome is transcribed into long non-coding RNAs (lncRNAs). Unlike canonical protein-coding genes, lncRNAs do not contain long open reading frames (ORFs) and tend to be poorly conserved across species. However, many of them contain small ORFs (sORFs) that exhibit translation signatures according to ribosome profiling or proteomics data. These sORFs are a source of putative novel proteins; some of them may confer a selective advantage and be maintained over time, a process known as de novo gene birth. Here we review the mechanisms by which randomly occurring sORFs in lncRNAs can become new functional proteins. }, keywords = {de novo gene, lncRNA, microprotein, ribosome profiling, sORF} } High throughput RNA sequencing techniques have revealed that a large fraction of the genome is transcribed into long non-coding RNAs (lncRNAs). Unlike canonical protein-coding genes, lncRNAs do not contain long open reading frames (ORFs) and tend to be poorly conserved across species. However, many of them contain small ORFs (sORFs) that exhibit translation signatures according to ribosome profiling or proteomics data. These sORFs are a source of putative novel proteins; some of them may confer a selective advantage and be maintained over time, a process known as de novo gene birth. Here we review the mechanisms by which randomly occurring sORFs in lncRNAs can become new functional proteins. |
Ivan de la Rubia, Joel A Indi, Silvia Carbonell, Julien Lagarde, M.Mar Albà, Eduardo Eyras Reference-free reconstruction and quantification of transcriptomes from long-read sequencing (Article) bioRxiv, 2020. (Abstract | Links | BibTeX | Tags: Long read, Nanopore, transcriptome) @article{delaRubia2020, title = {Reference-free reconstruction and quantification of transcriptomes from long-read sequencing}, author = {Ivan de la Rubia, Joel A Indi, Silvia Carbonell, Julien Lagarde, M.Mar Albà, Eduardo Eyras}, url = {https://www.biorxiv.org/content/10.1101/2020.02.08.939942v1}, year = {2020}, date = {2020-02-09}, journal = {bioRxiv}, abstract = {Single-molecule long-read sequencing provides an unprecedented opportunity to measure the transcriptome from any sample. However, current methods for the analysis of transcriptomes from long reads rely on the comparison with a genome or transcriptome reference, or use multiple sequencing technologies. These approaches preclude the cost-effective study of species with no reference available, and the discovery of new genes and transcripts in individuals underrepresented in the reference. Methods for the assembly of DNA long-reads cannot be directly transferred to transcriptomes since their consensus sequences lack the interpretability as genes with multiple transcript isoforms. To address these challenges, we have developed RATTLE, the first method for the reference-free reconstruction and quantification of transcripts from long reads. Using simulated data, transcript isoform spike-ins, and sequencing data from human and mouse tissues, we demonstrate that RATTLE accurately performs read clustering and error-correction. Furthermore, RATTLE predicts transcript sequences and their abundances with accuracy comparable to reference-based methods. RATTLE enables rapid and cost-effective long-read transcriptomics in any sample and any species, without the need of a genome or annotation reference and without using additional technologies.}, keywords = {Long read, Nanopore, transcriptome} } Single-molecule long-read sequencing provides an unprecedented opportunity to measure the transcriptome from any sample. However, current methods for the analysis of transcriptomes from long reads rely on the comparison with a genome or transcriptome reference, or use multiple sequencing technologies. These approaches preclude the cost-effective study of species with no reference available, and the discovery of new genes and transcripts in individuals underrepresented in the reference. Methods for the assembly of DNA long-reads cannot be directly transferred to transcriptomes since their consensus sequences lack the interpretability as genes with multiple transcript isoforms. To address these challenges, we have developed RATTLE, the first method for the reference-free reconstruction and quantification of transcripts from long reads. Using simulated data, transcript isoform spike-ins, and sequencing data from human and mouse tissues, we demonstrate that RATTLE accurately performs read clustering and error-correction. Furthermore, RATTLE predicts transcript sequences and their abundances with accuracy comparable to reference-based methods. RATTLE enables rapid and cost-effective long-read transcriptomics in any sample and any species, without the need of a genome or annotation reference and without using additional technologies. |
2019 |
William R. Blevins, Teresa Tavella, Simone G. Moro, Bernat Blasco-Moreno, Adrià Closa-Mosquera, Juana Díez, Lucas B. Carey, M.Mar Albà Scientific Reports, 9 pp. 11005, 2019. (Links | BibTeX | Tags: oxidative stress, proteomics, ribosome profiling, RNA-Seq, translation regulation, yeast) @article{Blevins2019_2, title = {Extensive post-transcriptional buffering of gene expression in the response to severe oxidative stress in baker's yeast}, author = {William R. Blevins, Teresa Tavella, Simone G. Moro, Bernat Blasco-Moreno, Adrià Closa-Mosquera, Juana Díez, Lucas B. Carey, M.Mar Albà}, url = {https://www.nature.com/articles/s41598-019-47424-w}, year = {2019}, date = {2019-07-29}, journal = {Scientific Reports}, volume = {9}, pages = {11005}, keywords = {oxidative stress, proteomics, ribosome profiling, RNA-Seq, translation regulation, yeast} } |
Jorge Ruiz-Orera, M.Mar Albà Nucleic Acids Research Genomics and Bioinformatics, 1 pp. e2, 2019. (Links | BibTeX | Tags: lncRNA, micropeptide, protein-RNA interaction, RBP, sORF) @article{Ruiz-Orera2019_2, title = {Conserved regions in long non-coding RNAs contain abundant translation and protein–RNA interaction signatures}, author = {Jorge Ruiz-Orera, M.Mar Albà}, url = {https://academic.oup.com/nargab/article/1/1/e2/5528612}, year = {2019}, date = {2019-07-05}, journal = {Nucleic Acids Research Genomics and Bioinformatics}, volume = {1}, pages = {e2}, keywords = {lncRNA, micropeptide, protein-RNA interaction, RBP, sORF} } |
William R. Blevins, Lucas B. Carey, M.Mar Albà BMC Research Notes, 12 pp. 250, 2019. (Links | BibTeX | Tags: oxidative stress, RNA-Seq, transcriptomics, yeast) @article{Blevins2019b, title = {Transcriptomics data of 11 species of yeast identically grown in rich media and oxidative stress conditions}, author = {William R. Blevins, Lucas B. Carey, M.Mar Albà}, url = {https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-019-4286-0}, year = {2019}, date = {2019-05-03}, journal = {BMC Research Notes}, volume = {12}, pages = {250}, keywords = {oxidative stress, RNA-Seq, transcriptomics, yeast} } |
Marina Reixachs-Sole, Jorge Ruiz-Orera, M.Mar Albà, Eduardo Eyras bioRxiv, March 19, 2019. (Abstract | Links | BibTeX | Tags: human, isoform, mouse, nervous system, ribosome profiling) @article{Reixachs-Sole2019, title = {Ribosome profiling at isoform level reveals an evolutionary conserved impact of differential splicing on the proteome}, author = {Marina Reixachs-Sole, Jorge Ruiz-Orera, M.Mar Albà, Eduardo Eyras}, url = {https://doi.org/10.1101/582031 }, year = {2019}, date = {2019-03-19}, journal = {bioRxiv, March 19}, abstract = {The differential production of transcript isoforms from gene loci is a key mechanism in multiple biological processes and pathologies. Although this has been exhaustively shown at RNA level, it remains elusive at protein level. Here, we describe a new pipeline ORQAS (ORF quantification pipeline for alternative splicing) for the translation quantification of individual transcript isoforms using ribosome-protected mRNA fragments (Ribosome profiling). We found evidence of translation for 40-50% of the expressed transcript isoforms in human and 50% in mouse, with 53% of the expressed genes having more than one translated isoform in human, and 33% in mouse. Differential analysis revealed that about 40% of the splicing changes measured at RNA level in human were concordant with changes in translation; and that 21.7% of changes measured at RNA level, and 17.8% at translation level, were conserved between human and mouse. Furthermore, orthologous cassette exons preserving the directionality of the change were found enriched in microexons in a comparison between glia and glioma in both, and were conserved between human and mouse.. In summary, we established a moderate but widespread impact of differential splicing in the translation of isoforms and found evidence of an impact on the translation of microexons as a consequence of differential splicing. ORQAS is available at https://github.com/comprna/orqas .}, keywords = {human, isoform, mouse, nervous system, ribosome profiling} } The differential production of transcript isoforms from gene loci is a key mechanism in multiple biological processes and pathologies. Although this has been exhaustively shown at RNA level, it remains elusive at protein level. Here, we describe a new pipeline ORQAS (ORF quantification pipeline for alternative splicing) for the translation quantification of individual transcript isoforms using ribosome-protected mRNA fragments (Ribosome profiling). We found evidence of translation for 40-50% of the expressed transcript isoforms in human and 50% in mouse, with 53% of the expressed genes having more than one translated isoform in human, and 33% in mouse. Differential analysis revealed that about 40% of the splicing changes measured at RNA level in human were concordant with changes in translation; and that 21.7% of changes measured at RNA level, and 17.8% at translation level, were conserved between human and mouse. Furthermore, orthologous cassette exons preserving the directionality of the change were found enriched in microexons in a comparison between glia and glioma in both, and were conserved between human and mouse.. In summary, we established a moderate but widespread impact of differential splicing in the translation of isoforms and found evidence of an impact on the translation of microexons as a consequence of differential splicing. ORQAS is available at https://github.com/comprna/orqas . |
William R. Blevins, Jorge Ruiz-Orera, Xavier Messeguer, Bernat Blasco-Moreno, José Luis Villanueva-Cañas, Lorena Espinar, Juana Díez, Lucas B. Carey, M. Mar Albà Frequent birth of de novo genes in the compact yeast genome (Article) bioRxiv, March 13, 2019. (Abstract | Links | BibTeX | Tags: de novo gene, RNA-Seq, Saccharomyces cerevisiae, yeast) @article{Blevins2019, title = {Frequent birth of de novo genes in the compact yeast genome}, author = {William R. Blevins, Jorge Ruiz-Orera, Xavier Messeguer, Bernat Blasco-Moreno, José Luis Villanueva-Cañas, Lorena Espinar, Juana Díez, Lucas B. Carey, M. Mar Albà}, url = {https://doi.org/10.1101/575837 }, year = {2019}, date = {2019-03-13}, journal = {bioRxiv, March 13}, abstract = {Evidence has accumulated that some genes originate directly from previously non-genic sequences, or de novo, rather than by the duplication or fusion of existing genes. However, how de novo genes emerge and eventually become functional is largely unknown. Here we perform the first study on de novo genes that uses transcriptomics data from eleven different yeast species, all grown identically in both rich media and in oxidative stress conditions. The genomes of these species are densely-packed with functional elements, leaving little room for the co-option of genomic sequences into new transcribed loci. Despite this, we find that at least 213 transcripts (~5%) have arisen de novo in the past 20 million years of evolution of baker’s yeast-or approximately 10 new transcripts every million years. Nearly half of the total newly expressed sequences are generated from regions in which both DNA strands are used as templates for transcription, explaining the apparent contradiction between the limited ‘empty’ genomic space and high rate of de novo gene birth. In addition, we find that 40% of these de novo transcripts are actively translated and that at least a fraction of the encoded proteins are likely to be under purifying selection. This study shows that even in very highly compact genomes, de novo transcripts are continuously generated and can give rise to new functional protein-coding genes.}, keywords = {de novo gene, RNA-Seq, Saccharomyces cerevisiae, yeast} } Evidence has accumulated that some genes originate directly from previously non-genic sequences, or de novo, rather than by the duplication or fusion of existing genes. However, how de novo genes emerge and eventually become functional is largely unknown. Here we perform the first study on de novo genes that uses transcriptomics data from eleven different yeast species, all grown identically in both rich media and in oxidative stress conditions. The genomes of these species are densely-packed with functional elements, leaving little room for the co-option of genomic sequences into new transcribed loci. Despite this, we find that at least 213 transcripts (~5%) have arisen de novo in the past 20 million years of evolution of baker’s yeast-or approximately 10 new transcripts every million years. Nearly half of the total newly expressed sequences are generated from regions in which both DNA strands are used as templates for transcription, explaining the apparent contradiction between the limited ‘empty’ genomic space and high rate of de novo gene birth. In addition, we find that 40% of these de novo transcripts are actively translated and that at least a fraction of the encoded proteins are likely to be under purifying selection. This study shows that even in very highly compact genomes, de novo transcripts are continuously generated and can give rise to new functional protein-coding genes. |
Jorge Ruiz-Orera M.Mar Albà Translation of Small Open Reading Frames: Roles in Regulation and Evolutionary Innovation (Article) Trends in Genetics, 35 pp. 186-198, 2019. (Links | BibTeX | Tags: micropeptide, ribosome profling, sORF, translation) @article{Albà2019, title = {Translation of Small Open Reading Frames: Roles in Regulation and Evolutionary Innovation}, author = {Jorge Ruiz-Orera M.Mar Albà}, url = {https://www.sciencedirect.com/science/article/pii/S0168952518302221}, year = {2019}, date = {2019-03-01}, journal = {Trends in Genetics}, volume = {35}, pages = {186-198}, keywords = {micropeptide, ribosome profling, sORF, translation} } |
2018 |
William R Blevins, Teresa Tavella, Simone G Moro, Bernat Blasco-Moreno, Adrià Closa-Mosquera, Juana Díez, Lucas B Carey, M. Mar Albà bioRxiv, Dec 19, 2018. (Abstract | Links | BibTeX | Tags: Ribo-Seq) @article{Blevins2018, title = {Using ribosome profiling to quantify differences in protein expression: a case study in Saccharomyces cerevisiae oxidative stress conditions}, author = {William R Blevins, Teresa Tavella, Simone G Moro, Bernat Blasco-Moreno, Adrià Closa-Mosquera, Juana Díez, Lucas B Carey, M. Mar Albà}, url = {https://doi.org/10.1101/501478 }, year = {2018}, date = {2018-12-19}, journal = {bioRxiv, Dec 19}, volume = { }, abstract = {Cells respond to changes in the environment by modifying the concentration of specific proteins. Paradoxically, the cellular response is usually examined by measuring variations in transcript abundance by high throughput RNA sequencing (RNA-Seq), instead of directly measuring protein concentrations. This happens because RNA-Seq-based methods provide better quantitative estimates, and more extensive gene coverage, than proteomics-based ones. However, variations in transcript abundance do not necessarily reflect changes in the corresponding protein abundance. How can we close this gap? Here we explore the use of ribosome profiling (Ribo-Seq) to perform differentially gene expression analysis in a relatively well-characterized system, oxidative stress in baker yeast. Ribo-Seq is an RNA sequencing method that specifically targets ribosome-protected RNA fragments, and thus is expected to provide a more accurate view of changes at the protein level than classical RNA-Seq. We show that gene quantification by Ribo-Seq is indeed more highly correlated with protein abundance, as measured from mass spectrometry data, than quantification by RNA-Seq. The analysis indicates that, whereas a subset of genes involved in oxidation-reduction processes is detected by both types of data, the majority of the genes that happen to be significant in the RNA-Seq-based analysis are not significant in the Ribo-Seq analysis, suggesting that they do not result in protein level changes. The results illustrate the advantages of Ribo-Seq to make inferences about changes in protein abundance in comparison with RNA-Seq.}, keywords = {Ribo-Seq} } Cells respond to changes in the environment by modifying the concentration of specific proteins. Paradoxically, the cellular response is usually examined by measuring variations in transcript abundance by high throughput RNA sequencing (RNA-Seq), instead of directly measuring protein concentrations. This happens because RNA-Seq-based methods provide better quantitative estimates, and more extensive gene coverage, than proteomics-based ones. However, variations in transcript abundance do not necessarily reflect changes in the corresponding protein abundance. How can we close this gap? Here we explore the use of ribosome profiling (Ribo-Seq) to perform differentially gene expression analysis in a relatively well-characterized system, oxidative stress in baker yeast. Ribo-Seq is an RNA sequencing method that specifically targets ribosome-protected RNA fragments, and thus is expected to provide a more accurate view of changes at the protein level than classical RNA-Seq. We show that gene quantification by Ribo-Seq is indeed more highly correlated with protein abundance, as measured from mass spectrometry data, than quantification by RNA-Seq. The analysis indicates that, whereas a subset of genes involved in oxidation-reduction processes is detected by both types of data, the majority of the genes that happen to be significant in the RNA-Seq-based analysis are not significant in the Ribo-Seq analysis, suggesting that they do not result in protein level changes. The results illustrate the advantages of Ribo-Seq to make inferences about changes in protein abundance in comparison with RNA-Seq. |
Jorge Ruiz-Orera, M.Mar Albà Frequent translation of small open reading frames in evolutionary conserved lncRNA regions (Article) bioRxiv, June 16 , 2018. (Abstract | Links | BibTeX | Tags: ) @article{Ruiz-Orera2018b, title = {Frequent translation of small open reading frames in evolutionary conserved lncRNA regions}, author = {Jorge Ruiz-Orera, M.Mar Albà}, url = {https://doi.org/10.1101/348326 }, year = {2018}, date = {2018-06-16}, journal = {bioRxiv, June 16 }, abstract = {The mammalian transcriptome includes thousands of transcripts that do not correspond to annotated protein-coding genes. Although many of these transcripts show homology between human and mouse, only a small proportion of them have been functionally characterized. Here we use ribosome profiling data to identify translated open reading frames, as well as non-ribosomal protein-RNA interactions, in evolutionary conserved and non-conserved transcripts. We find that conserved regions are subject to significant evolutionary constraints and are enriched in translated open reading frames, as well as non-ribosomal protein-RNA interaction signatures, when compared to non-conserved regions. Translated ORFs can be divided in two classes, those encoding functional micropeptides and those that show no evidence of protein functionality. This study underscores the importance of combining evolutionary and biochemical measurements to advance in a more complete understanding of the transcriptome.}, keywords = {} } The mammalian transcriptome includes thousands of transcripts that do not correspond to annotated protein-coding genes. Although many of these transcripts show homology between human and mouse, only a small proportion of them have been functionally characterized. Here we use ribosome profiling data to identify translated open reading frames, as well as non-ribosomal protein-RNA interactions, in evolutionary conserved and non-conserved transcripts. We find that conserved regions are subject to significant evolutionary constraints and are enriched in translated open reading frames, as well as non-ribosomal protein-RNA interaction signatures, when compared to non-conserved regions. Translated ORFs can be divided in two classes, those encoding functional micropeptides and those that show no evidence of protein functionality. This study underscores the importance of combining evolutionary and biochemical measurements to advance in a more complete understanding of the transcriptome. |
Jorge Ruiz-Orera, Pol Grau-Verdaguer, José Luis Villanueva-Cañas, Xavier Messeguer, M.Mar Albà Translation of neutrally evolving peptides provides a basis for de novo gene evolution (Article) Nature Ecology and Evolution, 2 pp. 890–896, 2018. (Abstract | Links | BibTeX | Tags: codon usage bias, de novo gene, natural selection, ribosome profiling) @article{Ruiz-Orera2018, title = {Translation of neutrally evolving peptides provides a basis for de novo gene evolution}, author = {Jorge Ruiz-Orera, Pol Grau-Verdaguer, José Luis Villanueva-Cañas, Xavier Messeguer, M.Mar Albà}, url = {https://www.nature.com/articles/s41559-018-0506-6}, year = {2018}, date = {2018-03-19}, journal = {Nature Ecology and Evolution}, volume = {2}, pages = {890–896}, abstract = {Accumulating evidence indicates that some protein-coding genes have originated de novo from previously non-coding genomic sequences. However, the processes underlying de novo gene birth are still enigmatic. In particular, the appearance of a new functional protein seems highly improbable unless there is already a pool of neutrally evolving peptides that are translated at significant levels and that can at some point acquire new functions. Here, we use deep ribosome-profiling sequencing data, together with proteomics and single nucleotide polymorphism information, to search for these peptides. We find hundreds of open reading frames that are translated and that show no evolutionary conservation or selective constraints. These data suggest that the translation of these neutrally evolving peptides may be facilitated by the chance occurrence of open reading frames with a favourable codon composition. We conclude that the pervasive translation of the transcriptome provides plenty of material for the evolution of new functional proteins.}, keywords = {codon usage bias, de novo gene, natural selection, ribosome profiling} } Accumulating evidence indicates that some protein-coding genes have originated de novo from previously non-coding genomic sequences. However, the processes underlying de novo gene birth are still enigmatic. In particular, the appearance of a new functional protein seems highly improbable unless there is already a pool of neutrally evolving peptides that are translated at significant levels and that can at some point acquire new functions. Here, we use deep ribosome-profiling sequencing data, together with proteomics and single nucleotide polymorphism information, to search for these peptides. We find hundreds of open reading frames that are translated and that show no evolutionary conservation or selective constraints. These data suggest that the translation of these neutrally evolving peptides may be facilitated by the chance occurrence of open reading frames with a favourable codon composition. We conclude that the pervasive translation of the transcriptome provides plenty of material for the evolution of new functional proteins. |
Sheena L. Faherty S#*, José Luis Villanueva-Cañas#, Marina B. Blanco, M.Mar Albà*, Anne D. Yoder. Transcriptomics in the wild: Hibernation physiology in free-ranging dwarf lemurs. (Article) Molecular Ecology, 27 (3), pp. 709-722, 2018. (Abstract | Links | BibTeX | Tags: hibernation, lemur, metabolism, RNA-Seq, torpor, transcriptomics) @article{SL2018, title = {Transcriptomics in the wild: Hibernation physiology in free-ranging dwarf lemurs.}, author = {Sheena L. Faherty S#*, José Luis Villanueva-Cañas#, Marina B. Blanco, M.Mar Albà*, Anne D. Yoder.}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/mec.14483}, year = {2018}, date = {2018-01-29}, journal = {Molecular Ecology}, volume = {27}, number = {3}, pages = {709-722}, abstract = {Hibernation is an adaptive strategy some mammals use to survive highly seasonal or unpredictable environments. We present the first investigation on the transcriptomics of hibernation in a natural population of primate hibernators: Crossley\'s dwarf lemurs (Cheirogaleus crossleyi). Using capture-mark-recapture techniques to track the same animals over a period of 7 months in Madagascar, we used RNA-seq to compare gene expression profiles in white adipose tissue (WAT) during three distinct physiological states. We focus on pathway analysis to assess the biological significance of transcriptional changes in dwarf lemur WAT and, by comparing and contrasting what is known in other model hibernating species, contribute to a broader understanding of genomic contributions of hibernation across Mammalia. The hibernation signature is characterized by a suppression of lipid biosynthesis, pyruvate metabolism and mitochondrial-associated functions, and an accumulation of transcripts encoding ribosomal components and iron-storage proteins. The data support a key role of pyruvate dehydrogenase kinase isoenzyme 4 (PDK4) in regulating the shift in fuel economy during periods of severe food deprivation. This pattern of PDK4 holds true across representative hibernating species from disparate mammalian groups, suggesting that the genetic underpinnings of hibernation may be ancestral to mammals.}, keywords = {hibernation, lemur, metabolism, RNA-Seq, torpor, transcriptomics} } Hibernation is an adaptive strategy some mammals use to survive highly seasonal or unpredictable environments. We present the first investigation on the transcriptomics of hibernation in a natural population of primate hibernators: Crossley's dwarf lemurs (Cheirogaleus crossleyi). Using capture-mark-recapture techniques to track the same animals over a period of 7 months in Madagascar, we used RNA-seq to compare gene expression profiles in white adipose tissue (WAT) during three distinct physiological states. We focus on pathway analysis to assess the biological significance of transcriptional changes in dwarf lemur WAT and, by comparing and contrasting what is known in other model hibernating species, contribute to a broader understanding of genomic contributions of hibernation across Mammalia. The hibernation signature is characterized by a suppression of lipid biosynthesis, pyruvate metabolism and mitochondrial-associated functions, and an accumulation of transcripts encoding ribosomal components and iron-storage proteins. The data support a key role of pyruvate dehydrogenase kinase isoenzyme 4 (PDK4) in regulating the shift in fuel economy during periods of severe food deprivation. This pattern of PDK4 holds true across representative hibernating species from disparate mammalian groups, suggesting that the genetic underpinnings of hibernation may be ancestral to mammals. |
2017 |
Subirana JA, Messeguer X Genes, 8 (12), pp. E351, 2017. (Links | BibTeX | Tags: C.elegans, Minisatellite Repeats) @article{X2017, title = {Evolution of Tandem Repeat Satellite Sequences in Two Closely Related Caenorhabditis Species. Diminution of Satellites in Hermaphrodites.}, author = {Subirana JA, Messeguer X}, url = {The availability of the genome sequence of the unisexual (male-female) Caenorhabditis nigoni offers an opportunity to compare its non-coding features with the related hermaphroditic species Caenorhabditis briggsae; to understand the evolutionary dynamics of their tandem repeat sequences (satellites), as a result of evolution from the unisexual ancestor. We take advantage of the previously developed SATFIND program to build satellite families defined by a consensus sequence. The relative number of satellites (satellites/Mb) in C. nigoni is 24.6% larger than in C. briggsae. Some satellites in C. nigoni have developed from a proto-repeat present in the ancestor species and are conserved as an isolated sequence in C. briggsae. We also identify unique satellites which occur only once and joint satellite families with a related sequence in both species. Some of these families are only found in C. nigoni, which indicates a recent appearance; they contain conserved adjacent 5\\\' and 3\\\' regions, which may favor transposition. Our results show that the number, length and turnover of satellites are restricted in the hermaphrodite C. briggsae when compared with the unisexual C. nigoni. We hypothesize that this results from differences in unequal recombination during meiotic chromosome pairing, which limits satellite turnover in hermaphrodites.}, year = {2017}, date = {2017-11-28}, journal = {Genes}, volume = {8}, number = {12}, pages = {E351}, keywords = {C.elegans, Minisatellite Repeats} } |
M.Mar Albà Zinc-finger domains in metazoans: evolution gone wild (Article) Genome Biology, 18 pp. 168, 2017. (Abstract | Links | BibTeX | Tags: Evolution, Zinc Fingers) @article{Albà2017b, title = {Zinc-finger domains in metazoans: evolution gone wild}, author = {M.Mar Albà}, url = {http://evolutionarygenomics.imim.es/group/wp-content/uploads/2017/10/Alba20172.pdf}, year = {2017}, date = {2017-09-06}, journal = {Genome Biology}, volume = {18}, pages = {168}, abstract = {A new study uncovers a potential mechanism that may allow zinc-finger domains in metazoans to recognize and bind virtually any DNA sequence.}, keywords = {Evolution, Zinc Fingers} } A new study uncovers a potential mechanism that may allow zinc-finger domains in metazoans to recognize and bind virtually any DNA sequence. |
José Luis Villanueva-Cañas, Jorge Ruiz-Orera, M.Isabel Agea, Maria Gallo, David Andreu, M.Mar Albà New genes and functional innovation in mammals (Article) Genome Biology and Evolution, 9 pp. 1886–1900, 2017. (Abstract | Links | BibTeX | Tags: de novo gene, innovation, mammal, protein function) @article{Albà2017, title = {New genes and functional innovation in mammals}, author = { José Luis Villanueva-Cañas, Jorge Ruiz-Orera, M.Isabel Agea, Maria Gallo, David Andreu, M.Mar Albà}, url = {https://academic.oup.com/gbe/article/doi/10.1093/gbe/evx136/3983271/New-genes-and-functional-innovation-in-mammals}, year = {2017}, date = {2017-07-21}, journal = {Genome Biology and Evolution}, volume = {9}, pages = { 1886–1900}, abstract = {The birth of genes that encode new protein sequences is a major source of evolutionary innovation. However, we still understand relatively little about how these genes come into being and which functions they are selected for. To address these questions, we have obtained a large collection of mammalian-specific gene families that lack homologues in other eukaryotic groups. We have combined gene annotations and de novo transcript assemblies from 30 different mammalian species, obtaining ∼6,000 gene families. In general, the proteins in mammalian-specific gene families tend to be short and depleted in aromatic and negatively charged residues. Proteins which arose early in mammalian evolution include milk and skin polypeptides, immune response components, and proteins involved in reproduction. In contrast, the functions of proteins which have a more recent origin remain largely unknown, despite the fact that these proteins also have extensive proteomics support. We identify several previously described cases of genes originated de novo from noncoding genomic regions, supporting the idea that this mechanism frequently underlies the evolution of new protein-coding genes in mammals. Finally, we show that most young mammalian genes are preferentially expressed in testis, suggesting that sexual selection plays an important role in the emergence of new functional genes.}, keywords = {de novo gene, innovation, mammal, protein function} } The birth of genes that encode new protein sequences is a major source of evolutionary innovation. However, we still understand relatively little about how these genes come into being and which functions they are selected for. To address these questions, we have obtained a large collection of mammalian-specific gene families that lack homologues in other eukaryotic groups. We have combined gene annotations and de novo transcript assemblies from 30 different mammalian species, obtaining ∼6,000 gene families. In general, the proteins in mammalian-specific gene families tend to be short and depleted in aromatic and negatively charged residues. Proteins which arose early in mammalian evolution include milk and skin polypeptides, immune response components, and proteins involved in reproduction. In contrast, the functions of proteins which have a more recent origin remain largely unknown, despite the fact that these proteins also have extensive proteomics support. We identify several previously described cases of genes originated de novo from noncoding genomic regions, supporting the idea that this mechanism frequently underlies the evolution of new protein-coding genes in mammals. Finally, we show that most young mammalian genes are preferentially expressed in testis, suggesting that sexual selection plays an important role in the emergence of new functional genes. |
Jorge Ruiz-Orera, José Luis Villanueva-Cañas, William Blevins, M.Mar Albà De novo gene evolution: How do we transition from non-coding to coding? (Conference) PeerJ preprints 5 (e3031v2), 2017, (The SMBE 2017 Collection). (Abstract | Links | BibTeX | Tags: de novo gene, long non-coding RNA, Ribo-Seq, ribosome profiling) @conference{Ruiz-Orera2017, title = {De novo gene evolution: How do we transition from non-coding to coding?}, author = {Jorge Ruiz-Orera, José Luis Villanueva-Cañas, William Blevins, M.Mar Albà}, url = {https://doi.org/10.7287/peerj.preprints.3031v2}, year = {2017}, date = {2017-06-28}, journal = {PeerJ Preprints}, volume = {PeerJ preprints 5}, number = {e3031v2}, abstract = {Recent years have witnessed the discovery of protein–coding genes which appear to have evolved de novo from previously non-coding sequences. This has changed the long-standing view that coding sequences can only evolve from other coding sequences. However, there are still many open questions regarding how new protein-coding sequences can arise from non-genic DNA. Two prerequisites for the birth of a new functional protein-coding gene are that the corresponding DNA fragment is transcribed and that it is also translated. Transcription is known to be pervasive in the genome, producing a large number of transcripts that do not correspond to conserved protein-coding genes, and which are usually annotated as long non-coding RNAs (lncRNA). Recently, sequencing of ribosome protected fragments (Ribo-Seq) has provided evidence that many of these transcripts actually translate small proteins. We have used mouse non-synonymous and synonymous variation data to estimate the strength of purifying selection acting on the translated open reading frames (ORFs). Whereas a subset of the lncRNAs are likely to actually be true protein-coding genes (and thus previously misclassified), the bulk of lncRNAs code for proteins which show variation patterns consistent with neutral evolution. We also show that the ORFs that have a more favorable, coding-like, sequence composition are more likely to be translated than other ORFs in lncRNAs. This study provides strong evidence that there is a large and ever-changing reservoir of lowly abundant proteins; some of these peptides may become useful and act as seeds for de novo gene evolution.}, note = {The SMBE 2017 Collection}, keywords = {de novo gene, long non-coding RNA, Ribo-Seq, ribosome profiling} } Recent years have witnessed the discovery of protein–coding genes which appear to have evolved de novo from previously non-coding sequences. This has changed the long-standing view that coding sequences can only evolve from other coding sequences. However, there are still many open questions regarding how new protein-coding sequences can arise from non-genic DNA. Two prerequisites for the birth of a new functional protein-coding gene are that the corresponding DNA fragment is transcribed and that it is also translated. Transcription is known to be pervasive in the genome, producing a large number of transcripts that do not correspond to conserved protein-coding genes, and which are usually annotated as long non-coding RNAs (lncRNA). Recently, sequencing of ribosome protected fragments (Ribo-Seq) has provided evidence that many of these transcripts actually translate small proteins. We have used mouse non-synonymous and synonymous variation data to estimate the strength of purifying selection acting on the translated open reading frames (ORFs). Whereas a subset of the lncRNAs are likely to actually be true protein-coding genes (and thus previously misclassified), the bulk of lncRNAs code for proteins which show variation patterns consistent with neutral evolution. We also show that the ORFs that have a more favorable, coding-like, sequence composition are more likely to be translated than other ORFs in lncRNAs. This study provides strong evidence that there is a large and ever-changing reservoir of lowly abundant proteins; some of these peptides may become useful and act as seeds for de novo gene evolution. |
Willam Blevins, M.Mar Albà, Lucas Carey Comparative transcriptomics and ribo-seq: Looking at de novo gene emergence in Saccharomycotina (Conference) PeerJ preprints 5 (e3030v1), 2017, (The SMBE 2017 Collection). (Abstract | BibTeX | Tags: de novo gene, yeast) @conference{Blevins2017, title = {Comparative transcriptomics and ribo-seq: Looking at de novo gene emergence in Saccharomycotina}, author = {Willam Blevins, M.Mar Albà, Lucas Carey}, year = {2017}, date = {2017-06-20}, journal = {PeerJ Preprints}, volume = {PeerJ preprints 5}, number = {e3030v1}, abstract = { In de novo gene emergence, a segment of non-coding DNA undergoes a series of changes which enables transcription, potentially leading to a new protein that could eventually acquire a novel function. Due to their recent origins, young de novo genes have no homology with other genes. Furthermore, de novo genes may not initially be under the same selective constraints as other genes. Dozens of de novo genes have recently been identified in many diverse species; however, the mechanisms leading to their appearance are not yet well understood. To study this phenomenon, we have performed deep RNA sequencing (RNA-seq) on 11 species of yeast from the phylum of Ascomycota in both rich media and oxidative stress conditions. Furthermore, we performed ribosome profiling (Ribo-seq) experiments in both conditions with S. cerevisiae. These data have been used to classify the conservation of genes at different depths in the yeast phylogeny. Hundreds of genes in each species were novel (unannotated), and many were identified as putative de novo genes; these candidates were then tested for signals of translation using our Ribo-seq data. We show that putative de novo genes have different properties relative to phylogenetically conserved genes. This comparative phylotranscriptomic analysis advances our understanding of de novo gene origins. }, note = {The SMBE 2017 Collection}, keywords = {de novo gene, yeast} } In de novo gene emergence, a segment of non-coding DNA undergoes a series of changes which enables transcription, potentially leading to a new protein that could eventually acquire a novel function. Due to their recent origins, young de novo genes have no homology with other genes. Furthermore, de novo genes may not initially be under the same selective constraints as other genes. Dozens of de novo genes have recently been identified in many diverse species; however, the mechanisms leading to their appearance are not yet well understood. To study this phenomenon, we have performed deep RNA sequencing (RNA-seq) on 11 species of yeast from the phylum of Ascomycota in both rich media and oxidative stress conditions. Furthermore, we performed ribosome profiling (Ribo-seq) experiments in both conditions with S. cerevisiae. These data have been used to classify the conservation of genes at different depths in the yeast phylogeny. Hundreds of genes in each species were novel (unannotated), and many were identified as putative de novo genes; these candidates were then tested for signals of translation using our Ribo-seq data. We show that putative de novo genes have different properties relative to phylogenetically conserved genes. This comparative phylotranscriptomic analysis advances our understanding of de novo gene origins. |
Tomislav Domazet-Lošo, Anne-Ruxandra Carvunis, M.Mar Albà, Martin Sebastijan Šestak, Robert Bakarić, Rafik Neme, Diethard Tautz Molecular Biology and Evolution, doi: 10.1093/molbev/msw284 2017. (Abstract | Links | BibTeX | Tags: BLAST, de novo gene, Homology, Sequence Analysis) @article{Domazet-Lošo2017, title = {No evidence for phylostratigraphic bias impacting inferences on patterns of gene emergence and evolution}, author = {Tomislav Domazet-Lošo, Anne-Ruxandra Carvunis, M.Mar Albà, Martin Sebastijan Šestak, Robert Bakarić, Rafik Neme, Diethard Tautz}, url = {http://mbe.oxfordjournals.org/content/early/2017/01/10/molbev.msw284.abstract}, year = {2017}, date = {2017-01-12}, journal = {Molecular Biology and Evolution}, volume = {doi: 10.1093/molbev/msw284}, abstract = {Phylostratigraphy is a computational framework for dating the emergence of DNA and protein sequences in a phylogeny. It has been extensively applied to make inferences on patterns of genome evolution, including patterns of disease gene evolution, ontogeny and de novo gene origination. Phylostratigraphy typically relies on BLAST searches along a species tree, but new simulation studies have raised concerns about the ability of BLAST to detect remote homologues and its impact on phylostratigraphic inferences. Here, we re-assessed these simulations. We found that, even with a possible overall BLAST false negative rate between 11-15%, the large majority of sequences assigned to a recent evolutionary origin by phylostratigraphy is unaffected by technical concerns about BLAST. Where the results of the simulations did cast doubt on previously reported findings, we repeated the original analyses but now excluded all questionable sequences. The originally described patterns remained essentially unchanged. These new analyses strongly support phylostratigraphic inferences, including: genes that emerged after the origin of eukaryotes are more likely to be expressed in the ectoderm than in the endoderm or mesoderm in Drosophila, and the de novo emergence of protein-coding genes from non-genic sequences occurs through proto-gene intermediates in yeast. We conclude that BLAST is an appropriate and sufficiently sensitive tool in phylostratigraphic analysis that does not appear to introduce significant biases into evolutionary pattern inferences. }, keywords = {BLAST, de novo gene, Homology, Sequence Analysis} } Phylostratigraphy is a computational framework for dating the emergence of DNA and protein sequences in a phylogeny. It has been extensively applied to make inferences on patterns of genome evolution, including patterns of disease gene evolution, ontogeny and de novo gene origination. Phylostratigraphy typically relies on BLAST searches along a species tree, but new simulation studies have raised concerns about the ability of BLAST to detect remote homologues and its impact on phylostratigraphic inferences. Here, we re-assessed these simulations. We found that, even with a possible overall BLAST false negative rate between 11-15%, the large majority of sequences assigned to a recent evolutionary origin by phylostratigraphy is unaffected by technical concerns about BLAST. Where the results of the simulations did cast doubt on previously reported findings, we repeated the original analyses but now excluded all questionable sequences. The originally described patterns remained essentially unchanged. These new analyses strongly support phylostratigraphic inferences, including: genes that emerged after the origin of eukaryotes are more likely to be expressed in the ectoderm than in the endoderm or mesoderm in Drosophila, and the de novo emergence of protein-coding genes from non-genic sequences occurs through proto-gene intermediates in yeast. We conclude that BLAST is an appropriate and sufficiently sensitive tool in phylostratigraphic analysis that does not appear to introduce significant biases into evolutionary pattern inferences. |
2016 |
F Abascal*, A Corvelo*, F Cruz*, J L Villanueva-Cañas, A Vlasova, M Marcet-Houben, B Martínez-Cruz, JY Cheng, P Prieto, V Quesada, J Quilez, G Li, F García, M Rubio-Camarillo, L Frias, P Ribeca, S Capella-Gutiérrez, JM Rodríguez, F Câmara, E Lowy, L Cozzuto, I Erb, ML Tress, JL Rodriguez-Ales, J Ruiz-Orera, (11 more), A Valencia, I Gut, JL García, R Guigó, WJ Murphy, A Ruiz-Herrera, T Marques-Bonet, G Roma, C Notredame, T Mailund, MM Albà, T Gabaldón, T Alioto, JA Godoy Genome Biology, 17 pp. 251, 2016. (Abstract | Links | BibTeX | Tags: adaptive evolution, lynx) @article{Abascal*2016, title = {Extreme genomic erosion after recurrent demographic bottlenecks in the highly endangered Iberian lynx}, author = {F Abascal*, A Corvelo*, F Cruz*, J L Villanueva-Cañas, A Vlasova, M Marcet-Houben, B Martínez-Cruz, JY Cheng, P Prieto, V Quesada, J Quilez, G Li, F García, M Rubio-Camarillo, L Frias, P Ribeca, S Capella-Gutiérrez, JM Rodríguez, F Câmara, E Lowy, L Cozzuto, I Erb, ML Tress, JL Rodriguez-Ales, J Ruiz-Orera, (11 more), A Valencia, I Gut, JL García, R Guigó, WJ Murphy, A Ruiz-Herrera, T Marques-Bonet, G Roma, C Notredame, T Mailund, MM Albà, T Gabaldón, T Alioto and JA Godoy}, url = {http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1090-1}, year = {2016}, date = {2016-12-14}, journal = {Genome Biology}, volume = {17}, pages = {251}, abstract = {Background Genomic studies of endangered species provide insights into their evolution and demographic history, reveal patterns of genomic erosion that might limit their viability, and offer tools for their effective conservation. The Iberian lynx (Lynx pardinus) is the most endangered felid and a unique example of a species on the brink of extinction. Results We generate the first annotated draft of the Iberian lynx genome and carry out genome-based analyses of lynx demography, evolution, and population genetics. We identify a series of severe population bottlenecks in the history of the Iberian lynx that predate its known demographic decline during the 20th century and have greatly impacted its genome evolution. We observe drastically reduced rates of weak-to-strong substitutions associated with GC-biased gene conversion and increased rates of fixation of transposable elements. We also find multiple signatures of genetic erosion in the two remnant Iberian lynx populations, including a high frequency of potentially deleterious variants and substitutions, as well as the lowest genome-wide genetic diversity reported so far in any species. Conclusions The genomic features observed in the Iberian lynx genome may hamper short- and long-term viability through reduced fitness and adaptive potential. The knowledge and resources developed in this study will boost the research on felid evolution and conservation genomics and will benefit the ongoing conservation and management of this emblematic species. }, keywords = {adaptive evolution, lynx} } Background Genomic studies of endangered species provide insights into their evolution and demographic history, reveal patterns of genomic erosion that might limit their viability, and offer tools for their effective conservation. The Iberian lynx (Lynx pardinus) is the most endangered felid and a unique example of a species on the brink of extinction. Results We generate the first annotated draft of the Iberian lynx genome and carry out genome-based analyses of lynx demography, evolution, and population genetics. We identify a series of severe population bottlenecks in the history of the Iberian lynx that predate its known demographic decline during the 20th century and have greatly impacted its genome evolution. We observe drastically reduced rates of weak-to-strong substitutions associated with GC-biased gene conversion and increased rates of fixation of transposable elements. We also find multiple signatures of genetic erosion in the two remnant Iberian lynx populations, including a high frequency of potentially deleterious variants and substitutions, as well as the lowest genome-wide genetic diversity reported so far in any species. Conclusions The genomic features observed in the Iberian lynx genome may hamper short- and long-term viability through reduced fitness and adaptive potential. The knowledge and resources developed in this study will boost the research on felid evolution and conservation genomics and will benefit the ongoing conservation and management of this emblematic species. |
José Luis Villanueva-Cañas Jorge Ruiz-Orera Isabel Agea Maria Gallo David Andreu M.Mar Albà New genes and functional innovation in mammals (Article) bioRxiv, 2016. (Abstract | Links | BibTeX | Tags: adaptation, de novo genes, Evolution, gene family, mammal) @article{Alba2016, title = {New genes and functional innovation in mammals}, author = {José Luis Villanueva-Cañas Jorge Ruiz-Orera Isabel Agea Maria Gallo David Andreu M.Mar Albà}, url = {http://dx.doi.org/10.1101/090860 }, year = {2016}, date = {2016-12-02}, journal = {bioRxiv}, abstract = {The birth of genes that encode new proteins is a major source of evolutionary innovation. However, we still understand relatively little about how these genes come into being and which functions they are selected for. Here we address this question by generating a comprehensive list of mammalian-specific gene families originated at different times during mammalian evolution. We combine gene annotations and de novo transcript assemblies from 30 mammalian species, obtaining about 6,000 families with different species composition. We show that the families which arose early in mammalian evolution (basal) are relatively well-characterized. They are enriched in secreted proteins and include milk and skin polypeptides, immune response components and, proteins involved in spermatogenesis. In contrast, there is a severe lack of knowledge about the functions of proteins which have a more recent origin in certain mammalian groups (young), despite the fact that they have extensive proteomics support. Interestingly, we find that both young and basal mammalian-specific gene families show similar gene expression biases, with a marked enrichment in testis. Proteins from both groups tend to be short and depleted in aromatic and negatively charged residues. This indicates shared mechanisms of formation and suggests that the youngest proteins may have been retained for similar kinds of functions as the oldest ones. We identify several previously described cases of genes originated de novo from non-coding genomic regions, supporting the idea that this mechanism frequently underlies the evolution of new protein-coding genes. The catalogue of gene families generated here provides a unique resource for studies on the role of new genes in mammalian-specific adaptations.}, keywords = {adaptation, de novo genes, Evolution, gene family, mammal} } The birth of genes that encode new proteins is a major source of evolutionary innovation. However, we still understand relatively little about how these genes come into being and which functions they are selected for. Here we address this question by generating a comprehensive list of mammalian-specific gene families originated at different times during mammalian evolution. We combine gene annotations and de novo transcript assemblies from 30 mammalian species, obtaining about 6,000 families with different species composition. We show that the families which arose early in mammalian evolution (basal) are relatively well-characterized. They are enriched in secreted proteins and include milk and skin polypeptides, immune response components and, proteins involved in spermatogenesis. In contrast, there is a severe lack of knowledge about the functions of proteins which have a more recent origin in certain mammalian groups (young), despite the fact that they have extensive proteomics support. Interestingly, we find that both young and basal mammalian-specific gene families show similar gene expression biases, with a marked enrichment in testis. Proteins from both groups tend to be short and depleted in aromatic and negatively charged residues. This indicates shared mechanisms of formation and suggests that the youngest proteins may have been retained for similar kinds of functions as the oldest ones. We identify several previously described cases of genes originated de novo from non-coding genomic regions, supporting the idea that this mechanism frequently underlies the evolution of new protein-coding genes. The catalogue of gene families generated here provides a unique resource for studies on the role of new genes in mammalian-specific adaptations. |
Jorge Ruiz-Orera, Pol Verdaguer-Grau, José Luis Villanueva-Cañas, Xavier Messeguer, M Mar Albà Functional and non-functional classes of peptides produced by long non-coding RNAs (Article) bioRxiv, 2016, ISBN: http://dx.doi.org/10.1101/064915 . (Abstract | Links | BibTeX | Tags: long non-coding RNA, micropeptide, mouse, ribosome profiling, smORF, translation) @article{Ruiz-Orera2016, title = {Functional and non-functional classes of peptides produced by long non-coding RNAs}, author = {Jorge Ruiz-Orera, Pol Verdaguer-Grau, José Luis Villanueva-Cañas, Xavier Messeguer, M Mar Albà}, url = {http://biorxiv.org/content/early/2016/07/21/064915}, isbn = {http://dx.doi.org/10.1101/064915 }, year = {2016}, date = {2016-07-21}, journal = {bioRxiv}, abstract = {Cells express thousands of transcripts that show weak coding potential. Known as long non-coding RNAs (lncRNAs), they typically contain short open reading frames (ORFs) having no homology with known proteins. Recent studies have reported that a significant proportion of lncRNAs are translated, challenging the view that they are essentially non-coding. These results are based on the selective sequencing of ribosome-protected fragments, or ribosome profiling. The present study used ribosome profiling data from eight mouse tissues and cell types, combined with ~330,000 synonymous and non-synonymous single nucleotide variants, to dissect the biological implications of lncRNA translation. Using the three-nucleotide read periodicity that characterizes actively translated regions, we found that about 23% of the transcribed lncRNAs was translated (1,365 out of 6,390). About one fourth of the translated sequences (350 lncRNAs) showed conservation in humans; this is likely to produce functional micropeptides, including the recently discovered myoregulin. For other lncRNAs, the ORF codon usage bias distinguishes between two classes. The first has significant coding scores and contains functional proteins which are not conserved in humans. The second large class, comprising >500 lncRNAs, produces proteins that show no significant purifying selection signatures. We showed that the neutral translation of these lncRNAs depends on the transcript expression level and the chance occurrence of ORFs with a favorable codon composition. This provides the first evidence to data that many lncRNAs produce non-functional proteins.}, keywords = {long non-coding RNA, micropeptide, mouse, ribosome profiling, smORF, translation} } Cells express thousands of transcripts that show weak coding potential. Known as long non-coding RNAs (lncRNAs), they typically contain short open reading frames (ORFs) having no homology with known proteins. Recent studies have reported that a significant proportion of lncRNAs are translated, challenging the view that they are essentially non-coding. These results are based on the selective sequencing of ribosome-protected fragments, or ribosome profiling. The present study used ribosome profiling data from eight mouse tissues and cell types, combined with ~330,000 synonymous and non-synonymous single nucleotide variants, to dissect the biological implications of lncRNA translation. Using the three-nucleotide read periodicity that characterizes actively translated regions, we found that about 23% of the transcribed lncRNAs was translated (1,365 out of 6,390). About one fourth of the translated sequences (350 lncRNAs) showed conservation in humans; this is likely to produce functional micropeptides, including the recently discovered myoregulin. For other lncRNAs, the ORF codon usage bias distinguishes between two classes. The first has significant coding scores and contains functional proteins which are not conserved in humans. The second large class, comprising >500 lncRNAs, produces proteins that show no significant purifying selection signatures. We showed that the neutral translation of these lncRNAs depends on the transcript expression level and the chance occurrence of ORFs with a favorable codon composition. This provides the first evidence to data that many lncRNAs produce non-functional proteins. |
Sheena L. Faherty, José Luis Villanueva-Cañas, Peter H. Klopfer, M. Mar Albà, Anne D. Yoder Gene expression profiling in the hibernating primate, Cheirogaleus medius (Article) Genome Biology and Evolution, advance access (doi: 10.1093/gbe/evw163) 2016. (Links | BibTeX | Tags: differential gene expression, hibernation, primate, RNA-Seq, transcriptomics) @article{Faherty2016, title = {Gene expression profiling in the hibernating primate, Cheirogaleus medius}, author = {Sheena L. Faherty, José Luis Villanueva-Cañas, Peter H. Klopfer, M. Mar Albà and Anne D. Yoder}, url = {http://gbe.oxfordjournals.org/content/early/2016/07/11/gbe.evw163.abstract}, year = {2016}, date = {2016-07-12}, journal = {Genome Biology and Evolution}, volume = {advance access (doi: 10.1093/gbe/evw163)}, keywords = {differential gene expression, hibernation, primate, RNA-Seq, transcriptomics} } |
Vartia S, Villanueva-Cañas JL, Finarelli J, Farrell ED, Collins PC, Hughes GM, Carlsson JE, Gauthier DT, McGinnity P, Cross TF, FitzGerald RD, Mirimin L, Crispie F, Cotter PD, Carlsson J. A novel method of microsatellite genotyping-by-sequencing using individual combinatorial barcoding (Article) R Soc Open Sci, 3 (1), pp. 150565, 2016, ISBN: 10.1098/rsos.150565. (Abstract | Links | BibTeX | Tags: barcoding, Evolution, microsatellite, sequencing) @article{S2016, title = {A novel method of microsatellite genotyping-by-sequencing using individual combinatorial barcoding}, author = {Vartia S, Villanueva-Cañas JL, Finarelli J, Farrell ED, Collins PC, Hughes GM, Carlsson JE, Gauthier DT, McGinnity P, Cross TF, FitzGerald RD, Mirimin L, Crispie F, Cotter PD, Carlsson J.}, url = {http://www.ncbi.nlm.nih.gov/pubmed/26909185}, isbn = {10.1098/rsos.150565}, year = {2016}, date = {2016-01-20}, journal = {R Soc Open Sci}, volume = {3}, number = {1}, pages = {150565}, abstract = {This study examines the potential of next-generation sequencing based \'genotyping-by-sequencing\' (GBS) of microsatellite loci for rapid and cost-effective genotyping in large-scale population genetic studies. The recovery of individual genotypes from large sequence pools was achieved by PCR-incorporated combinatorial barcoding using universal primers. Three experimental conditions were employed to explore the possibility of using this approach with existing and novel multiplex marker panels and weighted amplicon mixture. The GBS approach was validated against microsatellite data generated by capillary electrophoresis. GBS allows access to the underlying nucleotide sequences that can reveal homoplasy, even in large datasets and facilitates cross laboratory transfer. GBS of microsatellites, using individual combinatorial barcoding, is potentially faster and cheaper than current microsatellite approaches and offers better and more data. }, keywords = {barcoding, Evolution, microsatellite, sequencing} } This study examines the potential of next-generation sequencing based 'genotyping-by-sequencing' (GBS) of microsatellite loci for rapid and cost-effective genotyping in large-scale population genetic studies. The recovery of individual genotypes from large sequence pools was achieved by PCR-incorporated combinatorial barcoding using universal primers. Three experimental conditions were employed to explore the possibility of using this approach with existing and novel multiplex marker panels and weighted amplicon mixture. The GBS approach was validated against microsatellite data generated by capillary electrophoresis. GBS allows access to the underlying nucleotide sequences that can reveal homoplasy, even in large datasets and facilitates cross laboratory transfer. GBS of microsatellites, using individual combinatorial barcoding, is potentially faster and cheaper than current microsatellite approaches and offers better and more data. |
2015 |
Ruiz-Orera, Jorge, Hernandez-Rodriguez, Jessica, Chiva, Cristina, Sabidó, Eduard, Kondova, Ivanela, Bontrop, Ronald, Marqués-Bonet, Tomàs, Albà, M.Mar Origins of de novo genes in human and chimpanzee (Article) Plos Genetics, 11 (12), pp. e1005721, 2015. (Links | BibTeX | Tags: chimpanzee, de novo gene, Evolution, Humans, lncRNA, Promoter, proteomics, ribosome profiling, RNA-Seq, transcription factor binding site, transcriptomics) @article{Ruiz-Orera2015b, title = {Origins of de novo genes in human and chimpanzee}, author = {Ruiz-Orera, Jorge, Hernandez-Rodriguez, Jessica, Chiva, Cristina, Sabidó, Eduard, Kondova, Ivanela, Bontrop, Ronald, Marqués-Bonet, Tomàs, Albà, M.Mar}, url = {http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1005721}, year = {2015}, date = {2015-12-31}, journal = {Plos Genetics}, volume = {11}, number = {12}, pages = {e1005721}, keywords = {chimpanzee, de novo gene, Evolution, Humans, lncRNA, Promoter, proteomics, ribosome profiling, RNA-Seq, transcription factor binding site, transcriptomics} } |
van Dijk, David, Dhar, Riddhiman, Missarova, Alsu M, Espinar, Lorena, Blevins, William R, Lehner, Ben, Carey, Lucas B Nature Communications, 6 pp. 7972, 2015, ISBN: 10.1038/ncomms8972. (Links | BibTeX | Tags: RNA polymerase error rate, transcriptomics, yeast) @article{vanD2015, title = {Slow-growing cells within isogenic populations have increased RNA polymerase error rates and DNA damage}, author = {van Dijk, David, Dhar, Riddhiman, Missarova, Alsu M, Espinar, Lorena, Blevins, William R, Lehner, Ben, Carey, Lucas B}, url = {http://www.nature.com/ncomms/2015/150813/ncomms8972/full/ncomms8972.html}, isbn = {10.1038/ncomms8972}, year = {2015}, date = {2015-08-13}, journal = {Nature Communications}, volume = {6}, pages = {7972}, keywords = {RNA polymerase error rate, transcriptomics, yeast} } |
Baldo, Laura, Riera, Joan Lluís, Tooming-Klunderud, Ave, Albà, M Mar, Salzburger, Walter Gut Microbiota Dynamics during Dietary Shift in Eastern African Cichlid Fishes. (Article) PloS one, 10 (5), pp. e0127462, 2015, ISSN: 1932-6203. (Abstract | Links | BibTeX | Tags: cichlid fishes, diet, transcriptomics) @article{Baldo2015, title = {Gut Microbiota Dynamics during Dietary Shift in Eastern African Cichlid Fishes.}, author = {Baldo, Laura and Riera, Joan Lluís and Tooming-Klunderud, Ave and Albà, M Mar and Salzburger, Walter}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25978452}, issn = {1932-6203}, year = {2015}, date = {2015-01-01}, journal = {PloS one}, volume = {10}, number = {5}, pages = {e0127462}, abstract = {The gut microbiota structure reflects both a host phylogenetic history and a signature of adaptation to the host ecological, mainly trophic niches. African cichlid fishes, with their array of closely related species that underwent a rapid dietary niche radiation, offer a particularly interesting system to explore the relative contribution of these two factors in nature. Here we surveyed the host intra- and interspecific natural variation of the gut microbiota of five cichlid species from the monophyletic tribe Perissodini of lake Tanganyika, whose members transitioned from being zooplanktivorous to feeding primarily on fish scales. The outgroup riverine species Astatotilapia burtoni, largely omnivorous, was also included in the study. Fusobacteria, Firmicutes and Proteobacteria represented the dominant components in the gut microbiota of all 30 specimens analysed according to two distinct 16S rRNA markers. All members of the Perissodini tribe showed a homogenous pattern of microbial alpha and beta diversities, with no significant qualitative differences, despite changes in diet. The recent diet shift between zooplantkon- and scale-eaters simply reflects on a significant enrichment of Clostridium taxa in scale-eaters where they might be involved in the scale metabolism. Comparison with the omnivorous species A. burtoni suggests that, with increased host phylogenetic distance and/or increasing herbivory, the gut microbiota begins differentiating also at qualitative level. The cichlids show presence of a large conserved core of taxa and a small set of core OTUs (average 13-15%), remarkably stable also in captivity, and putatively favoured by both restricted microbial transmission among related hosts (putatively enhanced by mouthbrooding behavior) and common host constraints. This study sets the basis for a future large-scale investigation of the gut microbiota of cichlids and its adaptation in the process of the host adaptive radiation.}, keywords = {cichlid fishes, diet, transcriptomics} } The gut microbiota structure reflects both a host phylogenetic history and a signature of adaptation to the host ecological, mainly trophic niches. African cichlid fishes, with their array of closely related species that underwent a rapid dietary niche radiation, offer a particularly interesting system to explore the relative contribution of these two factors in nature. Here we surveyed the host intra- and interspecific natural variation of the gut microbiota of five cichlid species from the monophyletic tribe Perissodini of lake Tanganyika, whose members transitioned from being zooplanktivorous to feeding primarily on fish scales. The outgroup riverine species Astatotilapia burtoni, largely omnivorous, was also included in the study. Fusobacteria, Firmicutes and Proteobacteria represented the dominant components in the gut microbiota of all 30 specimens analysed according to two distinct 16S rRNA markers. All members of the Perissodini tribe showed a homogenous pattern of microbial alpha and beta diversities, with no significant qualitative differences, despite changes in diet. The recent diet shift between zooplantkon- and scale-eaters simply reflects on a significant enrichment of Clostridium taxa in scale-eaters where they might be involved in the scale metabolism. Comparison with the omnivorous species A. burtoni suggests that, with increased host phylogenetic distance and/or increasing herbivory, the gut microbiota begins differentiating also at qualitative level. The cichlids show presence of a large conserved core of taxa and a small set of core OTUs (average 13-15%), remarkably stable also in captivity, and putatively favoured by both restricted microbial transmission among related hosts (putatively enhanced by mouthbrooding behavior) and common host constraints. This study sets the basis for a future large-scale investigation of the gut microbiota of cichlids and its adaptation in the process of the host adaptive radiation. |
Radó-Trilla, Núria, Arató, Krisztina, Pegueroles, Cinta, Raya, Alicia, de la Luna, Susana, Albà, M Mar Molecular biology and evolution, 2015, ISSN: 1537-1719. (Abstract | Links | BibTeX | Tags: amino acid tandem repeat, Evolution, Gene Duplication, polyalanine, transcription factor, vertebrates) @article{Rado-Trilla2015, title = {Key role of amino acid repeat expansions in the functional diversification of duplicated transcription factors.}, author = {Radó-Trilla, Núria and Arató, Krisztina and Pegueroles, Cinta and Raya, Alicia and de la Luna, Susana and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25931513}, issn = {1537-1719}, year = {2015}, date = {2015-01-01}, journal = {Molecular biology and evolution}, abstract = {The high regulatory complexity of vertebrates has been related to two closely spaced whole genome duplications (2R-WGD) that occurred before the divergence of the major vertebrate groups. Following these events, many developmental transcription factors (TFs) were retained in multiple copies and subsequently specialized in diverse functions, whereas others reverted to their singleton state. TFs are known to be generally rich in amino acid repeats or low-complexity regions (LCRs), such as polyalanine or polyglutamine runs, which can evolve rapidly and potentially influence the transcriptional activity of the protein. Here we test the hypothesis that LCRs have played a major role in the diversification of TF gene duplicates. We find that nearly half of the TF gene families originated during the 2R-WGD contain LCRs. The number of gene duplicates with LCRs is 155 out of 550 analyzed (28%), about twice as many as the number of single copy genes with LCRs (15 out of 115, 13%). In addition, duplicated TFs preferentially accumulate certain LCR types, the most prominent of which are alanine repeats. We experimentally test the role of alanine-rich LCRs in two different TF gene families, PHOX2A/PHOX2B and LHX2/LHX9. In both cases, the presence of the alanine-rich LCR in one of the copies (PHOX2B and LHX2) significantly increases the capacity of the TF to activate transcription. Taken together, the results provide strong evidence that LCRs are important driving forces of evolutionary change in duplicated genes.}, keywords = {amino acid tandem repeat, Evolution, Gene Duplication, polyalanine, transcription factor, vertebrates} } The high regulatory complexity of vertebrates has been related to two closely spaced whole genome duplications (2R-WGD) that occurred before the divergence of the major vertebrate groups. Following these events, many developmental transcription factors (TFs) were retained in multiple copies and subsequently specialized in diverse functions, whereas others reverted to their singleton state. TFs are known to be generally rich in amino acid repeats or low-complexity regions (LCRs), such as polyalanine or polyglutamine runs, which can evolve rapidly and potentially influence the transcriptional activity of the protein. Here we test the hypothesis that LCRs have played a major role in the diversification of TF gene duplicates. We find that nearly half of the TF gene families originated during the 2R-WGD contain LCRs. The number of gene duplicates with LCRs is 155 out of 550 analyzed (28%), about twice as many as the number of single copy genes with LCRs (15 out of 115, 13%). In addition, duplicated TFs preferentially accumulate certain LCR types, the most prominent of which are alanine repeats. We experimentally test the role of alanine-rich LCRs in two different TF gene families, PHOX2A/PHOX2B and LHX2/LHX9. In both cases, the presence of the alanine-rich LCR in one of the copies (PHOX2B and LHX2) significantly increases the capacity of the TF to activate transcription. Taken together, the results provide strong evidence that LCRs are important driving forces of evolutionary change in duplicated genes. |
Subirana, Juan A., Albà, M. Mar, Messeguer, Xavier High evolutionary turnover of satellite families in Caenorhabditis (Article) BMC Evolutionary Biology, 15 (1), pp. 218, 2015, ISSN: 1471-2148. (Abstract | Links | BibTeX | Tags: Evolution, Repeats, satellite) @article{Subirana2015, title = {High evolutionary turnover of satellite families in Caenorhabditis}, author = {Subirana, Juan A. and Albà, M. Mar and Messeguer, Xavier}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4595182&tool=pmcentrez&rendertype=abstract}, issn = {1471-2148}, year = {2015}, date = {2015-01-01}, journal = {BMC Evolutionary Biology}, volume = {15}, number = {1}, pages = {218}, abstract = {BACKGROUND: The high density of tandem repeat sequences (satellites) in nematode genomes and the availability of genome sequences from several species in the group offer a unique opportunity to better understand the evolutionary dynamics and the functional role of these sequences. We take advantage of the previously developed SATFIND program to study the satellites in four Caenorhabditis species and investigate these questions. METHODS: The identification and comparison of satellites is carried out in three steps. First we find all the satellites present in each species with the SATFIND program. Each satellite is defined by its length, number of repeats, and repeat sequence. Only satellites with at least ten repeats are considered. In the second step we build satellite families with a newly developed alignment program. Satellite families are defined by a consensus sequence and the number of satellites in the family. Finally we compare the consensus sequence of satellite families in different species. RESULTS: We give a catalog of individual satellites in each species. We have also identified satellite families with a related sequence and compare them in different species. We analyze the turnover of satellites: they increased in size through duplications of fragments of 100-300 bases. It appears that in many cases they have undergone an explosive expansion. In C. elegans we have identified a subset of large satellites that have strong affinity for the centromere protein CENP-A. We have also compared our results with those obtained from other species, including one nematode and three mammals. CONCLUSIONS: Most satellite families found in Caenorhabditis are species-specific; in particular those with long repeats. A subset of these satellites may facilitate the formation of kinetochores in mitosis. Other satellite families in C. elegans are either related to Helitron transposons or to meiotic pairing centers.}, keywords = {Evolution, Repeats, satellite} } BACKGROUND: The high density of tandem repeat sequences (satellites) in nematode genomes and the availability of genome sequences from several species in the group offer a unique opportunity to better understand the evolutionary dynamics and the functional role of these sequences. We take advantage of the previously developed SATFIND program to study the satellites in four Caenorhabditis species and investigate these questions. METHODS: The identification and comparison of satellites is carried out in three steps. First we find all the satellites present in each species with the SATFIND program. Each satellite is defined by its length, number of repeats, and repeat sequence. Only satellites with at least ten repeats are considered. In the second step we build satellite families with a newly developed alignment program. Satellite families are defined by a consensus sequence and the number of satellites in the family. Finally we compare the consensus sequence of satellite families in different species. RESULTS: We give a catalog of individual satellites in each species. We have also identified satellite families with a related sequence and compare them in different species. We analyze the turnover of satellites: they increased in size through duplications of fragments of 100-300 bases. It appears that in many cases they have undergone an explosive expansion. In C. elegans we have identified a subset of large satellites that have strong affinity for the centromere protein CENP-A. We have also compared our results with those obtained from other species, including one nematode and three mammals. CONCLUSIONS: Most satellite families found in Caenorhabditis are species-specific; in particular those with long repeats. A subset of these satellites may facilitate the formation of kinetochores in mitosis. Other satellite families in C. elegans are either related to Helitron transposons or to meiotic pairing centers. |
2014 |
Villanueva-Cañas, José Luis, Faherty, Sheena L, Yoder, Anne D, Albà, M Mar Comparative Genomics of Mammalian Hibernators Using Gene Networks. (Article) Integrative and comparative biology, 2014, ISSN: 1557-7023. (Abstract | Links | BibTeX | Tags: ) @article{Villanueva-Canas2014, title = {Comparative Genomics of Mammalian Hibernators Using Gene Networks.}, author = {Villanueva-Cañas, José Luis and Faherty, Sheena L and Yoder, Anne D and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/24881044}, issn = {1557-7023}, year = {2014}, date = {2014-01-01}, journal = {Integrative and comparative biology}, abstract = {In recent years, the study of the molecular processes involved in mammalian hibernation has shifted from investigating a few carefully selected candidate genes to large-scale analysis of differential gene expression. The availability of high-throughput data provides an unprecedented opportunity to ask whether phylogenetically distant species show similar mechanisms of genetic control, and how these relate to particular genes and pathways involved in the hibernation phenotype. In order to address these questions, we compare 11 datasets of differentially expressed (DE) genes from two ground squirrel species, one bat species, and the American black bear, as well as a list of genes extracted from the literature that previously have been correlated with the drastic physiological changes associated with hibernation. We identify several genes that are DE in different species, indicating either ancestral adaptations or evolutionary convergence. When we use a network approach to expand the original datasets of DE genes to large gene networks using available interactome data, a higher agreement between datasets is achieved. This indicates that the same key pathways are important for activating and maintaining the hibernation phenotype. Functional-term-enrichment analysis identifies several important metabolic and mitochondrial processes that are critical for hibernation, such as fatty acid beta-oxidation and mitochondrial transport. We do not detect any enrichment of positive selection signatures in the coding sequences of genes from the networks of hibernation-associated genes, supporting the hypothesis that the genetic processes shaping the hibernation phenotype are driven primarily by changes in gene regulation.}, keywords = {} } In recent years, the study of the molecular processes involved in mammalian hibernation has shifted from investigating a few carefully selected candidate genes to large-scale analysis of differential gene expression. The availability of high-throughput data provides an unprecedented opportunity to ask whether phylogenetically distant species show similar mechanisms of genetic control, and how these relate to particular genes and pathways involved in the hibernation phenotype. In order to address these questions, we compare 11 datasets of differentially expressed (DE) genes from two ground squirrel species, one bat species, and the American black bear, as well as a list of genes extracted from the literature that previously have been correlated with the drastic physiological changes associated with hibernation. We identify several genes that are DE in different species, indicating either ancestral adaptations or evolutionary convergence. When we use a network approach to expand the original datasets of DE genes to large gene networks using available interactome data, a higher agreement between datasets is achieved. This indicates that the same key pathways are important for activating and maintaining the hibernation phenotype. Functional-term-enrichment analysis identifies several important metabolic and mitochondrial processes that are critical for hibernation, such as fatty acid beta-oxidation and mitochondrial transport. We do not detect any enrichment of positive selection signatures in the coding sequences of genes from the networks of hibernation-associated genes, supporting the hypothesis that the genetic processes shaping the hibernation phenotype are driven primarily by changes in gene regulation. |
Ruiz-Orera, Jorge, Messeguer, Xavier, Subirana, Juan Antonio, Alba, M Mar Long non-coding RNAs as a source of new peptides (Article) eLife, 3 2014, ISSN: 2050-084X. (Abstract | Links | BibTeX | Tags: ) @article{Ruiz-Orera2014, title = {Long non-coding RNAs as a source of new peptides}, author = {Ruiz-Orera, Jorge and Messeguer, Xavier and Subirana, Juan Antonio and Alba, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25233276}, issn = {2050-084X}, year = {2014}, date = {2014-01-01}, journal = {eLife}, volume = {3}, abstract = {Deep transcriptome sequencing has revealed the existence of many transcripts that lack long or conserved open reading frames (ORFs) and which have been termed long non-coding RNAs (lncRNAs). The vast majority of lncRNAs are lineage-specific and do not yet have a known function. In this study, we test the hypothesis that they may act as a repository for the synthesis of new peptides. We find that a large fraction of the lncRNAs expressed in cells from six different species is associated with ribosomes. The patterns of ribosome protection are consistent with the translation of short peptides. lncRNAs show similar coding potential and sequence constraints than evolutionary young protein coding sequences, indicating that they play an important role in de novo protein evolution.}, keywords = {} } Deep transcriptome sequencing has revealed the existence of many transcripts that lack long or conserved open reading frames (ORFs) and which have been termed long non-coding RNAs (lncRNAs). The vast majority of lncRNAs are lineage-specific and do not yet have a known function. In this study, we test the hypothesis that they may act as a repository for the synthesis of new peptides. We find that a large fraction of the lncRNAs expressed in cells from six different species is associated with ribosomes. The patterns of ribosome protection are consistent with the translation of short peptides. lncRNAs show similar coding potential and sequence constraints than evolutionary young protein coding sequences, indicating that they play an important role in de novo protein evolution. |
Santpere, Gabriel, Darre, Fleur, Blanco, Soledad, Alcami, Antonio, Villoslada, Pablo, Mar Albà, M, Navarro, Arcadi Genome biology and evolution, 6 (4), pp. 846–60, 2014, ISSN: 1759-6653. (Abstract | Links | BibTeX | Tags: ) @article{Santpere2014, title = {Genome-wide analysis of wild-type Epstein-Barr virus genomes derived from healthy individuals of the 1,000 Genomes Project.}, author = {Santpere, Gabriel and Darre, Fleur and Blanco, Soledad and Alcami, Antonio and Villoslada, Pablo and Mar Albà, M and Navarro, Arcadi}, url = {http://www.ncbi.nlm.nih.gov/pubmed/24682154}, issn = {1759-6653}, year = {2014}, date = {2014-01-01}, journal = {Genome biology and evolution}, volume = {6}, number = {4}, pages = {846--60}, abstract = {Most people in the world (∼90%) are infected by the Epstein-Barr virus (EBV), which establishes itself permanently in B cells. Infection by EBV is related to a number of diseases including infectious mononucleosis, multiple sclerosis, and different types of cancer. So far, only seven complete EBV strains have been described, all of them coming from donors presenting EBV-related diseases. To perform a detailed comparative genomic analysis of EBV including, for the first time, EBV strains derived from healthy individuals, we reconstructed EBV sequences infecting lymphoblastoid cell lines (LCLs) from the 1000 Genomes Project. As strain B95-8 was used to transform B cells to obtain LCLs, it is always present, but a specific deletion in its genome sets it apart from natural EBV strains. After studying hundreds of individuals, we determined the presence of natural EBV in at least 10 of them and obtained a set of variants specific to wild-type EBV. By mapping the natural EBV reads into the EBV reference genome (NC007605), we constructed nearly complete wild-type viral genomes from three individuals. Adding them to the five disease-derived EBV genomic sequences available in the literature, we performed an in-depth comparative genomic analysis. We found that latency genes harbor more nucleotide diversity than lytic genes and that six out of nine latency-related genes, as well as other genes involved in viral attachment and entry into host cells, packaging, and the capsid, present the molecular signature of accelerated protein evolution rates, suggesting rapid host-parasite coevolution.}, keywords = {} } Most people in the world (∼90%) are infected by the Epstein-Barr virus (EBV), which establishes itself permanently in B cells. Infection by EBV is related to a number of diseases including infectious mononucleosis, multiple sclerosis, and different types of cancer. So far, only seven complete EBV strains have been described, all of them coming from donors presenting EBV-related diseases. To perform a detailed comparative genomic analysis of EBV including, for the first time, EBV strains derived from healthy individuals, we reconstructed EBV sequences infecting lymphoblastoid cell lines (LCLs) from the 1000 Genomes Project. As strain B95-8 was used to transform B cells to obtain LCLs, it is always present, but a specific deletion in its genome sets it apart from natural EBV strains. After studying hundreds of individuals, we determined the presence of natural EBV in at least 10 of them and obtained a set of variants specific to wild-type EBV. By mapping the natural EBV reads into the EBV reference genome (NC007605), we constructed nearly complete wild-type viral genomes from three individuals. Adding them to the five disease-derived EBV genomic sequences available in the literature, we performed an in-depth comparative genomic analysis. We found that latency genes harbor more nucleotide diversity than lytic genes and that six out of nine latency-related genes, as well as other genes involved in viral attachment and entry into host cells, packaging, and the capsid, present the molecular signature of accelerated protein evolution rates, suggesting rapid host-parasite coevolution. |
Gayà-Vidal, Magdalena, Albà, M Mar Uncovering adaptive evolution in the human lineage (Article) BMC Genomics, 15 (1), pp. 599, 2014, ISSN: 1471-2164. (Abstract | Links | BibTeX | Tags: ) @article{Gaya-Vidal2014, title = {Uncovering adaptive evolution in the human lineage}, author = {Gayà-Vidal, Magdalena and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25030307}, issn = {1471-2164}, year = {2014}, date = {2014-01-01}, journal = {BMC Genomics}, volume = {15}, number = {1}, pages = {599}, abstract = {BACKGROUND: The recent increase in human polymorphism data, together with the availability of genome sequences from several primate species, provides an unprecedented opportunity to investigate how natural selection has shaped human evolution. RESULTS: We compared human branch-specific substitutions with variation data in the current human population to measure the impact of adaptive evolution on human protein coding genes. The use of single nucleotide polymorphisms (SNPs) with high derived allele frequencies (DAFs) minimized the influence of segregating slightly deleterious mutations and improved the estimation of the number of adaptive sites. Using DAF >= 60% we showed that the proportion of adaptive substitutions is 0.2% in the complete gene set. However, the percentage rose to 40% when we focused on genes that are specifically accelerated in the human branch with respect to the chimpanzee branch, or on genes that show signatures of adaptive selection at the codon level by the maximum likelihood based branch-site test. In general, neural genes are enriched in positive selection signatures. Genes with multiple lines of evidence of positive selection include taxilin beta, which is involved in motor nerve regeneration and syntabulin, and is required for the formation of new presynaptic boutons. CONCLUSIONS: We combined several methods to detect adaptive evolution in human coding sequences at a genome-wide level. The use of variation data, in addition to sequence divergence information, uncovered previously undetected positive selection signatures in neural genes.}, keywords = {} } BACKGROUND: The recent increase in human polymorphism data, together with the availability of genome sequences from several primate species, provides an unprecedented opportunity to investigate how natural selection has shaped human evolution. RESULTS: We compared human branch-specific substitutions with variation data in the current human population to measure the impact of adaptive evolution on human protein coding genes. The use of single nucleotide polymorphisms (SNPs) with high derived allele frequencies (DAFs) minimized the influence of segregating slightly deleterious mutations and improved the estimation of the number of adaptive sites. Using DAF >= 60% we showed that the proportion of adaptive substitutions is 0.2% in the complete gene set. However, the percentage rose to 40% when we focused on genes that are specifically accelerated in the human branch with respect to the chimpanzee branch, or on genes that show signatures of adaptive selection at the codon level by the maximum likelihood based branch-site test. In general, neural genes are enriched in positive selection signatures. Genes with multiple lines of evidence of positive selection include taxilin beta, which is involved in motor nerve regeneration and syntabulin, and is required for the formation of new presynaptic boutons. CONCLUSIONS: We combined several methods to detect adaptive evolution in human coding sequences at a genome-wide level. The use of variation data, in addition to sequence divergence information, uncovered previously undetected positive selection signatures in neural genes. |
2013 |
Bornberg-Bauer, Erich, Albà, M Mar Dynamics and adaptive benefits of modular protein evolution (Article) Current Opinion in Structural Biology, 2013, ISSN: 0959440X. (Abstract | Links | BibTeX | Tags: ) @article{Bornberg-Bauer2013, title = {Dynamics and adaptive benefits of modular protein evolution}, author = {Bornberg-Bauer, Erich and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23562500}, issn = {0959440X}, year = {2013}, date = {2013-01-01}, journal = {Current Opinion in Structural Biology}, abstract = {During protein evolution, novel domain arrangements are continuously formed. Rearrangements are important for the creation of molecular biodiversity and for functional molecular changes which underlie developmental shifts in the bauplan of organisms. Here we review the mechanisms by which new arrangements arise and the potential benefits of rearrangements. We concentrate on how new domains emerge and why they rapidly spread across genomes, gaining higher copy numbers than older, more established domains. This spread is most likely a consequence of their high adaptive potential but is unlikely to make up on its own for the drastic loss of domains, which is observed across different taxa. We show that a significant portion of the recently emerged domains, especially those in multidomain families, are highly disordered and speculate about the significance of these findings for the evolvability of novel genetic material.}, keywords = {} } During protein evolution, novel domain arrangements are continuously formed. Rearrangements are important for the creation of molecular biodiversity and for functional molecular changes which underlie developmental shifts in the bauplan of organisms. Here we review the mechanisms by which new arrangements arise and the potential benefits of rearrangements. We concentrate on how new domains emerge and why they rapidly spread across genomes, gaining higher copy numbers than older, more established domains. This spread is most likely a consequence of their high adaptive potential but is unlikely to make up on its own for the drastic loss of domains, which is observed across different taxa. We show that a significant portion of the recently emerged domains, especially those in multidomain families, are highly disordered and speculate about the significance of these findings for the evolvability of novel genetic material. |
Villanueva-Cañas, José Luis, Laurie, Steve, Albà, M Mar Improving genome-wide scans of positive selection by using protein isoforms of similar length. (Article) Genome biology and evolution, 5 (2), pp. 457–67, 2013, ISSN: 1759-6653. (Abstract | Links | BibTeX | Tags: ) @article{Villanueva-Canas2013, title = {Improving genome-wide scans of positive selection by using protein isoforms of similar length.}, author = {Villanueva-Cañas, José Luis and Laurie, Steve and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23377868}, issn = {1759-6653}, year = {2013}, date = {2013-01-01}, journal = {Genome biology and evolution}, volume = {5}, number = {2}, pages = {457--67}, abstract = {Large-scale evolutionary studies often require the automated construction of alignments of a large number of homologous gene families. The majority of eukaryotic genes can produce different transcripts due to alternative splicing or transcription initiation, and many such transcripts encode different protein isoforms. As analyses tend to be gene centered, one single-protein isoform per gene is selected for the alignment, with the de facto approach being to use the longest protein isoform per gene (Longest), presumably to avoid including partial sequences and to maximize sequence information. Here, we show that this approach is problematic because it increases the number of indels in the alignments due to the inclusion of nonhomologous regions, such as those derived from species-specific exons, increasing the number of misaligned positions. With the aim of ameliorating this problem, we have developed a novel heuristic, Protein ALignment Optimizer (PALO), which, for each gene family, selects the combination of protein isoforms that are most similar in length. We examine several evolutionary parameters inferred from alignments in which the only difference is the method used to select the protein isoform combination: Longest, PALO, the combination that results in the highest sequence conservation, and a randomly selected combination. We observe that Longest tends to overestimate both nonsynonymous and synonymous substitution rates when compared with PALO, which is most likely due to an excess of misaligned positions. The estimation of the fraction of genes that have experienced positive selection by maximum likelihood is very sensitive to the method of isoform selection employed, both when alignments are constructed with MAFFT and with Prank(+F). Longest performs better than a random combination but still estimates up to 3 times more positively selected genes than the combination showing the highest conservation, indicating the presence of many false positives. We show that PALO can eliminate the majority of such false positives and thus that it is a more appropriate approach for large-scale analyses than Longest. A web server has been set up to facilitate the use of PALO given a user-defined set of gene families; it is available at http://evolutionarygenomics.imim.es/palo.}, keywords = {} } Large-scale evolutionary studies often require the automated construction of alignments of a large number of homologous gene families. The majority of eukaryotic genes can produce different transcripts due to alternative splicing or transcription initiation, and many such transcripts encode different protein isoforms. As analyses tend to be gene centered, one single-protein isoform per gene is selected for the alignment, with the de facto approach being to use the longest protein isoform per gene (Longest), presumably to avoid including partial sequences and to maximize sequence information. Here, we show that this approach is problematic because it increases the number of indels in the alignments due to the inclusion of nonhomologous regions, such as those derived from species-specific exons, increasing the number of misaligned positions. With the aim of ameliorating this problem, we have developed a novel heuristic, Protein ALignment Optimizer (PALO), which, for each gene family, selects the combination of protein isoforms that are most similar in length. We examine several evolutionary parameters inferred from alignments in which the only difference is the method used to select the protein isoform combination: Longest, PALO, the combination that results in the highest sequence conservation, and a randomly selected combination. We observe that Longest tends to overestimate both nonsynonymous and synonymous substitution rates when compared with PALO, which is most likely due to an excess of misaligned positions. The estimation of the fraction of genes that have experienced positive selection by maximum likelihood is very sensitive to the method of isoform selection employed, both when alignments are constructed with MAFFT and with Prank(+F). Longest performs better than a random combination but still estimates up to 3 times more positively selected genes than the combination showing the highest conservation, indicating the presence of many false positives. We show that PALO can eliminate the majority of such false positives and thus that it is a more appropriate approach for large-scale analyses than Longest. A web server has been set up to facilitate the use of PALO given a user-defined set of gene families; it is available at http://evolutionarygenomics.imim.es/palo. |
Mulero, María Carmen, Ferres-Marco, Dolors, Islam, Abul, Margalef, Pol, Pecoraro, Matteo, Toll, Agustí, Drechsel, Nils, Charneco, Cristina, Davis, Shelly, Bellora, Nicolás, Gallardo, Fernando, López-Arribillaga, Erika, Asensio-Juan, Elena, Rodilla, Verónica, González, Jessica, Iglesias, Mar, Shih, Vincent, Albà, M Mar, Di Croce, Luciano, Hoffmann, Alexander, Miyamoto, Shigeki, Villà-Freixa, Jordi, López-Bigas, Nuria, Keyes, Wil Cancer cell, 24 (2), pp. 151–66, 2013, ISSN: 1878-3686. (Abstract | Links | BibTeX | Tags: ) @article{Mulero2013, title = {Chromatin-Bound I$kappa$B$alpha$ Regulates a Subset of Polycomb Target Genes in Differentiation and Cancer.}, author = {Mulero, María Carmen and Ferres-Marco, Dolors and Islam, Abul and Margalef, Pol and Pecoraro, Matteo and Toll, Agustí and Drechsel, Nils and Charneco, Cristina and Davis, Shelly and Bellora, Nicolás and Gallardo, Fernando and López-Arribillaga, Erika and Asensio-Juan, Elena and Rodilla, Verónica and González, Jessica and Iglesias, Mar and Shih, Vincent and Albà, M Mar and Di Croce, Luciano and Hoffmann, Alexander and Miyamoto, Shigeki and Villà-Freixa, Jordi and López-Bigas, Nuria and Keyes, Wil}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23850221}, issn = {1878-3686}, year = {2013}, date = {2013-01-01}, journal = {Cancer cell}, volume = {24}, number = {2}, pages = {151--66}, abstract = {I$kappa$B proteins are the primary inhibitors of NF-$kappa$B. Here, we demonstrate that sumoylated and phosphorylated I$kappa$B$alpha$ accumulates in the nucleus of keratinocytes and interacts with histones H2A and H4 at the regulatory region of HOX and IRX genes. Chromatin-bound I$kappa$B$alpha$ modulates Polycomb recruitment and imparts their competence to be activated by TNF$alpha$. Mutations in the Drosophila I$kappa$B$alpha$ gene cactus enhance the homeotic phenotype of Polycomb mutants, which is not counteracted by mutations in dorsal/NF-$kappa$B. Oncogenic transformation of keratinocytes results in cytoplasmic I$kappa$B$alpha$ translocation associated with a massive activation of Hox. Accumulation of cytoplasmic I$kappa$B$alpha$ was found in squamous cell carcinoma (SCC) associated with IKK activation and HOX upregulation.}, keywords = {} } I$kappa$B proteins are the primary inhibitors of NF-$kappa$B. Here, we demonstrate that sumoylated and phosphorylated I$kappa$B$alpha$ accumulates in the nucleus of keratinocytes and interacts with histones H2A and H4 at the regulatory region of HOX and IRX genes. Chromatin-bound I$kappa$B$alpha$ modulates Polycomb recruitment and imparts their competence to be activated by TNF$alpha$. Mutations in the Drosophila I$kappa$B$alpha$ gene cactus enhance the homeotic phenotype of Polycomb mutants, which is not counteracted by mutations in dorsal/NF-$kappa$B. Oncogenic transformation of keratinocytes results in cytoplasmic I$kappa$B$alpha$ translocation associated with a massive activation of Hox. Accumulation of cytoplasmic I$kappa$B$alpha$ was found in squamous cell carcinoma (SCC) associated with IKK activation and HOX upregulation. |
Pegueroles, Cinta, Laurie, Steve, Albà, M Mar Accelerated evolution after gene duplication: a time-dependent process affecting just one copy. (Article) Molecular biology and evolution, 2013, ISSN: 1537-1719. (Abstract | Links | BibTeX | Tags: ) @article{Pegueroles2013, title = {Accelerated evolution after gene duplication: a time-dependent process affecting just one copy.}, author = {Pegueroles, Cinta and Laurie, Steve and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23625888}, issn = {1537-1719}, year = {2013}, date = {2013-01-01}, journal = {Molecular biology and evolution}, abstract = {Gene duplication is widely regarded as a major mechanism modelling genome evolution and function. However, the mechanisms that drive the evolution of the two, initially redundant, gene copies are still ill-defined. Many gene duplicates experience evolutionary rate acceleration, but the relative contribution of positive selection and random drift to the retention and subsequent evolution of gene duplicates, and for how long the molecular clock may be distorted by these processes, remains unclear. Focusing on rodent genes which duplicated before and after the mouse and rat split, we find significantly increased sequence divergence after duplication in only one of the copies, which in nearly all cases corresponds to the novel daughter copy, independent of the mechanism of duplication. We observe that the evolutionary rate of the accelerated copy, measured as the ratio of non-synonymous to synonymous substitutions, is on average 5 fold higher in the period spanning 4 to 12 My after the duplication than it was before the duplication. This increase can be explained, at least in part, by the action of positive selection according to the results of the maximum likelihood based branch-site test. Subsequently, the rate decelerates until purifying selection completely returns to preduplication levels. Reversion to the original rates has already been accomplished 40.5 My after the duplication event, corresponding to a genetic distance of about 0.28 synonymous substitutions per site. Differences in tissue gene expression patterns parallel those of substitution rates, reinforcing the role of neofunctionalization in explaining the evolution of young gene duplicates.}, keywords = {} } Gene duplication is widely regarded as a major mechanism modelling genome evolution and function. However, the mechanisms that drive the evolution of the two, initially redundant, gene copies are still ill-defined. Many gene duplicates experience evolutionary rate acceleration, but the relative contribution of positive selection and random drift to the retention and subsequent evolution of gene duplicates, and for how long the molecular clock may be distorted by these processes, remains unclear. Focusing on rodent genes which duplicated before and after the mouse and rat split, we find significantly increased sequence divergence after duplication in only one of the copies, which in nearly all cases corresponds to the novel daughter copy, independent of the mechanism of duplication. We observe that the evolutionary rate of the accelerated copy, measured as the ratio of non-synonymous to synonymous substitutions, is on average 5 fold higher in the period spanning 4 to 12 My after the duplication than it was before the duplication. This increase can be explained, at least in part, by the action of positive selection according to the results of the maximum likelihood based branch-site test. Subsequently, the rate decelerates until purifying selection completely returns to preduplication levels. Reversion to the original rates has already been accomplished 40.5 My after the duplication event, corresponding to a genetic distance of about 0.28 synonymous substitutions per site. Differences in tissue gene expression patterns parallel those of substitution rates, reinforcing the role of neofunctionalization in explaining the evolution of young gene duplicates. |
Toll-Riera, Macarena, Albà, M Mar Emergence of novel domains in proteins. (Article) BMC evolutionary biology, 13 (1), pp. 47, 2013, ISSN: 1471-2148. (Abstract | Links | BibTeX | Tags: ) @article{Toll-Riera2013, title = {Emergence of novel domains in proteins.}, author = {Toll-Riera, Macarena and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23425224}, issn = {1471-2148}, year = {2013}, date = {2013-01-01}, journal = {BMC evolutionary biology}, volume = {13}, number = {1}, pages = {47}, abstract = {ABSTRACT: BACKGROUND: Proteins are composed of a combination of discrete, well-defined, sequence domains, associated with specific functions that have arisen at different times during evolutionary history. The emergence of novel domains is related to protein functional diversification and adaptation. But currently little is known about how novel domains arise and how they subsequently evolve. RESULTS: To gain insights into the impact of recently emerged domains in protein evolution we have identified all human young protein domains that have emerged in approximately the past 550 million years. We have classified them into vertebrate-specific and mammalian-specific groups, and compared them to older domains. We have found 426 different annotated young domains, totalling 995 domain occurrences, which represent about 12.3% of all human domains. We have observed that 61.3% of them arose in newly formed genes, while the remaining 38.7% are found combined with older domains, and have very likely emerged in the context of a previously existing protein. Young domains are preferentially located at the N-terminus of the protein, indicating that, at least in vertebrates, novel functional sequences often emerge there. Furthermore, young domains show significantly higher non-synonymous to synonymous substitution rates than older domains using human and mouse orthologous sequence comparisons. This is also true when we compare young and old domains located in the same protein, suggesting that recently arisen domains tend to evolve in a less constrained manner than older domains. CONCLUSIONS: We conclude that proteins tend to gain domains over time, becoming progressively longer. We show that many proteins are made of domains of different age, and that the fastest evolving parts correspond to the domains that have been acquired more recently.}, keywords = {} } ABSTRACT: BACKGROUND: Proteins are composed of a combination of discrete, well-defined, sequence domains, associated with specific functions that have arisen at different times during evolutionary history. The emergence of novel domains is related to protein functional diversification and adaptation. But currently little is known about how novel domains arise and how they subsequently evolve. RESULTS: To gain insights into the impact of recently emerged domains in protein evolution we have identified all human young protein domains that have emerged in approximately the past 550 million years. We have classified them into vertebrate-specific and mammalian-specific groups, and compared them to older domains. We have found 426 different annotated young domains, totalling 995 domain occurrences, which represent about 12.3% of all human domains. We have observed that 61.3% of them arose in newly formed genes, while the remaining 38.7% are found combined with older domains, and have very likely emerged in the context of a previously existing protein. Young domains are preferentially located at the N-terminus of the protein, indicating that, at least in vertebrates, novel functional sequences often emerge there. Furthermore, young domains show significantly higher non-synonymous to synonymous substitution rates than older domains using human and mouse orthologous sequence comparisons. This is also true when we compare young and old domains located in the same protein, suggesting that recently arisen domains tend to evolve in a less constrained manner than older domains. CONCLUSIONS: We conclude that proteins tend to gain domains over time, becoming progressively longer. We show that many proteins are made of domains of different age, and that the fastest evolving parts correspond to the domains that have been acquired more recently. |
Trilling, Mirko, Bellora, Nicolás, Rutkowski, Andrzej J, de Graaf, Miranda, Dickinson, Paul, Robertson, Kevin, Prazeres da Costa, Olivia, Ghazal, Peter, Friedel, Caroline C, Albà, M Mar, Dölken, Lars Nucleic acids research, 2013, ISSN: 1362-4962. (Abstract | Links | BibTeX | Tags: ) @article{Trilling2013, title = {Deciphering the modulation of gene expression by type I and II interferons combining 4sU-tagging, translational arrest and in silico promoter analysis.}, author = {Trilling, Mirko and Bellora, Nicolás and Rutkowski, Andrzej J and de Graaf, Miranda and Dickinson, Paul and Robertson, Kevin and Prazeres da Costa, Olivia and Ghazal, Peter and Friedel, Caroline C and Albà, M Mar and Dölken, Lars}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23832230}, issn = {1362-4962}, year = {2013}, date = {2013-01-01}, journal = {Nucleic acids research}, abstract = {Interferons (IFN) play a pivotal role in innate immunity, orchestrating a cell-intrinsic anti-pathogenic state and stimulating adaptive immune responses. The complex interplay between the primary response to IFNs and its modulation by positive and negative feedback loops is incompletely understood. Here, we implement the combination of high-resolution gene-expression profiling of nascent RNA with translational inhibition of secondary feedback by cycloheximide. Unexpectedly, this approach revealed a prominent role of negative feedback mechanisms during the immediate (≤60 min) IFN$alpha$ response. In contrast, a more complex picture involving both negative and positive feedback loops was observed on IFN$gamma$ treatment. IFN$gamma$-induced repression of genes associated with regulation of gene expression, cellular development, apoptosis and cell growth resulted from cycloheximide-resistant primary IFN$gamma$ signalling. In silico promoter analysis revealed significant overrepresentation of SP1/SP3-binding sites and/or GC-rich stretches. Although signal transducer and activator of transcription 1 (STAT1)-binding sites were not overrepresented, repression was lost in absence of STAT1. Interestingly, basal expression of the majority of these IFN$gamma$-repressed genes was dependent on STAT1 in IFN-naïve fibroblasts. Finally, IFN$gamma$-mediated repression was also found to be evident in primary murine macrophages. IFN-repressed genes include negative regulators of innate and stress response, and their decrease may thus aid the establishment of a signalling perceptive milieu.}, keywords = {} } Interferons (IFN) play a pivotal role in innate immunity, orchestrating a cell-intrinsic anti-pathogenic state and stimulating adaptive immune responses. The complex interplay between the primary response to IFNs and its modulation by positive and negative feedback loops is incompletely understood. Here, we implement the combination of high-resolution gene-expression profiling of nascent RNA with translational inhibition of secondary feedback by cycloheximide. Unexpectedly, this approach revealed a prominent role of negative feedback mechanisms during the immediate (≤60 min) IFN$alpha$ response. In contrast, a more complex picture involving both negative and positive feedback loops was observed on IFN$gamma$ treatment. IFN$gamma$-induced repression of genes associated with regulation of gene expression, cellular development, apoptosis and cell growth resulted from cycloheximide-resistant primary IFN$gamma$ signalling. In silico promoter analysis revealed significant overrepresentation of SP1/SP3-binding sites and/or GC-rich stretches. Although signal transducer and activator of transcription 1 (STAT1)-binding sites were not overrepresented, repression was lost in absence of STAT1. Interestingly, basal expression of the majority of these IFN$gamma$-repressed genes was dependent on STAT1 in IFN-naïve fibroblasts. Finally, IFN$gamma$-mediated repression was also found to be evident in primary murine macrophages. IFN-repressed genes include negative regulators of innate and stress response, and their decrease may thus aid the establishment of a signalling perceptive milieu. |
2012 |
Laurie, Steve, Toll-Riera, Macarena, Radó-Trilla, Núria, Albà, M Mar Sequence shortening in the rodent ancestor. (Article) Genome research, 22 (3), pp. 478–85, 2012, ISSN: 1549-5469. (Abstract | Links | BibTeX | Tags: ) @article{Laurie2012, title = {Sequence shortening in the rodent ancestor.}, author = {Laurie, Steve and Toll-Riera, Macarena and Radó-Trilla, Núria and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22128134}, issn = {1549-5469}, year = {2012}, date = {2012-01-01}, journal = {Genome research}, volume = {22}, number = {3}, pages = {478--85}, abstract = {Insertions and deletions (indels), together with nucleotide substitutions, are major drivers of sequence evolution. An excess of deletions over insertions in genomic sequences-the so-called deletional bias-has been reported in a wide range of species, including mammals. However, this bias has not been found in the coding sequences of some mammalian species, such as human and mouse. To determine the strength of the deletional bias in mammals, and the influence of mutation and selection, we have quantified indels in both neutrally evolving noncoding sequences and protein-coding sequences, in six mammalian branches: human, macaque, ancestral primate, mouse, rat, and ancestral rodent. The results obtained with an improved algorithm for the placement of insertions in multiple alignments, Prank(+F), indicate that contrary to previous results, the only mammalian branch with a strong deletional bias is the rodent ancestral branch. We estimate that such a bias has resulted in an ∼2.5% sequence loss of mammalian syntenic region in the ancestor of the mouse and rat. Further, a comparison of coding and noncoding sequences shows that negative selection is acting more strongly against mutations generating amino acid insertions than against mutations resulting in amino acid deletions. The strength of selection against indels is found to be higher in the rodent branches than in the primate branches, consistent with the larger effective population sizes of the rodents.}, keywords = {} } Insertions and deletions (indels), together with nucleotide substitutions, are major drivers of sequence evolution. An excess of deletions over insertions in genomic sequences-the so-called deletional bias-has been reported in a wide range of species, including mammals. However, this bias has not been found in the coding sequences of some mammalian species, such as human and mouse. To determine the strength of the deletional bias in mammals, and the influence of mutation and selection, we have quantified indels in both neutrally evolving noncoding sequences and protein-coding sequences, in six mammalian branches: human, macaque, ancestral primate, mouse, rat, and ancestral rodent. The results obtained with an improved algorithm for the placement of insertions in multiple alignments, Prank(+F), indicate that contrary to previous results, the only mammalian branch with a strong deletional bias is the rodent ancestral branch. We estimate that such a bias has resulted in an ∼2.5% sequence loss of mammalian syntenic region in the ancestor of the mouse and rat. Further, a comparison of coding and noncoding sequences shows that negative selection is acting more strongly against mutations generating amino acid insertions than against mutations resulting in amino acid deletions. The strength of selection against indels is found to be higher in the rodent branches than in the primate branches, consistent with the larger effective population sizes of the rodents. |
Radó-Trilla, Núria, Albà, M Mar Dissecting the role of low-complexity regions in the evolution of vertebrate proteins. (Article) BMC evolutionary biology, 12 (1), pp. 155, 2012, ISSN: 1471-2148. (Abstract | Links | BibTeX | Tags: ) @article{Rado-Trilla2012, title = {Dissecting the role of low-complexity regions in the evolution of vertebrate proteins.}, author = {Radó-Trilla, Núria and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22920595}, issn = {1471-2148}, year = {2012}, date = {2012-01-01}, journal = {BMC evolutionary biology}, volume = {12}, number = {1}, pages = {155}, abstract = {ABSTRACT: BACKGROUND: Low-complexity regions (LCRs) in proteins are tracts that are highly enriched in one or a few amino acids. Given their high abundance, and their capacity to expand in relatively short periods of time through replication slippage, they can greatly contribute to increase protein sequence space and generate novel protein functions. However, little is known about the global impact of LCRs on protein evolution. RESULTS: We have traced back the evolutionary history of 2,802 LCRs from a large set of homologous protein families from H.sapiens, M.musculus, G.gallus, D.rerio and C.intestinalis. Transcriptional factors and other regulatory functions are overrepresented in proteins containing LCRs. We have found that the gain of novel LCRs is frequently associated with repeat expansion whereas the loss of LCRs is more often due to accumulation of amino acid substitutions as opposed to deletions. This dichotomy results in net protein sequence gain over time. We have detected a significant increase in the rate of accumulation of novel LCRs in the ancestral Amniota and mammalian branches, and a reduction in the chicken branch. Alanine and/or glycine-rich LCRs are overrepresented in recently emerged LCR sets from all branches, suggesting that their expansion is better tolerated than for other LCR types. LCRs enriched in positively charged amino acids show the contrary pattern, indicating an important effect of purifying selection in their maintenance. CONCLUSION: We have performed the first large-scale study on the evolutionary dynamics of LCRs in protein families. The study has shown that the composition of an LCR is an important determinant of its evolutionary pattern.}, keywords = {} } ABSTRACT: BACKGROUND: Low-complexity regions (LCRs) in proteins are tracts that are highly enriched in one or a few amino acids. Given their high abundance, and their capacity to expand in relatively short periods of time through replication slippage, they can greatly contribute to increase protein sequence space and generate novel protein functions. However, little is known about the global impact of LCRs on protein evolution. RESULTS: We have traced back the evolutionary history of 2,802 LCRs from a large set of homologous protein families from H.sapiens, M.musculus, G.gallus, D.rerio and C.intestinalis. Transcriptional factors and other regulatory functions are overrepresented in proteins containing LCRs. We have found that the gain of novel LCRs is frequently associated with repeat expansion whereas the loss of LCRs is more often due to accumulation of amino acid substitutions as opposed to deletions. This dichotomy results in net protein sequence gain over time. We have detected a significant increase in the rate of accumulation of novel LCRs in the ancestral Amniota and mammalian branches, and a reduction in the chicken branch. Alanine and/or glycine-rich LCRs are overrepresented in recently emerged LCR sets from all branches, suggesting that their expansion is better tolerated than for other LCR types. LCRs enriched in positively charged amino acids show the contrary pattern, indicating an important effect of purifying selection in their maintenance. CONCLUSION: We have performed the first large-scale study on the evolutionary dynamics of LCRs in protein families. The study has shown that the composition of an LCR is an important determinant of its evolutionary pattern. |
Shikhagaie, Medya, Mercé-Maldonado, Eva, Isern, Elena, Muntasell, Aura, Albà, M Mar, López-Botet, Miguel, Hengel, Hartmut, Angulo, Ana The HCMV-specific UL1 gene encodes a late phase glycoprotein incorporated in the virion envelope. (Article) Journal of virology, 2012, ISSN: 1098-5514. (Abstract | Links | BibTeX | Tags: ) @article{Shikhagaie2012, title = {The HCMV-specific UL1 gene encodes a late phase glycoprotein incorporated in the virion envelope.}, author = {Shikhagaie, Medya and Mercé-Maldonado, Eva and Isern, Elena and Muntasell, Aura and Albà, M Mar and López-Botet, Miguel and Hengel, Hartmut and Angulo, Ana}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22345456}, issn = {1098-5514}, year = {2012}, date = {2012-01-01}, journal = {Journal of virology}, abstract = {We have investigated the previously uncharacterized human cytomegalovirus (HCMV) UL1 open reading frame (ORF), a member of the rapidly evolving HCMV RL11 family. UL1 is HCMV specific; absence of UL1 in chimpanzee CMV (CCMV) and sequence analysis studies suggests that UL1 may have originated by duplication of an ancestor gene from the RL11-TRL-cluster (TRL11, TRL12 and TRL13). Sequence similarity searches against human immunoglobulin (Ig) containing proteins revealed that HCMV pUL1 shows significant similarity to the cellular carcinoembrionic antigen related (CEA) protein family N-terminal Ig domain, which is responsible for CEA ligand recognition. Northern blot analysis revealed that UL1 is transcribed during the late phase of the viral replication cycle, both in fibroblast-adapted and endotheliotropic strains of HCMV. We characterized the protein encoded by an HA-tagged UL1 in the AD169-derived HB5 background. UL1 is expressed as a 224 amino acid type I transmembrane glycoprotein which becomes detectable 48 h post infection. In infected human fibroblasts, pUL1 co-localized at the cytoplasmic site of virion assembly and secondary envelopment together with TGN-46, a marker for the trans-Golgi network, and viral structural proteins including envelope glycoprotein B and tegument phosphoprotein pp28. Furthermore, analysis of highly purified AD169 UL1-HA epitope tagged virions revealed that pUL1 is a novel constituent of the HCMV envelope. Importantly, deletion of UL1 in HCMV TB40/E resulted in a reduced growth in a cell type-specific manner, suggesting that pUL1 may be implicated in regulating HCMV cell tropism.}, keywords = {} } We have investigated the previously uncharacterized human cytomegalovirus (HCMV) UL1 open reading frame (ORF), a member of the rapidly evolving HCMV RL11 family. UL1 is HCMV specific; absence of UL1 in chimpanzee CMV (CCMV) and sequence analysis studies suggests that UL1 may have originated by duplication of an ancestor gene from the RL11-TRL-cluster (TRL11, TRL12 and TRL13). Sequence similarity searches against human immunoglobulin (Ig) containing proteins revealed that HCMV pUL1 shows significant similarity to the cellular carcinoembrionic antigen related (CEA) protein family N-terminal Ig domain, which is responsible for CEA ligand recognition. Northern blot analysis revealed that UL1 is transcribed during the late phase of the viral replication cycle, both in fibroblast-adapted and endotheliotropic strains of HCMV. We characterized the protein encoded by an HA-tagged UL1 in the AD169-derived HB5 background. UL1 is expressed as a 224 amino acid type I transmembrane glycoprotein which becomes detectable 48 h post infection. In infected human fibroblasts, pUL1 co-localized at the cytoplasmic site of virion assembly and secondary envelopment together with TGN-46, a marker for the trans-Golgi network, and viral structural proteins including envelope glycoprotein B and tegument phosphoprotein pp28. Furthermore, analysis of highly purified AD169 UL1-HA epitope tagged virions revealed that pUL1 is a novel constituent of the HCMV envelope. Importantly, deletion of UL1 in HCMV TB40/E resulted in a reduced growth in a cell type-specific manner, suggesting that pUL1 may be implicated in regulating HCMV cell tropism. |
Toll-Riera, Macarena, Bostick, David, Albà, M Mar, Plotkin, Joshua B Structure and age jointly influence rates of protein evolution. (Article) PLoS computational biology, 8 (5), pp. e1002542, 2012, ISSN: 1553-7358. (Abstract | Links | BibTeX | Tags: Animals, Binding Sites, Computational Biology, Eukaryota, Evolution, Humans, Mice, Molecular, Protein Conformation, Protein Stability, Proteins, Proteins: chemistry, Proteins: genetics, Proteins: metabolism, Solvents) @article{Toll-Riera2012a, title = {Structure and age jointly influence rates of protein evolution.}, author = {Toll-Riera, Macarena and Bostick, David and Albà, M Mar and Plotkin, Joshua B}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3364943&tool=pmcentrez&rendertype=abstract}, issn = {1553-7358}, year = {2012}, date = {2012-01-01}, journal = {PLoS computational biology}, volume = {8}, number = {5}, pages = {e1002542}, abstract = {What factors determine a protein's rate of evolution are actively debated. Especially unclear is the relative role of intrinsic factors of present-day proteins versus historical factors such as protein age. Here we study the interplay of structural properties and evolutionary age, as determinants of protein evolutionary rate. We use a large set of one-to-one orthologs between human and mouse proteins, with mapped PDB structures. We report that previously observed structural correlations also hold within each age group - including relationships between solvent accessibility, designabililty, and evolutionary rates. However, age also plays a crucial role: age modulates the relationship between solvent accessibility and rate. Additionally, younger proteins, despite being less designable, tend to evolve faster than older proteins. We show that previously reported relationships between age and rate cannot be explained by structural biases among age groups. Finally, we introduce a knowledge-based potential function to study the stability of proteins through large-scale computation. We find that older proteins are more stable for their native structure, and more robust to mutations, than younger ones. Our results underscore that several determinants, both intrinsic and historical, can interact to determine rates of protein evolution.}, keywords = {Animals, Binding Sites, Computational Biology, Eukaryota, Evolution, Humans, Mice, Molecular, Protein Conformation, Protein Stability, Proteins, Proteins: chemistry, Proteins: genetics, Proteins: metabolism, Solvents} } What factors determine a protein's rate of evolution are actively debated. Especially unclear is the relative role of intrinsic factors of present-day proteins versus historical factors such as protein age. Here we study the interplay of structural properties and evolutionary age, as determinants of protein evolutionary rate. We use a large set of one-to-one orthologs between human and mouse proteins, with mapped PDB structures. We report that previously observed structural correlations also hold within each age group - including relationships between solvent accessibility, designabililty, and evolutionary rates. However, age also plays a crucial role: age modulates the relationship between solvent accessibility and rate. Additionally, younger proteins, despite being less designable, tend to evolve faster than older proteins. We show that previously reported relationships between age and rate cannot be explained by structural biases among age groups. Finally, we introduce a knowledge-based potential function to study the stability of proteins through large-scale computation. We find that older proteins are more stable for their native structure, and more robust to mutations, than younger ones. Our results underscore that several determinants, both intrinsic and historical, can interact to determine rates of protein evolution. |
Toll-Riera, Macarena, Radó-Trilla, Núria, Martys, Florian, Albà, M Mar Role of Low-Complexity Sequences in the Formation of Novel Protein Coding Sequences. (Article) Molecular biology and evolution, 29 (3), pp. 883–6, 2012, ISSN: 1537-1719. (Abstract | Links | BibTeX | Tags: ) @article{Toll-Riera2012, title = {Role of Low-Complexity Sequences in the Formation of Novel Protein Coding Sequences.}, author = {Toll-Riera, Macarena and Radó-Trilla, Núria and Martys, Florian and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/22045997}, issn = {1537-1719}, year = {2012}, date = {2012-01-01}, journal = {Molecular biology and evolution}, volume = {29}, number = {3}, pages = {883--6}, abstract = {Low-complexity sequences are extremely abundant in eukaryotic proteins for reasons that remain unclear. One hypothesis is that they contribute to the formation of novel coding sequences, facilitating the generation of novel protein functions. Here, we test this hypothesis by examining the content of low-complexity sequences in proteins of different age. We show that recently emerged proteins contain more low-complexity sequences than older proteins and that these sequences often form functional domains. These data are consistent with the idea that low-complexity sequences may play a key role in the emergence of novel genes.}, keywords = {} } Low-complexity sequences are extremely abundant in eukaryotic proteins for reasons that remain unclear. One hypothesis is that they contribute to the formation of novel coding sequences, facilitating the generation of novel protein functions. Here, we test this hypothesis by examining the content of low-complexity sequences in proteins of different age. We show that recently emerged proteins contain more low-complexity sequences than older proteins and that these sequences often form functional domains. These data are consistent with the idea that low-complexity sequences may play a key role in the emergence of novel genes. |
2011 |
Toll-Riera, M, Laurie, S, Radó-Trilla, N, Albà, MM Partial gene duplication and the formation of novel genes (Incollection) Friedberg, Felix (Ed.): Gene Duplication, Intech, Rijeka, 2011. @incollection{Toll-Riera2011b, title = {Partial gene duplication and the formation of novel genes}, author = {Toll-Riera, M and Laurie, S and Radó-Trilla, N and Albà, MM}, editor = {Felix Friedberg}, url = {http://www.intechopen.com/articles/show/title/partial-gene-duplication-and-the-formation-of-novel-genes}, year = {2011}, date = {2011-01-01}, booktitle = {Gene Duplication}, publisher = {Intech}, address = {Rijeka}, keywords = {} } |
Toll-Riera, Macarena, Laurie, Steve, Albà, M Mar Lineage-specific variation in intensity of natural selection in mammals. (Article) Molecular biology and evolution, 28 (1), pp. 383–98, 2011, ISSN: 1537-1719. (Abstract | Links | BibTeX | Tags: Amino Acid Sequence, Amino Acid Substitution, Animals, Evolution, F-Box Proteins, F-Box Proteins: genetics, G-Protein-Coupled, G-Protein-Coupled: genetics, Genetic, Genetic Variation, Humans, Mammals, Mammals: genetics, Molecular, Molecular Sequence Data, N-Methyl-D-Aspartate, N-Methyl-D-Aspartate: genetics, Odorant, Odorant: genetics, Receptors, Selection, Sequence Alignment) @article{Toll-Riera2011a, title = {Lineage-specific variation in intensity of natural selection in mammals.}, author = {Toll-Riera, Macarena and Laurie, Steve and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/20688808}, issn = {1537-1719}, year = {2011}, date = {2011-01-01}, journal = {Molecular biology and evolution}, volume = {28}, number = {1}, pages = {383--98}, abstract = {The molecular clock hypothesis states that protein-coding genes evolve at an approximately constant rate. However, this is only expected to be true as long as the function and the tertiary structure of the molecule remain unaltered. An important implication of this statement is that significant deviations in the rate of evolution of a gene with respect to the species clock are likely to reflect functional and/or structural alterations. Here, we present a method to identify such deviations and apply it to a data set of 2,929 high-quality coding sequence alignments corresponding to one-to-one orthologous genes from six mammalian species--human, macaque, mouse, rat, cow, and dog. Deviated branches are defined as those that present significant alterations in both the rate of nonsynonymous substitutions (dN) and the selective pressure (dN/dS). Strikingly, we find that as many as 24.5% of the genes show branch-specific deviations in dN and dN/dS, though this is a relatively well-conserved set of genes. Around half of these genes show branch-specific acceleration of evolutionary rates. Positive selection (PS) tests based on divergence data only identify 17.7% of the accelerated branches. Failure to identify PS in accelerated branches with an excess of radical amino acid replacements suggests that these tests are conservative. Interestingly, genes with accelerated branches are significantly enriched in neural proteins, indicating that this type of protein might play a more important role than previously thought in species diversification, although they are generally not detected by PS tests. We discuss in detail several examples of genes that show lineage-specific evolutionary rate acceleration and are involved in synaptic transmission, chemosensory perception, and ubiquitination.}, keywords = {Amino Acid Sequence, Amino Acid Substitution, Animals, Evolution, F-Box Proteins, F-Box Proteins: genetics, G-Protein-Coupled, G-Protein-Coupled: genetics, Genetic, Genetic Variation, Humans, Mammals, Mammals: genetics, Molecular, Molecular Sequence Data, N-Methyl-D-Aspartate, N-Methyl-D-Aspartate: genetics, Odorant, Odorant: genetics, Receptors, Selection, Sequence Alignment} } The molecular clock hypothesis states that protein-coding genes evolve at an approximately constant rate. However, this is only expected to be true as long as the function and the tertiary structure of the molecule remain unaltered. An important implication of this statement is that significant deviations in the rate of evolution of a gene with respect to the species clock are likely to reflect functional and/or structural alterations. Here, we present a method to identify such deviations and apply it to a data set of 2,929 high-quality coding sequence alignments corresponding to one-to-one orthologous genes from six mammalian species--human, macaque, mouse, rat, cow, and dog. Deviated branches are defined as those that present significant alterations in both the rate of nonsynonymous substitutions (dN) and the selective pressure (dN/dS). Strikingly, we find that as many as 24.5% of the genes show branch-specific deviations in dN and dN/dS, though this is a relatively well-conserved set of genes. Around half of these genes show branch-specific acceleration of evolutionary rates. Positive selection (PS) tests based on divergence data only identify 17.7% of the accelerated branches. Failure to identify PS in accelerated branches with an excess of radical amino acid replacements suggests that these tests are conservative. Interestingly, genes with accelerated branches are significantly enriched in neural proteins, indicating that this type of protein might play a more important role than previously thought in species diversification, although they are generally not detected by PS tests. We discuss in detail several examples of genes that show lineage-specific evolutionary rate acceleration and are involved in synaptic transmission, chemosensory perception, and ubiquitination. |
Engel, Pablo, Pérez-Carmona, Natàlia, Albà, M Mar, Robertson, Kevin, Ghazal, Peter, Angulo, Ana Immunology and cell biology, 89 (7), pp. 753–66, 2011, ISSN: 1440-1711. (Abstract | Links | BibTeX | Tags: ) @article{Engel2011, title = {Human cytomegalovirus UL7, a homologue of the SLAM-family receptor CD229, impairs cytokine production.}, author = {Engel, Pablo and Pérez-Carmona, Natàlia and Albà, M Mar and Robertson, Kevin and Ghazal, Peter and Angulo, Ana}, url = {http://www.ncbi.nlm.nih.gov/pubmed/21670740}, issn = {1440-1711}, year = {2011}, date = {2011-01-01}, journal = {Immunology and cell biology}, volume = {89}, number = {7}, pages = {753--66}, abstract = {Human cytomegalovirus (HCMV), the $beta$-herpesvirus prototype, has evolved a wide spectrum of mechanisms to counteract host immunity. Among them, HCMV uses cellular captured genes encoding molecules capable of interfering with the original host function or of fulfilling new immunomodulatory tasks. Here, we report on UL7, a novel HCMV heavily glycosylated transmembrane protein, containing an Ig-like domain that exhibits remarkable amino acid similarity to CD229, a cell-surface molecule of the signalling lymphocyte-activation molecule (SLAM) family involved in leukocyte activation. The UL7 Ig-like domain, which is well-preserved in all HCMV strains, structurally resembles the SLAM-family N-terminal Ig-variable domain responsible for the homophilic and heterophilic interactions that trigger signalling. UL7 is transcribed with early-late kinetics during the lytic infectious cycle. Using a mAb generated against the viral protein, we show that it is constitutively shed, through its mucine-like stalk, from the cell-surface. Production of soluble UL7 is enhanced by PMA and reduced by a broad-spectrum metalloproteinase inhibitor. Although UL7 does not hold the ability to interact with CD229 or other SLAM-family members, it shares with them the capacity to mediate adhesion to leukocytes, specifically to monocyte-derived DCs. Furthermore, we demonstrate that UL7 expression attenuates the production of proinflammatory cytokines TNF, IL-8 and IL-6 in DCs and myeloid cell lines. Thus, the ability of UL7 to interfere with cellular proinflammatory responses may contribute to viral persistence. These results enhance our understanding of those HCMV-encoded molecules involved in sustaining the balance between HCMV and the host immune system.Immunology and Cell Biology advance online publication, 14 June 2011; doi:10.1038/icb.2011.55.}, keywords = {} } Human cytomegalovirus (HCMV), the $beta$-herpesvirus prototype, has evolved a wide spectrum of mechanisms to counteract host immunity. Among them, HCMV uses cellular captured genes encoding molecules capable of interfering with the original host function or of fulfilling new immunomodulatory tasks. Here, we report on UL7, a novel HCMV heavily glycosylated transmembrane protein, containing an Ig-like domain that exhibits remarkable amino acid similarity to CD229, a cell-surface molecule of the signalling lymphocyte-activation molecule (SLAM) family involved in leukocyte activation. The UL7 Ig-like domain, which is well-preserved in all HCMV strains, structurally resembles the SLAM-family N-terminal Ig-variable domain responsible for the homophilic and heterophilic interactions that trigger signalling. UL7 is transcribed with early-late kinetics during the lytic infectious cycle. Using a mAb generated against the viral protein, we show that it is constitutively shed, through its mucine-like stalk, from the cell-surface. Production of soluble UL7 is enhanced by PMA and reduced by a broad-spectrum metalloproteinase inhibitor. Although UL7 does not hold the ability to interact with CD229 or other SLAM-family members, it shares with them the capacity to mediate adhesion to leukocytes, specifically to monocyte-derived DCs. Furthermore, we demonstrate that UL7 expression attenuates the production of proinflammatory cytokines TNF, IL-8 and IL-6 in DCs and myeloid cell lines. Thus, the ability of UL7 to interfere with cellular proinflammatory responses may contribute to viral persistence. These results enhance our understanding of those HCMV-encoded molecules involved in sustaining the balance between HCMV and the host immune system.Immunology and Cell Biology advance online publication, 14 June 2011; doi:10.1038/icb.2011.55. |
2010 |
Morcillo, C, Albà, MM, Navarro, A Genoma y enfermedades complejas (Incollection) Villoslada, Pablo (Ed.): Tratado de Esclerosis Múltiple, Marge Books, Barcelona, 2010. (BibTeX | Tags: ) @incollection{Morcillo2010, title = {Genoma y enfermedades complejas}, author = {Morcillo, C and Albà, MM and Navarro, A}, editor = {Pablo Villoslada}, year = {2010}, date = {2010-01-01}, booktitle = {Tratado de Esclerosis Múltiple}, publisher = {Marge Books}, address = {Barcelona}, keywords = {} } |
Mularoni, Loris, Ledda, Alice, Toll-Riera, Macarena, Albà, M Mar Natural selection drives the accumulation of amino acid tandem repeats in human proteins. (Article) Genome research, 20 (6), pp. 745–54, 2010, ISSN: 1549-5469. (Abstract | Links | BibTeX | Tags: Amino Acid, Amino Acid Sequence, Amino Acids, Amino Acids: chemistry, Amino Acids: genetics, Animals, Genetic, Humans, Molecular Sequence Data, Proteins, Proteins: chemistry, Proteins: genetics, Repetitive Sequences, Selection, Sequence Homology) @article{Mularoni2010, title = {Natural selection drives the accumulation of amino acid tandem repeats in human proteins.}, author = {Mularoni, Loris and Ledda, Alice and Toll-Riera, Macarena and Albà, M Mar}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2877571&tool=pmcentrez&rendertype=abstract}, issn = {1549-5469}, year = {2010}, date = {2010-01-01}, journal = {Genome research}, volume = {20}, number = {6}, pages = {745--54}, abstract = {Amino acid tandem repeats are found in a large number of eukaryotic proteins. They are often encoded by trinucleotide repeats and exhibit high intra- and interspecies size variability due to the high mutation rate associated with replication slippage. The extent to which natural selection is important in shaping amino acid repeat evolution is a matter of debate. On one hand, their high frequency may simply reflect their high probability of expansion by slippage, and they could essentially evolve in a neutral manner. On the other hand, there is experimental evidence that changes in repeat size can influence protein-protein interactions, transcriptional activity, or protein subcellular localization, indicating that repeats could be functionally relevant and thus shaped by selection. To gauge the relative contribution of neutral and selective forces in amino acid repeat evolution, we have performed a comparative analysis of amino acid repeat conservation in a large set of orthologous proteins from 12 vertebrate species. As a neutral model of repeat evolution we have used sequences with the same DNA triplet composition as the coding sequences--and thus expected to be subject to the same mutational forces--but located in syntenic noncoding genomic regions. The results strongly indicate that selection has played a more important role than previously suspected in amino acid tandem repeat evolution, by increasing the repeat retention rate and by modulating repeat size. The data obtained in this study have allowed us to identify a set of 92 repeats that are postulated to play important functional roles due to their strong selective signature, including five cases with direct experimental evidence.}, keywords = {Amino Acid, Amino Acid Sequence, Amino Acids, Amino Acids: chemistry, Amino Acids: genetics, Animals, Genetic, Humans, Molecular Sequence Data, Proteins, Proteins: chemistry, Proteins: genetics, Repetitive Sequences, Selection, Sequence Homology} } Amino acid tandem repeats are found in a large number of eukaryotic proteins. They are often encoded by trinucleotide repeats and exhibit high intra- and interspecies size variability due to the high mutation rate associated with replication slippage. The extent to which natural selection is important in shaping amino acid repeat evolution is a matter of debate. On one hand, their high frequency may simply reflect their high probability of expansion by slippage, and they could essentially evolve in a neutral manner. On the other hand, there is experimental evidence that changes in repeat size can influence protein-protein interactions, transcriptional activity, or protein subcellular localization, indicating that repeats could be functionally relevant and thus shaped by selection. To gauge the relative contribution of neutral and selective forces in amino acid repeat evolution, we have performed a comparative analysis of amino acid repeat conservation in a large set of orthologous proteins from 12 vertebrate species. As a neutral model of repeat evolution we have used sequences with the same DNA triplet composition as the coding sequences--and thus expected to be subject to the same mutational forces--but located in syntenic noncoding genomic regions. The results strongly indicate that selection has played a more important role than previously suspected in amino acid tandem repeat evolution, by increasing the repeat retention rate and by modulating repeat size. The data obtained in this study have allowed us to identify a set of 92 repeats that are postulated to play important functional roles due to their strong selective signature, including five cases with direct experimental evidence. |
Farré, Domènec, Albà, M Mar Heterogeneous patterns of gene-expression diversification in mammalian gene duplicates. (Article) Molecular biology and evolution, 27 (2), pp. 325–35, 2010, ISSN: 1537-1719. (Abstract | Links | BibTeX | Tags: Animals, Evolution, Gene Duplication, Genetic, Humans, Mammals, Mammals: genetics, Models, Molecular) @article{Farre2010, title = {Heterogeneous patterns of gene-expression diversification in mammalian gene duplicates.}, author = {Farré, Domènec and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19822635}, issn = {1537-1719}, year = {2010}, date = {2010-01-01}, journal = {Molecular biology and evolution}, volume = {27}, number = {2}, pages = {325--35}, abstract = {Gene duplication is a major mechanism for molecular evolutionary innovation. Young gene duplicates typically exhibit elevated rates of protein evolution and, according to a number of recent studies, increased expression divergence. However, the nature of these changes is still poorly understood. To gain novel insights into the functional consequences of gene duplication, we have undertaken an in-depth analysis of a large data set of gene families containing primate- and/or rodent-specific gene duplicates. We have found a clear tendency toward an increase in protein, promoter, and expression divergence with increasing number of duplication events undergone by each gene since the human-mouse split. In addition, gene duplication is significantly associated with a reduction in expression breadth and intensity. Interestingly, it is possible to identify three main groups regarding the evolution of gene expression following gene duplication. The first group, which comprises around 25% of the families, shows patterns compatible with tissue-expression partitioning. The second and largest group, comprising 33-53% of the families, shows broad expression of one of the gene copies and reduced, overlapping, expression of the other copy or copies. This can be attributed, in most cases, to loss of expression in several tissues of one or more gene copies. Finally, a substantial number of families, 19-35%, maintain a very high level of tissue-expression overlap (>0.8) after tens of millions of years of evolution. These families may have been subject to selection for increased gene dosage.}, keywords = {Animals, Evolution, Gene Duplication, Genetic, Humans, Mammals, Mammals: genetics, Models, Molecular} } Gene duplication is a major mechanism for molecular evolutionary innovation. Young gene duplicates typically exhibit elevated rates of protein evolution and, according to a number of recent studies, increased expression divergence. However, the nature of these changes is still poorly understood. To gain novel insights into the functional consequences of gene duplication, we have undertaken an in-depth analysis of a large data set of gene families containing primate- and/or rodent-specific gene duplicates. We have found a clear tendency toward an increase in protein, promoter, and expression divergence with increasing number of duplication events undergone by each gene since the human-mouse split. In addition, gene duplication is significantly associated with a reduction in expression breadth and intensity. Interestingly, it is possible to identify three main groups regarding the evolution of gene expression following gene duplication. The first group, which comprises around 25% of the families, shows patterns compatible with tissue-expression partitioning. The second and largest group, comprising 33-53% of the families, shows broad expression of one of the gene copies and reduced, overlapping, expression of the other copy or copies. This can be attributed, in most cases, to loss of expression in several tissues of one or more gene copies. Finally, a substantial number of families, 19-35%, maintain a very high level of tissue-expression overlap (>0.8) after tens of millions of years of evolution. These families may have been subject to selection for increased gene dosage. |
2009 |
Toll-Riera M, Bosch N, Bellora N, Castelo R, Armengol L, Estivill X, Albà MM. Origin of primate orphan genes: a comparative genomics approach (Article) Molecular Biology and Evolution, 26 (3), pp. 603-612, 2009. (BibTeX | Tags: de novo gen, primate) @article{M2009, title = {Origin of primate orphan genes: a comparative genomics approach}, author = {Toll-Riera M, Bosch N, Bellora N, Castelo R, Armengol L, Estivill X, Albà MM.}, year = {2009}, date = {2009-03-01}, journal = {Molecular Biology and Evolution}, volume = {26}, number = {3}, pages = {603-612}, keywords = {de novo gen, primate} } |
Albà MM Temporal aspects of gene evolution (Incollection) Dopazo,; Navarro, (Ed.): Evolución y Adaptación: 150 años después del origen de las especies, Sociedad Española de Biologia Evolutiva, Barcelona, 2009. (BibTeX | Tags: ) @incollection{AlbaMM2009, title = {Temporal aspects of gene evolution}, author = {Albà MM}, editor = {Dopazo, H and Navarro, A}, year = {2009}, date = {2009-01-01}, booktitle = {Evolución y Adaptación: 150 años después del origen de las especies}, publisher = {Sociedad Española de Biologia Evolutiva}, address = {Barcelona}, keywords = {} } |
Rodilla, Verónica, Villanueva, Alberto, Obrador-Hevia, Antonia, Robert-Moreno, Alex, Fernández-Majada, Vanessa, Grilli, Andrea, López-Bigas, Nuria, Bellora, Nicolás, Albà, M Mar, Torres, Ferran, Duñach, Mireia, Sanjuan, Xavier, Gonzalez, Sara, Gridley, Thomas, Capella, Gabriel, Bigas, Anna, Espinosa, Lluís Jagged1 is the pathological link between Wnt and Notch pathways in colorectal cancer. (Article) Proceedings of the National Academy of Sciences of the United States of America, 106 (15), pp. 6315–20, 2009, ISSN: 1091-6490. (Abstract | Links | BibTeX | Tags: Alleles, Animals, beta Catenin, beta Catenin: metabolism, Calcium-Binding Proteins, Calcium-Binding Proteins: genetics, Calcium-Binding Proteins: metabolism, Cell Line, Cell Nucleus, Cell Nucleus: metabolism, Colorectal Neoplasms, Colorectal Neoplasms: blood supply, Colorectal Neoplasms: genetics, Colorectal Neoplasms: metabolism, Colorectal Neoplasms: pathology, Gene Expression Profiling, Gene Expression Regulation, Genetic, Genetic: genetics, Humans, Intercellular Signaling Peptides and Proteins, Intercellular Signaling Peptides and Proteins: gen, Intercellular Signaling Peptides and Proteins: met, Membrane Proteins, Membrane Proteins: genetics, Membrane Proteins: metabolism, Mice, Neoplastic, Notch, Notch: metabolism, Receptors, Signal Transduction, TCF Transcription Factors, TCF Transcription Factors: metabolism, Transcription, Transgenic, Wnt Proteins, Wnt Proteins: metabolism) @article{Rodilla2009, title = {Jagged1 is the pathological link between Wnt and Notch pathways in colorectal cancer.}, author = {Rodilla, Verónica and Villanueva, Alberto and Obrador-Hevia, Antonia and Robert-Moreno, Alex and Fernández-Majada, Vanessa and Grilli, Andrea and López-Bigas, Nuria and Bellora, Nicolás and Albà, M Mar and Torres, Ferran and Duñach, Mireia and Sanjuan, Xavier and Gonzalez, Sara and Gridley, Thomas and Capella, Gabriel and Bigas, Anna and Espinosa, Lluís}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2669348&tool=pmcentrez&rendertype=abstract}, issn = {1091-6490}, year = {2009}, date = {2009-01-01}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {106}, number = {15}, pages = {6315--20}, abstract = {Notch has been linked to beta-catenin-dependent tumorigenesis; however, the mechanisms leading to Notch activation and the contribution of the Notch pathway to colorectal cancer is not yet understood. By microarray analysis, we have identified a group of genes downstream of Wnt/beta-catenin (down-regulated when blocking Wnt/beta-catenin) that are directly regulated by Notch (repressed by gamma-secretase inhibitors and up-regulated by active Notch1 in the absence of beta-catenin signaling). We demonstrate that Notch is downstream of Wnt in colorectal cancer cells through beta-catenin-mediated transcriptional activation of the Notch-ligand Jagged1. Consistently, expression of activated Notch1 partially reverts the effects of blocking Wnt/beta-catenin pathway in tumors implanted s.c. in nude mice. Crossing APC(Min/+) with Jagged1(+/Delta) mice is sufficient to significantly reduce the size of the polyps arising in the APC mutant background indicating that Notch is an essential modulator of tumorigenesis induced by nuclear beta-catenin. We show that this mechanism is operating in human tumors from Familial Adenomatous Polyposis patients. We conclude that Notch activation, accomplished by beta-catenin-mediated up-regulation of Jagged1, is required for tumorigenesis in the intestine. The Notch-specific genetic signature is sufficient to block differentiation and promote vasculogenesis in tumors whereas proliferation depends on both pathways.}, keywords = {Alleles, Animals, beta Catenin, beta Catenin: metabolism, Calcium-Binding Proteins, Calcium-Binding Proteins: genetics, Calcium-Binding Proteins: metabolism, Cell Line, Cell Nucleus, Cell Nucleus: metabolism, Colorectal Neoplasms, Colorectal Neoplasms: blood supply, Colorectal Neoplasms: genetics, Colorectal Neoplasms: metabolism, Colorectal Neoplasms: pathology, Gene Expression Profiling, Gene Expression Regulation, Genetic, Genetic: genetics, Humans, Intercellular Signaling Peptides and Proteins, Intercellular Signaling Peptides and Proteins: gen, Intercellular Signaling Peptides and Proteins: met, Membrane Proteins, Membrane Proteins: genetics, Membrane Proteins: metabolism, Mice, Neoplastic, Notch, Notch: metabolism, Receptors, Signal Transduction, TCF Transcription Factors, TCF Transcription Factors: metabolism, Transcription, Transgenic, Wnt Proteins, Wnt Proteins: metabolism} } Notch has been linked to beta-catenin-dependent tumorigenesis; however, the mechanisms leading to Notch activation and the contribution of the Notch pathway to colorectal cancer is not yet understood. By microarray analysis, we have identified a group of genes downstream of Wnt/beta-catenin (down-regulated when blocking Wnt/beta-catenin) that are directly regulated by Notch (repressed by gamma-secretase inhibitors and up-regulated by active Notch1 in the absence of beta-catenin signaling). We demonstrate that Notch is downstream of Wnt in colorectal cancer cells through beta-catenin-mediated transcriptional activation of the Notch-ligand Jagged1. Consistently, expression of activated Notch1 partially reverts the effects of blocking Wnt/beta-catenin pathway in tumors implanted s.c. in nude mice. Crossing APC(Min/+) with Jagged1(+/Delta) mice is sufficient to significantly reduce the size of the polyps arising in the APC mutant background indicating that Notch is an essential modulator of tumorigenesis induced by nuclear beta-catenin. We show that this mechanism is operating in human tumors from Familial Adenomatous Polyposis patients. We conclude that Notch activation, accomplished by beta-catenin-mediated up-regulation of Jagged1, is required for tumorigenesis in the intestine. The Notch-specific genetic signature is sufficient to block differentiation and promote vasculogenesis in tumors whereas proliferation depends on both pathways. |
Salichs, Eulàlia, Ledda, Alice, Mularoni, Loris, Albà, M Mar, de la Luna, Susana PLoS genetics, 5 (3), pp. e1000397, 2009, ISSN: 1553-7404. (Abstract | Links | BibTeX | Tags: Amino Acids, Cell Line, Cell Nucleus, Cell Nucleus: chemistry, Cell Nucleus: genetics, Cell Nucleus: metabolism, Genome, Histidine, Histidine: chemistry, Histidine: genetics, Histidine: metabolism, human, Humans, Molecular Sequence Data, Nuclear Localization Signals, Nuclear Proteins, Nuclear Proteins: chemistry, Nuclear Proteins: genetics, Nuclear Proteins: metabolism, Protein Transport, Proteins, Proteins: chemistry, Proteins: genetics, Proteins: metabolism, Sequence Alignment, Tandem Repeat Sequences) @article{Salichs2009, title = {Genome-wide analysis of histidine repeats reveals their role in the localization of human proteins to the nuclear speckles compartment.}, author = {Salichs, Eulàlia and Ledda, Alice and Mularoni, Loris and Albà, M Mar and de la Luna, Susana}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2644819&tool=pmcentrez&rendertype=abstract}, issn = {1553-7404}, year = {2009}, date = {2009-01-01}, journal = {PLoS genetics}, volume = {5}, number = {3}, pages = {e1000397}, abstract = {Single amino acid repeats are prevalent in eukaryote organisms, although the role of many such sequences is still poorly understood. We have performed a comprehensive analysis of the proteins containing homopolymeric histidine tracts in the human genome and identified 86 human proteins that contain stretches of five or more histidines. Most of them are endowed with DNA- and RNA-related functions, and, in addition, there is an overrepresentation of proteins expressed in the brain and/or nervous system development. An analysis of their subcellular localization shows that 15 of the 22 nuclear proteins identified accumulate in the nuclear subcompartment known as nuclear speckles. This localization is lost when the histidine repeat is deleted, and significantly, closely related paralogous proteins without histidine repeats also fail to localize to nuclear speckles. Hence, the histidine tract appears to be directly involved in targeting proteins to this compartment. The removal of DNA-binding domains or treatment with RNA polymerase II inhibitors induces the re-localization of several polyhistidine-containing proteins from the nucleoplasm to nuclear speckles. These findings highlight the dynamic relationship between sites of transcription and nuclear speckles. Therefore, we define the histidine repeats as a novel targeting signal for nuclear speckles, and we suggest that these repeats are a way of generating evolutionary diversification in gene duplicates. These data contribute to our better understanding of the physiological role of single amino acid repeats in proteins.}, keywords = {Amino Acids, Cell Line, Cell Nucleus, Cell Nucleus: chemistry, Cell Nucleus: genetics, Cell Nucleus: metabolism, Genome, Histidine, Histidine: chemistry, Histidine: genetics, Histidine: metabolism, human, Humans, Molecular Sequence Data, Nuclear Localization Signals, Nuclear Proteins, Nuclear Proteins: chemistry, Nuclear Proteins: genetics, Nuclear Proteins: metabolism, Protein Transport, Proteins, Proteins: chemistry, Proteins: genetics, Proteins: metabolism, Sequence Alignment, Tandem Repeat Sequences} } Single amino acid repeats are prevalent in eukaryote organisms, although the role of many such sequences is still poorly understood. We have performed a comprehensive analysis of the proteins containing homopolymeric histidine tracts in the human genome and identified 86 human proteins that contain stretches of five or more histidines. Most of them are endowed with DNA- and RNA-related functions, and, in addition, there is an overrepresentation of proteins expressed in the brain and/or nervous system development. An analysis of their subcellular localization shows that 15 of the 22 nuclear proteins identified accumulate in the nuclear subcompartment known as nuclear speckles. This localization is lost when the histidine repeat is deleted, and significantly, closely related paralogous proteins without histidine repeats also fail to localize to nuclear speckles. Hence, the histidine tract appears to be directly involved in targeting proteins to this compartment. The removal of DNA-binding domains or treatment with RNA polymerase II inhibitors induces the re-localization of several polyhistidine-containing proteins from the nucleoplasm to nuclear speckles. These findings highlight the dynamic relationship between sites of transcription and nuclear speckles. Therefore, we define the histidine repeats as a novel targeting signal for nuclear speckles, and we suggest that these repeats are a way of generating evolutionary diversification in gene duplicates. These data contribute to our better understanding of the physiological role of single amino acid repeats in proteins. |
Toll-Riera, Macarena, Castelo, Robert, Bellora, Nicolás, Albà, M Mar Evolution of primate orphan proteins. (Article) Biochemical Society transactions, 37 (Pt 4), pp. 778–82, 2009, ISSN: 1470-8752. (Abstract | Links | BibTeX | Tags: Animals, Evolution, Gene Duplication, Genome, Genome: genetics, Molecular, Primates, Primates: genetics, Proteins, Proteins: genetics) @article{Toll-Riera2009, title = {Evolution of primate orphan proteins.}, author = {Toll-Riera, Macarena and Castelo, Robert and Bellora, Nicolás and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19614593}, issn = {1470-8752}, year = {2009}, date = {2009-01-01}, journal = {Biochemical Society transactions}, volume = {37}, number = {Pt 4}, pages = {778--82}, abstract = {Genomes contain a large number of genes that do not have recognizable homologues in other species. These genes, found in only one or a few closely related species, are known as orphan genes. Their limited distribution implies that many of them are probably involved in lineage-specific adaptive processes. One important question that has remained elusive to date is how orphan genes originate. It has been proposed that they might have arisen by gene duplication followed by a period of very rapid sequence divergence, which would have erased any traces of similarity to other evolutionarily related genes. However, this explanation does not seem plausible for genes lacking homologues in very closely related species. In the present article, we review recent efforts to identify the mechanisms of formation of primate orphan genes. These studies reveal an unexpected important role of transposable elements in the formation of novel protein-coding genes in the genomes of primates.}, keywords = {Animals, Evolution, Gene Duplication, Genome, Genome: genetics, Molecular, Primates, Primates: genetics, Proteins, Proteins: genetics} } Genomes contain a large number of genes that do not have recognizable homologues in other species. These genes, found in only one or a few closely related species, are known as orphan genes. Their limited distribution implies that many of them are probably involved in lineage-specific adaptive processes. One important question that has remained elusive to date is how orphan genes originate. It has been proposed that they might have arisen by gene duplication followed by a period of very rapid sequence divergence, which would have erased any traces of similarity to other evolutionarily related genes. However, this explanation does not seem plausible for genes lacking homologues in very closely related species. In the present article, we review recent efforts to identify the mechanisms of formation of primate orphan genes. These studies reveal an unexpected important role of transposable elements in the formation of novel protein-coding genes in the genomes of primates. |
2008 |
Mularoni, L, Toll-Riera, M, Albà, MM Trinucleotide repeats in the human and ape genomes (Incollection) Kehrer-Sawatzki,; Cooper, (Ed.): Handbook of human molecular evolution 2, John Wiley and Sons, Chichester, 2008. (BibTeX | Tags: ) @incollection{Mularoni2008, title = {Trinucleotide repeats in the human and ape genomes}, author = {Mularoni, L and Toll-Riera, M and Albà, MM}, editor = {Kehrer-Sawatzki, H and Cooper, D}, year = {2008}, date = {2008-01-01}, booktitle = {Handbook of human molecular evolution 2}, publisher = {John Wiley and Sons}, address = {Chichester}, keywords = {} } |
Mularoni, L, Toll-Riera, M, Albà, MM Trinucleotide repeats in human and ape genomes (Incollection) Encyclopedia of Life Sciences, John Wiley & Sons, Ltd, Chichester, UK, 2008, ISBN: 0470016175. @incollection{Mularoni2008a, title = {Trinucleotide repeats in human and ape genomes}, author = {Mularoni, L and Toll-Riera, M and Albà, MM}, url = {http://onlinelibrary.wiley.com/doi/10.1002/9780470015902.a0020844/full}, isbn = {0470016175}, year = {2008}, date = {2008-01-01}, booktitle = {Encyclopedia of Life Sciences}, publisher = {John Wiley & Sons, Ltd}, address = {Chichester, UK}, keywords = {} } |
Racz, Ildiko, Nadal, Xavier, Alferink, Judith, Baños, Josep E, Rehnelt, Jennifer, Martín, Miquel, Pintado, Belén, Gutierrez-Adan, Alfonso, Sanguino, Elena, Bellora, Nicolas, Manzanares, Jorge, Zimmer, Andreas, Maldonado, Rafael The Journal of neuroscience : the official journal of the Society for Neuroscience, 28 (46), pp. 12136–45, 2008, ISSN: 1529-2401. (Abstract | Links | BibTeX | Tags: Animals, Astrocytes, Astrocytes: immunology, Cannabinoid, CB2, CB2: genetics, CB2: immunology, CB2: metabolism, CCR2, CCR2: immunology, CCR2: metabolism, Cells, Cultured, Gene Knockout Techniques, Gene Knockout Techniques: methods, Hyperalgesia, Hyperalgesia: immunology, Hyperalgesia: physiopathology, Interferon-gamma, Interferon-gamma: genetics, Interferon-gamma: immunology, Interferon-gamma: metabolism, Knockout, Male, Mice, Microglia, Microglia: drug effects, Microglia: immunology, Microglia: metabolism, Neuralgia, Neuralgia: genetics, Neuralgia: immunology, Neuralgia: metabolism, Neurons, Neurons: immunology, Nitric Oxide Synthase Type II, Nitric Oxide Synthase Type II: immunology, Nitric Oxide Synthase Type II: metabolism, Peripheral Nerves, Peripheral Nerves: immunology, Peripheral Nerves: injuries, Peripheral Nerves: physiopathology, Peripheral Nervous System Diseases, Peripheral Nervous System Diseases: genetics, Peripheral Nervous System Diseases: immunology, Peripheral Nervous System Diseases: metabolism, Receptor, Receptors, Signal Transduction, Signal Transduction: genetics, Signal Transduction: immunology, Spinal Cord, Spinal Cord: immunology, Spinal Cord: metabolism, Spinal Cord: physiopathology, Up-Regulation, Up-Regulation: immunology) @article{Racz2008, title = {Interferon-gamma is a critical modulator of CB(2) cannabinoid receptor signaling during neuropathic pain.}, author = {Racz, Ildiko and Nadal, Xavier and Alferink, Judith and Baños, Josep E and Rehnelt, Jennifer and Martín, Miquel and Pintado, Belén and Gutierrez-Adan, Alfonso and Sanguino, Elena and Bellora, Nicolas and Manzanares, Jorge and Zimmer, Andreas and Maldonado, Rafael}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19005078}, issn = {1529-2401}, year = {2008}, date = {2008-01-01}, journal = {The Journal of neuroscience : the official journal of the Society for Neuroscience}, volume = {28}, number = {46}, pages = {12136--45}, abstract = {Nerve injuries often lead to neuropathic pain syndrome. The mechanisms contributing to this syndrome involve local inflammatory responses, activation of glia cells, and changes in the plasticity of neuronal nociceptive pathways. Cannabinoid CB(2) receptors contribute to the local containment of neuropathic pain by modulating glial activation in response to nerve injury. Thus, neuropathic pain spreads in mice lacking CB(2) receptors beyond the site of nerve injury. To further investigate the mechanisms leading to the enhanced manifestation of neuropathic pain, we have established expression profiles of spinal cord tissues from wild-type and CB(2)-deficient mice after nerve injury. An enhanced interferon-gamma (IFN-gamma) response was revealed in the absence of CB(2) signaling. Immunofluorescence stainings demonstrated an IFN-gamma production by astrocytes and neurons ispilateral to the nerve injury in wild-type animals. In contrast, CB(2)-deficient mice showed neuronal and astrocytic IFN-gamma immunoreactivity also in the contralateral region, thus matching the pattern of nociceptive hypersensitivity in these animals. Experiments in BV-2 microglia cells revealed that transcriptional changes induced by IFN-gamma in two key elements for neuropathic pain development, iNOS (inducible nitric oxide synthase) and CCR2, are modulated by CB(2) receptor signaling. The most direct support for a functional involvement of IFN-gamma as a mediator of CB(2) signaling was obtained with a double knock-out mouse strain deficient in CB(2) receptors and IFN-gamma. These animals no longer show the enhanced manifestations of neuropathic pain observed in CB(2) knock-outs. These data clearly demonstrate that the CB(2) receptor-mediated control of neuropathic pain is IFN-gamma dependent.}, keywords = {Animals, Astrocytes, Astrocytes: immunology, Cannabinoid, CB2, CB2: genetics, CB2: immunology, CB2: metabolism, CCR2, CCR2: immunology, CCR2: metabolism, Cells, Cultured, Gene Knockout Techniques, Gene Knockout Techniques: methods, Hyperalgesia, Hyperalgesia: immunology, Hyperalgesia: physiopathology, Interferon-gamma, Interferon-gamma: genetics, Interferon-gamma: immunology, Interferon-gamma: metabolism, Knockout, Male, Mice, Microglia, Microglia: drug effects, Microglia: immunology, Microglia: metabolism, Neuralgia, Neuralgia: genetics, Neuralgia: immunology, Neuralgia: metabolism, Neurons, Neurons: immunology, Nitric Oxide Synthase Type II, Nitric Oxide Synthase Type II: immunology, Nitric Oxide Synthase Type II: metabolism, Peripheral Nerves, Peripheral Nerves: immunology, Peripheral Nerves: injuries, Peripheral Nerves: physiopathology, Peripheral Nervous System Diseases, Peripheral Nervous System Diseases: genetics, Peripheral Nervous System Diseases: immunology, Peripheral Nervous System Diseases: metabolism, Receptor, Receptors, Signal Transduction, Signal Transduction: genetics, Signal Transduction: immunology, Spinal Cord, Spinal Cord: immunology, Spinal Cord: metabolism, Spinal Cord: physiopathology, Up-Regulation, Up-Regulation: immunology} } Nerve injuries often lead to neuropathic pain syndrome. The mechanisms contributing to this syndrome involve local inflammatory responses, activation of glia cells, and changes in the plasticity of neuronal nociceptive pathways. Cannabinoid CB(2) receptors contribute to the local containment of neuropathic pain by modulating glial activation in response to nerve injury. Thus, neuropathic pain spreads in mice lacking CB(2) receptors beyond the site of nerve injury. To further investigate the mechanisms leading to the enhanced manifestation of neuropathic pain, we have established expression profiles of spinal cord tissues from wild-type and CB(2)-deficient mice after nerve injury. An enhanced interferon-gamma (IFN-gamma) response was revealed in the absence of CB(2) signaling. Immunofluorescence stainings demonstrated an IFN-gamma production by astrocytes and neurons ispilateral to the nerve injury in wild-type animals. In contrast, CB(2)-deficient mice showed neuronal and astrocytic IFN-gamma immunoreactivity also in the contralateral region, thus matching the pattern of nociceptive hypersensitivity in these animals. Experiments in BV-2 microglia cells revealed that transcriptional changes induced by IFN-gamma in two key elements for neuropathic pain development, iNOS (inducible nitric oxide synthase) and CCR2, are modulated by CB(2) receptor signaling. The most direct support for a functional involvement of IFN-gamma as a mediator of CB(2) signaling was obtained with a double knock-out mouse strain deficient in CB(2) receptors and IFN-gamma. These animals no longer show the enhanced manifestations of neuropathic pain observed in CB(2) knock-outs. These data clearly demonstrate that the CB(2) receptor-mediated control of neuropathic pain is IFN-gamma dependent. |
Toll-Riera, Macarena, Castresana, Jose, Albà, M. Mar Evolutionary Biology from Concept to Application (Book) Springer Berlin Heidelberg, Berlin, Heidelberg, 2008, ISBN: 978-3-540-78992-5. (Abstract | Links | BibTeX | Tags: Biomedical and Life Sciences) @book{Toll-Riera2008, title = {Evolutionary Biology from Concept to Application}, author = {Toll-Riera, Macarena and Castresana, Jose and Albà, M. Mar}, editor = {Pontarotti, Pierre}, url = {http://www.springerlink.com/content/m85w5421t3x0xm22/}, isbn = {978-3-540-78992-5}, year = {2008}, date = {2008-01-01}, pages = {45--59}, publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {The gene content of any genome is a rich mosaic of genes that have originated at different times during evolution. Among the most interesting properties related to gene age is the fact that younger genes tend to show accelerated evolutionary rates with respect to older genes. Here, we use a large number of closely related mammalian genomes to gain further insights into the relationship between gene age and evolutionary rate. We define a group of primate-specific genes that are absent from 11 non-primate mammalian genomes as well as from other eukaryotic genomes. These genes, of very recent origin, show the highest evolutionary rate and the shortest protein length. We discuss how these results may shed light on understanding the proposed mechanisms for the origin of lineage-specific, novel genes.}, keywords = {Biomedical and Life Sciences} } The gene content of any genome is a rich mosaic of genes that have originated at different times during evolution. Among the most interesting properties related to gene age is the fact that younger genes tend to show accelerated evolutionary rates with respect to older genes. Here, we use a large number of closely related mammalian genomes to gain further insights into the relationship between gene age and evolutionary rate. We define a group of primate-specific genes that are absent from 11 non-primate mammalian genomes as well as from other eukaryotic genomes. These genes, of very recent origin, show the highest evolutionary rate and the shortest protein length. We discuss how these results may shed light on understanding the proposed mechanisms for the origin of lineage-specific, novel genes. |
2007 |
Bellora, Nicolás, Farré, Domènec, Albà, M Mar Positional bias of general and tissue-specific regulatory motifs in mouse gene promoters. (Article) BMC genomics, 8 pp. 459, 2007, ISSN: 1471-2164. (Abstract | Links | BibTeX | Tags: Animals, Databases, Gene Expression Regulation, Gene Expression Regulation: genetics, Genetic, Genetic: genetics, Mice, Nucleic Acid, Organ Specificity, Organ Specificity: genetics, Promoter Regions, Software, Transcription Factors, Transcription Factors: metabolism) @article{Bellora2007, title = {Positional bias of general and tissue-specific regulatory motifs in mouse gene promoters.}, author = {Bellora, Nicolás and Farré, Domènec and Albà, M Mar}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2249607&tool=pmcentrez&rendertype=abstract}, issn = {1471-2164}, year = {2007}, date = {2007-01-01}, journal = {BMC genomics}, volume = {8}, pages = {459}, abstract = {The arrangement of regulatory motifs in gene promoters, or promoter architecture, is the result of mutation and selection processes that have operated over many millions of years. In mammals, tissue-specific transcriptional regulation is related to the presence of specific protein-interacting DNA motifs in gene promoters. However, little is known about the relative location and spacing of these motifs. To fill this gap, we have performed a systematic search for motifs that show significant bias at specific promoter locations in a large collection of housekeeping and tissue-specific genes.}, keywords = {Animals, Databases, Gene Expression Regulation, Gene Expression Regulation: genetics, Genetic, Genetic: genetics, Mice, Nucleic Acid, Organ Specificity, Organ Specificity: genetics, Promoter Regions, Software, Transcription Factors, Transcription Factors: metabolism} } The arrangement of regulatory motifs in gene promoters, or promoter architecture, is the result of mutation and selection processes that have operated over many millions of years. In mammals, tissue-specific transcriptional regulation is related to the presence of specific protein-interacting DNA motifs in gene promoters. However, little is known about the relative location and spacing of these motifs. To fill this gap, we have performed a systematic search for motifs that show significant bias at specific promoter locations in a large collection of housekeeping and tissue-specific genes. |
Bellora, Nicolás, Farré, Domènec, Mar Albà, M PEAKS: identification of regulatory motifs by their position in DNA sequences. (Article) Bioinformatics (Oxford, England), 23 (2), pp. 243–4, 2007, ISSN: 1367-4811. (Abstract | Links | BibTeX | Tags: Algorithms, Automated, Automated: methods, Base Sequence, Chromosome Mapping, Chromosome Mapping: methods, DNA, DNA: genetics, DNA: methods, Molecular Sequence Data, Nucleic Acid, Nucleic Acid: genetics, Pattern Recognition, Regulatory Sequences, Sequence Alignment, Sequence Alignment: methods, Sequence Analysis, Software, Transcriptional Activation, Transcriptional Activation: genetics) @article{Bellora2007a, title = {PEAKS: identification of regulatory motifs by their position in DNA sequences.}, author = {Bellora, Nicolás and Farré, Domènec and Mar Albà, M}, url = {http://www.ncbi.nlm.nih.gov/pubmed/17098773}, issn = {1367-4811}, year = {2007}, date = {2007-01-01}, journal = {Bioinformatics (Oxford, England)}, volume = {23}, number = {2}, pages = {243--4}, abstract = {Many DNA functional motifs tend to accumulate or cluster at specific gene locations. These locations can be detected, in a group of gene sequences, as high frequency 'peaks' with respect to a reference position, such as the transcription start site (TSS). We have developed a web tool for the identification of regions containing significant motif peaks. We show, by using different yeast gene datasets, that peak regions are strongly enriched in experimentally-validated motifs and contain potentially important novel motifs. AVAILABILITY: http://genomics.imim.es/peaks}, keywords = {Algorithms, Automated, Automated: methods, Base Sequence, Chromosome Mapping, Chromosome Mapping: methods, DNA, DNA: genetics, DNA: methods, Molecular Sequence Data, Nucleic Acid, Nucleic Acid: genetics, Pattern Recognition, Regulatory Sequences, Sequence Alignment, Sequence Alignment: methods, Sequence Analysis, Software, Transcriptional Activation, Transcriptional Activation: genetics} } Many DNA functional motifs tend to accumulate or cluster at specific gene locations. These locations can be detected, in a group of gene sequences, as high frequency 'peaks' with respect to a reference position, such as the transcription start site (TSS). We have developed a web tool for the identification of regions containing significant motif peaks. We show, by using different yeast gene datasets, that peak regions are strongly enriched in experimentally-validated motifs and contain potentially important novel motifs. AVAILABILITY: http://genomics.imim.es/peaks |
Albà, M M, Tompa, P, Veitia, R A Amino acid repeats and the structure and evolution of proteins. (Article) Genome dynamics, 3 pp. 119–30, 2007, ISSN: 1660-9263. (Abstract | Links | BibTeX | Tags: Amino Acid, Animals, Base Composition, Evolution, Humans, Molecular, Open Reading Frames, Open Reading Frames: genetics, Peptides, Peptides: chemistry, Proteins, Proteins: chemistry, Proteins: genetics, Repetitive Sequences) @article{Alba2007a, title = {Amino acid repeats and the structure and evolution of proteins.}, author = {Albà, M M and Tompa, P and Veitia, R A}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18753788}, issn = {1660-9263}, year = {2007}, date = {2007-01-01}, journal = {Genome dynamics}, volume = {3}, pages = {119--30}, abstract = {Many proteins have repeats or runs of single amino acids. The pathogenicity of some repeat expansions has fueled proteomic, genomic and structural explorations of homopolymeric runs not only in human but in a wide variety of other organisms. Other types of amino acid repetitive structures exhibit more complex patterns than homopeptides. Irrespective of their precise organization, repetitive sequences are defined as low complexity or simple sequences, as one or a few residues are particularly abundant. Prokaryotes show a relatively low frequency of simple sequences compared to eukaryotes. In the latter the percentage of proteins containing homopolymeric runs varies greatly from one group to another. For instance, within vertebrates, amino acid repeat frequency is much higher in mammals than in amphibians, birds or fishes. For some repeats, this is correlated with the GC-richness of the regions containing the corresponding genes. Homopeptides tend to occur in disordered regions of transcription factors or developmental proteins. They can trigger the formation of protein aggregates, particularly in 'disease' proteins. Simple sequences seem to evolve more rapidly than the rest of the protein/gene and may have a functional impact. Therefore, they are good candidates to promote rapid evolutionary changes. All these diverse facets of homopolymeric runs are explored in this review.}, keywords = {Amino Acid, Animals, Base Composition, Evolution, Humans, Molecular, Open Reading Frames, Open Reading Frames: genetics, Peptides, Peptides: chemistry, Proteins, Proteins: chemistry, Proteins: genetics, Repetitive Sequences} } Many proteins have repeats or runs of single amino acids. The pathogenicity of some repeat expansions has fueled proteomic, genomic and structural explorations of homopolymeric runs not only in human but in a wide variety of other organisms. Other types of amino acid repetitive structures exhibit more complex patterns than homopeptides. Irrespective of their precise organization, repetitive sequences are defined as low complexity or simple sequences, as one or a few residues are particularly abundant. Prokaryotes show a relatively low frequency of simple sequences compared to eukaryotes. In the latter the percentage of proteins containing homopolymeric runs varies greatly from one group to another. For instance, within vertebrates, amino acid repeat frequency is much higher in mammals than in amphibians, birds or fishes. For some repeats, this is correlated with the GC-richness of the regions containing the corresponding genes. Homopeptides tend to occur in disordered regions of transcription factors or developmental proteins. They can trigger the formation of protein aggregates, particularly in 'disease' proteins. Simple sequences seem to evolve more rapidly than the rest of the protein/gene and may have a functional impact. Therefore, they are good candidates to promote rapid evolutionary changes. All these diverse facets of homopolymeric runs are explored in this review. |
Mularoni, Loris, Veitia, Reiner A, Albà, M Mar Highly constrained proteins contain an unexpectedly large number of amino acid tandem repeats. (Article) Genomics, 89 (3), pp. 316–25, 2007, ISSN: 0888-7543. (Abstract | Links | BibTeX | Tags: Amino Acid, Amino Acid Sequence, Animals, Complementary, Conserved Sequence, DNA, Evolution, Genetic, Humans, Mice, Molecular, Point Mutation, Proteins, Proteins: chemistry, Proteins: genetics, Repetitive Sequences, Selection, Trinucleotide Repeats) @article{Mularoni2007, title = {Highly constrained proteins contain an unexpectedly large number of amino acid tandem repeats.}, author = {Mularoni, Loris and Veitia, Reiner A and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/17196365}, issn = {0888-7543}, year = {2007}, date = {2007-01-01}, journal = {Genomics}, volume = {89}, number = {3}, pages = {316--25}, abstract = {Single-amino-acid tandem repeats are very common in mammalian proteins but their function and evolution are still poorly understood. Here we investigate how the variability and prevalence of amino acid repeats are related to the evolutionary constraints operating on the proteins. We find a significant positive correlation between repeat size difference and protein nonsynonymous substitution rate in human and mouse orthologous genes. This association is observed for all the common amino acid repeat types and indicates that rapid diversification of repeat structures, involving both trinucleotide slippage and nucleotide substitutions, preferentially occurs in proteins subject to low selective constraints. However, strikingly, we also observe a significant negative correlation between the number of repeats in a protein and the gene nonsynonymous substitution rate, particularly for glutamine, glycine, and alanine repeats. This implies that proteins subject to strong selective constraints tend to contain an unexpectedly high number of repeats, which tend to be well conserved between the two species. This is consistent with a role for selection in the maintenance of a significant number of repeats. Analysis of the codon structure of the sequences encoding the repeats shows that codon purity is associated with high repeat size interspecific variability. Interestingly, polyalanine and polyglutamine repeats associated with disease show very distinctive features regarding the degree of repeat conservation and the protein sequence selective constraints.}, keywords = {Amino Acid, Amino Acid Sequence, Animals, Complementary, Conserved Sequence, DNA, Evolution, Genetic, Humans, Mice, Molecular, Point Mutation, Proteins, Proteins: chemistry, Proteins: genetics, Repetitive Sequences, Selection, Trinucleotide Repeats} } Single-amino-acid tandem repeats are very common in mammalian proteins but their function and evolution are still poorly understood. Here we investigate how the variability and prevalence of amino acid repeats are related to the evolutionary constraints operating on the proteins. We find a significant positive correlation between repeat size difference and protein nonsynonymous substitution rate in human and mouse orthologous genes. This association is observed for all the common amino acid repeat types and indicates that rapid diversification of repeat structures, involving both trinucleotide slippage and nucleotide substitutions, preferentially occurs in proteins subject to low selective constraints. However, strikingly, we also observe a significant negative correlation between the number of repeats in a protein and the gene nonsynonymous substitution rate, particularly for glutamine, glycine, and alanine repeats. This implies that proteins subject to strong selective constraints tend to contain an unexpectedly high number of repeats, which tend to be well conserved between the two species. This is consistent with a role for selection in the maintenance of a significant number of repeats. Analysis of the codon structure of the sequences encoding the repeats shows that codon purity is associated with high repeat size interspecific variability. Interestingly, polyalanine and polyglutamine repeats associated with disease show very distinctive features regarding the degree of repeat conservation and the protein sequence selective constraints. |
Farré, Domènec, Bellora, Nicolás, Mularoni, Loris, Messeguer, Xavier, Albà, M Mar Housekeeping genes tend to show reduced upstream sequence conservation. (Article) Genome biology, 8 (7), pp. R140, 2007, ISSN: 1465-6914. (Abstract | Links | BibTeX | Tags: Animals, Base Sequence, Conserved Sequence, CpG Islands, Evolution, Gene Expression, Genetic, Genetic Variation, Humans, Mice, Molecular, Molecular Sequence Data, Promoter Regions) @article{Farre2007, title = {Housekeeping genes tend to show reduced upstream sequence conservation.}, author = {Farré, Domènec and Bellora, Nicolás and Mularoni, Loris and Messeguer, Xavier and Albà, M Mar}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2323216&tool=pmcentrez&rendertype=abstract}, issn = {1465-6914}, year = {2007}, date = {2007-01-01}, journal = {Genome biology}, volume = {8}, number = {7}, pages = {R140}, abstract = {Understanding the constraints that operate in mammalian gene promoter sequences is of key importance to understand the evolution of gene regulatory networks. The level of promoter conservation varies greatly across orthologous genes, denoting differences in the strength of the evolutionary constraints. Here we test the hypothesis that the number of tissues in which a gene is expressed is related in a significant manner to the extent of promoter sequence conservation.}, keywords = {Animals, Base Sequence, Conserved Sequence, CpG Islands, Evolution, Gene Expression, Genetic, Genetic Variation, Humans, Mice, Molecular, Molecular Sequence Data, Promoter Regions} } Understanding the constraints that operate in mammalian gene promoter sequences is of key importance to understand the evolution of gene regulatory networks. The level of promoter conservation varies greatly across orthologous genes, denoting differences in the strength of the evolutionary constraints. Here we test the hypothesis that the number of tissues in which a gene is expressed is related in a significant manner to the extent of promoter sequence conservation. |
Albà, M Mar, Castresana, Jose On homology searches by protein Blast and the characterization of the age of genes. (Article) BMC evolutionary biology, 7 pp. 53, 2007, ISSN: 1471-2148. (Abstract | Links | BibTeX | Tags: Amino Acid, Animals, Computational Biology, Databases, Evolution, Genes, Humans, Molecular, Phylogeny, Protein, Sequence Analysis, Sequence Homology) @article{Alba2007, title = {On homology searches by protein Blast and the characterization of the age of genes.}, author = {Albà, M Mar and Castresana, Jose}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1855329&tool=pmcentrez&rendertype=abstract}, issn = {1471-2148}, year = {2007}, date = {2007-01-01}, journal = {BMC evolutionary biology}, volume = {7}, pages = {53}, abstract = {It has been shown in a variety of organisms, including mammals, that genes that appeared recently in evolution, for example orphan genes, evolve faster than older genes. Low functional constraints at the time of origin of novel genes may explain these results. However, this observation has been recently attributed to an artifact caused by the inability of Blast to detect the fastest genes in different eukaryotic genomes. Distinguishing between these two possible explanations would be of great importance for any studies dealing with the taxon distribution of proteins and the origin of novel genes.}, keywords = {Amino Acid, Animals, Computational Biology, Databases, Evolution, Genes, Humans, Molecular, Phylogeny, Protein, Sequence Analysis, Sequence Homology} } It has been shown in a variety of organisms, including mammals, that genes that appeared recently in evolution, for example orphan genes, evolve faster than older genes. Low functional constraints at the time of origin of novel genes may explain these results. However, this observation has been recently attributed to an artifact caused by the inability of Blast to detect the fastest genes in different eukaryotic genomes. Distinguishing between these two possible explanations would be of great importance for any studies dealing with the taxon distribution of proteins and the origin of novel genes. |
2006 |
Blanco, Enrique, Farré, Domènec, Albà, M Mar, Messeguer, Xavier, Guigó, Roderic ABS: a database of Annotated regulatory Binding Sites from orthologous promoters. (Article) Nucleic acids research, 34 (Database issue), pp. D63–7, 2006, ISSN: 1362-4962. (Abstract | Links | BibTeX | Tags: Animals, Binding Sites, Chickens, Chickens: genetics, Databases, Genetic, Genomics, Humans, Internet, Mice, Nucleic Acid, Promoter Regions, Rats, Transcription Factors, Transcription Factors: metabolism, User-Computer Interface) @article{Blanco2006, title = {ABS: a database of Annotated regulatory Binding Sites from orthologous promoters.}, author = {Blanco, Enrique and Farré, Domènec and Albà, M Mar and Messeguer, Xavier and Guigó, Roderic}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1347478&tool=pmcentrez&rendertype=abstract}, issn = {1362-4962}, year = {2006}, date = {2006-01-01}, journal = {Nucleic acids research}, volume = {34}, number = {Database issue}, pages = {D63--7}, abstract = {Information about the genomic coordinates and the sequence of experimentally identified transcription factor binding sites is found scattered under a variety of diverse formats. The availability of standard collections of such high-quality data is important to design, evaluate and improve novel computational approaches to identify binding motifs on promoter sequences from related genes. ABS (http://genome.imim.es/datasets/abs2005/index.html) is a public database of known binding sites identified in promoters of orthologous vertebrate genes that have been manually curated from bibliography. We have annotated 650 experimental binding sites from 68 transcription factors and 100 orthologous target genes in human, mouse, rat or chicken genome sequences. Computational predictions and promoter alignment information are also provided for each entry. A simple and easy-to-use web interface facilitates data retrieval allowing different views of the information. In addition, the release 1.0 of ABS includes a customizable generator of artificial datasets based on the known sites contained in the collection and an evaluation tool to aid during the training and the assessment of motif-finding programs.}, keywords = {Animals, Binding Sites, Chickens, Chickens: genetics, Databases, Genetic, Genomics, Humans, Internet, Mice, Nucleic Acid, Promoter Regions, Rats, Transcription Factors, Transcription Factors: metabolism, User-Computer Interface} } Information about the genomic coordinates and the sequence of experimentally identified transcription factor binding sites is found scattered under a variety of diverse formats. The availability of standard collections of such high-quality data is important to design, evaluate and improve novel computational approaches to identify binding motifs on promoter sequences from related genes. ABS (http://genome.imim.es/datasets/abs2005/index.html) is a public database of known binding sites identified in promoters of orthologous vertebrate genes that have been manually curated from bibliography. We have annotated 650 experimental binding sites from 68 transcription factors and 100 orthologous target genes in human, mouse, rat or chicken genome sequences. Computational predictions and promoter alignment information are also provided for each entry. A simple and easy-to-use web interface facilitates data retrieval allowing different views of the information. In addition, the release 1.0 of ABS includes a customizable generator of artificial datasets based on the known sites contained in the collection and an evaluation tool to aid during the training and the assessment of motif-finding programs. |
Mularoni, Loris, Guigó, Roderic, Albà, M Mar Mutation patterns of amino acid tandem repeats in the human proteome. (Article) Genome biology, 7 (4), pp. R33, 2006, ISSN: 1465-6914. (Abstract | Links | BibTeX | Tags: Amino Acid, Amino Acid Substitution, Amino Acid: genetics, Codon, Expressed Sequence Tags, Genetic, Humans, Mutation, Polymorphism, Protein, Proteome, Proteome: genetics, Repetitive Sequences, Sequence Analysis) @article{Mularoni2006, title = {Mutation patterns of amino acid tandem repeats in the human proteome.}, author = {Mularoni, Loris and Guigó, Roderic and Albà, M Mar}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1557989&tool=pmcentrez&rendertype=abstract}, issn = {1465-6914}, year = {2006}, date = {2006-01-01}, journal = {Genome biology}, volume = {7}, number = {4}, pages = {R33}, abstract = {Amino acid tandem repeats are found in nearly one-fifth of human proteins. Abnormal expansion of these regions is associated with several human disorders. To gain further insight into the mutational mechanisms that operate in this type of sequence, we have analyzed a large number of mutation variants derived from human expressed sequence tags (ESTs).}, keywords = {Amino Acid, Amino Acid Substitution, Amino Acid: genetics, Codon, Expressed Sequence Tags, Genetic, Humans, Mutation, Polymorphism, Protein, Proteome, Proteome: genetics, Repetitive Sequences, Sequence Analysis} } Amino acid tandem repeats are found in nearly one-fifth of human proteins. Abnormal expansion of these regions is associated with several human disorders. To gain further insight into the mutational mechanisms that operate in this type of sequence, we have analyzed a large number of mutation variants derived from human expressed sequence tags (ESTs). |
Furney, Simon J, Albà, M Mar, López-Bigas, Núria BMC genomics, 7 pp. 165, 2006, ISSN: 1471-2164. (Abstract | Links | BibTeX | Tags: Amino Acid, Animals, Caenorhabditis elegans, Caenorhabditis elegans: genetics, Computational Biology, Conserved Sequence, Dominant, Essential, Evolution, Genes, Genetic, Genetic Diseases, Genetic Structures, Humans, Inborn, Inborn: classification, Inborn: genetics, Mice, Molecular, Mutation, Pan troglodytes, Pan troglodytes: genetics, Recessive, Selection, Sequence Homology) @article{Furney2006, title = {Differences in the evolutionary history of disease genes affected by dominant or recessive mutations.}, author = {Furney, Simon J and Albà, M Mar and López-Bigas, Núria}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1534034&tool=pmcentrez&rendertype=abstract}, issn = {1471-2164}, year = {2006}, date = {2006-01-01}, journal = {BMC genomics}, volume = {7}, pages = {165}, abstract = {Global analyses of human disease genes by computational methods have yielded important advances in the understanding of human diseases. Generally these studies have treated the group of disease genes uniformly, thus ignoring the type of disease-causing mutations (dominant or recessive). In this report we present a comprehensive study of the evolutionary history of autosomal disease genes separated by mode of inheritance.}, keywords = {Amino Acid, Animals, Caenorhabditis elegans, Caenorhabditis elegans: genetics, Computational Biology, Conserved Sequence, Dominant, Essential, Evolution, Genes, Genetic, Genetic Diseases, Genetic Structures, Humans, Inborn, Inborn: classification, Inborn: genetics, Mice, Molecular, Mutation, Pan troglodytes, Pan troglodytes: genetics, Recessive, Selection, Sequence Homology} } Global analyses of human disease genes by computational methods have yielded important advances in the understanding of human diseases. Generally these studies have treated the group of disease genes uniformly, thus ignoring the type of disease-causing mutations (dominant or recessive). In this report we present a comprehensive study of the evolutionary history of autosomal disease genes separated by mode of inheritance. |
2005 |
Albà, M Mar, Castresana, Jose Inverse relationship between evolutionary rate and age of mammalian genes. (Article) Molecular biology and evolution, 22 (3), pp. 598–606, 2005, ISSN: 0737-4038. (Abstract | Links | BibTeX | Tags: Animals, DNA, Evolution, Genome, human, Humans, Mice, Molecular, Sequence Analysis) @article{Alba2005, title = {Inverse relationship between evolutionary rate and age of mammalian genes.}, author = {Albà, M Mar and Castresana, Jose}, url = {http://www.ncbi.nlm.nih.gov/pubmed/15537804}, issn = {0737-4038}, year = {2005}, date = {2005-01-01}, journal = {Molecular biology and evolution}, volume = {22}, number = {3}, pages = {598--606}, abstract = {A large number of genes is shared by all living organisms, whereas many others are unique to some specific lineages, indicating their different times of origin. The availability of a growing number of eukaryotic genomes allows us to estimate which mammalian genes are novel genes and, approximately, when they arose. In this article, we classify human genes into four different age groups and estimate evolutionary rates in human and mouse orthologs. We show that older genes tend to evolve more slowly than newer ones; that is, proteins that arose earlier in evolution currently have a larger proportion of sites subjected to negative selection. Interestingly, this property is maintained when a fraction of the fastest-evolving genes is excluded or when only genes belonging to a given functional class are considered. One way to explain this relationship is by assuming that genes maintain their functional constraints along all their evolutionary history, but the nature of more recent evolutionary innovations is such that the functional constraints operating on them are increasingly weaker. Alternatively, our results would also be consistent with a scenario in which the functional constraints acting on a gene would not need to be constant through evolution. Instead, starting from weak functional constraints near the time of origin of a gene-as supported by mechanisms proposed for the origin of orphan genes-there would be a gradual increase in selective pressures with time, resulting in fewer accepted mutations in older versus more novel genes.}, keywords = {Animals, DNA, Evolution, Genome, human, Humans, Mice, Molecular, Sequence Analysis} } A large number of genes is shared by all living organisms, whereas many others are unique to some specific lineages, indicating their different times of origin. The availability of a growing number of eukaryotic genomes allows us to estimate which mammalian genes are novel genes and, approximately, when they arose. In this article, we classify human genes into four different age groups and estimate evolutionary rates in human and mouse orthologs. We show that older genes tend to evolve more slowly than newer ones; that is, proteins that arose earlier in evolution currently have a larger proportion of sites subjected to negative selection. Interestingly, this property is maintained when a fraction of the fastest-evolving genes is excluded or when only genes belonging to a given functional class are considered. One way to explain this relationship is by assuming that genes maintain their functional constraints along all their evolutionary history, but the nature of more recent evolutionary innovations is such that the functional constraints operating on them are increasingly weaker. Alternatively, our results would also be consistent with a scenario in which the functional constraints acting on a gene would not need to be constant through evolution. Instead, starting from weak functional constraints near the time of origin of a gene-as supported by mechanisms proposed for the origin of orphan genes-there would be a gradual increase in selective pressures with time, resulting in fewer accepted mutations in older versus more novel genes. |
2004 |
Albà, M Mar, Guigó, Roderic Comparative analysis of amino acid repeats in rodents and humans. (Article) Genome research, 14 (4), pp. 549–54, 2004, ISSN: 1088-9051. (Abstract | Links | BibTeX | Tags: Amino Acid, Amino Acid: genetics, Amino Acid: physiology, Animals, Chromosome Mapping, Chromosome Mapping: methods, Chromosome Mapping: statistics & numerical data, Computational Biology, Computational Biology: methods, Computational Biology: statistics & numerical data, GC Rich Sequence, GC Rich Sequence: genetics, Humans, Mice, Proteins, Proteins: chemistry, Proteins: genetics, Proteins: physiology, Rats, Repetitive Sequences, Trinucleotide Repeats, Trinucleotide Repeats: genetics) @article{Alba2004, title = {Comparative analysis of amino acid repeats in rodents and humans.}, author = {Albà, M Mar and Guigó, Roderic}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=383298&tool=pmcentrez&rendertype=abstract}, issn = {1088-9051}, year = {2004}, date = {2004-01-01}, journal = {Genome research}, volume = {14}, number = {4}, pages = {549--54}, abstract = {Amino acid tandem repeats, also called homopolymeric tracts, are extremely abundant in eukaryotic proteins. To gain insight into the genome-wide evolution of these regions in mammals, we analyzed the repeat content in a large data set of rat-mouse-human orthologs. Our results show that human proteins contain more amino acid repeats than rodent proteins and that trinucleotide repeats are also more abundant in human coding sequences. Using the human species as an outgroup, we were able to address differences in repeat loss and repeat gain in the rat and mouse lineages. In this data set, mouse proteins contain substantially more repeats than rat proteins, which can be at least partly attributed to a higher repeat loss in the rat lineage. The data are consistent with a role for trinucleotide slippage in the generation of novel amino acid repeats. We confirm the previously observed functional bias of proteins with repeats, with overrepresentation of transcription factors and DNA-binding proteins. We show that genes encoding amino acid repeats tend to have an unusually high GC content, and that differences in coding GC content among orthologs are directly related to the presence/absence of repeats. We propose that the different GC content isochore structure in rodents and humans may result in an increased amino acid repeat prevalence in the human lineage.}, keywords = {Amino Acid, Amino Acid: genetics, Amino Acid: physiology, Animals, Chromosome Mapping, Chromosome Mapping: methods, Chromosome Mapping: statistics & numerical data, Computational Biology, Computational Biology: methods, Computational Biology: statistics & numerical data, GC Rich Sequence, GC Rich Sequence: genetics, Humans, Mice, Proteins, Proteins: chemistry, Proteins: genetics, Proteins: physiology, Rats, Repetitive Sequences, Trinucleotide Repeats, Trinucleotide Repeats: genetics} } Amino acid tandem repeats, also called homopolymeric tracts, are extremely abundant in eukaryotic proteins. To gain insight into the genome-wide evolution of these regions in mammals, we analyzed the repeat content in a large data set of rat-mouse-human orthologs. Our results show that human proteins contain more amino acid repeats than rodent proteins and that trinucleotide repeats are also more abundant in human coding sequences. Using the human species as an outgroup, we were able to address differences in repeat loss and repeat gain in the rat and mouse lineages. In this data set, mouse proteins contain substantially more repeats than rat proteins, which can be at least partly attributed to a higher repeat loss in the rat lineage. The data are consistent with a role for trinucleotide slippage in the generation of novel amino acid repeats. We confirm the previously observed functional bias of proteins with repeats, with overrepresentation of transcription factors and DNA-binding proteins. We show that genes encoding amino acid repeats tend to have an unusually high GC content, and that differences in coding GC content among orthologs are directly related to the presence/absence of repeats. We propose that the different GC content isochore structure in rodents and humans may result in an increased amino acid repeat prevalence in the human lineage. |
Huang, Hui, Winter, Eitan E, Wang, Huajun, Weinstock, Keith G, Xing, Heming, Goodstadt, Leo, Stenson, Peter D, Cooper, David N, Smith, Douglas, Albà, M Mar, Ponting, Chris P, Fechtel, Kim Genome biology, 5 (7), pp. R47, 2004, ISSN: 1465-6914. (Abstract | Links | BibTeX | Tags: Amino Acid, Amino Acid: genetics, Animal, Animals, Chromosome Mapping, Chromosome Mapping: methods, Conserved Sequence, Conserved Sequence: genetics, Disease Models, Evolution, Fishes, Fishes: genetics, Fungal, Fungal: genetics, Genes, Genes: genetics, Genes: physiology, Genetic, Genetic Diseases, Genome, Helminth, Helminth: genetics, human, Humans, Inborn, Inborn: genetics, Inborn: physiopathology, Insect, Insect: genetics, Mice, Molecular, Mutagenesis, Mutagenesis: genetics, Nucleic Acid, Nucleotides, Nucleotides: genetics, Point Mutation, Point Mutation: genetics, Rats, Repetitive Sequences, Selection, Sequence Homology, Trinucleotide Repeat Expansion, Trinucleotide Repeat Expansion: genetics) @article{Huang2004, title = {Evolutionary conservation and selection of human disease gene orthologs in the rat and mouse genomes.}, author = {Huang, Hui and Winter, Eitan E and Wang, Huajun and Weinstock, Keith G and Xing, Heming and Goodstadt, Leo and Stenson, Peter D and Cooper, David N and Smith, Douglas and Albà, M Mar and Ponting, Chris P and Fechtel, Kim}, url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=463309&tool=pmcentrez&rendertype=abstract}, issn = {1465-6914}, year = {2004}, date = {2004-01-01}, journal = {Genome biology}, volume = {5}, number = {7}, pages = {R47}, abstract = {Model organisms have contributed substantially to our understanding of the etiology of human disease as well as having assisted with the development of new treatment modalities. The availability of the human, mouse and, most recently, the rat genome sequences now permit the comprehensive investigation of the rodent orthologs of genes associated with human disease. Here, we investigate whether human disease genes differ significantly from their rodent orthologs with respect to their overall levels of conservation and their rates of evolutionary change.}, keywords = {Amino Acid, Amino Acid: genetics, Animal, Animals, Chromosome Mapping, Chromosome Mapping: methods, Conserved Sequence, Conserved Sequence: genetics, Disease Models, Evolution, Fishes, Fishes: genetics, Fungal, Fungal: genetics, Genes, Genes: genetics, Genes: physiology, Genetic, Genetic Diseases, Genome, Helminth, Helminth: genetics, human, Humans, Inborn, Inborn: genetics, Inborn: physiopathology, Insect, Insect: genetics, Mice, Molecular, Mutagenesis, Mutagenesis: genetics, Nucleic Acid, Nucleotides, Nucleotides: genetics, Point Mutation, Point Mutation: genetics, Rats, Repetitive Sequences, Selection, Sequence Homology, Trinucleotide Repeat Expansion, Trinucleotide Repeat Expansion: genetics} } Model organisms have contributed substantially to our understanding of the etiology of human disease as well as having assisted with the development of new treatment modalities. The availability of the human, mouse and, most recently, the rat genome sequences now permit the comprehensive investigation of the rodent orthologs of genes associated with human disease. Here, we investigate whether human disease genes differ significantly from their rodent orthologs with respect to their overall levels of conservation and their rates of evolutionary change. |
Castresana, Jose, Guigó, Roderic, Albà, M Mar Journal of molecular evolution, 59 (1), pp. 72–9, 2004, ISSN: 0022-2844. (Abstract | Links | BibTeX | Tags: Base Composition, Base Composition: genetics, Chromatin, Chromatin: metabolism, Chromosomes, Computational Biology, Databases, DNA-Binding Proteins, DNA-Binding Proteins: genetics, DNA-Binding Proteins: metabolism, Evolution, Genetic, Genome, human, Humans, Introns, Introns: genetics, Models, Molecular, Multigene Family, Multigene Family: genetics, Pair 19, Pair 19: genetics, Phylogeny, Zinc Fingers, Zinc Fingers: genetics) @article{Castresana2004, title = {Clustering of genes coding for DNA binding proteins in a region of atypical evolution of the human genome.}, author = {Castresana, Jose and Guigó, Roderic and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/15383909}, issn = {0022-2844}, year = {2004}, date = {2004-01-01}, journal = {Journal of molecular evolution}, volume = {59}, number = {1}, pages = {72--9}, abstract = {Comparison of the human and mouse genomes has revealed that significant variations in evolutionary rates exist among genomic regions and that a large part of this variation is interchromosomal. We confirm in this work, using a large collection of introns, that human chromosome 19 is the one that shows the highest divergence with respect to mouse. To search for other differences among chromosomes, we examine the distribution of gene functions in human and mouse chromosomes using the Gene Ontology definitions. We found by correspondence analysis that among the strongest clusterings of gene functions in human chromosomes is a group of genes coding for DNA binding proteins in chromosome 19. Interestingly, chromosome 19 also has a very high GC content, a feature that has been proposed to promote an opening of the chromatin, thereby facilitating binding of proteins to the DNA helix. In the mouse genome, however, a similar aggregation of genes coding for DNA binding proteins and high GC content cannot be found. This suggests that the distribution of genes coding for DNA binding proteins and the variations of the chromatin accessibility to these proteins are different in the human and mouse genomes. It is likely that the overall high synonymous and intron rates in chromosome 19 are a by-product of the high GC content of this chromosome.}, keywords = {Base Composition, Base Composition: genetics, Chromatin, Chromatin: metabolism, Chromosomes, Computational Biology, Databases, DNA-Binding Proteins, DNA-Binding Proteins: genetics, DNA-Binding Proteins: metabolism, Evolution, Genetic, Genome, human, Humans, Introns, Introns: genetics, Models, Molecular, Multigene Family, Multigene Family: genetics, Pair 19, Pair 19: genetics, Phylogeny, Zinc Fingers, Zinc Fingers: genetics} } Comparison of the human and mouse genomes has revealed that significant variations in evolutionary rates exist among genomic regions and that a large part of this variation is interchromosomal. We confirm in this work, using a large collection of introns, that human chromosome 19 is the one that shows the highest divergence with respect to mouse. To search for other differences among chromosomes, we examine the distribution of gene functions in human and mouse chromosomes using the Gene Ontology definitions. We found by correspondence analysis that among the strongest clusterings of gene functions in human chromosomes is a group of genes coding for DNA binding proteins in chromosome 19. Interestingly, chromosome 19 also has a very high GC content, a feature that has been proposed to promote an opening of the chromatin, thereby facilitating binding of proteins to the DNA helix. In the mouse genome, however, a similar aggregation of genes coding for DNA binding proteins and high GC content cannot be found. This suggests that the distribution of genes coding for DNA binding proteins and the variations of the chromatin accessibility to these proteins are different in the human and mouse genomes. It is likely that the overall high synonymous and intron rates in chromosome 19 are a by-product of the high GC content of this chromosome. |
Gibbs, Richard A, Et al. Genome sequence of the Brown Norway rat yields insights into mammalian evolution. (Article) Nature, 428 (6982), pp. 493–521, 2004, ISSN: 1476-4687. (Abstract | Links | BibTeX | Tags: Animals, Base Composition, Centromere, Centromere: genetics, Chromosomes, CpG Islands, CpG Islands: genetics, DNA, DNA Transposable Elements, DNA Transposable Elements: genetics, Evolution, Gene Duplication, Genome, Genomics, Humans, Inbred BN, Inbred BN: genetics, Introns, Introns: genetics, Male, Mammalian, Mammalian: genetics, Mice, Mitochondrial, Mitochondrial: genetics, Models, Molecular, Mutagenesis, Nucleic Acid, Nucleic Acid: genetics, Polymorphism, Rats, Regulatory Sequences, Retroelements, Retroelements: genetics, RNA, RNA Splice Sites, RNA Splice Sites: genetics, Sequence Analysis, Single Nucleotide, Single Nucleotide: genetics, Telomere, Telomere: genetics, Untranslated, Untranslated: genetics) @article{Gibbs2004, title = {Genome sequence of the Brown Norway rat yields insights into mammalian evolution.}, author = {Gibbs, Richard A and Et al.}, url = {http://www.ncbi.nlm.nih.gov/pubmed/15057822}, issn = {1476-4687}, year = {2004}, date = {2004-01-01}, journal = {Nature}, volume = {428}, number = {6982}, pages = {493--521}, abstract = {The laboratory rat (Rattus norvegicus) is an indispensable tool in experimental medicine and drug development, having made inestimable contributions to human health. We report here the genome sequence of the Brown Norway (BN) rat strain. The sequence represents a high-quality 'draft' covering over 90% of the genome. The BN rat sequence is the third complete mammalian genome to be deciphered, and three-way comparisons with the human and mouse genomes resolve details of mammalian evolution. This first comprehensive analysis includes genes and proteins and their relation to human disease, repeated sequences, comparative genome-wide studies of mammalian orthologous chromosomal regions and rearrangement breakpoints, reconstruction of ancestral karyotypes and the events leading to existing species, rates of variation, and lineage-specific and lineage-independent evolutionary events such as expansion of gene families, orthology relations and protein evolution.}, keywords = {Animals, Base Composition, Centromere, Centromere: genetics, Chromosomes, CpG Islands, CpG Islands: genetics, DNA, DNA Transposable Elements, DNA Transposable Elements: genetics, Evolution, Gene Duplication, Genome, Genomics, Humans, Inbred BN, Inbred BN: genetics, Introns, Introns: genetics, Male, Mammalian, Mammalian: genetics, Mice, Mitochondrial, Mitochondrial: genetics, Models, Molecular, Mutagenesis, Nucleic Acid, Nucleic Acid: genetics, Polymorphism, Rats, Regulatory Sequences, Retroelements, Retroelements: genetics, RNA, RNA Splice Sites, RNA Splice Sites: genetics, Sequence Analysis, Single Nucleotide, Single Nucleotide: genetics, Telomere, Telomere: genetics, Untranslated, Untranslated: genetics} } The laboratory rat (Rattus norvegicus) is an indispensable tool in experimental medicine and drug development, having made inestimable contributions to human health. We report here the genome sequence of the Brown Norway (BN) rat strain. The sequence represents a high-quality 'draft' covering over 90% of the genome. The BN rat sequence is the third complete mammalian genome to be deciphered, and three-way comparisons with the human and mouse genomes resolve details of mammalian evolution. This first comprehensive analysis includes genes and proteins and their relation to human disease, repeated sequences, comparative genome-wide studies of mammalian orthologous chromosomal regions and rearrangement breakpoints, reconstruction of ancestral karyotypes and the events leading to existing species, rates of variation, and lineage-specific and lineage-independent evolutionary events such as expansion of gene families, orthology relations and protein evolution. |
2002 |
Albà, M Mar, Laskowski, Roman A, Hancock, John M Detecting cryptically simple protein sequences using the SIMPLE algorithm. (Article) Bioinformatics (Oxford, England), 18 (5), pp. 672–8, 2002, ISSN: 1367-4803. (Abstract | Links | BibTeX | Tags: Algorithms, Amino Acid, Amino Acid Sequence, Amino Acid: genetics, Databases, Genetic, Genetic Variation, Internet, Minisatellite Repeats, Minisatellite Repeats: genetics, Models, Molecular Sequence Data, Protein, Protein: methods, Proteins, Proteins: chemistry, Repetitive Sequences, Saccharomyces cerevisiae, Saccharomyces cerevisiae: genetics, Sensitivity and Specificity, Sequence Analysis, Sequence Homology, Software, Statistical) @article{Alba2002, title = {Detecting cryptically simple protein sequences using the SIMPLE algorithm.}, author = {Albà, M Mar and Laskowski, Roman A and Hancock, John M}, url = {http://www.ncbi.nlm.nih.gov/pubmed/12050063}, issn = {1367-4803}, year = {2002}, date = {2002-01-01}, journal = {Bioinformatics (Oxford, England)}, volume = {18}, number = {5}, pages = {672--8}, abstract = {Low-complexity or cryptically simple sequences are widespread in protein sequences but their evolution and function are poorly understood. To date methods for the detection of low complexity in proteins have been directed towards the filtering of such regions prior to sequence homology searches but not to the analysis of the regions per se. However, many of these regions are encoded by non-repetitive DNA sequences and may therefore result from selection acting on protein structure and/or function.}, keywords = {Algorithms, Amino Acid, Amino Acid Sequence, Amino Acid: genetics, Databases, Genetic, Genetic Variation, Internet, Minisatellite Repeats, Minisatellite Repeats: genetics, Models, Molecular Sequence Data, Protein, Protein: methods, Proteins, Proteins: chemistry, Repetitive Sequences, Saccharomyces cerevisiae, Saccharomyces cerevisiae: genetics, Sensitivity and Specificity, Sequence Analysis, Sequence Homology, Software, Statistical} } Low-complexity or cryptically simple sequences are widespread in protein sequences but their evolution and function are poorly understood. To date methods for the detection of low complexity in proteins have been directed towards the filtering of such regions prior to sequence homology searches but not to the analysis of the regions per se. However, many of these regions are encoded by non-repetitive DNA sequences and may therefore result from selection acting on protein structure and/or function. |
Messeguer, Xavier, Escudero, Ruth, Farré, Domènec, Núñez, Oscar, Martínez, Javier, Albà, M Mar PROMO: detection of known transcription regulatory elements using species-tailored searches. (Article) Bioinformatics (Oxford, England), 18 (2), pp. 333–4, 2002, ISSN: 1367-4803. (Abstract | Links | BibTeX | Tags: Animals, Binding Sites, Binding Sites: genetics, Computational Biology, DNA, DNA: genetics, DNA: metabolism, Humans, Software, Species Specificity, Transcription Factors, Transcription Factors: metabolism) @article{Messeguer2002, title = {PROMO: detection of known transcription regulatory elements using species-tailored searches.}, author = {Messeguer, Xavier and Escudero, Ruth and Farré, Domènec and Núñez, Oscar and Martínez, Javier and Albà, M Mar}, url = {http://www.ncbi.nlm.nih.gov/pubmed/11847087}, issn = {1367-4803}, year = {2002}, date = {2002-01-01}, journal = {Bioinformatics (Oxford, England)}, volume = {18}, number = {2}, pages = {333--4}, abstract = {We have developed a set of tools to construct positional weight matrices from known transcription factor binding sites in a species or taxon-specific manner, and to search for matches in DNA sequences.}, keywords = {Animals, Binding Sites, Binding Sites: genetics, Computational Biology, DNA, DNA: genetics, DNA: metabolism, Humans, Software, Species Specificity, Transcription Factors, Transcription Factors: metabolism} } We have developed a set of tools to construct positional weight matrices from known transcription factor binding sites in a species or taxon-specific manner, and to search for matches in DNA sequences. |
Publication List
2020 |
Proceedings of the National Academy of Sciences USA, Online ahead of print. 2020. |
Evolution of New Proteins From Translated sORFs in Long Non-Coding RNAs (Article) Experimental Cell Research, 391 (1), pp. 111940, 2020. |
Nature Communications, 11 (1), pp. 1768, 2020, ISBN: 10.1038/s41467-020-15634-w . |
Evolution of New Proteins From Translated sORFs in Long Non-Coding RNAs (Article) Experimental Cell Research, 391 (1), pp. 111940, 2020. |
Reference-free reconstruction and quantification of transcriptomes from long-read sequencing (Article) bioRxiv, 2020. |
2019 |
Scientific Reports, 9 pp. 11005, 2019. |
Nucleic Acids Research Genomics and Bioinformatics, 1 pp. e2, 2019. |
BMC Research Notes, 12 pp. 250, 2019. |
bioRxiv, March 19, 2019. |
Frequent birth of de novo genes in the compact yeast genome (Article) bioRxiv, March 13, 2019. |
Translation of Small Open Reading Frames: Roles in Regulation and Evolutionary Innovation (Article) Trends in Genetics, 35 pp. 186-198, 2019. |
2018 |
bioRxiv, Dec 19, 2018. |
Frequent translation of small open reading frames in evolutionary conserved lncRNA regions (Article) bioRxiv, June 16 , 2018. |
Translation of neutrally evolving peptides provides a basis for de novo gene evolution (Article) Nature Ecology and Evolution, 2 pp. 890–896, 2018. |
Transcriptomics in the wild: Hibernation physiology in free-ranging dwarf lemurs. (Article) Molecular Ecology, 27 (3), pp. 709-722, 2018. |
2017 |
Genes, 8 (12), pp. E351, 2017. |
Zinc-finger domains in metazoans: evolution gone wild (Article) Genome Biology, 18 pp. 168, 2017. |
New genes and functional innovation in mammals (Article) Genome Biology and Evolution, 9 pp. 1886–1900, 2017. |
De novo gene evolution: How do we transition from non-coding to coding? (Conference) PeerJ preprints 5 (e3031v2), 2017, (The SMBE 2017 Collection). |
Comparative transcriptomics and ribo-seq: Looking at de novo gene emergence in Saccharomycotina (Conference) PeerJ preprints 5 (e3030v1), 2017, (The SMBE 2017 Collection). |
Molecular Biology and Evolution, doi: 10.1093/molbev/msw284 2017. |
2016 |
Genome Biology, 17 pp. 251, 2016. |
New genes and functional innovation in mammals (Article) bioRxiv, 2016. |
Functional and non-functional classes of peptides produced by long non-coding RNAs (Article) bioRxiv, 2016, ISBN: http://dx.doi.org/10.1101/064915 . |
Gene expression profiling in the hibernating primate, Cheirogaleus medius (Article) Genome Biology and Evolution, advance access (doi: 10.1093/gbe/evw163) 2016. |
A novel method of microsatellite genotyping-by-sequencing using individual combinatorial barcoding (Article) R Soc Open Sci, 3 (1), pp. 150565, 2016, ISBN: 10.1098/rsos.150565. |
2015 |
Origins of de novo genes in human and chimpanzee (Article) Plos Genetics, 11 (12), pp. e1005721, 2015. |
Nature Communications, 6 pp. 7972, 2015, ISBN: 10.1038/ncomms8972. |
Gut Microbiota Dynamics during Dietary Shift in Eastern African Cichlid Fishes. (Article) PloS one, 10 (5), pp. e0127462, 2015, ISSN: 1932-6203. |
Molecular biology and evolution, 2015, ISSN: 1537-1719. |
High evolutionary turnover of satellite families in Caenorhabditis (Article) BMC Evolutionary Biology, 15 (1), pp. 218, 2015, ISSN: 1471-2148. |
2014 |
Comparative Genomics of Mammalian Hibernators Using Gene Networks. (Article) Integrative and comparative biology, 2014, ISSN: 1557-7023. |
Long non-coding RNAs as a source of new peptides (Article) eLife, 3 2014, ISSN: 2050-084X. |
Genome biology and evolution, 6 (4), pp. 846–60, 2014, ISSN: 1759-6653. |
Uncovering adaptive evolution in the human lineage (Article) BMC Genomics, 15 (1), pp. 599, 2014, ISSN: 1471-2164. |
2013 |
Dynamics and adaptive benefits of modular protein evolution (Article) Current Opinion in Structural Biology, 2013, ISSN: 0959440X. |
Improving genome-wide scans of positive selection by using protein isoforms of similar length. (Article) Genome biology and evolution, 5 (2), pp. 457–67, 2013, ISSN: 1759-6653. |
Cancer cell, 24 (2), pp. 151–66, 2013, ISSN: 1878-3686. |
Accelerated evolution after gene duplication: a time-dependent process affecting just one copy. (Article) Molecular biology and evolution, 2013, ISSN: 1537-1719. |
Emergence of novel domains in proteins. (Article) BMC evolutionary biology, 13 (1), pp. 47, 2013, ISSN: 1471-2148. |
Nucleic acids research, 2013, ISSN: 1362-4962. |
2012 |
Sequence shortening in the rodent ancestor. (Article) Genome research, 22 (3), pp. 478–85, 2012, ISSN: 1549-5469. |
Dissecting the role of low-complexity regions in the evolution of vertebrate proteins. (Article) BMC evolutionary biology, 12 (1), pp. 155, 2012, ISSN: 1471-2148. |
The HCMV-specific UL1 gene encodes a late phase glycoprotein incorporated in the virion envelope. (Article) Journal of virology, 2012, ISSN: 1098-5514. |
Structure and age jointly influence rates of protein evolution. (Article) PLoS computational biology, 8 (5), pp. e1002542, 2012, ISSN: 1553-7358. |
Role of Low-Complexity Sequences in the Formation of Novel Protein Coding Sequences. (Article) Molecular biology and evolution, 29 (3), pp. 883–6, 2012, ISSN: 1537-1719. |
2011 |
Partial gene duplication and the formation of novel genes (Incollection) Friedberg, Felix (Ed.): Gene Duplication, Intech, Rijeka, 2011. |
Lineage-specific variation in intensity of natural selection in mammals. (Article) Molecular biology and evolution, 28 (1), pp. 383–98, 2011, ISSN: 1537-1719. |
Immunology and cell biology, 89 (7), pp. 753–66, 2011, ISSN: 1440-1711. |
2010 |
Genoma y enfermedades complejas (Incollection) Villoslada, Pablo (Ed.): Tratado de Esclerosis Múltiple, Marge Books, Barcelona, 2010. |
Natural selection drives the accumulation of amino acid tandem repeats in human proteins. (Article) Genome research, 20 (6), pp. 745–54, 2010, ISSN: 1549-5469. |
Heterogeneous patterns of gene-expression diversification in mammalian gene duplicates. (Article) Molecular biology and evolution, 27 (2), pp. 325–35, 2010, ISSN: 1537-1719. |
2009 |
Origin of primate orphan genes: a comparative genomics approach (Article) Molecular Biology and Evolution, 26 (3), pp. 603-612, 2009. |
Temporal aspects of gene evolution (Incollection) Dopazo,; Navarro, (Ed.): Evolución y Adaptación: 150 años después del origen de las especies, Sociedad Española de Biologia Evolutiva, Barcelona, 2009. |
Jagged1 is the pathological link between Wnt and Notch pathways in colorectal cancer. (Article) Proceedings of the National Academy of Sciences of the United States of America, 106 (15), pp. 6315–20, 2009, ISSN: 1091-6490. |
PLoS genetics, 5 (3), pp. e1000397, 2009, ISSN: 1553-7404. |
Evolution of primate orphan proteins. (Article) Biochemical Society transactions, 37 (Pt 4), pp. 778–82, 2009, ISSN: 1470-8752. |
2008 |
Trinucleotide repeats in the human and ape genomes (Incollection) Kehrer-Sawatzki,; Cooper, (Ed.): Handbook of human molecular evolution 2, John Wiley and Sons, Chichester, 2008. |
Trinucleotide repeats in human and ape genomes (Incollection) Encyclopedia of Life Sciences, John Wiley & Sons, Ltd, Chichester, UK, 2008, ISBN: 0470016175. |
The Journal of neuroscience : the official journal of the Society for Neuroscience, 28 (46), pp. 12136–45, 2008, ISSN: 1529-2401. |
Evolutionary Biology from Concept to Application (Book) Springer Berlin Heidelberg, Berlin, Heidelberg, 2008, ISBN: 978-3-540-78992-5. |
2007 |
Positional bias of general and tissue-specific regulatory motifs in mouse gene promoters. (Article) BMC genomics, 8 pp. 459, 2007, ISSN: 1471-2164. |
PEAKS: identification of regulatory motifs by their position in DNA sequences. (Article) Bioinformatics (Oxford, England), 23 (2), pp. 243–4, 2007, ISSN: 1367-4811. |
Amino acid repeats and the structure and evolution of proteins. (Article) Genome dynamics, 3 pp. 119–30, 2007, ISSN: 1660-9263. |
Highly constrained proteins contain an unexpectedly large number of amino acid tandem repeats. (Article) Genomics, 89 (3), pp. 316–25, 2007, ISSN: 0888-7543. |
Housekeeping genes tend to show reduced upstream sequence conservation. (Article) Genome biology, 8 (7), pp. R140, 2007, ISSN: 1465-6914. |
On homology searches by protein Blast and the characterization of the age of genes. (Article) BMC evolutionary biology, 7 pp. 53, 2007, ISSN: 1471-2148. |
2006 |
ABS: a database of Annotated regulatory Binding Sites from orthologous promoters. (Article) Nucleic acids research, 34 (Database issue), pp. D63–7, 2006, ISSN: 1362-4962. |
Mutation patterns of amino acid tandem repeats in the human proteome. (Article) Genome biology, 7 (4), pp. R33, 2006, ISSN: 1465-6914. |
BMC genomics, 7 pp. 165, 2006, ISSN: 1471-2164. |
2005 |
Inverse relationship between evolutionary rate and age of mammalian genes. (Article) Molecular biology and evolution, 22 (3), pp. 598–606, 2005, ISSN: 0737-4038. |
2004 |
Comparative analysis of amino acid repeats in rodents and humans. (Article) Genome research, 14 (4), pp. 549–54, 2004, ISSN: 1088-9051. |
Genome biology, 5 (7), pp. R47, 2004, ISSN: 1465-6914. |
Journal of molecular evolution, 59 (1), pp. 72–9, 2004, ISSN: 0022-2844. |
Genome sequence of the Brown Norway rat yields insights into mammalian evolution. (Article) Nature, 428 (6982), pp. 493–521, 2004, ISSN: 1476-4687. |
2002 |
Detecting cryptically simple protein sequences using the SIMPLE algorithm. (Article) Bioinformatics (Oxford, England), 18 (5), pp. 672–8, 2002, ISSN: 1367-4803. |
PROMO: detection of known transcription regulatory elements using species-tailored searches. (Article) Bioinformatics (Oxford, England), 18 (2), pp. 333–4, 2002, ISSN: 1367-4803. |