@article{Anders2010Differential, author = {Anders, Simon and Huber, Wolfgang}, doi = {10.1186/gb-2010-11-10-r106}, journal = {Genome Biology}, number = 10, pages = {R106+}, pmcid = {PMC3218662}, pmid = 20979621, title = {{Differential expression analysis for sequence count data}}, volume = 11, year = 2010 } @article{Anders2015HTSeqa, author = {Anders, Simon and Pyl, Paul T. and Huber, Wolfgang}, doi = {10.1093/bioinformatics/btu638}, journal = {Bioinformatics}, number = 2, pages = {166--169}, pmid = 25260700, title = {{HTSeq -- a Python framework to work with high-throughput sequencing data}}, volume = 31, year = 2015 } @article{Benjamini1995Controlling, author = {Benjamini, Yoav and Hochberg, Yosef}, journal = {Journal of the Royal Statistical Society. Series B (Methodological)}, number = 1, pages = {289--300}, title = {{Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}}, url = {http://www.jstor.org/stable/2346101}, volume = 57, year = 1995 } @article{Bourgon2010Independent, author = {Bourgon, R. and Gentleman, R. and Huber, W.}, doi = {10.1073/pnas.0914005107}, journal = {Proceedings of the National Academy of Sciences}, number = 21, pages = {9546--9551}, pmcid = {PMC2906865}, pmid = 20460310, title = {{Independent filtering increases detection power for high-throughput experiments}}, volume = 107, year = 2010 } @article{Bray2016Near, author = {Bray, Nicolas and Pimentel, Harold and Melsted, Pall and Pachter, Lior}, journal = {Nature Biotechnology}, pages = {525–-527}, title = {Near-optimal probabilistic RNA-seq quantification}, volume = 34, url = {http://dx.doi.org/10.1038/nbt.3519}, year = 2016 } @article{Dobin2013STAR, author = {Dobin, Alexander and Davis, Carrie A. and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R.}, doi = {10.1093/bioinformatics/bts635}, journal = {Bioinformatics}, number = 1, pages = {15--21}, pmcid = {PMC3530905}, pmid = 23104886, title = {{STAR: ultrafast universal RNA-seq aligner}}, url = {http://dx.doi.org/10.1093/bioinformatics/bts635}, volume = 29, year = 2013 } @article{Dudoit2002Statistical, author = {Dudoit, Rine and Yang, Yee H. and Callow, Matthew J. and Speed, Terence P.}, journal = {Statistica Sinica}, pages = {111--139}, title = {{Statistical methods for identifying differentially expressed genes in replicated cDNA microarray experiments}}, year = 2002 } @article{Durinck2009Mapping, author = {Durinck, Steffen and Spellman, Paul T. and Birney, Ewan and Huber, Wolfgang}, doi = {10.1038/nprot.2009.97}, journal = {Nature Protocols}, number = 8, pages = {1184--1191}, pmcid = {PMC3159387}, pmid = 19617889, publisher = {Nature Publishing Group}, title = {{Mapping identifiers for the integration of genomic datasets with the R/Bioconductor package biomaRt.}}, url = {http://dx.doi.org/10.1038/nprot.2009.97}, volume = 4, year = 2009 } @article{Flicek2014Ensembl, author = {Flicek, Paul and Amode, M. Ridwan and Barrell, Daniel and Beal, Kathryn and Billis, Konstantinos and Brent, Simon and Carvalho-Silva, Denise and Clapham, Peter and Coates, Guy and Fitzgerald, Stephen and Gil, Laurent and Gir\'{o}n, Carlos G. and Gordon, Leo and Hourlier, Thibaut and Hunt, Sarah and Johnson, Nathan and Juettemann, Thomas and K\"{a}h\"{a}ri, Andreas K. and Keenan, Stephen and Kulesha, Eugene and Martin, Fergal J. and Maurel, Thomas and McLaren, William M. and Murphy, Daniel N. and Nag, Rishi and Overduin, Bert and Pignatelli, Miguel and Pritchard, Bethan and Pritchard, Emily and Riat, Harpreet S. and Ruffier, Magali and Sheppard, Daniel and Taylor, Kieron and Thormann, Anja and Trevanion, Stephen J. and Vullo, Alessandro and Wilder, Steven P. and Wilson, Mark and Zadissa, Amonida and Aken, Bronwen L. and Birney, Ewan and Cunningham, Fiona and Harrow, Jennifer and Herrero, Javier and Hubbard, Tim J. P. and Kinsella, Rhoda and Muffato, Matthieu and Parker, Anne and Spudich, Giulietta and Yates, Andy and Zerbino, Daniel R. and Searle, Stephen M. J.}, doi = {10.1093/nar/gkt1196}, issn = {1362-4962}, journal = {Nucleic Acids Research}, number = {D1}, pages = {D749--D755}, pmid = 24316576, title = {{Ensembl 2014}}, url = {http://dx.doi.org/10.1093/nar/gkt1196}, volume = 42, year = 2014 } @article{Hardcastle2010BaySeq, abstract = {{BACKGROUND:High throughput sequencing has become an important technology for studying expression levels in many types of genomic, and particularly transcriptomic, data. One key way of analysing such data is to look for elements of the data which display particular patterns of differential expression in order to take these forward for further analysis and validation.RESULTS:We propose a framework for defining patterns of differential expression and develop a novel algorithm, baySeq, which uses an empirical Bayes approach to detect these patterns of differential expression within a set of sequencing samples. The method assumes a negative binomial distribution for the data and derives an empirically determined prior distribution from the entire dataset. We examine the performance of the method on real and simulated data.CONCLUSIONS:Our method performs at least as well, and often better, than existing methods for analyses of pairwise differential expression in both real and simulated data. When we compare methods for the analysis of data from experimental designs involving multiple sample groups, our method again shows substantial gains in performance. We believe that this approach thus represents an important step forward for the analysis of count data from sequencing experiments.}}, author = {Hardcastle, Thomas and Kelly, Krystyna}, citeulike-article-id =7610091, citeulike-linkout-0 ={http://dx.doi.org/10.1186/1471-2105-11-422}, citeulike-linkout-1 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2928208/}, citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/20698981}, citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=20698981}, doi = {10.1186/1471-2105-11-422}, issn = {1471-2105}, journal = {BMC Bioinformatics}, keywords = {bayes, deseq2, rnaseq, workflow}, number = 1, pages = {422+}, pmcid = {PMC2928208}, pmid = 20698981, posted-at = {2011-04-05 09:08:06}, priority = 2, title = {{baySeq: Empirical Bayesian methods for identifying differential expression in sequence count data}}, url = {http://dx.doi.org/10.1186/1471-2105-11-422}, volume = 11, year = 2010 } @article{Himes2014RNASeq, abstract = {{Asthma is a chronic inflammatory respiratory disease that affects over 300 million people worldwide. Glucocorticoids are a mainstay therapy for asthma because they exert anti-inflammatory effects in multiple lung tissues, including the airway smooth muscle (ASM). However, the mechanism by which glucocorticoids suppress inflammation in ASM remains poorly understood. Using RNA-Seq, a high-throughput sequencing method, we characterized transcriptomic changes in four primary human ASM cell lines that were treated with dexamethasone--a potent synthetic glucocorticoid (1 µM for 18 hours). Based on a Benjamini-Hochberg corrected p-value <0.05, we identified 316 differentially expressed genes, including both well known (DUSP1, KLF15, PER1, TSC22D3) and less investigated (C7, CCDC69, CRISPLD2) glucocorticoid-responsive genes. CRISPLD2, which encodes a secreted protein previously implicated in lung development and endotoxin regulation, was found to have SNPs that were moderately associated with inhaled corticosteroid resistance and bronchodilator response among asthma patients in two previously conducted genome-wide association studies. Quantitative RT-PCR and Western blotting showed that dexamethasone treatment significantly increased CRISPLD2 mRNA and protein expression in ASM cells. CRISPLD2 expression was also induced by the inflammatory cytokine IL1β, and small interfering RNA-mediated knockdown of CRISPLD2 further increased IL1β-induced expression of IL6 and IL8. Our findings offer a comprehensive view of the effect of a glucocorticoid on the ASM transcriptome and identify CRISPLD2 as an asthma pharmacogenetics candidate gene that regulates anti-inflammatory effects of glucocorticoids in the ASM.}}, author = {Himes, Blanca E. and Jiang, Xiaofeng and Wagner, Peter and Hu, Ruoxi and Wang, Qiyu and Klanderman, Barbara and Whitaker, Reid M. and Duan, Qingling and Lasky-Su, Jessica and Nikolos, Christina and Jester, William and Johnson, Martin and Panettieri, Reynold A. and Tantisira, Kelan G. and Weiss, Scott T. and Lu, Quan}, citeulike-article-id =13705379, citeulike-linkout-0 ={http://dx.doi.org/10.1371/journal.pone.0099625}, citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24926665}, citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24926665}, doi = {10.1371/journal.pone.0099625}, issn = {1932-6203}, journal = {PloS one}, keywords = {rnaseq, workflow}, number = 6, pmid = 24926665, posted-at = {2015-08-18 15:02:37}, priority = 2, title = {{RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid responsive gene that modulates cytokine function in airway smooth muscle cells.}}, url = {http://dx.doi.org/10.1371/journal.pone.0099625}, volume = 9, year = 2014 } @article{Huber2015Orchestrating, abstract = {{Bioconductor is an open-source, open-development software project for the analysis and comprehension of high-throughput data in genomics and molecular biology. The project aims to enable interdisciplinary research, collaboration and rapid development of scientific software. Based on the statistical programming language R, Bioconductor comprises 934 interoperable packages contributed by a large, diverse community of scientists. Packages cover a range of bioinformatic and statistical applications. They undergo formal initial review and continuous automated testing. We present an overview for prospective users and contributors.}}, author = {Huber, Wolfgang and Carey, Vincent J. and Gentleman, Robert and Anders, Simon and Carlson, Marc and Carvalho, Benilton S. and Bravo, Hector Corrada C. and Davis, Sean and Gatto, Laurent and Girke, Thomas and Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D. and Irizarry, Rafael A. and Lawrence, Michael and Love, Michael I. and MacDonald, James and Obenchain, Valerie and Ole\'{s}, Andrzej K. and Pag\`{e}s, Herv\'{e} and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon K. and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin}, citeulike-article-id =13504287, citeulike-linkout-0 ={http://dx.doi.org/10.1038/nmeth.3252}, citeulike-linkout-1 ={http://dx.doi.org/10.1038/nmeth.3252}, citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/25633503}, citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=25633503}, day = 29, doi = {10.1038/nmeth.3252}, issn = {1548-7105}, journal = {Nature methods}, keywords = {mine, workflow}, month = feb, number = 2, pages = {115--121}, pmid = 25633503, posted-at = {2015-05-29 16:53:20}, priority = 2, publisher = {Nature Publishing Group}, title = {{Orchestrating high-throughput genomic analysis with Bioconductor.}}, url = {http://dx.doi.org/10.1038/nmeth.3252}, volume = 12, year = 2015 } @article{Huntley2013ReportingTools, abstract = {{Summary: It is common for computational analyses to generate large amounts of complex data that are difficult to process and share with collaborators. Standard methods are needed to transform such data into a more useful and intuitive format. We present ReportingTools, a Bioconductor package, that automatically recognizes and transforms the output of many common Bioconductor packages into rich, interactive, HTML-based reports. Reports are not generic, but have been individually designed to reflect content specific to the result type detected. Tabular output included in reports is sortable, filterable and searchable and contains context-relevant hyperlinks to external databases. Additionally, in-line graphics have been developed for specific analysis types and are embedded by default within table rows, providing a useful visual summary of underlying raw data. ReportingTools is highly flexible and reports can be easily customized for specific applications using the well-defined API.}}, author = {Huntley, Melanie A. and Larson, Jessica L. and Chaivorapol, Christina and Becker, Gabriel and Lawrence, Michael and Hackney, Jason A. and Kaminker, Joshua S.}, citeulike-article-id =12728071, citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt551}, citeulike-linkout-1 ={http://bioinformatics.oxfordjournals.org/content/29/24/3220.abstract}, citeulike-linkout-2 ={http://bioinformatics.oxfordjournals.org/content/29/24/3220.full.pdf}, citeulike-linkout-3 ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/24/3220}, citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/24078713}, citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=24078713}, day = 15, doi = {10.1093/bioinformatics/btt551}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {workflow}, month = dec, number = 24, pages = {3220--3221}, pmid = 24078713, posted-at = {2015-08-18 15:13:59}, priority = 2, publisher = {Oxford University Press}, title = {{ReportingTools: an automated result processing and presentation toolkit for high-throughput genomic analyses}}, url = {http://dx.doi.org/10.1093/bioinformatics/btt551}, volume = 29, year = 2013 } @article{Kent2002Human, abstract = {{As vertebrate genome sequences near completion and research refocuses to their analysis, the issue of effective genome annotation display becomes critical. A mature web tool for rapid and reliable display of any requested portion of the genome at any scale, together with several dozen aligned annotation tracks, is provided at http://genome.ucsc.edu. This browser displays assembly contigs and gaps, mRNA and expressed sequence tag alignments, multiple gene predictions, cross-species homologies, single nucleotide polymorphisms, sequence-tagged sites, radiation hybrid data, transposon repeats, and more as a stack of coregistered tracks. Text and sequence-based searches provide quick and precise access to any region of specific interest. Secondary links from individual features lead to sequence details and supplementary off-site databases. One-half of the annotation tracks are computed at the University of California, Santa Cruz from publicly available sequence data; collaborators worldwide provide the rest. Users can stably add their own custom tracks to the browser for educational or research purposes. The conceptual and technical framework of the browser, its underlying MYSQL database, and overall use are described. The web site currently serves over 50,000 pages per day to over 3000 different users.}}, author = {Kent, W. James and Sugnet, Charles W. and Furey, Terrence S. and Roskin, Krishna M. and Pringle, Tom H. and Zahler, Alan M. and Haussler, David}, citeulike-article-id =2009259, citeulike-linkout-0 ={http://dx.doi.org/10.1101/gr.229102}, citeulike-linkout-1 ={http://dx.doi.org/10.1101/gr.229102.\%20article\%20published\%20online\%20before\%20print\%20in\%20may\%202002}, citeulike-linkout-2 ={http://genome.cshlp.org/content/12/6/996.full.abstract}, citeulike-linkout-3 ={http://genome.cshlp.org/content/12/6/996.full.full.pdf}, citeulike-linkout-4 ={http://www.genome.org/cgi/content/abstract/12/6/996}, citeulike-linkout-5 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC186604/}, citeulike-linkout-6 ={http://view.ncbi.nlm.nih.gov/pubmed/12045153}, citeulike-linkout-7 ={http://www.hubmed.org/display.cgi?uids=12045153}, day = 1, doi = {10.1101/gr.229102}, issn = {1088-9051}, journal = {Genome research}, keywords = {ctsca, workflow}, month = jun, number = 6, pages = {996--1006}, pmcid = {PMC186604}, pmid = 12045153, posted-at = {2012-07-26 16:04:05}, priority = 2, publisher = {Cold Spring Harbor Laboratory Press}, title = {{The human genome browser at UCSC.}}, url = {http://dx.doi.org/10.1101/gr.229102}, volume = 12, year = 2002 } @article{Law2014Voom, abstract = {{Normal linear modeling methods are developed for analyzing read counts from RNA-seq experiments. The voom method estimates the mean-variance relationship of the log-counts, generates a precision weight for each observation, and then enters these into a limma empirical Bayes analysis pipeline. This opens access for RNA-seq analysts to a large body of methodology developed for microarrays. Simulation studies show that voom performs as well or better than count-based RNA-seq methods even when the data are generated according to the assumptions of the earlier methods. Two case studies illustrate the use of linear modeling and gene set testing methods.}}, author = {Law, Charity W. and Chen, Yunshun and Shi, Wei and Smyth, Gordon K.}, citeulike-article-id =12965503, citeulike-linkout-0 ={http://dx.doi.org/10.1186/gb-2014-15-2-r29}, citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24485249}, citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24485249}, day = 03, doi = {10.1186/gb-2014-15-2-r29}, issn = {1465-6906}, journal = {Genome Biology}, keywords = {deseq2, rnaguide, workflow}, month = feb, number = 2, pages = {R29+}, pmid = 24485249, posted-at = {2014-02-13 20:56:00}, priority = 2, publisher = {BioMed Central Ltd}, title = {{Voom: precision weights unlock linear model analysis tools for RNA-seq read counts}}, url = {http://dx.doi.org/10.1186/gb-2014-15-2-r29}, volume = 15, year = 2014 } @article{Lawrence2013Software, abstract = {{We describe Bioconductor infrastructure for representing and computing on annotated genomic ranges and integrating genomic data with the statistical computing features of R and its extensions. At the core of the infrastructure are three packages: IRanges, GenomicRanges, and GenomicFeatures. These packages provide scalable data structures for representing annotated ranges on the genome, with special support for transcript structures, read alignments and coverage vectors. Computational facilities include efficient algorithms for overlap and nearest neighbor detection, coverage calculation and other range operations. This infrastructure directly supports more than 80 other Bioconductor packages, including those for sequence analysis, differential expression analysis and visualization.}}, author = {Lawrence, Michael and Huber, Wolfgang and Pag\`{e}s, Herv\'{e} and Aboyoun, Patrick and Carlson, Marc and Gentleman, Robert and Morgan, Martin T. and Carey, Vincent J.}, citeulike-article-id =12548311, citeulike-linkout-0 ={http://dx.doi.org/10.1371/journal.pcbi.1003118}, citeulike-linkout-1 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3738458/}, citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/23950696}, citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=23950696}, day = 8, doi = {10.1371/journal.pcbi.1003118}, editor = {Prlic, Andreas}, issn = {1553-7358}, journal = {PLoS Computational Biology}, keywords = {deseq2, workflow}, month = aug, number = 8, pages = {e1003118+}, pmcid = {PMC3738458}, pmid = 23950696, posted-at = {2014-02-14 00:17:30}, priority = 2, publisher = {Public Library of Science}, title = {{Software for Computing and Annotating Genomic Ranges}}, url = {http://dx.doi.org/10.1371/journal.pcbi.1003118}, volume = 9, year = 2013 } @article{Leek2014Svaseq, abstract = {{It is now known that unwanted noise and unmodeled artifacts such as batch effects can dramatically reduce the accuracy of statistical inference in genomic experiments. These sources of noise must be modeled and removed to accurately measure biological variability and to obtain correct statistical inference when performing high-throughput genomic analysis. We introduced surrogate variable analysis (sva) for estimating these artifacts by (i) identifying the part of the genomic data only affected by artifacts and (ii) estimating the artifacts with principal components or singular vectors of the subset of the data matrix. The resulting estimates of artifacts can be used in subsequent analyses as adjustment factors to correct analyses. Here I describe a version of the sva approach specifically created for count data or FPKMs from sequencing experiments based on appropriate data transformation. I also describe the addition of supervised sva (ssva) for using control probes to identify the part of the genomic data only affected by artifacts. I present a comparison between these versions of sva and other methods for batch effect estimation on simulated data, real count-based data and FPKM-based data. These updates are available through the sva Bioconductor package and I have made fully reproducible analysis using these methods available from: https://github.com/jtleek/svaseq. {\copyright} The Author(s) 2014. Published by Oxford University Press on behalf of Nucleic Acids Research.}}, author = {Leek, Jeffrey T.}, citeulike-article-id =13385083, citeulike-linkout-0 ={http://dx.doi.org/10.1093/nar/gku864}, citeulike-linkout-1 ={http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.abstract}, citeulike-linkout-2 ={http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.full.pdf}, citeulike-linkout-3 ={http://view.ncbi.nlm.nih.gov/pubmed/25294822}, citeulike-linkout-4 ={http://www.hubmed.org/display.cgi?uids=25294822}, day = 1, doi = {10.1093/nar/gku864}, issn = {1362-4962}, journal = {Nucleic acids research}, keywords = {workflow}, month = dec, number = 21, pages = 000, pmid = 25294822, posted-at = {2015-08-18 15:16:02}, priority = 2, publisher = {Oxford University Press}, title = {{svaseq: removing batch effects and other unwanted noise from sequencing data.}}, url = {http://dx.doi.org/10.1093/nar/gku864}, volume = 42, year = 2014 } @article{Leng2013EBSeq, abstract = {{Motivation: Messenger RNA expression is important in normal development and differentiation, as well as in manifestation of disease. RNA-seq experiments allow for the identification of differentially expressed (DE) genes and their corresponding isoforms on a genome-wide scale. However, statistical methods are required to ensure that accurate identifications are made. A number of methods exist for identifying DE genes, but far fewer are available for identifying DE isoforms. When isoform DE is of interest, investigators often apply gene-level (count-based) methods directly to estimates of isoform counts. Doing so is not recommended. In short, estimating isoform expression is relatively straightforward for some groups of isoforms, but more challenging for others. This results in estimation uncertainty that varies across isoform groups. Count-based methods were not designed to accommodate this varying uncertainty, and consequently, application of them for isoform inference results in reduced power for some classes of isoforms and increased false discoveries for others.}}, author = {Leng, N. and Dawson, J. A. and Thomson, J. A. and Ruotti, V. and Rissman, A. I. and Smits, B. M. G. and Haag, J. D. and Gould, M. N. and Stewart, R. M. and Kendziorski, C.}, citeulike-article-id =12074857, citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt087}, citeulike-linkout-1 ={http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.abstract}, citeulike-linkout-2 ={http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.full.pdf}, citeulike-linkout-3 ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/8/1035}, citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/23428641}, citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=23428641}, day = 15, doi = {10.1093/bioinformatics/btt087}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {deseq2, workflow}, month = feb, number = 8, pages = {1035--1043}, pmid = 23428641, posted-at = {2014-05-13 22:33:51}, priority = 2, publisher = {Oxford University Press}, title = {{EBSeq: an empirical Bayes hierarchical model for inference in RNA-seq experiments}}, url = {http://dx.doi.org/10.1093/bioinformatics/btt087}, volume = 29, year = 2013 } @article{Leong2014Global, abstract = {{Non-coding RNAs (ncRNAs) are frequent and prevalent across the taxa. Although individual non-coding loci have been assigned a function, most are uncharacterized. Their global biological significance is unproven and remains controversial. Here we investigate the role played by ncRNAs in the stress response of Schizosaccharomyces pombe. We integrate global proteomics and RNA sequencing data to identify a systematic programme in which elevated antisense RNA arising both from ncRNAs and from 3'-overlapping convergent gene pairs is directly associated with substantial reductions in protein levels throughout the genome. We describe an extensive array of ncRNAs with trans associations that have the potential to influence multiple pathways. Deletion of one such locus reduces levels of atf1, a transcription factor downstream of the stress-activated mitogen-activated protein kinase (MAPK) pathway, and alters sensitivity to oxidative stress. These non-coding transcripts therefore regulate specific stress responses, adding unanticipated information-processing capacity to the MAPK signalling system.}}, author = {Leong, Hui S. and Dawson, Keren and Wirth, Chris and Li, Yaoyong and Connolly, Yvonne and Smith, Duncan L. and Wilkinson, Caroline R. and Miller, Crispin J.}, citeulike-article-id =13705386, citeulike-linkout-0 ={http://dx.doi.org/10.1038/ncomms4947}, citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24853205}, citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24853205}, doi = {10.1038/ncomms4947}, issn = {2041-1723}, journal = {Nature communications}, keywords = {workflow}, pmid = 24853205, posted-at = {2015-08-18 15:16:55}, priority = 2, title = {{A global non-coding RNA system modulates fission yeast protein levels in response to stress.}}, url = {http://dx.doi.org/10.1038/ncomms4947}, volume = 5, year = 2014 } @article{Li2009Sequence, abstract = {{The Sequence Alignment/Map (SAM) format is a generic alignment format for storing read alignments against reference sequences, supporting short and long reads (up to 128 Mbp) produced by different sequencing platforms. It is flexible in style, compact in size, efficient in random access and is the format in which alignments from the 1000 Genomes Project are released. SAMtools implements various utilities for post-processing alignments in the SAM format, such as indexing, variant caller and alignment viewer, and thus provides universal tools for processing read alignments. http://samtools.sourceforge.net.}}, author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard and {1000 Genome Project Data Processing Subgroup}}, citeulike-article-id =4778506, citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btp352}, citeulike-linkout-1 ={http://bioinformatics.oxfordjournals.org/content/25/16/2078.abstract}, citeulike-linkout-2 ={http://bioinformatics.oxfordjournals.org/content/25/16/2078.full.pdf}, citeulike-linkout-3 ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/25/16/2078}, citeulike-linkout-4 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2723002/}, citeulike-linkout-5 ={http://view.ncbi.nlm.nih.gov/pubmed/19505943}, citeulike-linkout-6 ={http://www.hubmed.org/display.cgi?uids=19505943}, day = 15, doi = {10.1093/bioinformatics/btp352}, issn = {1367-4811}, journal = {Bioinformatics (Oxford, England)}, keywords = {workflow}, month = aug, number = 16, pages = {2078--2079}, pmcid = {PMC2723002}, pmid = 19505943, posted-at = {2015-08-18 15:05:40}, priority = 2, publisher = {Oxford University Press}, title = {{The Sequence Alignment/Map format and SAMtools.}}, url = {http://dx.doi.org/10.1093/bioinformatics/btp352}, volume = 25, year = 2009 } @article{Li2011RSEM, author = {Li, Bo and Dewey, Colin N.}, doi = {10.1186/1471-2105-12-3231}, journal = {BMC Bioinformatics}, pages = {323+}, title = {{RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome.}}, url = {http://dx.doi.org/10.1186/1471-2105-12-323}, volume = 12, year = 2011 } @article{Liao2014FeatureCounts, abstract = {{ Next-generation sequencing technologies generate millions of short sequence reads, which are usually aligned to a reference genome. In many applications, the key information required for downstream analysis is the number of reads mapping to each genomic feature, for example to each exon or each gene. The process of counting reads is called read summarization. Read summarization is required for a great variety of genomic analyses but has so far received relatively little attention in the literature.  We present featureCounts, a read summarization program suitable for counting reads generated from either RNA or genomic DNA sequencing experiments. featureCounts implements highly efficient chromosome hashing and feature blocking techniques. It is considerably faster than existing methods (by an order of magnitude for gene-level summarization) and requires far less computer memory. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.Availability and implementation: featureCounts is available under GNU General Public License as part of the Subread (http://subread.sourceforge.net) or Rsubread (http://www.bioconductor.org) software packages.  shi@wehi.edu.au.}}, author = {Liao, Y. and Smyth, G. K. and Shi, W.}, citeulike-article-id =12796380, citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt656}, citeulike-linkout-1 ={http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.abstract}, citeulike-linkout-2 ={http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.full.pdf}, citeulike-linkout-3 ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/30/7/923}, citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/24227677}, citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=24227677}, day = 13, doi = {10.1093/bioinformatics/btt656}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {deseq2, workflow}, month = apr, number = 7, pages = {923--930}, pmid = 24227677, posted-at = {2014-02-18 20:28:26}, priority = 2, publisher = {Oxford University Press}, title = {{featureCounts: an efficient general purpose program for assigning sequence reads to genomic features}}, url = {http://dx.doi.org/10.1093/bioinformatics/btt656}, volume = 30, year = 2014 } @article{Love2014Moderated, abstract = {{In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html.}}, author = {Love, Michael I. and Huber, Wolfgang and Anders, Simon}, citeulike-article-id =13505832, citeulike-linkout-0 ={http://dx.doi.org/10.1186/s13059-014-0550-8}, citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/25516281}, citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=25516281}, day = 05, doi = {10.1186/s13059-014-0550-8}, issn = {1465-6906}, journal = {Genome Biology}, keywords = {mine, workflow}, month = dec, number = 12, pages = {550+}, pmid = 25516281, posted-at = {2015-08-18 15:29:41}, priority = 2, publisher = {BioMed Central Ltd}, title = {{Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}}, url = {http://dx.doi.org/10.1186/s13059-014-0550-8}, volume = 15, year = 2014 } @article{Patro2014Sailfish, author = {Patro, Rob and Mount, Stephen M. and Kingsford, Carl}, journal = {Nature Biotechnology}, pages = {462--464}, title = {{Sailfish enables alignment-free isoform quantification from RNA-seq reads using lightweight algorithms}}, doi = {10.1038/nbt.2862}, url = {http://dx.doi.org/10.1038/nbt.2862}, volume = 32, year = 2014 } @article{Patro2016Salmon, author = {Patro, Rob and Duggal, Geet and Love, Michael I. and Irizarry, Rafael A. and Kingsford, Carl}, journal = {bioRxiv}, title = {Salmon provides accurate, fast, and bias-aware transcript expression estimates using dual-phase inference}, url = {http://biorxiv.org/content/early/2016/08/30/021592}, year = 2016 } @article{Risso2014Normalization, author = {Risso, Davide and Ngai, John and Speed, Terence P. and Dudoit, Sandrine}, citeulike-article-id =13336814, citeulike-linkout-0 ={http://dx.doi.org/10.1038/nbt.2931}, citeulike-linkout-1 ={http://dx.doi.org/10.1038/nbt.2931}, day = 24, doi = {10.1038/nbt.2931}, issn = {1087-0156}, journal = {Nature Biotechnology}, keywords = {rnaguide, workflow}, month = aug, number = 9, pages = {896--902}, posted-at = {2014-09-11 20:51:49}, priority = 2, publisher = {Nature Publishing Group}, title = {{Normalization of RNA-seq data using factor analysis of control genes or samples}}, url = {http://dx.doi.org/10.1038/nbt.2931}, volume = 32, year = 2014 } @article{Robert2015Errors, author = {Robert, Christelle and Watson, Mick}, doi = {10.1186/s13059-015-0734-x}, journal = {Genome Biology}, title = {{Errors in RNA-Seq quantification affect genes of relevance to human disease}}, url = {http://dx.doi.org/10.1186/s13059-015-0734-x}, year = 2015 } @article{Robinson2009EdgeR, abstract = {{It is expected that emerging digital gene expression (DGE) technologies will overtake microarray technologies in the near future for many functional genomics applications. One of the fundamental data analysis tasks, especially for gene expression studies, involves determining whether there is evidence that counts for a transcript or exon are significantly different across experimental conditions. edgeR is a Bioconductor software package for examining differential expression of replicated count data. An overdispersed Poisson model is used to account for both biological and technical variability. Empirical Bayes methods are used to moderate the degree of overdispersion across transcripts, improving the reliability of inference. The methodology can be used even with the most minimal levels of replication, provided at least one phenotype or experimental condition is replicated. The software may have other applications beyond sequencing data, such as proteome peptide count data. The package is freely available under the LGPL licence from the Bioconductor web site (http://bioconductor.org).}}, author = {Robinson, M. D. and McCarthy, D. J. and Smyth, G. K.}, citeulike-article-id =6109634, citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btp616}, citeulike-linkout-1 ={http://bioinformatics.oxfordjournals.org/content/btp616v1/.abstract}, citeulike-linkout-2 ={http://bioinformatics.oxfordjournals.org/content/btp616v1/.full.pdf}, citeulike-linkout-3 ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/1/139}, citeulike-linkout-4 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2796818/}, citeulike-linkout-5 ={http://view.ncbi.nlm.nih.gov/pubmed/19910308}, citeulike-linkout-6 ={http://www.hubmed.org/display.cgi?uids=19910308}, day = 11, doi = {10.1093/bioinformatics/btp616}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {cnv, deseq2, overdispersion, rnaseq, workflow}, month = nov, number = 1, pages = {139--140}, pmcid = {PMC2796818}, pmid = 19910308, posted-at = {2011-06-25 18:43:51}, priority = 2, publisher = {Oxford University Press}, title = {{edgeR: a Bioconductor package for differential expression analysis of digital gene expression data}}, url = {http://dx.doi.org/10.1093/bioinformatics/btp616}, volume = 26, year = 2009 } @article{Schurch2016How, author = {Schurch, Nicholas J. and Schofield, Pieta and Gierlinski, Marek and Cole, Christian and Sherstnev, Alexander and Singh, Vijender and Wrobel, Nicola and Gharbi, Karim and Simpson, Gordon G. and Owen-Hughes, Tom and Blaxter, Mark and Barton, Geoffrey J.}, title = {How many biological replicates are needed in an RNA-seq experiment and which differential expression tool should you use?}, volume = 22, number = 6, pages = {839-851}, year = 2016, doi = {10.1261/rna.053959.115}, url = {http://dx.doi.org/10.1261/rna.053959.115} } @article{Soneson2015Differential, url = {http://dx.doi.org/10.12688/f1000research.7563.1}, doi = {10.12688/f1000research.7563.1}, author = {Soneson, Charlotte and Love, Michael I. and Robinson, Mark}, title = {{Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences}}, journal = {F1000Research}, year = 2015, Volume = 4, Issue = 1521 } @article{Tonner2016, author = {Tonner, Peter D and Darnell, Cynthia L and Engelhardt, Barbara E and Schmid, Amy K}, doi = {10.1101/gr.210286.116}, pages = {320--333}, title = {{Detecting differential growth of microbial populations with Gaussian process regression}}, year = 2017, volume = 27, journal = {Genome Research} } @article{Trapnell2013Differential, author = {Trapnell, Cole and Hendrickson, David G and Sauvageau, Martin and Goff, Loyal and Rinn, John L and Pachter, Lior}, doi = {10.1038/nbt.2450}, journal = {Nature Biotechnology}, title = {{Differential analysis of gene regulation at transcript resolution with RNA-seq}}, url = {http://dx.doi.org/10.1038/nbt.2450}, year = 2013 } @book{Wickham2009Ggplot2, address = {New York, NY}, author = {Wickham, Hadley}, booktitle = {ggplot2}, citeulike-article-id =10715717, citeulike-linkout-0 ={http://dx.doi.org/10.1007/978-0-387-98141-3}, citeulike-linkout-1 ={http://www.springerlink.com/content/978-0-387-98140-6}, doi = {10.1007/978-0-387-98141-3}, isbn = {978-0-387-98140-6}, keywords = {workflow}, posted-at = {2015-08-18 15:12:19}, priority = 2, publisher = {Springer New York}, title = {{ggplot2}}, url = {http://dx.doi.org/10.1007/978-0-387-98141-3}, year = 2009 } @article{Witten2011Classification, abstract = {{In recent years, advances in high throughput sequencing technology have led to a need for specialized methods for the analysis of digital gene expression data. While gene expression data measured on a microarray take on continuous values and can be modeled using the normal distribution, RNA sequencing data involve nonnegative counts and are more appropriately modeled using a discrete count distribution, such as the Poisson or the negative binomial. Consequently, analytic tools that assume a Gaussian distribution (such as classification methods based on linear discriminant analysis and clustering methods that use Euclidean distance) may not perform as well for sequencing data as methods that are based upon a more appropriate distribution. Here, we propose new approaches for performing classification and clustering of observations on the basis of sequencing data. Using a Poisson log linear model, we develop an analog of diagonal linear discriminant analysis that is appropriate for sequencing data. We also propose an approach for clustering sequencing data using a new dissimilarity measure that is based upon the Poisson model. We demonstrate the performances of these approaches in a simulation study, on three publicly available RNA sequencing data sets, and on a publicly available chromatin immunoprecipitation sequencing data set.}}, author = {Witten, Daniela M.}, citeulike-article-id =13172798, citeulike-linkout-0 ={http://dx.doi.org/10.1214/11-AOAS493}, day = 28, doi = {10.1214/11-AOAS493}, issn = {1932-6157}, journal = {The Annals of Applied Statistics}, keywords = {chipseq, ctsca, deseq2, rnaseq, workflow}, month = dec, number = 4, pages = {2493--2518}, posted-at = {2014-05-16 17:18:08}, priority = 2, title = {{Classification and clustering of sequencing data using a Poisson model}}, url = {http://dx.doi.org/10.1214/11-AOAS493}, volume = 5, year = 2011 } @article{Wu2013New, abstract = {{Recent developments in RNA-sequencing (RNA-seq) technology have led to a rapid increase in gene expression data in the form of counts. RNA-seq can be used for a variety of applications, however, identifying differential expression (DE) remains a key task in functional genomics. There have been a number of statistical methods for DE detection for RNA-seq data. One common feature of several leading methods is the use of the negative binomial (Gamma–Poisson mixture) model. That is, the unobserved gene expression is modeled by a gamma random variable and, given the expression, the sequencing read counts are modeled as Poisson. The distinct feature in various methods is how the variance, or dispersion, in the Gamma distribution is modeled and estimated. We evaluate several large public RNA-seq datasets and find that the estimated dispersion in existing methods does not adequately capture the heterogeneity of biological variance among samples. We present a new empirical Bayes shrinkage estimate of the dispersion parameters and demonstrate improved DE detection.}}, author = {Wu, Hao and Wang, Chi and Wu, Zhijin}, citeulike-article-id =11345725, citeulike-linkout-0 ={http://dx.doi.org/10.1093/biostatistics/kxs033}, citeulike-linkout-1 ={http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.abstract}, citeulike-linkout-2 ={http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.full.pdf}, citeulike-linkout-3 ={http://view.ncbi.nlm.nih.gov/pubmed/23001152}, citeulike-linkout-4 ={http://www.hubmed.org/display.cgi?uids=23001152}, day = 01, doi = {10.1093/biostatistics/kxs033}, issn = {1468-4357}, journal = {Biostatistics}, keywords = {deseq2, rnaseq, workflow}, month = apr, number = 2, pages = {232--243}, pmid = 23001152, posted-at = {2013-02-26 17:09:19}, priority = 2, publisher = {Oxford University Press}, title = {{A new shrinkage estimator for dispersion improves differential expression detection in RNA-seq data}}, url = {http://dx.doi.org/10.1093/biostatistics/kxs033}, volume = 14, year = 2013 }