123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985 |
- @article{Anders2010Differential,
- author = {Anders, Simon and Huber, Wolfgang},
- doi = {10.1186/gb-2010-11-10-r106},
- journal = {Genome Biology},
- number = 10,
- pages = {R106+},
- pmcid = {PMC3218662},
- pmid = 20979621,
- title = {{Differential expression analysis for sequence count data}},
- volume = 11,
- year = 2010
- }
- @article{Anders2015HTSeqa,
- author = {Anders, Simon and Pyl, Paul T. and Huber, Wolfgang},
- doi = {10.1093/bioinformatics/btu638},
- journal = {Bioinformatics},
- number = 2,
- pages = {166--169},
- pmid = 25260700,
- title = {{HTSeq -- a Python framework to work with high-throughput sequencing
- data}},
- volume = 31,
- year = 2015
- }
- @article{Benjamini1995Controlling,
- author = {Benjamini, Yoav and Hochberg, Yosef},
- journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
- number = 1,
- pages = {289--300},
- title = {{Controlling the False Discovery Rate: A Practical and Powerful Approach
- to Multiple Testing}},
- url = {http://www.jstor.org/stable/2346101},
- volume = 57,
- year = 1995
- }
- @article{Bourgon2010Independent,
- author = {Bourgon, R. and Gentleman, R. and Huber, W.},
- doi = {10.1073/pnas.0914005107},
- journal = {Proceedings of the National Academy of Sciences},
- number = 21,
- pages = {9546--9551},
- pmcid = {PMC2906865},
- pmid = 20460310,
- title = {{Independent filtering increases detection power for high-throughput
- experiments}},
- volume = 107,
- year = 2010
- }
- @article{Bray2016Near,
- author = {Bray, Nicolas and Pimentel, Harold and Melsted, Pall and Pachter, Lior},
- journal = {Nature Biotechnology},
- pages = {525–-527},
- title = {Near-optimal probabilistic RNA-seq quantification},
- volume = 34,
- url = {http://dx.doi.org/10.1038/nbt.3519},
- year = 2016
- }
- @article{Dobin2013STAR,
- author = {Dobin, Alexander and Davis, Carrie A. and Schlesinger, Felix and
- Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and
- Chaisson, Mark and Gingeras, Thomas R.},
- doi = {10.1093/bioinformatics/bts635},
- journal = {Bioinformatics},
- number = 1,
- pages = {15--21},
- pmcid = {PMC3530905},
- pmid = 23104886,
- title = {{STAR: ultrafast universal RNA-seq aligner}},
- url = {http://dx.doi.org/10.1093/bioinformatics/bts635},
- volume = 29,
- year = 2013
- }
- @article{Dudoit2002Statistical,
- author = {Dudoit, Rine and Yang, Yee H. and Callow, Matthew J. and Speed, Terence
- P.},
- journal = {Statistica Sinica},
- pages = {111--139},
- title = {{Statistical methods for identifying differentially expressed genes in
- replicated cDNA microarray experiments}},
- year = 2002
- }
- @article{Durinck2009Mapping,
- author = {Durinck, Steffen and Spellman, Paul T. and Birney, Ewan and Huber,
- Wolfgang},
- doi = {10.1038/nprot.2009.97},
- journal = {Nature Protocols},
- number = 8,
- pages = {1184--1191},
- pmcid = {PMC3159387},
- pmid = 19617889,
- publisher = {Nature Publishing Group},
- title = {{Mapping identifiers for the integration of genomic datasets with the
- R/Bioconductor package biomaRt.}},
- url = {http://dx.doi.org/10.1038/nprot.2009.97},
- volume = 4,
- year = 2009
- }
- @article{Flicek2014Ensembl,
- author = {Flicek, Paul and Amode, M. Ridwan and Barrell, Daniel and Beal, Kathryn
- and Billis, Konstantinos and Brent, Simon and Carvalho-Silva, Denise and
- Clapham, Peter and Coates, Guy and Fitzgerald, Stephen and Gil, Laurent
- and Gir\'{o}n, Carlos G. and Gordon, Leo and Hourlier, Thibaut and Hunt,
- Sarah and Johnson, Nathan and Juettemann, Thomas and K\"{a}h\"{a}ri,
- Andreas K. and Keenan, Stephen and Kulesha, Eugene and Martin, Fergal
- J. and Maurel, Thomas and McLaren, William M. and Murphy, Daniel N. and
- Nag, Rishi and Overduin, Bert and Pignatelli, Miguel and Pritchard,
- Bethan and Pritchard, Emily and Riat, Harpreet S. and Ruffier, Magali
- and Sheppard, Daniel and Taylor, Kieron and Thormann, Anja and
- Trevanion, Stephen J. and Vullo, Alessandro and Wilder, Steven P. and
- Wilson, Mark and Zadissa, Amonida and Aken, Bronwen L. and Birney, Ewan
- and Cunningham, Fiona and Harrow, Jennifer and Herrero, Javier and
- Hubbard, Tim J. P. and Kinsella, Rhoda and Muffato, Matthieu and Parker,
- Anne and Spudich, Giulietta and Yates, Andy and Zerbino, Daniel R. and
- Searle, Stephen M. J.},
- doi = {10.1093/nar/gkt1196},
- issn = {1362-4962},
- journal = {Nucleic Acids Research},
- number = {D1},
- pages = {D749--D755},
- pmid = 24316576,
- title = {{Ensembl 2014}},
- url = {http://dx.doi.org/10.1093/nar/gkt1196},
- volume = 42,
- year = 2014
- }
- @article{Hardcastle2010BaySeq,
- abstract = {{BACKGROUND:High throughput sequencing has become an important
- technology for studying expression levels in many types of genomic, and
- particularly transcriptomic, data. One key way of analysing such data is
- to look for elements of the data which display particular patterns of
- differential expression in order to take these forward for further
- analysis and validation.RESULTS:We propose a framework for defining
- patterns of differential expression and develop a novel algorithm,
- baySeq, which uses an empirical Bayes approach to detect these patterns
- of differential expression within a set of sequencing samples. The
- method assumes a negative binomial distribution for the data and derives
- an empirically determined prior distribution from the entire dataset. We
- examine the performance of the method on real and simulated
- data.CONCLUSIONS:Our method performs at least as well, and often better,
- than existing methods for analyses of pairwise differential expression
- in both real and simulated data. When we compare methods for the
- analysis of data from experimental designs involving multiple sample
- groups, our method again shows substantial gains in performance. We
- believe that this approach thus represents an important step forward for
- the analysis of count data from sequencing experiments.}},
- author = {Hardcastle, Thomas and Kelly, Krystyna},
- citeulike-article-id =7610091,
- citeulike-linkout-0 ={http://dx.doi.org/10.1186/1471-2105-11-422},
- citeulike-linkout-1 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2928208/},
- citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/20698981},
- citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=20698981},
- doi = {10.1186/1471-2105-11-422},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- keywords = {bayes, deseq2, rnaseq, workflow},
- number = 1,
- pages = {422+},
- pmcid = {PMC2928208},
- pmid = 20698981,
- posted-at = {2011-04-05 09:08:06},
- priority = 2,
- title = {{baySeq: Empirical Bayesian methods for identifying differential
- expression in sequence count data}},
- url = {http://dx.doi.org/10.1186/1471-2105-11-422},
- volume = 11,
- year = 2010
- }
- @article{Himes2014RNASeq,
- abstract = {{Asthma is a chronic inflammatory respiratory disease that affects over
- 300 million people worldwide. Glucocorticoids are a mainstay therapy for
- asthma because they exert anti-inflammatory effects in multiple lung
- tissues, including the airway smooth muscle (ASM). However, the
- mechanism by which glucocorticoids suppress inflammation in ASM remains
- poorly understood. Using RNA-Seq, a high-throughput sequencing method,
- we characterized transcriptomic changes in four primary human ASM cell
- lines that were treated with dexamethasone--a potent synthetic
- glucocorticoid (1 µM for 18 hours). Based on a Benjamini-Hochberg
- corrected p-value <0.05, we identified 316 differentially expressed
- genes, including both well known (DUSP1, KLF15, PER1, TSC22D3) and less
- investigated (C7, CCDC69, CRISPLD2) glucocorticoid-responsive
- genes. CRISPLD2, which encodes a secreted protein previously implicated
- in lung development and endotoxin regulation, was found to have SNPs
- that were moderately associated with inhaled corticosteroid resistance
- and bronchodilator response among asthma patients in two previously
- conducted genome-wide association studies. Quantitative RT-PCR and
- Western blotting showed that dexamethasone treatment significantly
- increased CRISPLD2 mRNA and protein expression in ASM cells. CRISPLD2
- expression was also induced by the inflammatory cytokine IL1β, and small
- interfering RNA-mediated knockdown of CRISPLD2 further increased
- IL1β-induced expression of IL6 and IL8. Our findings offer a
- comprehensive view of the effect of a glucocorticoid on the ASM
- transcriptome and identify CRISPLD2 as an asthma pharmacogenetics
- candidate gene that regulates anti-inflammatory effects of
- glucocorticoids in the ASM.}},
- author = {Himes, Blanca E. and Jiang, Xiaofeng and Wagner, Peter and Hu, Ruoxi and
- Wang, Qiyu and Klanderman, Barbara and Whitaker, Reid M. and Duan,
- Qingling and Lasky-Su, Jessica and Nikolos, Christina and Jester,
- William and Johnson, Martin and Panettieri, Reynold A. and Tantisira,
- Kelan G. and Weiss, Scott T. and Lu, Quan},
- citeulike-article-id =13705379,
- citeulike-linkout-0 ={http://dx.doi.org/10.1371/journal.pone.0099625},
- citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24926665},
- citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24926665},
- doi = {10.1371/journal.pone.0099625},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {rnaseq, workflow},
- number = 6,
- pmid = 24926665,
- posted-at = {2015-08-18 15:02:37},
- priority = 2,
- title = {{RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid
- responsive gene that modulates cytokine function in airway smooth muscle
- cells.}},
- url = {http://dx.doi.org/10.1371/journal.pone.0099625},
- volume = 9,
- year = 2014
- }
- @article{Huber2015Orchestrating,
- abstract = {{Bioconductor is an open-source, open-development software project for
- the analysis and comprehension of high-throughput data in genomics and
- molecular biology. The project aims to enable interdisciplinary
- research, collaboration and rapid development of scientific
- software. Based on the statistical programming language R, Bioconductor
- comprises 934 interoperable packages contributed by a large, diverse
- community of scientists. Packages cover a range of bioinformatic and
- statistical applications. They undergo formal initial review and
- continuous automated testing. We present an overview for prospective
- users and contributors.}},
- author = {Huber, Wolfgang and Carey, Vincent J. and Gentleman, Robert and Anders,
- Simon and Carlson, Marc and Carvalho, Benilton S. and Bravo, Hector
- Corrada C. and Davis, Sean and Gatto, Laurent and Girke, Thomas and
- Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D. and Irizarry,
- Rafael A. and Lawrence, Michael and Love, Michael I. and MacDonald,
- James and Obenchain, Valerie and Ole\'{s}, Andrzej K. and Pag\`{e}s,
- Herv\'{e} and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon
- K. and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin},
- citeulike-article-id =13504287,
- citeulike-linkout-0 ={http://dx.doi.org/10.1038/nmeth.3252},
- citeulike-linkout-1 ={http://dx.doi.org/10.1038/nmeth.3252},
- citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/25633503},
- citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=25633503},
- day = 29,
- doi = {10.1038/nmeth.3252},
- issn = {1548-7105},
- journal = {Nature methods},
- keywords = {mine, workflow},
- month = feb,
- number = 2,
- pages = {115--121},
- pmid = 25633503,
- posted-at = {2015-05-29 16:53:20},
- priority = 2,
- publisher = {Nature Publishing Group},
- title = {{Orchestrating high-throughput genomic analysis with Bioconductor.}},
- url = {http://dx.doi.org/10.1038/nmeth.3252},
- volume = 12,
- year = 2015
- }
- @article{Huntley2013ReportingTools,
- abstract = {{Summary: It is common for computational analyses to generate large
- amounts of complex data that are difficult to process and share with
- collaborators. Standard methods are needed to transform such data into a
- more useful and intuitive format. We present ReportingTools, a
- Bioconductor package, that automatically recognizes and transforms the
- output of many common Bioconductor packages into rich, interactive,
- HTML-based reports. Reports are not generic, but have been individually
- designed to reflect content specific to the result type
- detected. Tabular output included in reports is sortable, filterable and
- searchable and contains context-relevant hyperlinks to external
- databases. Additionally, in-line graphics have been developed for
- specific analysis types and are embedded by default within table rows,
- providing a useful visual summary of underlying raw data. ReportingTools
- is highly flexible and reports can be easily customized for specific
- applications using the well-defined API.}},
- author = {Huntley, Melanie A. and Larson, Jessica L. and Chaivorapol, Christina
- and Becker, Gabriel and Lawrence, Michael and Hackney, Jason A. and
- Kaminker, Joshua S.},
- citeulike-article-id =12728071,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt551},
- citeulike-linkout-1
- ={http://bioinformatics.oxfordjournals.org/content/29/24/3220.abstract},
- citeulike-linkout-2
- ={http://bioinformatics.oxfordjournals.org/content/29/24/3220.full.pdf},
- citeulike-linkout-3
- ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/24/3220},
- citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/24078713},
- citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=24078713},
- day = 15,
- doi = {10.1093/bioinformatics/btt551},
- issn = {1460-2059},
- journal = {Bioinformatics},
- keywords = {workflow},
- month = dec,
- number = 24,
- pages = {3220--3221},
- pmid = 24078713,
- posted-at = {2015-08-18 15:13:59},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{ReportingTools: an automated result processing and presentation toolkit
- for high-throughput genomic analyses}},
- url = {http://dx.doi.org/10.1093/bioinformatics/btt551},
- volume = 29,
- year = 2013
- }
- @article{Kent2002Human,
- abstract = {{As vertebrate genome sequences near completion and research refocuses
- to their analysis, the issue of effective genome annotation display
- becomes critical. A mature web tool for rapid and reliable display of
- any requested portion of the genome at any scale, together with several
- dozen aligned annotation tracks, is provided at
- http://genome.ucsc.edu. This browser displays assembly contigs and gaps,
- mRNA and expressed sequence tag alignments, multiple gene predictions,
- cross-species homologies, single nucleotide polymorphisms,
- sequence-tagged sites, radiation hybrid data, transposon repeats, and
- more as a stack of coregistered tracks. Text and sequence-based searches
- provide quick and precise access to any region of specific
- interest. Secondary links from individual features lead to sequence
- details and supplementary off-site databases. One-half of the annotation
- tracks are computed at the University of California, Santa Cruz from
- publicly available sequence data; collaborators worldwide provide the
- rest. Users can stably add their own custom tracks to the browser for
- educational or research purposes. The conceptual and technical framework
- of the browser, its underlying MYSQL database, and overall use are
- described. The web site currently serves over 50,000 pages per day to
- over 3000 different users.}},
- author = {Kent, W. James and Sugnet, Charles W. and Furey, Terrence S. and Roskin,
- Krishna M. and Pringle, Tom H. and Zahler, Alan M. and Haussler, David},
- citeulike-article-id =2009259,
- citeulike-linkout-0 ={http://dx.doi.org/10.1101/gr.229102},
- citeulike-linkout-1
- ={http://dx.doi.org/10.1101/gr.229102.\%20article\%20published\%20online\%20before\%20print\%20in\%20may\%202002},
- citeulike-linkout-2 ={http://genome.cshlp.org/content/12/6/996.full.abstract},
- citeulike-linkout-3 ={http://genome.cshlp.org/content/12/6/996.full.full.pdf},
- citeulike-linkout-4 ={http://www.genome.org/cgi/content/abstract/12/6/996},
- citeulike-linkout-5 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC186604/},
- citeulike-linkout-6 ={http://view.ncbi.nlm.nih.gov/pubmed/12045153},
- citeulike-linkout-7 ={http://www.hubmed.org/display.cgi?uids=12045153},
- day = 1,
- doi = {10.1101/gr.229102},
- issn = {1088-9051},
- journal = {Genome research},
- keywords = {ctsca, workflow},
- month = jun,
- number = 6,
- pages = {996--1006},
- pmcid = {PMC186604},
- pmid = 12045153,
- posted-at = {2012-07-26 16:04:05},
- priority = 2,
- publisher = {Cold Spring Harbor Laboratory Press},
- title = {{The human genome browser at UCSC.}},
- url = {http://dx.doi.org/10.1101/gr.229102},
- volume = 12,
- year = 2002
- }
- @article{Law2014Voom,
- abstract = {{Normal linear modeling methods are developed for analyzing read counts
- from RNA-seq experiments. The voom method estimates the mean-variance
- relationship of the log-counts, generates a precision weight for each
- observation, and then enters these into a limma empirical Bayes analysis
- pipeline. This opens access for RNA-seq analysts to a large body of
- methodology developed for microarrays. Simulation studies show that voom
- performs as well or better than count-based RNA-seq methods even when
- the data are generated according to the assumptions of the earlier
- methods. Two case studies illustrate the use of linear modeling and gene
- set testing methods.}},
- author = {Law, Charity W. and Chen, Yunshun and Shi, Wei and Smyth, Gordon K.},
- citeulike-article-id =12965503,
- citeulike-linkout-0 ={http://dx.doi.org/10.1186/gb-2014-15-2-r29},
- citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24485249},
- citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24485249},
- day = 03,
- doi = {10.1186/gb-2014-15-2-r29},
- issn = {1465-6906},
- journal = {Genome Biology},
- keywords = {deseq2, rnaguide, workflow},
- month = feb,
- number = 2,
- pages = {R29+},
- pmid = 24485249,
- posted-at = {2014-02-13 20:56:00},
- priority = 2,
- publisher = {BioMed Central Ltd},
- title = {{Voom: precision weights unlock linear model analysis tools for RNA-seq
- read counts}},
- url = {http://dx.doi.org/10.1186/gb-2014-15-2-r29},
- volume = 15,
- year = 2014
- }
- @article{Lawrence2013Software,
- abstract = {{We describe Bioconductor infrastructure for representing and computing
- on annotated genomic ranges and integrating genomic data with the
- statistical computing features of R and its extensions. At the core of
- the infrastructure are three packages: IRanges, GenomicRanges, and
- GenomicFeatures. These packages provide scalable data structures for
- representing annotated ranges on the genome, with special support for
- transcript structures, read alignments and coverage
- vectors. Computational facilities include efficient algorithms for
- overlap and nearest neighbor detection, coverage calculation and other
- range operations. This infrastructure directly supports more than 80
- other Bioconductor packages, including those for sequence analysis,
- differential expression analysis and visualization.}},
- author = {Lawrence, Michael and Huber, Wolfgang and Pag\`{e}s, Herv\'{e} and
- Aboyoun, Patrick and Carlson, Marc and Gentleman, Robert and Morgan,
- Martin T. and Carey, Vincent J.},
- citeulike-article-id =12548311,
- citeulike-linkout-0 ={http://dx.doi.org/10.1371/journal.pcbi.1003118},
- citeulike-linkout-1 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3738458/},
- citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/23950696},
- citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=23950696},
- day = 8,
- doi = {10.1371/journal.pcbi.1003118},
- editor = {Prlic, Andreas},
- issn = {1553-7358},
- journal = {PLoS Computational Biology},
- keywords = {deseq2, workflow},
- month = aug,
- number = 8,
- pages = {e1003118+},
- pmcid = {PMC3738458},
- pmid = 23950696,
- posted-at = {2014-02-14 00:17:30},
- priority = 2,
- publisher = {Public Library of Science},
- title = {{Software for Computing and Annotating Genomic Ranges}},
- url = {http://dx.doi.org/10.1371/journal.pcbi.1003118},
- volume = 9,
- year = 2013
- }
- @article{Leek2014Svaseq,
- abstract = {{It is now known that unwanted noise and unmodeled artifacts such as
- batch effects can dramatically reduce the accuracy of statistical
- inference in genomic experiments. These sources of noise must be modeled
- and removed to accurately measure biological variability and to obtain
- correct statistical inference when performing high-throughput genomic
- analysis. We introduced surrogate variable analysis (sva) for estimating
- these artifacts by (i) identifying the part of the genomic data only
- affected by artifacts and (ii) estimating the artifacts with principal
- components or singular vectors of the subset of the data matrix. The
- resulting estimates of artifacts can be used in subsequent analyses as
- adjustment factors to correct analyses. Here I describe a version of the
- sva approach specifically created for count data or FPKMs from
- sequencing experiments based on appropriate data transformation. I also
- describe the addition of supervised sva (ssva) for using control probes
- to identify the part of the genomic data only affected by artifacts. I
- present a comparison between these versions of sva and other methods for
- batch effect estimation on simulated data, real count-based data and
- FPKM-based data. These updates are available through the sva
- Bioconductor package and I have made fully reproducible analysis using
- these methods available from:
- https://github.com/jtleek/svaseq. {\copyright} The Author(s)
- 2014. Published by Oxford University Press on behalf of Nucleic Acids
- Research.}},
- author = {Leek, Jeffrey T.},
- citeulike-article-id =13385083,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/nar/gku864},
- citeulike-linkout-1
- ={http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.abstract},
- citeulike-linkout-2
- ={http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.full.pdf},
- citeulike-linkout-3 ={http://view.ncbi.nlm.nih.gov/pubmed/25294822},
- citeulike-linkout-4 ={http://www.hubmed.org/display.cgi?uids=25294822},
- day = 1,
- doi = {10.1093/nar/gku864},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {workflow},
- month = dec,
- number = 21,
- pages = 000,
- pmid = 25294822,
- posted-at = {2015-08-18 15:16:02},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{svaseq: removing batch effects and other unwanted noise from sequencing
- data.}},
- url = {http://dx.doi.org/10.1093/nar/gku864},
- volume = 42,
- year = 2014
- }
- @article{Leng2013EBSeq,
- abstract = {{Motivation: Messenger RNA expression is important in normal development
- and differentiation, as well as in manifestation of disease. RNA-seq
- experiments allow for the identification of differentially expressed
- (DE) genes and their corresponding isoforms on a genome-wide
- scale. However, statistical methods are required to ensure that accurate
- identifications are made. A number of methods exist for identifying DE
- genes, but far fewer are available for identifying DE isoforms. When
- isoform DE is of interest, investigators often apply gene-level
- (count-based) methods directly to estimates of isoform counts. Doing so
- is not recommended. In short, estimating isoform expression is
- relatively straightforward for some groups of isoforms, but more
- challenging for others. This results in estimation uncertainty that
- varies across isoform groups. Count-based methods were not designed to
- accommodate this varying uncertainty, and consequently, application of
- them for isoform inference results in reduced power for some classes of
- isoforms and increased false discoveries for others.}},
- author = {Leng, N. and Dawson, J. A. and Thomson, J. A. and Ruotti, V. and
- Rissman, A. I. and Smits, B. M. G. and Haag, J. D. and Gould, M. N. and
- Stewart, R. M. and Kendziorski, C.},
- citeulike-article-id =12074857,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt087},
- citeulike-linkout-1
- ={http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.abstract},
- citeulike-linkout-2
- ={http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.full.pdf},
- citeulike-linkout-3
- ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/8/1035},
- citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/23428641},
- citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=23428641},
- day = 15,
- doi = {10.1093/bioinformatics/btt087},
- issn = {1460-2059},
- journal = {Bioinformatics},
- keywords = {deseq2, workflow},
- month = feb,
- number = 8,
- pages = {1035--1043},
- pmid = 23428641,
- posted-at = {2014-05-13 22:33:51},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{EBSeq: an empirical Bayes hierarchical model for inference in RNA-seq
- experiments}},
- url = {http://dx.doi.org/10.1093/bioinformatics/btt087},
- volume = 29,
- year = 2013
- }
- @article{Leong2014Global,
- abstract = {{Non-coding RNAs (ncRNAs) are frequent and prevalent across the
- taxa. Although individual non-coding loci have been assigned a function,
- most are uncharacterized. Their global biological significance is
- unproven and remains controversial. Here we investigate the role played
- by ncRNAs in the stress response of Schizosaccharomyces pombe. We
- integrate global proteomics and RNA sequencing data to identify a
- systematic programme in which elevated antisense RNA arising both from
- ncRNAs and from 3'-overlapping convergent gene pairs is directly
- associated with substantial reductions in protein levels throughout the
- genome. We describe an extensive array of ncRNAs with trans associations
- that have the potential to influence multiple pathways. Deletion of one
- such locus reduces levels of atf1, a transcription factor downstream of
- the stress-activated mitogen-activated protein kinase (MAPK) pathway,
- and alters sensitivity to oxidative stress. These non-coding transcripts
- therefore regulate specific stress responses, adding unanticipated
- information-processing capacity to the MAPK signalling system.}},
- author = {Leong, Hui S. and Dawson, Keren and Wirth, Chris and Li, Yaoyong and
- Connolly, Yvonne and Smith, Duncan L. and Wilkinson, Caroline R. and
- Miller, Crispin J.},
- citeulike-article-id =13705386,
- citeulike-linkout-0 ={http://dx.doi.org/10.1038/ncomms4947},
- citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24853205},
- citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24853205},
- doi = {10.1038/ncomms4947},
- issn = {2041-1723},
- journal = {Nature communications},
- keywords = {workflow},
- pmid = 24853205,
- posted-at = {2015-08-18 15:16:55},
- priority = 2,
- title = {{A global non-coding RNA system modulates fission yeast protein levels
- in response to stress.}},
- url = {http://dx.doi.org/10.1038/ncomms4947},
- volume = 5,
- year = 2014
- }
- @article{Li2009Sequence,
- abstract = {{The Sequence Alignment/Map (SAM) format is a generic alignment format
- for storing read alignments against reference sequences, supporting
- short and long reads (up to 128 Mbp) produced by different sequencing
- platforms. It is flexible in style, compact in size, efficient in random
- access and is the format in which alignments from the 1000 Genomes
- Project are released. SAMtools implements various utilities for
- post-processing alignments in the SAM format, such as indexing, variant
- caller and alignment viewer, and thus provides universal tools for
- processing read alignments. http://samtools.sourceforge.net.}},
- author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan,
- Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin,
- Richard and {1000 Genome Project Data Processing Subgroup}},
- citeulike-article-id =4778506,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btp352},
- citeulike-linkout-1
- ={http://bioinformatics.oxfordjournals.org/content/25/16/2078.abstract},
- citeulike-linkout-2
- ={http://bioinformatics.oxfordjournals.org/content/25/16/2078.full.pdf},
- citeulike-linkout-3
- ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/25/16/2078},
- citeulike-linkout-4 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2723002/},
- citeulike-linkout-5 ={http://view.ncbi.nlm.nih.gov/pubmed/19505943},
- citeulike-linkout-6 ={http://www.hubmed.org/display.cgi?uids=19505943},
- day = 15,
- doi = {10.1093/bioinformatics/btp352},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {workflow},
- month = aug,
- number = 16,
- pages = {2078--2079},
- pmcid = {PMC2723002},
- pmid = 19505943,
- posted-at = {2015-08-18 15:05:40},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{The Sequence Alignment/Map format and SAMtools.}},
- url = {http://dx.doi.org/10.1093/bioinformatics/btp352},
- volume = 25,
- year = 2009
- }
- @article{Li2011RSEM,
- author = {Li, Bo and Dewey, Colin N.},
- doi = {10.1186/1471-2105-12-3231},
- journal = {BMC Bioinformatics},
- pages = {323+},
- title = {{RSEM: accurate transcript quantification from RNA-Seq data with or
- without a reference genome.}},
- url = {http://dx.doi.org/10.1186/1471-2105-12-323},
- volume = 12,
- year = 2011
- }
- @article{Liao2014FeatureCounts,
- abstract = {{ Next-generation sequencing technologies generate millions of short
- sequence reads, which are usually aligned to a reference genome. In many
- applications, the key information required for downstream analysis is
- the number of reads mapping to each genomic feature, for example to each
- exon or each gene. The process of counting reads is called read
- summarization. Read summarization is required for a great variety of
- genomic analyses but has so far received relatively little attention in
- the literature. We present featureCounts, a read summarization program
- suitable for counting reads generated from either RNA or genomic DNA
- sequencing experiments. featureCounts implements highly efficient
- chromosome hashing and feature blocking techniques. It is considerably
- faster than existing methods (by an order of magnitude for gene-level
- summarization) and requires far less computer memory. It works with
- either single or paired-end reads and provides a wide range of options
- appropriate for different sequencing applications.Availability and
- implementation: featureCounts is available under GNU General Public
- License as part of the Subread (http://subread.sourceforge.net) or
- Rsubread (http://www.bioconductor.org) software packages.
- [email protected].}},
- author = {Liao, Y. and Smyth, G. K. and Shi, W.},
- citeulike-article-id =12796380,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt656},
- citeulike-linkout-1
- ={http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.abstract},
- citeulike-linkout-2
- ={http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.full.pdf},
- citeulike-linkout-3
- ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/30/7/923},
- citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/24227677},
- citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=24227677},
- day = 13,
- doi = {10.1093/bioinformatics/btt656},
- issn = {1460-2059},
- journal = {Bioinformatics},
- keywords = {deseq2, workflow},
- month = apr,
- number = 7,
- pages = {923--930},
- pmid = 24227677,
- posted-at = {2014-02-18 20:28:26},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{featureCounts: an efficient general purpose program for assigning
- sequence reads to genomic features}},
- url = {http://dx.doi.org/10.1093/bioinformatics/btt656},
- volume = 30,
- year = 2014
- }
- @article{Love2014Moderated,
- abstract = {{In comparative high-throughput sequencing assays, a fundamental task is
- the analysis of count data, such as read counts per gene in RNA-seq, for
- evidence of systematic changes across experimental conditions. Small
- replicate numbers, discreteness, large dynamic range and the presence of
- outliers require a suitable statistical approach. We present DESeq2, a
- method for differential analysis of count data, using shrinkage
- estimation for dispersions and fold changes to improve stability and
- interpretability of estimates. This enables a more quantitative analysis
- focused on the strength rather than the mere presence of differential
- expression. The DESeq2 package is available at
- http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html.}},
- author = {Love, Michael I. and Huber, Wolfgang and Anders, Simon},
- citeulike-article-id =13505832,
- citeulike-linkout-0 ={http://dx.doi.org/10.1186/s13059-014-0550-8},
- citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/25516281},
- citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=25516281},
- day = 05,
- doi = {10.1186/s13059-014-0550-8},
- issn = {1465-6906},
- journal = {Genome Biology},
- keywords = {mine, workflow},
- month = dec,
- number = 12,
- pages = {550+},
- pmid = 25516281,
- posted-at = {2015-08-18 15:29:41},
- priority = 2,
- publisher = {BioMed Central Ltd},
- title = {{Moderated estimation of fold change and dispersion for RNA-seq data
- with DESeq2}},
- url = {http://dx.doi.org/10.1186/s13059-014-0550-8},
- volume = 15,
- year = 2014
- }
- @article{Patro2014Sailfish,
- author = {Patro, Rob and Mount, Stephen M. and Kingsford, Carl},
- journal = {Nature Biotechnology},
- pages = {462--464},
- title = {{Sailfish enables alignment-free isoform quantification from RNA-seq
- reads using lightweight algorithms}},
- doi = {10.1038/nbt.2862},
- url = {http://dx.doi.org/10.1038/nbt.2862},
- volume = 32,
- year = 2014
- }
- @article{Patro2016Salmon,
- author = {Patro, Rob and Duggal, Geet and Love, Michael I. and Irizarry, Rafael
- A. and Kingsford, Carl},
- journal = {bioRxiv},
- title = {Salmon provides accurate, fast, and bias-aware transcript expression
- estimates using dual-phase inference},
- url = {http://biorxiv.org/content/early/2016/08/30/021592},
- year = 2016
- }
- @article{Risso2014Normalization,
- author = {Risso, Davide and Ngai, John and Speed, Terence P. and Dudoit, Sandrine},
- citeulike-article-id =13336814,
- citeulike-linkout-0 ={http://dx.doi.org/10.1038/nbt.2931},
- citeulike-linkout-1 ={http://dx.doi.org/10.1038/nbt.2931},
- day = 24,
- doi = {10.1038/nbt.2931},
- issn = {1087-0156},
- journal = {Nature Biotechnology},
- keywords = {rnaguide, workflow},
- month = aug,
- number = 9,
- pages = {896--902},
- posted-at = {2014-09-11 20:51:49},
- priority = 2,
- publisher = {Nature Publishing Group},
- title = {{Normalization of RNA-seq data using factor analysis of control genes or
- samples}},
- url = {http://dx.doi.org/10.1038/nbt.2931},
- volume = 32,
- year = 2014
- }
- @article{Robert2015Errors,
- author = {Robert, Christelle and Watson, Mick},
- doi = {10.1186/s13059-015-0734-x},
- journal = {Genome Biology},
- title = {{Errors in RNA-Seq quantification affect genes of relevance to human
- disease}},
- url = {http://dx.doi.org/10.1186/s13059-015-0734-x},
- year = 2015
- }
- @article{Robinson2009EdgeR,
- abstract = {{It is expected that emerging digital gene expression (DGE) technologies
- will overtake microarray technologies in the near future for many
- functional genomics applications. One of the fundamental data analysis
- tasks, especially for gene expression studies, involves determining
- whether there is evidence that counts for a transcript or exon are
- significantly different across experimental conditions. edgeR is a
- Bioconductor software package for examining differential expression of
- replicated count data. An overdispersed Poisson model is used to account
- for both biological and technical variability. Empirical Bayes methods
- are used to moderate the degree of overdispersion across transcripts,
- improving the reliability of inference. The methodology can be used even
- with the most minimal levels of replication, provided at least one
- phenotype or experimental condition is replicated. The software may have
- other applications beyond sequencing data, such as proteome peptide
- count data. The package is freely available under the LGPL licence from
- the Bioconductor web site (http://bioconductor.org).}},
- author = {Robinson, M. D. and McCarthy, D. J. and Smyth, G. K.},
- citeulike-article-id =6109634,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btp616},
- citeulike-linkout-1
- ={http://bioinformatics.oxfordjournals.org/content/btp616v1/.abstract},
- citeulike-linkout-2
- ={http://bioinformatics.oxfordjournals.org/content/btp616v1/.full.pdf},
- citeulike-linkout-3
- ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/1/139},
- citeulike-linkout-4 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2796818/},
- citeulike-linkout-5 ={http://view.ncbi.nlm.nih.gov/pubmed/19910308},
- citeulike-linkout-6 ={http://www.hubmed.org/display.cgi?uids=19910308},
- day = 11,
- doi = {10.1093/bioinformatics/btp616},
- issn = {1460-2059},
- journal = {Bioinformatics},
- keywords = {cnv, deseq2, overdispersion, rnaseq, workflow},
- month = nov,
- number = 1,
- pages = {139--140},
- pmcid = {PMC2796818},
- pmid = 19910308,
- posted-at = {2011-06-25 18:43:51},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{edgeR: a Bioconductor package for differential expression analysis of
- digital gene expression data}},
- url = {http://dx.doi.org/10.1093/bioinformatics/btp616},
- volume = 26,
- year = 2009
- }
- @article{Schurch2016How,
- author = {Schurch, Nicholas J. and Schofield, Pieta and Gierlinski, Marek and
- Cole, Christian and Sherstnev, Alexander and Singh, Vijender and Wrobel,
- Nicola and Gharbi, Karim and Simpson, Gordon G. and Owen-Hughes, Tom and
- Blaxter, Mark and Barton, Geoffrey J.},
- title = {How many biological replicates are needed in an RNA-seq experiment and
- which differential expression tool should you use?},
- volume = 22,
- number = 6,
- pages = {839-851},
- year = 2016,
- doi = {10.1261/rna.053959.115},
- url = {http://dx.doi.org/10.1261/rna.053959.115}
- }
- @article{Soneson2015Differential,
- url = {http://dx.doi.org/10.12688/f1000research.7563.1},
- doi = {10.12688/f1000research.7563.1},
- author = {Soneson, Charlotte and Love, Michael I. and Robinson, Mark},
- title = {{Differential analyses for RNA-seq: transcript-level estimates improve
- gene-level inferences}},
- journal = {F1000Research},
- year = 2015,
- Volume = 4,
- Issue = 1521
- }
- @article{Tonner2016,
- author = {Tonner, Peter D and Darnell, Cynthia L and Engelhardt, Barbara E and
- Schmid, Amy K},
- doi = {10.1101/gr.210286.116},
- pages = {320--333},
- title = {{Detecting differential growth of microbial populations with Gaussian
- process regression}},
- year = 2017,
- volume = 27,
- journal = {Genome Research}
- }
- @article{Trapnell2013Differential,
- author = {Trapnell, Cole and Hendrickson, David G and Sauvageau, Martin and Goff,
- Loyal and Rinn, John L and Pachter, Lior},
- doi = {10.1038/nbt.2450},
- journal = {Nature Biotechnology},
- title = {{Differential analysis of gene regulation at transcript resolution with
- RNA-seq}},
- url = {http://dx.doi.org/10.1038/nbt.2450},
- year = 2013
- }
- @book{Wickham2009Ggplot2,
- address = {New York, NY},
- author = {Wickham, Hadley},
- booktitle = {ggplot2},
- citeulike-article-id =10715717,
- citeulike-linkout-0 ={http://dx.doi.org/10.1007/978-0-387-98141-3},
- citeulike-linkout-1 ={http://www.springerlink.com/content/978-0-387-98140-6},
- doi = {10.1007/978-0-387-98141-3},
- isbn = {978-0-387-98140-6},
- keywords = {workflow},
- posted-at = {2015-08-18 15:12:19},
- priority = 2,
- publisher = {Springer New York},
- title = {{ggplot2}},
- url = {http://dx.doi.org/10.1007/978-0-387-98141-3},
- year = 2009
- }
- @article{Witten2011Classification,
- abstract = {{In recent years, advances in high throughput sequencing technology have
- led to a need for specialized methods for the analysis of digital gene
- expression data. While gene expression data measured on a microarray
- take on continuous values and can be modeled using the normal
- distribution, RNA sequencing data involve nonnegative counts and are
- more appropriately modeled using a discrete count distribution, such as
- the Poisson or the negative binomial. Consequently, analytic tools that
- assume a Gaussian distribution (such as classification methods based on
- linear discriminant analysis and clustering methods that use Euclidean
- distance) may not perform as well for sequencing data as methods that
- are based upon a more appropriate distribution. Here, we propose new
- approaches for performing classification and clustering of observations
- on the basis of sequencing data. Using a Poisson log linear model, we
- develop an analog of diagonal linear discriminant analysis that is
- appropriate for sequencing data. We also propose an approach for
- clustering sequencing data using a new dissimilarity measure that is
- based upon the Poisson model. We demonstrate the performances of these
- approaches in a simulation study, on three publicly available RNA
- sequencing data sets, and on a publicly available chromatin
- immunoprecipitation sequencing data set.}},
- author = {Witten, Daniela M.},
- citeulike-article-id =13172798,
- citeulike-linkout-0 ={http://dx.doi.org/10.1214/11-AOAS493},
- day = 28,
- doi = {10.1214/11-AOAS493},
- issn = {1932-6157},
- journal = {The Annals of Applied Statistics},
- keywords = {chipseq, ctsca, deseq2, rnaseq, workflow},
- month = dec,
- number = 4,
- pages = {2493--2518},
- posted-at = {2014-05-16 17:18:08},
- priority = 2,
- title = {{Classification and clustering of sequencing data using a Poisson
- model}},
- url = {http://dx.doi.org/10.1214/11-AOAS493},
- volume = 5,
- year = 2011
- }
- @article{Wu2013New,
- abstract = {{Recent developments in RNA-sequencing (RNA-seq) technology have led to
- a rapid increase in gene expression data in the form of counts. RNA-seq
- can be used for a variety of applications, however, identifying
- differential expression (DE) remains a key task in functional
- genomics. There have been a number of statistical methods for DE
- detection for RNA-seq data. One common feature of several leading
- methods is the use of the negative binomial (Gamma–Poisson mixture)
- model. That is, the unobserved gene expression is modeled by a gamma
- random variable and, given the expression, the sequencing read counts
- are modeled as Poisson. The distinct feature in various methods is how
- the variance, or dispersion, in the Gamma distribution is modeled and
- estimated. We evaluate several large public RNA-seq datasets and find
- that the estimated dispersion in existing methods does not adequately
- capture the heterogeneity of biological variance among samples. We
- present a new empirical Bayes shrinkage estimate of the dispersion
- parameters and demonstrate improved DE detection.}},
- author = {Wu, Hao and Wang, Chi and Wu, Zhijin},
- citeulike-article-id =11345725,
- citeulike-linkout-0 ={http://dx.doi.org/10.1093/biostatistics/kxs033},
- citeulike-linkout-1
- ={http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.abstract},
- citeulike-linkout-2
- ={http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.full.pdf},
- citeulike-linkout-3 ={http://view.ncbi.nlm.nih.gov/pubmed/23001152},
- citeulike-linkout-4 ={http://www.hubmed.org/display.cgi?uids=23001152},
- day = 01,
- doi = {10.1093/biostatistics/kxs033},
- issn = {1468-4357},
- journal = {Biostatistics},
- keywords = {deseq2, rnaseq, workflow},
- month = apr,
- number = 2,
- pages = {232--243},
- pmid = 23001152,
- posted-at = {2013-02-26 17:09:19},
- priority = 2,
- publisher = {Oxford University Press},
- title = {{A new shrinkage estimator for dispersion improves differential
- expression detection in RNA-seq data}},
- url = {http://dx.doi.org/10.1093/biostatistics/kxs033},
- volume = 14,
- year = 2013
- }
|