bioc-rnaseq.bib 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985
  1. @article{Anders2010Differential,
  2. author = {Anders, Simon and Huber, Wolfgang},
  3. doi = {10.1186/gb-2010-11-10-r106},
  4. journal = {Genome Biology},
  5. number = 10,
  6. pages = {R106+},
  7. pmcid = {PMC3218662},
  8. pmid = 20979621,
  9. title = {{Differential expression analysis for sequence count data}},
  10. volume = 11,
  11. year = 2010
  12. }
  13. @article{Anders2015HTSeqa,
  14. author = {Anders, Simon and Pyl, Paul T. and Huber, Wolfgang},
  15. doi = {10.1093/bioinformatics/btu638},
  16. journal = {Bioinformatics},
  17. number = 2,
  18. pages = {166--169},
  19. pmid = 25260700,
  20. title = {{HTSeq -- a Python framework to work with high-throughput sequencing
  21. data}},
  22. volume = 31,
  23. year = 2015
  24. }
  25. @article{Benjamini1995Controlling,
  26. author = {Benjamini, Yoav and Hochberg, Yosef},
  27. journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
  28. number = 1,
  29. pages = {289--300},
  30. title = {{Controlling the False Discovery Rate: A Practical and Powerful Approach
  31. to Multiple Testing}},
  32. url = {http://www.jstor.org/stable/2346101},
  33. volume = 57,
  34. year = 1995
  35. }
  36. @article{Bourgon2010Independent,
  37. author = {Bourgon, R. and Gentleman, R. and Huber, W.},
  38. doi = {10.1073/pnas.0914005107},
  39. journal = {Proceedings of the National Academy of Sciences},
  40. number = 21,
  41. pages = {9546--9551},
  42. pmcid = {PMC2906865},
  43. pmid = 20460310,
  44. title = {{Independent filtering increases detection power for high-throughput
  45. experiments}},
  46. volume = 107,
  47. year = 2010
  48. }
  49. @article{Bray2016Near,
  50. author = {Bray, Nicolas and Pimentel, Harold and Melsted, Pall and Pachter, Lior},
  51. journal = {Nature Biotechnology},
  52. pages = {525–-527},
  53. title = {Near-optimal probabilistic RNA-seq quantification},
  54. volume = 34,
  55. url = {http://dx.doi.org/10.1038/nbt.3519},
  56. year = 2016
  57. }
  58. @article{Dobin2013STAR,
  59. author = {Dobin, Alexander and Davis, Carrie A. and Schlesinger, Felix and
  60. Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and
  61. Chaisson, Mark and Gingeras, Thomas R.},
  62. doi = {10.1093/bioinformatics/bts635},
  63. journal = {Bioinformatics},
  64. number = 1,
  65. pages = {15--21},
  66. pmcid = {PMC3530905},
  67. pmid = 23104886,
  68. title = {{STAR: ultrafast universal RNA-seq aligner}},
  69. url = {http://dx.doi.org/10.1093/bioinformatics/bts635},
  70. volume = 29,
  71. year = 2013
  72. }
  73. @article{Dudoit2002Statistical,
  74. author = {Dudoit, Rine and Yang, Yee H. and Callow, Matthew J. and Speed, Terence
  75. P.},
  76. journal = {Statistica Sinica},
  77. pages = {111--139},
  78. title = {{Statistical methods for identifying differentially expressed genes in
  79. replicated cDNA microarray experiments}},
  80. year = 2002
  81. }
  82. @article{Durinck2009Mapping,
  83. author = {Durinck, Steffen and Spellman, Paul T. and Birney, Ewan and Huber,
  84. Wolfgang},
  85. doi = {10.1038/nprot.2009.97},
  86. journal = {Nature Protocols},
  87. number = 8,
  88. pages = {1184--1191},
  89. pmcid = {PMC3159387},
  90. pmid = 19617889,
  91. publisher = {Nature Publishing Group},
  92. title = {{Mapping identifiers for the integration of genomic datasets with the
  93. R/Bioconductor package biomaRt.}},
  94. url = {http://dx.doi.org/10.1038/nprot.2009.97},
  95. volume = 4,
  96. year = 2009
  97. }
  98. @article{Flicek2014Ensembl,
  99. author = {Flicek, Paul and Amode, M. Ridwan and Barrell, Daniel and Beal, Kathryn
  100. and Billis, Konstantinos and Brent, Simon and Carvalho-Silva, Denise and
  101. Clapham, Peter and Coates, Guy and Fitzgerald, Stephen and Gil, Laurent
  102. and Gir\'{o}n, Carlos G. and Gordon, Leo and Hourlier, Thibaut and Hunt,
  103. Sarah and Johnson, Nathan and Juettemann, Thomas and K\"{a}h\"{a}ri,
  104. Andreas K. and Keenan, Stephen and Kulesha, Eugene and Martin, Fergal
  105. J. and Maurel, Thomas and McLaren, William M. and Murphy, Daniel N. and
  106. Nag, Rishi and Overduin, Bert and Pignatelli, Miguel and Pritchard,
  107. Bethan and Pritchard, Emily and Riat, Harpreet S. and Ruffier, Magali
  108. and Sheppard, Daniel and Taylor, Kieron and Thormann, Anja and
  109. Trevanion, Stephen J. and Vullo, Alessandro and Wilder, Steven P. and
  110. Wilson, Mark and Zadissa, Amonida and Aken, Bronwen L. and Birney, Ewan
  111. and Cunningham, Fiona and Harrow, Jennifer and Herrero, Javier and
  112. Hubbard, Tim J. P. and Kinsella, Rhoda and Muffato, Matthieu and Parker,
  113. Anne and Spudich, Giulietta and Yates, Andy and Zerbino, Daniel R. and
  114. Searle, Stephen M. J.},
  115. doi = {10.1093/nar/gkt1196},
  116. issn = {1362-4962},
  117. journal = {Nucleic Acids Research},
  118. number = {D1},
  119. pages = {D749--D755},
  120. pmid = 24316576,
  121. title = {{Ensembl 2014}},
  122. url = {http://dx.doi.org/10.1093/nar/gkt1196},
  123. volume = 42,
  124. year = 2014
  125. }
  126. @article{Hardcastle2010BaySeq,
  127. abstract = {{BACKGROUND:High throughput sequencing has become an important
  128. technology for studying expression levels in many types of genomic, and
  129. particularly transcriptomic, data. One key way of analysing such data is
  130. to look for elements of the data which display particular patterns of
  131. differential expression in order to take these forward for further
  132. analysis and validation.RESULTS:We propose a framework for defining
  133. patterns of differential expression and develop a novel algorithm,
  134. baySeq, which uses an empirical Bayes approach to detect these patterns
  135. of differential expression within a set of sequencing samples. The
  136. method assumes a negative binomial distribution for the data and derives
  137. an empirically determined prior distribution from the entire dataset. We
  138. examine the performance of the method on real and simulated
  139. data.CONCLUSIONS:Our method performs at least as well, and often better,
  140. than existing methods for analyses of pairwise differential expression
  141. in both real and simulated data. When we compare methods for the
  142. analysis of data from experimental designs involving multiple sample
  143. groups, our method again shows substantial gains in performance. We
  144. believe that this approach thus represents an important step forward for
  145. the analysis of count data from sequencing experiments.}},
  146. author = {Hardcastle, Thomas and Kelly, Krystyna},
  147. citeulike-article-id =7610091,
  148. citeulike-linkout-0 ={http://dx.doi.org/10.1186/1471-2105-11-422},
  149. citeulike-linkout-1 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2928208/},
  150. citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/20698981},
  151. citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=20698981},
  152. doi = {10.1186/1471-2105-11-422},
  153. issn = {1471-2105},
  154. journal = {BMC Bioinformatics},
  155. keywords = {bayes, deseq2, rnaseq, workflow},
  156. number = 1,
  157. pages = {422+},
  158. pmcid = {PMC2928208},
  159. pmid = 20698981,
  160. posted-at = {2011-04-05 09:08:06},
  161. priority = 2,
  162. title = {{baySeq: Empirical Bayesian methods for identifying differential
  163. expression in sequence count data}},
  164. url = {http://dx.doi.org/10.1186/1471-2105-11-422},
  165. volume = 11,
  166. year = 2010
  167. }
  168. @article{Himes2014RNASeq,
  169. abstract = {{Asthma is a chronic inflammatory respiratory disease that affects over
  170. 300 million people worldwide. Glucocorticoids are a mainstay therapy for
  171. asthma because they exert anti-inflammatory effects in multiple lung
  172. tissues, including the airway smooth muscle (ASM). However, the
  173. mechanism by which glucocorticoids suppress inflammation in ASM remains
  174. poorly understood. Using RNA-Seq, a high-throughput sequencing method,
  175. we characterized transcriptomic changes in four primary human ASM cell
  176. lines that were treated with dexamethasone--a potent synthetic
  177. glucocorticoid (1 µM for 18 hours). Based on a Benjamini-Hochberg
  178. corrected p-value <0.05, we identified 316 differentially expressed
  179. genes, including both well known (DUSP1, KLF15, PER1, TSC22D3) and less
  180. investigated (C7, CCDC69, CRISPLD2) glucocorticoid-responsive
  181. genes. CRISPLD2, which encodes a secreted protein previously implicated
  182. in lung development and endotoxin regulation, was found to have SNPs
  183. that were moderately associated with inhaled corticosteroid resistance
  184. and bronchodilator response among asthma patients in two previously
  185. conducted genome-wide association studies. Quantitative RT-PCR and
  186. Western blotting showed that dexamethasone treatment significantly
  187. increased CRISPLD2 mRNA and protein expression in ASM cells. CRISPLD2
  188. expression was also induced by the inflammatory cytokine IL1β, and small
  189. interfering RNA-mediated knockdown of CRISPLD2 further increased
  190. IL1β-induced expression of IL6 and IL8. Our findings offer a
  191. comprehensive view of the effect of a glucocorticoid on the ASM
  192. transcriptome and identify CRISPLD2 as an asthma pharmacogenetics
  193. candidate gene that regulates anti-inflammatory effects of
  194. glucocorticoids in the ASM.}},
  195. author = {Himes, Blanca E. and Jiang, Xiaofeng and Wagner, Peter and Hu, Ruoxi and
  196. Wang, Qiyu and Klanderman, Barbara and Whitaker, Reid M. and Duan,
  197. Qingling and Lasky-Su, Jessica and Nikolos, Christina and Jester,
  198. William and Johnson, Martin and Panettieri, Reynold A. and Tantisira,
  199. Kelan G. and Weiss, Scott T. and Lu, Quan},
  200. citeulike-article-id =13705379,
  201. citeulike-linkout-0 ={http://dx.doi.org/10.1371/journal.pone.0099625},
  202. citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24926665},
  203. citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24926665},
  204. doi = {10.1371/journal.pone.0099625},
  205. issn = {1932-6203},
  206. journal = {PloS one},
  207. keywords = {rnaseq, workflow},
  208. number = 6,
  209. pmid = 24926665,
  210. posted-at = {2015-08-18 15:02:37},
  211. priority = 2,
  212. title = {{RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid
  213. responsive gene that modulates cytokine function in airway smooth muscle
  214. cells.}},
  215. url = {http://dx.doi.org/10.1371/journal.pone.0099625},
  216. volume = 9,
  217. year = 2014
  218. }
  219. @article{Huber2015Orchestrating,
  220. abstract = {{Bioconductor is an open-source, open-development software project for
  221. the analysis and comprehension of high-throughput data in genomics and
  222. molecular biology. The project aims to enable interdisciplinary
  223. research, collaboration and rapid development of scientific
  224. software. Based on the statistical programming language R, Bioconductor
  225. comprises 934 interoperable packages contributed by a large, diverse
  226. community of scientists. Packages cover a range of bioinformatic and
  227. statistical applications. They undergo formal initial review and
  228. continuous automated testing. We present an overview for prospective
  229. users and contributors.}},
  230. author = {Huber, Wolfgang and Carey, Vincent J. and Gentleman, Robert and Anders,
  231. Simon and Carlson, Marc and Carvalho, Benilton S. and Bravo, Hector
  232. Corrada C. and Davis, Sean and Gatto, Laurent and Girke, Thomas and
  233. Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D. and Irizarry,
  234. Rafael A. and Lawrence, Michael and Love, Michael I. and MacDonald,
  235. James and Obenchain, Valerie and Ole\'{s}, Andrzej K. and Pag\`{e}s,
  236. Herv\'{e} and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon
  237. K. and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin},
  238. citeulike-article-id =13504287,
  239. citeulike-linkout-0 ={http://dx.doi.org/10.1038/nmeth.3252},
  240. citeulike-linkout-1 ={http://dx.doi.org/10.1038/nmeth.3252},
  241. citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/25633503},
  242. citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=25633503},
  243. day = 29,
  244. doi = {10.1038/nmeth.3252},
  245. issn = {1548-7105},
  246. journal = {Nature methods},
  247. keywords = {mine, workflow},
  248. month = feb,
  249. number = 2,
  250. pages = {115--121},
  251. pmid = 25633503,
  252. posted-at = {2015-05-29 16:53:20},
  253. priority = 2,
  254. publisher = {Nature Publishing Group},
  255. title = {{Orchestrating high-throughput genomic analysis with Bioconductor.}},
  256. url = {http://dx.doi.org/10.1038/nmeth.3252},
  257. volume = 12,
  258. year = 2015
  259. }
  260. @article{Huntley2013ReportingTools,
  261. abstract = {{Summary: It is common for computational analyses to generate large
  262. amounts of complex data that are difficult to process and share with
  263. collaborators. Standard methods are needed to transform such data into a
  264. more useful and intuitive format. We present ReportingTools, a
  265. Bioconductor package, that automatically recognizes and transforms the
  266. output of many common Bioconductor packages into rich, interactive,
  267. HTML-based reports. Reports are not generic, but have been individually
  268. designed to reflect content specific to the result type
  269. detected. Tabular output included in reports is sortable, filterable and
  270. searchable and contains context-relevant hyperlinks to external
  271. databases. Additionally, in-line graphics have been developed for
  272. specific analysis types and are embedded by default within table rows,
  273. providing a useful visual summary of underlying raw data. ReportingTools
  274. is highly flexible and reports can be easily customized for specific
  275. applications using the well-defined API.}},
  276. author = {Huntley, Melanie A. and Larson, Jessica L. and Chaivorapol, Christina
  277. and Becker, Gabriel and Lawrence, Michael and Hackney, Jason A. and
  278. Kaminker, Joshua S.},
  279. citeulike-article-id =12728071,
  280. citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt551},
  281. citeulike-linkout-1
  282. ={http://bioinformatics.oxfordjournals.org/content/29/24/3220.abstract},
  283. citeulike-linkout-2
  284. ={http://bioinformatics.oxfordjournals.org/content/29/24/3220.full.pdf},
  285. citeulike-linkout-3
  286. ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/24/3220},
  287. citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/24078713},
  288. citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=24078713},
  289. day = 15,
  290. doi = {10.1093/bioinformatics/btt551},
  291. issn = {1460-2059},
  292. journal = {Bioinformatics},
  293. keywords = {workflow},
  294. month = dec,
  295. number = 24,
  296. pages = {3220--3221},
  297. pmid = 24078713,
  298. posted-at = {2015-08-18 15:13:59},
  299. priority = 2,
  300. publisher = {Oxford University Press},
  301. title = {{ReportingTools: an automated result processing and presentation toolkit
  302. for high-throughput genomic analyses}},
  303. url = {http://dx.doi.org/10.1093/bioinformatics/btt551},
  304. volume = 29,
  305. year = 2013
  306. }
  307. @article{Kent2002Human,
  308. abstract = {{As vertebrate genome sequences near completion and research refocuses
  309. to their analysis, the issue of effective genome annotation display
  310. becomes critical. A mature web tool for rapid and reliable display of
  311. any requested portion of the genome at any scale, together with several
  312. dozen aligned annotation tracks, is provided at
  313. http://genome.ucsc.edu. This browser displays assembly contigs and gaps,
  314. mRNA and expressed sequence tag alignments, multiple gene predictions,
  315. cross-species homologies, single nucleotide polymorphisms,
  316. sequence-tagged sites, radiation hybrid data, transposon repeats, and
  317. more as a stack of coregistered tracks. Text and sequence-based searches
  318. provide quick and precise access to any region of specific
  319. interest. Secondary links from individual features lead to sequence
  320. details and supplementary off-site databases. One-half of the annotation
  321. tracks are computed at the University of California, Santa Cruz from
  322. publicly available sequence data; collaborators worldwide provide the
  323. rest. Users can stably add their own custom tracks to the browser for
  324. educational or research purposes. The conceptual and technical framework
  325. of the browser, its underlying MYSQL database, and overall use are
  326. described. The web site currently serves over 50,000 pages per day to
  327. over 3000 different users.}},
  328. author = {Kent, W. James and Sugnet, Charles W. and Furey, Terrence S. and Roskin,
  329. Krishna M. and Pringle, Tom H. and Zahler, Alan M. and Haussler, David},
  330. citeulike-article-id =2009259,
  331. citeulike-linkout-0 ={http://dx.doi.org/10.1101/gr.229102},
  332. citeulike-linkout-1
  333. ={http://dx.doi.org/10.1101/gr.229102.\%20article\%20published\%20online\%20before\%20print\%20in\%20may\%202002},
  334. citeulike-linkout-2 ={http://genome.cshlp.org/content/12/6/996.full.abstract},
  335. citeulike-linkout-3 ={http://genome.cshlp.org/content/12/6/996.full.full.pdf},
  336. citeulike-linkout-4 ={http://www.genome.org/cgi/content/abstract/12/6/996},
  337. citeulike-linkout-5 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC186604/},
  338. citeulike-linkout-6 ={http://view.ncbi.nlm.nih.gov/pubmed/12045153},
  339. citeulike-linkout-7 ={http://www.hubmed.org/display.cgi?uids=12045153},
  340. day = 1,
  341. doi = {10.1101/gr.229102},
  342. issn = {1088-9051},
  343. journal = {Genome research},
  344. keywords = {ctsca, workflow},
  345. month = jun,
  346. number = 6,
  347. pages = {996--1006},
  348. pmcid = {PMC186604},
  349. pmid = 12045153,
  350. posted-at = {2012-07-26 16:04:05},
  351. priority = 2,
  352. publisher = {Cold Spring Harbor Laboratory Press},
  353. title = {{The human genome browser at UCSC.}},
  354. url = {http://dx.doi.org/10.1101/gr.229102},
  355. volume = 12,
  356. year = 2002
  357. }
  358. @article{Law2014Voom,
  359. abstract = {{Normal linear modeling methods are developed for analyzing read counts
  360. from RNA-seq experiments. The voom method estimates the mean-variance
  361. relationship of the log-counts, generates a precision weight for each
  362. observation, and then enters these into a limma empirical Bayes analysis
  363. pipeline. This opens access for RNA-seq analysts to a large body of
  364. methodology developed for microarrays. Simulation studies show that voom
  365. performs as well or better than count-based RNA-seq methods even when
  366. the data are generated according to the assumptions of the earlier
  367. methods. Two case studies illustrate the use of linear modeling and gene
  368. set testing methods.}},
  369. author = {Law, Charity W. and Chen, Yunshun and Shi, Wei and Smyth, Gordon K.},
  370. citeulike-article-id =12965503,
  371. citeulike-linkout-0 ={http://dx.doi.org/10.1186/gb-2014-15-2-r29},
  372. citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24485249},
  373. citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24485249},
  374. day = 03,
  375. doi = {10.1186/gb-2014-15-2-r29},
  376. issn = {1465-6906},
  377. journal = {Genome Biology},
  378. keywords = {deseq2, rnaguide, workflow},
  379. month = feb,
  380. number = 2,
  381. pages = {R29+},
  382. pmid = 24485249,
  383. posted-at = {2014-02-13 20:56:00},
  384. priority = 2,
  385. publisher = {BioMed Central Ltd},
  386. title = {{Voom: precision weights unlock linear model analysis tools for RNA-seq
  387. read counts}},
  388. url = {http://dx.doi.org/10.1186/gb-2014-15-2-r29},
  389. volume = 15,
  390. year = 2014
  391. }
  392. @article{Lawrence2013Software,
  393. abstract = {{We describe Bioconductor infrastructure for representing and computing
  394. on annotated genomic ranges and integrating genomic data with the
  395. statistical computing features of R and its extensions. At the core of
  396. the infrastructure are three packages: IRanges, GenomicRanges, and
  397. GenomicFeatures. These packages provide scalable data structures for
  398. representing annotated ranges on the genome, with special support for
  399. transcript structures, read alignments and coverage
  400. vectors. Computational facilities include efficient algorithms for
  401. overlap and nearest neighbor detection, coverage calculation and other
  402. range operations. This infrastructure directly supports more than 80
  403. other Bioconductor packages, including those for sequence analysis,
  404. differential expression analysis and visualization.}},
  405. author = {Lawrence, Michael and Huber, Wolfgang and Pag\`{e}s, Herv\'{e} and
  406. Aboyoun, Patrick and Carlson, Marc and Gentleman, Robert and Morgan,
  407. Martin T. and Carey, Vincent J.},
  408. citeulike-article-id =12548311,
  409. citeulike-linkout-0 ={http://dx.doi.org/10.1371/journal.pcbi.1003118},
  410. citeulike-linkout-1 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3738458/},
  411. citeulike-linkout-2 ={http://view.ncbi.nlm.nih.gov/pubmed/23950696},
  412. citeulike-linkout-3 ={http://www.hubmed.org/display.cgi?uids=23950696},
  413. day = 8,
  414. doi = {10.1371/journal.pcbi.1003118},
  415. editor = {Prlic, Andreas},
  416. issn = {1553-7358},
  417. journal = {PLoS Computational Biology},
  418. keywords = {deseq2, workflow},
  419. month = aug,
  420. number = 8,
  421. pages = {e1003118+},
  422. pmcid = {PMC3738458},
  423. pmid = 23950696,
  424. posted-at = {2014-02-14 00:17:30},
  425. priority = 2,
  426. publisher = {Public Library of Science},
  427. title = {{Software for Computing and Annotating Genomic Ranges}},
  428. url = {http://dx.doi.org/10.1371/journal.pcbi.1003118},
  429. volume = 9,
  430. year = 2013
  431. }
  432. @article{Leek2014Svaseq,
  433. abstract = {{It is now known that unwanted noise and unmodeled artifacts such as
  434. batch effects can dramatically reduce the accuracy of statistical
  435. inference in genomic experiments. These sources of noise must be modeled
  436. and removed to accurately measure biological variability and to obtain
  437. correct statistical inference when performing high-throughput genomic
  438. analysis. We introduced surrogate variable analysis (sva) for estimating
  439. these artifacts by (i) identifying the part of the genomic data only
  440. affected by artifacts and (ii) estimating the artifacts with principal
  441. components or singular vectors of the subset of the data matrix. The
  442. resulting estimates of artifacts can be used in subsequent analyses as
  443. adjustment factors to correct analyses. Here I describe a version of the
  444. sva approach specifically created for count data or FPKMs from
  445. sequencing experiments based on appropriate data transformation. I also
  446. describe the addition of supervised sva (ssva) for using control probes
  447. to identify the part of the genomic data only affected by artifacts. I
  448. present a comparison between these versions of sva and other methods for
  449. batch effect estimation on simulated data, real count-based data and
  450. FPKM-based data. These updates are available through the sva
  451. Bioconductor package and I have made fully reproducible analysis using
  452. these methods available from:
  453. https://github.com/jtleek/svaseq. {\copyright} The Author(s)
  454. 2014. Published by Oxford University Press on behalf of Nucleic Acids
  455. Research.}},
  456. author = {Leek, Jeffrey T.},
  457. citeulike-article-id =13385083,
  458. citeulike-linkout-0 ={http://dx.doi.org/10.1093/nar/gku864},
  459. citeulike-linkout-1
  460. ={http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.abstract},
  461. citeulike-linkout-2
  462. ={http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.full.pdf},
  463. citeulike-linkout-3 ={http://view.ncbi.nlm.nih.gov/pubmed/25294822},
  464. citeulike-linkout-4 ={http://www.hubmed.org/display.cgi?uids=25294822},
  465. day = 1,
  466. doi = {10.1093/nar/gku864},
  467. issn = {1362-4962},
  468. journal = {Nucleic acids research},
  469. keywords = {workflow},
  470. month = dec,
  471. number = 21,
  472. pages = 000,
  473. pmid = 25294822,
  474. posted-at = {2015-08-18 15:16:02},
  475. priority = 2,
  476. publisher = {Oxford University Press},
  477. title = {{svaseq: removing batch effects and other unwanted noise from sequencing
  478. data.}},
  479. url = {http://dx.doi.org/10.1093/nar/gku864},
  480. volume = 42,
  481. year = 2014
  482. }
  483. @article{Leng2013EBSeq,
  484. abstract = {{Motivation: Messenger RNA expression is important in normal development
  485. and differentiation, as well as in manifestation of disease. RNA-seq
  486. experiments allow for the identification of differentially expressed
  487. (DE) genes and their corresponding isoforms on a genome-wide
  488. scale. However, statistical methods are required to ensure that accurate
  489. identifications are made. A number of methods exist for identifying DE
  490. genes, but far fewer are available for identifying DE isoforms. When
  491. isoform DE is of interest, investigators often apply gene-level
  492. (count-based) methods directly to estimates of isoform counts. Doing so
  493. is not recommended. In short, estimating isoform expression is
  494. relatively straightforward for some groups of isoforms, but more
  495. challenging for others. This results in estimation uncertainty that
  496. varies across isoform groups. Count-based methods were not designed to
  497. accommodate this varying uncertainty, and consequently, application of
  498. them for isoform inference results in reduced power for some classes of
  499. isoforms and increased false discoveries for others.}},
  500. author = {Leng, N. and Dawson, J. A. and Thomson, J. A. and Ruotti, V. and
  501. Rissman, A. I. and Smits, B. M. G. and Haag, J. D. and Gould, M. N. and
  502. Stewart, R. M. and Kendziorski, C.},
  503. citeulike-article-id =12074857,
  504. citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt087},
  505. citeulike-linkout-1
  506. ={http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.abstract},
  507. citeulike-linkout-2
  508. ={http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.full.pdf},
  509. citeulike-linkout-3
  510. ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/8/1035},
  511. citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/23428641},
  512. citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=23428641},
  513. day = 15,
  514. doi = {10.1093/bioinformatics/btt087},
  515. issn = {1460-2059},
  516. journal = {Bioinformatics},
  517. keywords = {deseq2, workflow},
  518. month = feb,
  519. number = 8,
  520. pages = {1035--1043},
  521. pmid = 23428641,
  522. posted-at = {2014-05-13 22:33:51},
  523. priority = 2,
  524. publisher = {Oxford University Press},
  525. title = {{EBSeq: an empirical Bayes hierarchical model for inference in RNA-seq
  526. experiments}},
  527. url = {http://dx.doi.org/10.1093/bioinformatics/btt087},
  528. volume = 29,
  529. year = 2013
  530. }
  531. @article{Leong2014Global,
  532. abstract = {{Non-coding RNAs (ncRNAs) are frequent and prevalent across the
  533. taxa. Although individual non-coding loci have been assigned a function,
  534. most are uncharacterized. Their global biological significance is
  535. unproven and remains controversial. Here we investigate the role played
  536. by ncRNAs in the stress response of Schizosaccharomyces pombe. We
  537. integrate global proteomics and RNA sequencing data to identify a
  538. systematic programme in which elevated antisense RNA arising both from
  539. ncRNAs and from 3'-overlapping convergent gene pairs is directly
  540. associated with substantial reductions in protein levels throughout the
  541. genome. We describe an extensive array of ncRNAs with trans associations
  542. that have the potential to influence multiple pathways. Deletion of one
  543. such locus reduces levels of atf1, a transcription factor downstream of
  544. the stress-activated mitogen-activated protein kinase (MAPK) pathway,
  545. and alters sensitivity to oxidative stress. These non-coding transcripts
  546. therefore regulate specific stress responses, adding unanticipated
  547. information-processing capacity to the MAPK signalling system.}},
  548. author = {Leong, Hui S. and Dawson, Keren and Wirth, Chris and Li, Yaoyong and
  549. Connolly, Yvonne and Smith, Duncan L. and Wilkinson, Caroline R. and
  550. Miller, Crispin J.},
  551. citeulike-article-id =13705386,
  552. citeulike-linkout-0 ={http://dx.doi.org/10.1038/ncomms4947},
  553. citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/24853205},
  554. citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=24853205},
  555. doi = {10.1038/ncomms4947},
  556. issn = {2041-1723},
  557. journal = {Nature communications},
  558. keywords = {workflow},
  559. pmid = 24853205,
  560. posted-at = {2015-08-18 15:16:55},
  561. priority = 2,
  562. title = {{A global non-coding RNA system modulates fission yeast protein levels
  563. in response to stress.}},
  564. url = {http://dx.doi.org/10.1038/ncomms4947},
  565. volume = 5,
  566. year = 2014
  567. }
  568. @article{Li2009Sequence,
  569. abstract = {{The Sequence Alignment/Map (SAM) format is a generic alignment format
  570. for storing read alignments against reference sequences, supporting
  571. short and long reads (up to 128 Mbp) produced by different sequencing
  572. platforms. It is flexible in style, compact in size, efficient in random
  573. access and is the format in which alignments from the 1000 Genomes
  574. Project are released. SAMtools implements various utilities for
  575. post-processing alignments in the SAM format, such as indexing, variant
  576. caller and alignment viewer, and thus provides universal tools for
  577. processing read alignments. http://samtools.sourceforge.net.}},
  578. author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan,
  579. Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin,
  580. Richard and {1000 Genome Project Data Processing Subgroup}},
  581. citeulike-article-id =4778506,
  582. citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btp352},
  583. citeulike-linkout-1
  584. ={http://bioinformatics.oxfordjournals.org/content/25/16/2078.abstract},
  585. citeulike-linkout-2
  586. ={http://bioinformatics.oxfordjournals.org/content/25/16/2078.full.pdf},
  587. citeulike-linkout-3
  588. ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/25/16/2078},
  589. citeulike-linkout-4 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2723002/},
  590. citeulike-linkout-5 ={http://view.ncbi.nlm.nih.gov/pubmed/19505943},
  591. citeulike-linkout-6 ={http://www.hubmed.org/display.cgi?uids=19505943},
  592. day = 15,
  593. doi = {10.1093/bioinformatics/btp352},
  594. issn = {1367-4811},
  595. journal = {Bioinformatics (Oxford, England)},
  596. keywords = {workflow},
  597. month = aug,
  598. number = 16,
  599. pages = {2078--2079},
  600. pmcid = {PMC2723002},
  601. pmid = 19505943,
  602. posted-at = {2015-08-18 15:05:40},
  603. priority = 2,
  604. publisher = {Oxford University Press},
  605. title = {{The Sequence Alignment/Map format and SAMtools.}},
  606. url = {http://dx.doi.org/10.1093/bioinformatics/btp352},
  607. volume = 25,
  608. year = 2009
  609. }
  610. @article{Li2011RSEM,
  611. author = {Li, Bo and Dewey, Colin N.},
  612. doi = {10.1186/1471-2105-12-3231},
  613. journal = {BMC Bioinformatics},
  614. pages = {323+},
  615. title = {{RSEM: accurate transcript quantification from RNA-Seq data with or
  616. without a reference genome.}},
  617. url = {http://dx.doi.org/10.1186/1471-2105-12-323},
  618. volume = 12,
  619. year = 2011
  620. }
  621. @article{Liao2014FeatureCounts,
  622. abstract = {{ Next-generation sequencing technologies generate millions of short
  623. sequence reads, which are usually aligned to a reference genome. In many
  624. applications, the key information required for downstream analysis is
  625. the number of reads mapping to each genomic feature, for example to each
  626. exon or each gene. The process of counting reads is called read
  627. summarization. Read summarization is required for a great variety of
  628. genomic analyses but has so far received relatively little attention in
  629. the literature.  We present featureCounts, a read summarization program
  630. suitable for counting reads generated from either RNA or genomic DNA
  631. sequencing experiments. featureCounts implements highly efficient
  632. chromosome hashing and feature blocking techniques. It is considerably
  633. faster than existing methods (by an order of magnitude for gene-level
  634. summarization) and requires far less computer memory. It works with
  635. either single or paired-end reads and provides a wide range of options
  636. appropriate for different sequencing applications.Availability and
  637. implementation: featureCounts is available under GNU General Public
  638. License as part of the Subread (http://subread.sourceforge.net) or
  639. Rsubread (http://www.bioconductor.org) software packages.
  640. [email protected].}},
  641. author = {Liao, Y. and Smyth, G. K. and Shi, W.},
  642. citeulike-article-id =12796380,
  643. citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btt656},
  644. citeulike-linkout-1
  645. ={http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.abstract},
  646. citeulike-linkout-2
  647. ={http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.full.pdf},
  648. citeulike-linkout-3
  649. ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/30/7/923},
  650. citeulike-linkout-4 ={http://view.ncbi.nlm.nih.gov/pubmed/24227677},
  651. citeulike-linkout-5 ={http://www.hubmed.org/display.cgi?uids=24227677},
  652. day = 13,
  653. doi = {10.1093/bioinformatics/btt656},
  654. issn = {1460-2059},
  655. journal = {Bioinformatics},
  656. keywords = {deseq2, workflow},
  657. month = apr,
  658. number = 7,
  659. pages = {923--930},
  660. pmid = 24227677,
  661. posted-at = {2014-02-18 20:28:26},
  662. priority = 2,
  663. publisher = {Oxford University Press},
  664. title = {{featureCounts: an efficient general purpose program for assigning
  665. sequence reads to genomic features}},
  666. url = {http://dx.doi.org/10.1093/bioinformatics/btt656},
  667. volume = 30,
  668. year = 2014
  669. }
  670. @article{Love2014Moderated,
  671. abstract = {{In comparative high-throughput sequencing assays, a fundamental task is
  672. the analysis of count data, such as read counts per gene in RNA-seq, for
  673. evidence of systematic changes across experimental conditions. Small
  674. replicate numbers, discreteness, large dynamic range and the presence of
  675. outliers require a suitable statistical approach. We present DESeq2, a
  676. method for differential analysis of count data, using shrinkage
  677. estimation for dispersions and fold changes to improve stability and
  678. interpretability of estimates. This enables a more quantitative analysis
  679. focused on the strength rather than the mere presence of differential
  680. expression. The DESeq2 package is available at
  681. http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html.}},
  682. author = {Love, Michael I. and Huber, Wolfgang and Anders, Simon},
  683. citeulike-article-id =13505832,
  684. citeulike-linkout-0 ={http://dx.doi.org/10.1186/s13059-014-0550-8},
  685. citeulike-linkout-1 ={http://view.ncbi.nlm.nih.gov/pubmed/25516281},
  686. citeulike-linkout-2 ={http://www.hubmed.org/display.cgi?uids=25516281},
  687. day = 05,
  688. doi = {10.1186/s13059-014-0550-8},
  689. issn = {1465-6906},
  690. journal = {Genome Biology},
  691. keywords = {mine, workflow},
  692. month = dec,
  693. number = 12,
  694. pages = {550+},
  695. pmid = 25516281,
  696. posted-at = {2015-08-18 15:29:41},
  697. priority = 2,
  698. publisher = {BioMed Central Ltd},
  699. title = {{Moderated estimation of fold change and dispersion for RNA-seq data
  700. with DESeq2}},
  701. url = {http://dx.doi.org/10.1186/s13059-014-0550-8},
  702. volume = 15,
  703. year = 2014
  704. }
  705. @article{Patro2014Sailfish,
  706. author = {Patro, Rob and Mount, Stephen M. and Kingsford, Carl},
  707. journal = {Nature Biotechnology},
  708. pages = {462--464},
  709. title = {{Sailfish enables alignment-free isoform quantification from RNA-seq
  710. reads using lightweight algorithms}},
  711. doi = {10.1038/nbt.2862},
  712. url = {http://dx.doi.org/10.1038/nbt.2862},
  713. volume = 32,
  714. year = 2014
  715. }
  716. @article{Patro2016Salmon,
  717. author = {Patro, Rob and Duggal, Geet and Love, Michael I. and Irizarry, Rafael
  718. A. and Kingsford, Carl},
  719. journal = {bioRxiv},
  720. title = {Salmon provides accurate, fast, and bias-aware transcript expression
  721. estimates using dual-phase inference},
  722. url = {http://biorxiv.org/content/early/2016/08/30/021592},
  723. year = 2016
  724. }
  725. @article{Risso2014Normalization,
  726. author = {Risso, Davide and Ngai, John and Speed, Terence P. and Dudoit, Sandrine},
  727. citeulike-article-id =13336814,
  728. citeulike-linkout-0 ={http://dx.doi.org/10.1038/nbt.2931},
  729. citeulike-linkout-1 ={http://dx.doi.org/10.1038/nbt.2931},
  730. day = 24,
  731. doi = {10.1038/nbt.2931},
  732. issn = {1087-0156},
  733. journal = {Nature Biotechnology},
  734. keywords = {rnaguide, workflow},
  735. month = aug,
  736. number = 9,
  737. pages = {896--902},
  738. posted-at = {2014-09-11 20:51:49},
  739. priority = 2,
  740. publisher = {Nature Publishing Group},
  741. title = {{Normalization of RNA-seq data using factor analysis of control genes or
  742. samples}},
  743. url = {http://dx.doi.org/10.1038/nbt.2931},
  744. volume = 32,
  745. year = 2014
  746. }
  747. @article{Robert2015Errors,
  748. author = {Robert, Christelle and Watson, Mick},
  749. doi = {10.1186/s13059-015-0734-x},
  750. journal = {Genome Biology},
  751. title = {{Errors in RNA-Seq quantification affect genes of relevance to human
  752. disease}},
  753. url = {http://dx.doi.org/10.1186/s13059-015-0734-x},
  754. year = 2015
  755. }
  756. @article{Robinson2009EdgeR,
  757. abstract = {{It is expected that emerging digital gene expression (DGE) technologies
  758. will overtake microarray technologies in the near future for many
  759. functional genomics applications. One of the fundamental data analysis
  760. tasks, especially for gene expression studies, involves determining
  761. whether there is evidence that counts for a transcript or exon are
  762. significantly different across experimental conditions. edgeR is a
  763. Bioconductor software package for examining differential expression of
  764. replicated count data. An overdispersed Poisson model is used to account
  765. for both biological and technical variability. Empirical Bayes methods
  766. are used to moderate the degree of overdispersion across transcripts,
  767. improving the reliability of inference. The methodology can be used even
  768. with the most minimal levels of replication, provided at least one
  769. phenotype or experimental condition is replicated. The software may have
  770. other applications beyond sequencing data, such as proteome peptide
  771. count data. The package is freely available under the LGPL licence from
  772. the Bioconductor web site (http://bioconductor.org).}},
  773. author = {Robinson, M. D. and McCarthy, D. J. and Smyth, G. K.},
  774. citeulike-article-id =6109634,
  775. citeulike-linkout-0 ={http://dx.doi.org/10.1093/bioinformatics/btp616},
  776. citeulike-linkout-1
  777. ={http://bioinformatics.oxfordjournals.org/content/btp616v1/.abstract},
  778. citeulike-linkout-2
  779. ={http://bioinformatics.oxfordjournals.org/content/btp616v1/.full.pdf},
  780. citeulike-linkout-3
  781. ={http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/1/139},
  782. citeulike-linkout-4 ={http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2796818/},
  783. citeulike-linkout-5 ={http://view.ncbi.nlm.nih.gov/pubmed/19910308},
  784. citeulike-linkout-6 ={http://www.hubmed.org/display.cgi?uids=19910308},
  785. day = 11,
  786. doi = {10.1093/bioinformatics/btp616},
  787. issn = {1460-2059},
  788. journal = {Bioinformatics},
  789. keywords = {cnv, deseq2, overdispersion, rnaseq, workflow},
  790. month = nov,
  791. number = 1,
  792. pages = {139--140},
  793. pmcid = {PMC2796818},
  794. pmid = 19910308,
  795. posted-at = {2011-06-25 18:43:51},
  796. priority = 2,
  797. publisher = {Oxford University Press},
  798. title = {{edgeR: a Bioconductor package for differential expression analysis of
  799. digital gene expression data}},
  800. url = {http://dx.doi.org/10.1093/bioinformatics/btp616},
  801. volume = 26,
  802. year = 2009
  803. }
  804. @article{Schurch2016How,
  805. author = {Schurch, Nicholas J. and Schofield, Pieta and Gierlinski, Marek and
  806. Cole, Christian and Sherstnev, Alexander and Singh, Vijender and Wrobel,
  807. Nicola and Gharbi, Karim and Simpson, Gordon G. and Owen-Hughes, Tom and
  808. Blaxter, Mark and Barton, Geoffrey J.},
  809. title = {How many biological replicates are needed in an RNA-seq experiment and
  810. which differential expression tool should you use?},
  811. volume = 22,
  812. number = 6,
  813. pages = {839-851},
  814. year = 2016,
  815. doi = {10.1261/rna.053959.115},
  816. url = {http://dx.doi.org/10.1261/rna.053959.115}
  817. }
  818. @article{Soneson2015Differential,
  819. url = {http://dx.doi.org/10.12688/f1000research.7563.1},
  820. doi = {10.12688/f1000research.7563.1},
  821. author = {Soneson, Charlotte and Love, Michael I. and Robinson, Mark},
  822. title = {{Differential analyses for RNA-seq: transcript-level estimates improve
  823. gene-level inferences}},
  824. journal = {F1000Research},
  825. year = 2015,
  826. Volume = 4,
  827. Issue = 1521
  828. }
  829. @article{Tonner2016,
  830. author = {Tonner, Peter D and Darnell, Cynthia L and Engelhardt, Barbara E and
  831. Schmid, Amy K},
  832. doi = {10.1101/gr.210286.116},
  833. pages = {320--333},
  834. title = {{Detecting differential growth of microbial populations with Gaussian
  835. process regression}},
  836. year = 2017,
  837. volume = 27,
  838. journal = {Genome Research}
  839. }
  840. @article{Trapnell2013Differential,
  841. author = {Trapnell, Cole and Hendrickson, David G and Sauvageau, Martin and Goff,
  842. Loyal and Rinn, John L and Pachter, Lior},
  843. doi = {10.1038/nbt.2450},
  844. journal = {Nature Biotechnology},
  845. title = {{Differential analysis of gene regulation at transcript resolution with
  846. RNA-seq}},
  847. url = {http://dx.doi.org/10.1038/nbt.2450},
  848. year = 2013
  849. }
  850. @book{Wickham2009Ggplot2,
  851. address = {New York, NY},
  852. author = {Wickham, Hadley},
  853. booktitle = {ggplot2},
  854. citeulike-article-id =10715717,
  855. citeulike-linkout-0 ={http://dx.doi.org/10.1007/978-0-387-98141-3},
  856. citeulike-linkout-1 ={http://www.springerlink.com/content/978-0-387-98140-6},
  857. doi = {10.1007/978-0-387-98141-3},
  858. isbn = {978-0-387-98140-6},
  859. keywords = {workflow},
  860. posted-at = {2015-08-18 15:12:19},
  861. priority = 2,
  862. publisher = {Springer New York},
  863. title = {{ggplot2}},
  864. url = {http://dx.doi.org/10.1007/978-0-387-98141-3},
  865. year = 2009
  866. }
  867. @article{Witten2011Classification,
  868. abstract = {{In recent years, advances in high throughput sequencing technology have
  869. led to a need for specialized methods for the analysis of digital gene
  870. expression data. While gene expression data measured on a microarray
  871. take on continuous values and can be modeled using the normal
  872. distribution, RNA sequencing data involve nonnegative counts and are
  873. more appropriately modeled using a discrete count distribution, such as
  874. the Poisson or the negative binomial. Consequently, analytic tools that
  875. assume a Gaussian distribution (such as classification methods based on
  876. linear discriminant analysis and clustering methods that use Euclidean
  877. distance) may not perform as well for sequencing data as methods that
  878. are based upon a more appropriate distribution. Here, we propose new
  879. approaches for performing classification and clustering of observations
  880. on the basis of sequencing data. Using a Poisson log linear model, we
  881. develop an analog of diagonal linear discriminant analysis that is
  882. appropriate for sequencing data. We also propose an approach for
  883. clustering sequencing data using a new dissimilarity measure that is
  884. based upon the Poisson model. We demonstrate the performances of these
  885. approaches in a simulation study, on three publicly available RNA
  886. sequencing data sets, and on a publicly available chromatin
  887. immunoprecipitation sequencing data set.}},
  888. author = {Witten, Daniela M.},
  889. citeulike-article-id =13172798,
  890. citeulike-linkout-0 ={http://dx.doi.org/10.1214/11-AOAS493},
  891. day = 28,
  892. doi = {10.1214/11-AOAS493},
  893. issn = {1932-6157},
  894. journal = {The Annals of Applied Statistics},
  895. keywords = {chipseq, ctsca, deseq2, rnaseq, workflow},
  896. month = dec,
  897. number = 4,
  898. pages = {2493--2518},
  899. posted-at = {2014-05-16 17:18:08},
  900. priority = 2,
  901. title = {{Classification and clustering of sequencing data using a Poisson
  902. model}},
  903. url = {http://dx.doi.org/10.1214/11-AOAS493},
  904. volume = 5,
  905. year = 2011
  906. }
  907. @article{Wu2013New,
  908. abstract = {{Recent developments in RNA-sequencing (RNA-seq) technology have led to
  909. a rapid increase in gene expression data in the form of counts. RNA-seq
  910. can be used for a variety of applications, however, identifying
  911. differential expression (DE) remains a key task in functional
  912. genomics. There have been a number of statistical methods for DE
  913. detection for RNA-seq data. One common feature of several leading
  914. methods is the use of the negative binomial (Gamma–Poisson mixture)
  915. model. That is, the unobserved gene expression is modeled by a gamma
  916. random variable and, given the expression, the sequencing read counts
  917. are modeled as Poisson. The distinct feature in various methods is how
  918. the variance, or dispersion, in the Gamma distribution is modeled and
  919. estimated. We evaluate several large public RNA-seq datasets and find
  920. that the estimated dispersion in existing methods does not adequately
  921. capture the heterogeneity of biological variance among samples. We
  922. present a new empirical Bayes shrinkage estimate of the dispersion
  923. parameters and demonstrate improved DE detection.}},
  924. author = {Wu, Hao and Wang, Chi and Wu, Zhijin},
  925. citeulike-article-id =11345725,
  926. citeulike-linkout-0 ={http://dx.doi.org/10.1093/biostatistics/kxs033},
  927. citeulike-linkout-1
  928. ={http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.abstract},
  929. citeulike-linkout-2
  930. ={http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.full.pdf},
  931. citeulike-linkout-3 ={http://view.ncbi.nlm.nih.gov/pubmed/23001152},
  932. citeulike-linkout-4 ={http://www.hubmed.org/display.cgi?uids=23001152},
  933. day = 01,
  934. doi = {10.1093/biostatistics/kxs033},
  935. issn = {1468-4357},
  936. journal = {Biostatistics},
  937. keywords = {deseq2, rnaseq, workflow},
  938. month = apr,
  939. number = 2,
  940. pages = {232--243},
  941. pmid = 23001152,
  942. posted-at = {2013-02-26 17:09:19},
  943. priority = 2,
  944. publisher = {Oxford University Press},
  945. title = {{A new shrinkage estimator for dispersion improves differential
  946. expression detection in RNA-seq data}},
  947. url = {http://dx.doi.org/10.1093/biostatistics/kxs033},
  948. volume = 14,
  949. year = 2013
  950. }