jidctint.c 180 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240
  1. /*
  2. * jidctint.c
  3. *
  4. * Copyright (C) 1991-1998, Thomas G. Lane.
  5. * Modification developed 2002-2015 by Guido Vollbeding.
  6. * This file is part of the Independent JPEG Group's software.
  7. * For conditions of distribution and use, see the accompanying README file.
  8. *
  9. * This file contains a slow-but-accurate integer implementation of the
  10. * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
  11. * must also perform dequantization of the input coefficients.
  12. *
  13. * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
  14. * on each row (or vice versa, but it's more convenient to emit a row at
  15. * a time). Direct algorithms are also available, but they are much more
  16. * complex and seem not to be any faster when reduced to code.
  17. *
  18. * This implementation is based on an algorithm described in
  19. * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
  20. * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
  21. * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
  22. * The primary algorithm described there uses 11 multiplies and 29 adds.
  23. * We use their alternate method with 12 multiplies and 32 adds.
  24. * The advantage of this method is that no data path contains more than one
  25. * multiplication; this allows a very simple and accurate implementation in
  26. * scaled fixed-point arithmetic, with a minimal number of shifts.
  27. *
  28. * We also provide IDCT routines with various output sample block sizes for
  29. * direct resolution reduction or enlargement and for direct resolving the
  30. * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
  31. * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
  32. *
  33. * For N<8 we simply take the corresponding low-frequency coefficients of
  34. * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
  35. * to yield the downscaled outputs.
  36. * This can be seen as direct low-pass downsampling from the DCT domain
  37. * point of view rather than the usual spatial domain point of view,
  38. * yielding significant computational savings and results at least
  39. * as good as common bilinear (averaging) spatial downsampling.
  40. *
  41. * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
  42. * lower frequencies and higher frequencies assumed to be zero.
  43. * It turns out that the computational effort is similar to the 8x8 IDCT
  44. * regarding the output size.
  45. * Furthermore, the scaling and descaling is the same for all IDCT sizes.
  46. *
  47. * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
  48. * since there would be too many additional constants to pre-calculate.
  49. */
  50. #define JPEG_INTERNALS
  51. #include "jinclude.h"
  52. #include "jpeglib.h"
  53. #include "jdct.h" /* Private declarations for DCT subsystem */
  54. #ifdef DCT_ISLOW_SUPPORTED
  55. /*
  56. * This module is specialized to the case DCTSIZE = 8.
  57. */
  58. #if DCTSIZE != 8
  59. Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
  60. #endif
  61. /*
  62. * The poop on this scaling stuff is as follows:
  63. *
  64. * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
  65. * larger than the true IDCT outputs. The final outputs are therefore
  66. * a factor of N larger than desired; since N=8 this can be cured by
  67. * a simple right shift at the end of the algorithm. The advantage of
  68. * this arrangement is that we save two multiplications per 1-D IDCT,
  69. * because the y0 and y4 inputs need not be divided by sqrt(N).
  70. *
  71. * We have to do addition and subtraction of the integer inputs, which
  72. * is no problem, and multiplication by fractional constants, which is
  73. * a problem to do in integer arithmetic. We multiply all the constants
  74. * by CONST_SCALE and convert them to integer constants (thus retaining
  75. * CONST_BITS bits of precision in the constants). After doing a
  76. * multiplication we have to divide the product by CONST_SCALE, with proper
  77. * rounding, to produce the correct output. This division can be done
  78. * cheaply as a right shift of CONST_BITS bits. We postpone shifting
  79. * as long as possible so that partial sums can be added together with
  80. * full fractional precision.
  81. *
  82. * The outputs of the first pass are scaled up by PASS1_BITS bits so that
  83. * they are represented to better-than-integral precision. These outputs
  84. * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
  85. * with the recommended scaling. (To scale up 12-bit sample data further, an
  86. * intermediate INT32 array would be needed.)
  87. *
  88. * To avoid overflow of the 32-bit intermediate results in pass 2, we must
  89. * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
  90. * shows that the values given below are the most effective.
  91. */
  92. #if BITS_IN_JSAMPLE == 8
  93. #define CONST_BITS 13
  94. #define PASS1_BITS 2
  95. #else
  96. #define CONST_BITS 13
  97. #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
  98. #endif
  99. /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
  100. * causing a lot of useless floating-point operations at run time.
  101. * To get around this we use the following pre-calculated constants.
  102. * If you change CONST_BITS you may want to add appropriate values.
  103. * (With a reasonable C compiler, you can just rely on the FIX() macro...)
  104. */
  105. #if CONST_BITS == 13
  106. #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
  107. #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
  108. #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
  109. #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
  110. #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
  111. #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
  112. #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
  113. #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
  114. #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
  115. #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
  116. #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
  117. #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
  118. #else
  119. #define FIX_0_298631336 FIX(0.298631336)
  120. #define FIX_0_390180644 FIX(0.390180644)
  121. #define FIX_0_541196100 FIX(0.541196100)
  122. #define FIX_0_765366865 FIX(0.765366865)
  123. #define FIX_0_899976223 FIX(0.899976223)
  124. #define FIX_1_175875602 FIX(1.175875602)
  125. #define FIX_1_501321110 FIX(1.501321110)
  126. #define FIX_1_847759065 FIX(1.847759065)
  127. #define FIX_1_961570560 FIX(1.961570560)
  128. #define FIX_2_053119869 FIX(2.053119869)
  129. #define FIX_2_562915447 FIX(2.562915447)
  130. #define FIX_3_072711026 FIX(3.072711026)
  131. #endif
  132. /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
  133. * For 8-bit samples with the recommended scaling, all the variable
  134. * and constant values involved are no more than 16 bits wide, so a
  135. * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
  136. * For 12-bit samples, a full 32-bit multiplication will be needed.
  137. */
  138. #if BITS_IN_JSAMPLE == 8
  139. #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
  140. #else
  141. #define MULTIPLY(var,const) ((var) * (const))
  142. #endif
  143. /* Dequantize a coefficient by multiplying it by the multiplier-table
  144. * entry; produce an int result. In this module, both inputs and result
  145. * are 16 bits or less, so either int or short multiply will work.
  146. */
  147. #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
  148. /*
  149. * Perform dequantization and inverse DCT on one block of coefficients.
  150. *
  151. * cK represents sqrt(2) * cos(K*pi/16).
  152. */
  153. GLOBAL(void)
  154. jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  155. JCOEFPTR coef_block,
  156. JSAMPARRAY output_buf, JDIMENSION output_col)
  157. {
  158. INT32 tmp0, tmp1, tmp2, tmp3;
  159. INT32 tmp10, tmp11, tmp12, tmp13;
  160. INT32 z1, z2, z3;
  161. JCOEFPTR inptr;
  162. ISLOW_MULT_TYPE * quantptr;
  163. int * wsptr;
  164. JSAMPROW outptr;
  165. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  166. int ctr;
  167. int workspace[DCTSIZE2]; /* buffers data between passes */
  168. SHIFT_TEMPS
  169. /* Pass 1: process columns from input, store into work array.
  170. * Note results are scaled up by sqrt(8) compared to a true IDCT;
  171. * furthermore, we scale the results by 2**PASS1_BITS.
  172. */
  173. inptr = coef_block;
  174. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  175. wsptr = workspace;
  176. for (ctr = DCTSIZE; ctr > 0; ctr--) {
  177. /* Due to quantization, we will usually find that many of the input
  178. * coefficients are zero, especially the AC terms. We can exploit this
  179. * by short-circuiting the IDCT calculation for any column in which all
  180. * the AC terms are zero. In that case each output is equal to the
  181. * DC coefficient (with scale factor as needed).
  182. * With typical images and quantization tables, half or more of the
  183. * column DCT calculations can be simplified this way.
  184. */
  185. if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
  186. inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
  187. inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
  188. inptr[DCTSIZE*7] == 0) {
  189. /* AC terms all zero */
  190. int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
  191. wsptr[DCTSIZE*0] = dcval;
  192. wsptr[DCTSIZE*1] = dcval;
  193. wsptr[DCTSIZE*2] = dcval;
  194. wsptr[DCTSIZE*3] = dcval;
  195. wsptr[DCTSIZE*4] = dcval;
  196. wsptr[DCTSIZE*5] = dcval;
  197. wsptr[DCTSIZE*6] = dcval;
  198. wsptr[DCTSIZE*7] = dcval;
  199. inptr++; /* advance pointers to next column */
  200. quantptr++;
  201. wsptr++;
  202. continue;
  203. }
  204. /* Even part: reverse the even part of the forward DCT.
  205. * The rotator is c(-6).
  206. */
  207. z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  208. z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  209. z2 <<= CONST_BITS;
  210. z3 <<= CONST_BITS;
  211. /* Add fudge factor here for final descale. */
  212. z2 += ONE << (CONST_BITS-PASS1_BITS-1);
  213. tmp0 = z2 + z3;
  214. tmp1 = z2 - z3;
  215. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  216. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  217. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  218. tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  219. tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  220. tmp10 = tmp0 + tmp2;
  221. tmp13 = tmp0 - tmp2;
  222. tmp11 = tmp1 + tmp3;
  223. tmp12 = tmp1 - tmp3;
  224. /* Odd part per figure 8; the matrix is unitary and hence its
  225. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  226. */
  227. tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  228. tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  229. tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  230. tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  231. z2 = tmp0 + tmp2;
  232. z3 = tmp1 + tmp3;
  233. z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
  234. z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
  235. z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
  236. z2 += z1;
  237. z3 += z1;
  238. z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
  239. tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
  240. tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
  241. tmp0 += z1 + z2;
  242. tmp3 += z1 + z3;
  243. z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
  244. tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
  245. tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
  246. tmp1 += z1 + z3;
  247. tmp2 += z1 + z2;
  248. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  249. wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
  250. wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
  251. wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
  252. wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
  253. wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
  254. wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
  255. wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
  256. wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
  257. inptr++; /* advance pointers to next column */
  258. quantptr++;
  259. wsptr++;
  260. }
  261. /* Pass 2: process rows from work array, store into output array.
  262. * Note that we must descale the results by a factor of 8 == 2**3,
  263. * and also undo the PASS1_BITS scaling.
  264. */
  265. wsptr = workspace;
  266. for (ctr = 0; ctr < DCTSIZE; ctr++) {
  267. outptr = output_buf[ctr] + output_col;
  268. /* Add range center and fudge factor for final descale and range-limit. */
  269. z2 = (INT32) wsptr[0] +
  270. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  271. (ONE << (PASS1_BITS+2)));
  272. /* Rows of zeroes can be exploited in the same way as we did with columns.
  273. * However, the column calculation has created many nonzero AC terms, so
  274. * the simplification applies less often (typically 5% to 10% of the time).
  275. * On machines with very fast multiplication, it's possible that the
  276. * test takes more time than it's worth. In that case this section
  277. * may be commented out.
  278. */
  279. #ifndef NO_ZERO_ROW_TEST
  280. if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
  281. wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
  282. /* AC terms all zero */
  283. JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
  284. & RANGE_MASK];
  285. outptr[0] = dcval;
  286. outptr[1] = dcval;
  287. outptr[2] = dcval;
  288. outptr[3] = dcval;
  289. outptr[4] = dcval;
  290. outptr[5] = dcval;
  291. outptr[6] = dcval;
  292. outptr[7] = dcval;
  293. wsptr += DCTSIZE; /* advance pointer to next row */
  294. continue;
  295. }
  296. #endif
  297. /* Even part: reverse the even part of the forward DCT.
  298. * The rotator is c(-6).
  299. */
  300. z3 = (INT32) wsptr[4];
  301. tmp0 = (z2 + z3) << CONST_BITS;
  302. tmp1 = (z2 - z3) << CONST_BITS;
  303. z2 = (INT32) wsptr[2];
  304. z3 = (INT32) wsptr[6];
  305. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  306. tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  307. tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  308. tmp10 = tmp0 + tmp2;
  309. tmp13 = tmp0 - tmp2;
  310. tmp11 = tmp1 + tmp3;
  311. tmp12 = tmp1 - tmp3;
  312. /* Odd part per figure 8; the matrix is unitary and hence its
  313. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  314. */
  315. tmp0 = (INT32) wsptr[7];
  316. tmp1 = (INT32) wsptr[5];
  317. tmp2 = (INT32) wsptr[3];
  318. tmp3 = (INT32) wsptr[1];
  319. z2 = tmp0 + tmp2;
  320. z3 = tmp1 + tmp3;
  321. z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
  322. z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
  323. z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
  324. z2 += z1;
  325. z3 += z1;
  326. z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
  327. tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
  328. tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
  329. tmp0 += z1 + z2;
  330. tmp3 += z1 + z3;
  331. z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
  332. tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
  333. tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
  334. tmp1 += z1 + z3;
  335. tmp2 += z1 + z2;
  336. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  337. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
  338. CONST_BITS+PASS1_BITS+3)
  339. & RANGE_MASK];
  340. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
  341. CONST_BITS+PASS1_BITS+3)
  342. & RANGE_MASK];
  343. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
  344. CONST_BITS+PASS1_BITS+3)
  345. & RANGE_MASK];
  346. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
  347. CONST_BITS+PASS1_BITS+3)
  348. & RANGE_MASK];
  349. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
  350. CONST_BITS+PASS1_BITS+3)
  351. & RANGE_MASK];
  352. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
  353. CONST_BITS+PASS1_BITS+3)
  354. & RANGE_MASK];
  355. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
  356. CONST_BITS+PASS1_BITS+3)
  357. & RANGE_MASK];
  358. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
  359. CONST_BITS+PASS1_BITS+3)
  360. & RANGE_MASK];
  361. wsptr += DCTSIZE; /* advance pointer to next row */
  362. }
  363. }
  364. #ifdef IDCT_SCALING_SUPPORTED
  365. /*
  366. * Perform dequantization and inverse DCT on one block of coefficients,
  367. * producing a 7x7 output block.
  368. *
  369. * Optimized algorithm with 12 multiplications in the 1-D kernel.
  370. * cK represents sqrt(2) * cos(K*pi/14).
  371. */
  372. GLOBAL(void)
  373. jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  374. JCOEFPTR coef_block,
  375. JSAMPARRAY output_buf, JDIMENSION output_col)
  376. {
  377. INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
  378. INT32 z1, z2, z3;
  379. JCOEFPTR inptr;
  380. ISLOW_MULT_TYPE * quantptr;
  381. int * wsptr;
  382. JSAMPROW outptr;
  383. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  384. int ctr;
  385. int workspace[7*7]; /* buffers data between passes */
  386. SHIFT_TEMPS
  387. /* Pass 1: process columns from input, store into work array. */
  388. inptr = coef_block;
  389. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  390. wsptr = workspace;
  391. for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
  392. /* Even part */
  393. tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  394. tmp13 <<= CONST_BITS;
  395. /* Add fudge factor here for final descale. */
  396. tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
  397. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  398. z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  399. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  400. tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
  401. tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
  402. tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
  403. tmp0 = z1 + z3;
  404. z2 -= tmp0;
  405. tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
  406. tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
  407. tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
  408. tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
  409. /* Odd part */
  410. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  411. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  412. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  413. tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
  414. tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
  415. tmp0 = tmp1 - tmp2;
  416. tmp1 += tmp2;
  417. tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
  418. tmp1 += tmp2;
  419. z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
  420. tmp0 += z2;
  421. tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
  422. /* Final output stage */
  423. wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  424. wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  425. wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
  426. wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
  427. wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
  428. wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
  429. wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
  430. }
  431. /* Pass 2: process 7 rows from work array, store into output array. */
  432. wsptr = workspace;
  433. for (ctr = 0; ctr < 7; ctr++) {
  434. outptr = output_buf[ctr] + output_col;
  435. /* Even part */
  436. /* Add range center and fudge factor for final descale and range-limit. */
  437. tmp13 = (INT32) wsptr[0] +
  438. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  439. (ONE << (PASS1_BITS+2)));
  440. tmp13 <<= CONST_BITS;
  441. z1 = (INT32) wsptr[2];
  442. z2 = (INT32) wsptr[4];
  443. z3 = (INT32) wsptr[6];
  444. tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
  445. tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
  446. tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
  447. tmp0 = z1 + z3;
  448. z2 -= tmp0;
  449. tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
  450. tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
  451. tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
  452. tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
  453. /* Odd part */
  454. z1 = (INT32) wsptr[1];
  455. z2 = (INT32) wsptr[3];
  456. z3 = (INT32) wsptr[5];
  457. tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
  458. tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
  459. tmp0 = tmp1 - tmp2;
  460. tmp1 += tmp2;
  461. tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
  462. tmp1 += tmp2;
  463. z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
  464. tmp0 += z2;
  465. tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
  466. /* Final output stage */
  467. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  468. CONST_BITS+PASS1_BITS+3)
  469. & RANGE_MASK];
  470. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  471. CONST_BITS+PASS1_BITS+3)
  472. & RANGE_MASK];
  473. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
  474. CONST_BITS+PASS1_BITS+3)
  475. & RANGE_MASK];
  476. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
  477. CONST_BITS+PASS1_BITS+3)
  478. & RANGE_MASK];
  479. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  480. CONST_BITS+PASS1_BITS+3)
  481. & RANGE_MASK];
  482. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  483. CONST_BITS+PASS1_BITS+3)
  484. & RANGE_MASK];
  485. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
  486. CONST_BITS+PASS1_BITS+3)
  487. & RANGE_MASK];
  488. wsptr += 7; /* advance pointer to next row */
  489. }
  490. }
  491. /*
  492. * Perform dequantization and inverse DCT on one block of coefficients,
  493. * producing a reduced-size 6x6 output block.
  494. *
  495. * Optimized algorithm with 3 multiplications in the 1-D kernel.
  496. * cK represents sqrt(2) * cos(K*pi/12).
  497. */
  498. GLOBAL(void)
  499. jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  500. JCOEFPTR coef_block,
  501. JSAMPARRAY output_buf, JDIMENSION output_col)
  502. {
  503. INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
  504. INT32 z1, z2, z3;
  505. JCOEFPTR inptr;
  506. ISLOW_MULT_TYPE * quantptr;
  507. int * wsptr;
  508. JSAMPROW outptr;
  509. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  510. int ctr;
  511. int workspace[6*6]; /* buffers data between passes */
  512. SHIFT_TEMPS
  513. /* Pass 1: process columns from input, store into work array. */
  514. inptr = coef_block;
  515. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  516. wsptr = workspace;
  517. for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
  518. /* Even part */
  519. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  520. tmp0 <<= CONST_BITS;
  521. /* Add fudge factor here for final descale. */
  522. tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
  523. tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  524. tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
  525. tmp1 = tmp0 + tmp10;
  526. tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
  527. tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  528. tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
  529. tmp10 = tmp1 + tmp0;
  530. tmp12 = tmp1 - tmp0;
  531. /* Odd part */
  532. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  533. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  534. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  535. tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
  536. tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
  537. tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
  538. tmp1 = (z1 - z2 - z3) << PASS1_BITS;
  539. /* Final output stage */
  540. wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  541. wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  542. wsptr[6*1] = (int) (tmp11 + tmp1);
  543. wsptr[6*4] = (int) (tmp11 - tmp1);
  544. wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
  545. wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
  546. }
  547. /* Pass 2: process 6 rows from work array, store into output array. */
  548. wsptr = workspace;
  549. for (ctr = 0; ctr < 6; ctr++) {
  550. outptr = output_buf[ctr] + output_col;
  551. /* Even part */
  552. /* Add range center and fudge factor for final descale and range-limit. */
  553. tmp0 = (INT32) wsptr[0] +
  554. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  555. (ONE << (PASS1_BITS+2)));
  556. tmp0 <<= CONST_BITS;
  557. tmp2 = (INT32) wsptr[4];
  558. tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
  559. tmp1 = tmp0 + tmp10;
  560. tmp11 = tmp0 - tmp10 - tmp10;
  561. tmp10 = (INT32) wsptr[2];
  562. tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
  563. tmp10 = tmp1 + tmp0;
  564. tmp12 = tmp1 - tmp0;
  565. /* Odd part */
  566. z1 = (INT32) wsptr[1];
  567. z2 = (INT32) wsptr[3];
  568. z3 = (INT32) wsptr[5];
  569. tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
  570. tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
  571. tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
  572. tmp1 = (z1 - z2 - z3) << CONST_BITS;
  573. /* Final output stage */
  574. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  575. CONST_BITS+PASS1_BITS+3)
  576. & RANGE_MASK];
  577. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  578. CONST_BITS+PASS1_BITS+3)
  579. & RANGE_MASK];
  580. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
  581. CONST_BITS+PASS1_BITS+3)
  582. & RANGE_MASK];
  583. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
  584. CONST_BITS+PASS1_BITS+3)
  585. & RANGE_MASK];
  586. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  587. CONST_BITS+PASS1_BITS+3)
  588. & RANGE_MASK];
  589. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  590. CONST_BITS+PASS1_BITS+3)
  591. & RANGE_MASK];
  592. wsptr += 6; /* advance pointer to next row */
  593. }
  594. }
  595. /*
  596. * Perform dequantization and inverse DCT on one block of coefficients,
  597. * producing a reduced-size 5x5 output block.
  598. *
  599. * Optimized algorithm with 5 multiplications in the 1-D kernel.
  600. * cK represents sqrt(2) * cos(K*pi/10).
  601. */
  602. GLOBAL(void)
  603. jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  604. JCOEFPTR coef_block,
  605. JSAMPARRAY output_buf, JDIMENSION output_col)
  606. {
  607. INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
  608. INT32 z1, z2, z3;
  609. JCOEFPTR inptr;
  610. ISLOW_MULT_TYPE * quantptr;
  611. int * wsptr;
  612. JSAMPROW outptr;
  613. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  614. int ctr;
  615. int workspace[5*5]; /* buffers data between passes */
  616. SHIFT_TEMPS
  617. /* Pass 1: process columns from input, store into work array. */
  618. inptr = coef_block;
  619. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  620. wsptr = workspace;
  621. for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
  622. /* Even part */
  623. tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  624. tmp12 <<= CONST_BITS;
  625. /* Add fudge factor here for final descale. */
  626. tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
  627. tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  628. tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  629. z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
  630. z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
  631. z3 = tmp12 + z2;
  632. tmp10 = z3 + z1;
  633. tmp11 = z3 - z1;
  634. tmp12 -= z2 << 2;
  635. /* Odd part */
  636. z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  637. z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  638. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
  639. tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
  640. tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
  641. /* Final output stage */
  642. wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  643. wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  644. wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
  645. wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
  646. wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
  647. }
  648. /* Pass 2: process 5 rows from work array, store into output array. */
  649. wsptr = workspace;
  650. for (ctr = 0; ctr < 5; ctr++) {
  651. outptr = output_buf[ctr] + output_col;
  652. /* Even part */
  653. /* Add range center and fudge factor for final descale and range-limit. */
  654. tmp12 = (INT32) wsptr[0] +
  655. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  656. (ONE << (PASS1_BITS+2)));
  657. tmp12 <<= CONST_BITS;
  658. tmp0 = (INT32) wsptr[2];
  659. tmp1 = (INT32) wsptr[4];
  660. z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
  661. z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
  662. z3 = tmp12 + z2;
  663. tmp10 = z3 + z1;
  664. tmp11 = z3 - z1;
  665. tmp12 -= z2 << 2;
  666. /* Odd part */
  667. z2 = (INT32) wsptr[1];
  668. z3 = (INT32) wsptr[3];
  669. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
  670. tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
  671. tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
  672. /* Final output stage */
  673. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  674. CONST_BITS+PASS1_BITS+3)
  675. & RANGE_MASK];
  676. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  677. CONST_BITS+PASS1_BITS+3)
  678. & RANGE_MASK];
  679. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
  680. CONST_BITS+PASS1_BITS+3)
  681. & RANGE_MASK];
  682. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
  683. CONST_BITS+PASS1_BITS+3)
  684. & RANGE_MASK];
  685. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
  686. CONST_BITS+PASS1_BITS+3)
  687. & RANGE_MASK];
  688. wsptr += 5; /* advance pointer to next row */
  689. }
  690. }
  691. /*
  692. * Perform dequantization and inverse DCT on one block of coefficients,
  693. * producing a reduced-size 4x4 output block.
  694. *
  695. * Optimized algorithm with 3 multiplications in the 1-D kernel.
  696. * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
  697. */
  698. GLOBAL(void)
  699. jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  700. JCOEFPTR coef_block,
  701. JSAMPARRAY output_buf, JDIMENSION output_col)
  702. {
  703. INT32 tmp0, tmp2, tmp10, tmp12;
  704. INT32 z1, z2, z3;
  705. JCOEFPTR inptr;
  706. ISLOW_MULT_TYPE * quantptr;
  707. int * wsptr;
  708. JSAMPROW outptr;
  709. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  710. int ctr;
  711. int workspace[4*4]; /* buffers data between passes */
  712. SHIFT_TEMPS
  713. /* Pass 1: process columns from input, store into work array. */
  714. inptr = coef_block;
  715. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  716. wsptr = workspace;
  717. for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
  718. /* Even part */
  719. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  720. tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  721. tmp10 = (tmp0 + tmp2) << PASS1_BITS;
  722. tmp12 = (tmp0 - tmp2) << PASS1_BITS;
  723. /* Odd part */
  724. /* Same rotation as in the even part of the 8x8 LL&M IDCT */
  725. z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  726. z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  727. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  728. /* Add fudge factor here for final descale. */
  729. z1 += ONE << (CONST_BITS-PASS1_BITS-1);
  730. tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
  731. CONST_BITS-PASS1_BITS);
  732. tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
  733. CONST_BITS-PASS1_BITS);
  734. /* Final output stage */
  735. wsptr[4*0] = (int) (tmp10 + tmp0);
  736. wsptr[4*3] = (int) (tmp10 - tmp0);
  737. wsptr[4*1] = (int) (tmp12 + tmp2);
  738. wsptr[4*2] = (int) (tmp12 - tmp2);
  739. }
  740. /* Pass 2: process 4 rows from work array, store into output array. */
  741. wsptr = workspace;
  742. for (ctr = 0; ctr < 4; ctr++) {
  743. outptr = output_buf[ctr] + output_col;
  744. /* Even part */
  745. /* Add range center and fudge factor for final descale and range-limit. */
  746. tmp0 = (INT32) wsptr[0] +
  747. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  748. (ONE << (PASS1_BITS+2)));
  749. tmp2 = (INT32) wsptr[2];
  750. tmp10 = (tmp0 + tmp2) << CONST_BITS;
  751. tmp12 = (tmp0 - tmp2) << CONST_BITS;
  752. /* Odd part */
  753. /* Same rotation as in the even part of the 8x8 LL&M IDCT */
  754. z2 = (INT32) wsptr[1];
  755. z3 = (INT32) wsptr[3];
  756. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  757. tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  758. tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  759. /* Final output stage */
  760. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  761. CONST_BITS+PASS1_BITS+3)
  762. & RANGE_MASK];
  763. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  764. CONST_BITS+PASS1_BITS+3)
  765. & RANGE_MASK];
  766. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  767. CONST_BITS+PASS1_BITS+3)
  768. & RANGE_MASK];
  769. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  770. CONST_BITS+PASS1_BITS+3)
  771. & RANGE_MASK];
  772. wsptr += 4; /* advance pointer to next row */
  773. }
  774. }
  775. /*
  776. * Perform dequantization and inverse DCT on one block of coefficients,
  777. * producing a reduced-size 3x3 output block.
  778. *
  779. * Optimized algorithm with 2 multiplications in the 1-D kernel.
  780. * cK represents sqrt(2) * cos(K*pi/6).
  781. */
  782. GLOBAL(void)
  783. jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  784. JCOEFPTR coef_block,
  785. JSAMPARRAY output_buf, JDIMENSION output_col)
  786. {
  787. INT32 tmp0, tmp2, tmp10, tmp12;
  788. JCOEFPTR inptr;
  789. ISLOW_MULT_TYPE * quantptr;
  790. int * wsptr;
  791. JSAMPROW outptr;
  792. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  793. int ctr;
  794. int workspace[3*3]; /* buffers data between passes */
  795. SHIFT_TEMPS
  796. /* Pass 1: process columns from input, store into work array. */
  797. inptr = coef_block;
  798. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  799. wsptr = workspace;
  800. for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
  801. /* Even part */
  802. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  803. tmp0 <<= CONST_BITS;
  804. /* Add fudge factor here for final descale. */
  805. tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
  806. tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  807. tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
  808. tmp10 = tmp0 + tmp12;
  809. tmp2 = tmp0 - tmp12 - tmp12;
  810. /* Odd part */
  811. tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  812. tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
  813. /* Final output stage */
  814. wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  815. wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  816. wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
  817. }
  818. /* Pass 2: process 3 rows from work array, store into output array. */
  819. wsptr = workspace;
  820. for (ctr = 0; ctr < 3; ctr++) {
  821. outptr = output_buf[ctr] + output_col;
  822. /* Even part */
  823. /* Add range center and fudge factor for final descale and range-limit. */
  824. tmp0 = (INT32) wsptr[0] +
  825. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  826. (ONE << (PASS1_BITS+2)));
  827. tmp0 <<= CONST_BITS;
  828. tmp2 = (INT32) wsptr[2];
  829. tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
  830. tmp10 = tmp0 + tmp12;
  831. tmp2 = tmp0 - tmp12 - tmp12;
  832. /* Odd part */
  833. tmp12 = (INT32) wsptr[1];
  834. tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
  835. /* Final output stage */
  836. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  837. CONST_BITS+PASS1_BITS+3)
  838. & RANGE_MASK];
  839. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  840. CONST_BITS+PASS1_BITS+3)
  841. & RANGE_MASK];
  842. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
  843. CONST_BITS+PASS1_BITS+3)
  844. & RANGE_MASK];
  845. wsptr += 3; /* advance pointer to next row */
  846. }
  847. }
  848. /*
  849. * Perform dequantization and inverse DCT on one block of coefficients,
  850. * producing a reduced-size 2x2 output block.
  851. *
  852. * Multiplication-less algorithm.
  853. */
  854. GLOBAL(void)
  855. jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  856. JCOEFPTR coef_block,
  857. JSAMPARRAY output_buf, JDIMENSION output_col)
  858. {
  859. DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
  860. ISLOW_MULT_TYPE * quantptr;
  861. JSAMPROW outptr;
  862. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  863. ISHIFT_TEMPS
  864. /* Pass 1: process columns from input. */
  865. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  866. /* Column 0 */
  867. tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
  868. tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
  869. /* Add range center and fudge factor for final descale and range-limit. */
  870. tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
  871. tmp0 = tmp4 + tmp5;
  872. tmp2 = tmp4 - tmp5;
  873. /* Column 1 */
  874. tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
  875. tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
  876. tmp1 = tmp4 + tmp5;
  877. tmp3 = tmp4 - tmp5;
  878. /* Pass 2: process 2 rows, store into output array. */
  879. /* Row 0 */
  880. outptr = output_buf[0] + output_col;
  881. outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
  882. outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
  883. /* Row 1 */
  884. outptr = output_buf[1] + output_col;
  885. outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
  886. outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
  887. }
  888. /*
  889. * Perform dequantization and inverse DCT on one block of coefficients,
  890. * producing a reduced-size 1x1 output block.
  891. *
  892. * We hardly need an inverse DCT routine for this: just take the
  893. * average pixel value, which is one-eighth of the DC coefficient.
  894. */
  895. GLOBAL(void)
  896. jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  897. JCOEFPTR coef_block,
  898. JSAMPARRAY output_buf, JDIMENSION output_col)
  899. {
  900. DCTELEM dcval;
  901. ISLOW_MULT_TYPE * quantptr;
  902. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  903. ISHIFT_TEMPS
  904. /* 1x1 is trivial: just take the DC coefficient divided by 8. */
  905. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  906. dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
  907. /* Add range center and fudge factor for descale and range-limit. */
  908. dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
  909. output_buf[0][output_col] =
  910. range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
  911. }
  912. /*
  913. * Perform dequantization and inverse DCT on one block of coefficients,
  914. * producing a 9x9 output block.
  915. *
  916. * Optimized algorithm with 10 multiplications in the 1-D kernel.
  917. * cK represents sqrt(2) * cos(K*pi/18).
  918. */
  919. GLOBAL(void)
  920. jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  921. JCOEFPTR coef_block,
  922. JSAMPARRAY output_buf, JDIMENSION output_col)
  923. {
  924. INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
  925. INT32 z1, z2, z3, z4;
  926. JCOEFPTR inptr;
  927. ISLOW_MULT_TYPE * quantptr;
  928. int * wsptr;
  929. JSAMPROW outptr;
  930. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  931. int ctr;
  932. int workspace[8*9]; /* buffers data between passes */
  933. SHIFT_TEMPS
  934. /* Pass 1: process columns from input, store into work array. */
  935. inptr = coef_block;
  936. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  937. wsptr = workspace;
  938. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  939. /* Even part */
  940. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  941. tmp0 <<= CONST_BITS;
  942. /* Add fudge factor here for final descale. */
  943. tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
  944. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  945. z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  946. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  947. tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
  948. tmp1 = tmp0 + tmp3;
  949. tmp2 = tmp0 - tmp3 - tmp3;
  950. tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
  951. tmp11 = tmp2 + tmp0;
  952. tmp14 = tmp2 - tmp0 - tmp0;
  953. tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
  954. tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
  955. tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
  956. tmp10 = tmp1 + tmp0 - tmp3;
  957. tmp12 = tmp1 - tmp0 + tmp2;
  958. tmp13 = tmp1 - tmp2 + tmp3;
  959. /* Odd part */
  960. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  961. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  962. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  963. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  964. z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
  965. tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
  966. tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
  967. tmp0 = tmp2 + tmp3 - z2;
  968. tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
  969. tmp2 += z2 - tmp1;
  970. tmp3 += z2 + tmp1;
  971. tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
  972. /* Final output stage */
  973. wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  974. wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  975. wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
  976. wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
  977. wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
  978. wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
  979. wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
  980. wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
  981. wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
  982. }
  983. /* Pass 2: process 9 rows from work array, store into output array. */
  984. wsptr = workspace;
  985. for (ctr = 0; ctr < 9; ctr++) {
  986. outptr = output_buf[ctr] + output_col;
  987. /* Even part */
  988. /* Add range center and fudge factor for final descale and range-limit. */
  989. tmp0 = (INT32) wsptr[0] +
  990. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  991. (ONE << (PASS1_BITS+2)));
  992. tmp0 <<= CONST_BITS;
  993. z1 = (INT32) wsptr[2];
  994. z2 = (INT32) wsptr[4];
  995. z3 = (INT32) wsptr[6];
  996. tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
  997. tmp1 = tmp0 + tmp3;
  998. tmp2 = tmp0 - tmp3 - tmp3;
  999. tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
  1000. tmp11 = tmp2 + tmp0;
  1001. tmp14 = tmp2 - tmp0 - tmp0;
  1002. tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
  1003. tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
  1004. tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
  1005. tmp10 = tmp1 + tmp0 - tmp3;
  1006. tmp12 = tmp1 - tmp0 + tmp2;
  1007. tmp13 = tmp1 - tmp2 + tmp3;
  1008. /* Odd part */
  1009. z1 = (INT32) wsptr[1];
  1010. z2 = (INT32) wsptr[3];
  1011. z3 = (INT32) wsptr[5];
  1012. z4 = (INT32) wsptr[7];
  1013. z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
  1014. tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
  1015. tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
  1016. tmp0 = tmp2 + tmp3 - z2;
  1017. tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
  1018. tmp2 += z2 - tmp1;
  1019. tmp3 += z2 + tmp1;
  1020. tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
  1021. /* Final output stage */
  1022. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  1023. CONST_BITS+PASS1_BITS+3)
  1024. & RANGE_MASK];
  1025. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  1026. CONST_BITS+PASS1_BITS+3)
  1027. & RANGE_MASK];
  1028. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
  1029. CONST_BITS+PASS1_BITS+3)
  1030. & RANGE_MASK];
  1031. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
  1032. CONST_BITS+PASS1_BITS+3)
  1033. & RANGE_MASK];
  1034. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  1035. CONST_BITS+PASS1_BITS+3)
  1036. & RANGE_MASK];
  1037. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  1038. CONST_BITS+PASS1_BITS+3)
  1039. & RANGE_MASK];
  1040. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
  1041. CONST_BITS+PASS1_BITS+3)
  1042. & RANGE_MASK];
  1043. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
  1044. CONST_BITS+PASS1_BITS+3)
  1045. & RANGE_MASK];
  1046. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
  1047. CONST_BITS+PASS1_BITS+3)
  1048. & RANGE_MASK];
  1049. wsptr += 8; /* advance pointer to next row */
  1050. }
  1051. }
  1052. /*
  1053. * Perform dequantization and inverse DCT on one block of coefficients,
  1054. * producing a 10x10 output block.
  1055. *
  1056. * Optimized algorithm with 12 multiplications in the 1-D kernel.
  1057. * cK represents sqrt(2) * cos(K*pi/20).
  1058. */
  1059. GLOBAL(void)
  1060. jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  1061. JCOEFPTR coef_block,
  1062. JSAMPARRAY output_buf, JDIMENSION output_col)
  1063. {
  1064. INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
  1065. INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
  1066. INT32 z1, z2, z3, z4, z5;
  1067. JCOEFPTR inptr;
  1068. ISLOW_MULT_TYPE * quantptr;
  1069. int * wsptr;
  1070. JSAMPROW outptr;
  1071. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  1072. int ctr;
  1073. int workspace[8*10]; /* buffers data between passes */
  1074. SHIFT_TEMPS
  1075. /* Pass 1: process columns from input, store into work array. */
  1076. inptr = coef_block;
  1077. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  1078. wsptr = workspace;
  1079. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  1080. /* Even part */
  1081. z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  1082. z3 <<= CONST_BITS;
  1083. /* Add fudge factor here for final descale. */
  1084. z3 += ONE << (CONST_BITS-PASS1_BITS-1);
  1085. z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  1086. z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
  1087. z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
  1088. tmp10 = z3 + z1;
  1089. tmp11 = z3 - z2;
  1090. tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
  1091. CONST_BITS-PASS1_BITS);
  1092. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  1093. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  1094. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
  1095. tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
  1096. tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
  1097. tmp20 = tmp10 + tmp12;
  1098. tmp24 = tmp10 - tmp12;
  1099. tmp21 = tmp11 + tmp13;
  1100. tmp23 = tmp11 - tmp13;
  1101. /* Odd part */
  1102. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  1103. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  1104. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  1105. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  1106. tmp11 = z2 + z4;
  1107. tmp13 = z2 - z4;
  1108. tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
  1109. z5 = z3 << CONST_BITS;
  1110. z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
  1111. z4 = z5 + tmp12;
  1112. tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
  1113. tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
  1114. z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
  1115. z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
  1116. tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
  1117. tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
  1118. tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
  1119. /* Final output stage */
  1120. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  1121. wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  1122. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  1123. wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  1124. wsptr[8*2] = (int) (tmp22 + tmp12);
  1125. wsptr[8*7] = (int) (tmp22 - tmp12);
  1126. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  1127. wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  1128. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  1129. wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  1130. }
  1131. /* Pass 2: process 10 rows from work array, store into output array. */
  1132. wsptr = workspace;
  1133. for (ctr = 0; ctr < 10; ctr++) {
  1134. outptr = output_buf[ctr] + output_col;
  1135. /* Even part */
  1136. /* Add range center and fudge factor for final descale and range-limit. */
  1137. z3 = (INT32) wsptr[0] +
  1138. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  1139. (ONE << (PASS1_BITS+2)));
  1140. z3 <<= CONST_BITS;
  1141. z4 = (INT32) wsptr[4];
  1142. z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
  1143. z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
  1144. tmp10 = z3 + z1;
  1145. tmp11 = z3 - z2;
  1146. tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
  1147. z2 = (INT32) wsptr[2];
  1148. z3 = (INT32) wsptr[6];
  1149. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
  1150. tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
  1151. tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
  1152. tmp20 = tmp10 + tmp12;
  1153. tmp24 = tmp10 - tmp12;
  1154. tmp21 = tmp11 + tmp13;
  1155. tmp23 = tmp11 - tmp13;
  1156. /* Odd part */
  1157. z1 = (INT32) wsptr[1];
  1158. z2 = (INT32) wsptr[3];
  1159. z3 = (INT32) wsptr[5];
  1160. z3 <<= CONST_BITS;
  1161. z4 = (INT32) wsptr[7];
  1162. tmp11 = z2 + z4;
  1163. tmp13 = z2 - z4;
  1164. tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
  1165. z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
  1166. z4 = z3 + tmp12;
  1167. tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
  1168. tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
  1169. z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
  1170. z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
  1171. tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
  1172. tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
  1173. tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
  1174. /* Final output stage */
  1175. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  1176. CONST_BITS+PASS1_BITS+3)
  1177. & RANGE_MASK];
  1178. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  1179. CONST_BITS+PASS1_BITS+3)
  1180. & RANGE_MASK];
  1181. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  1182. CONST_BITS+PASS1_BITS+3)
  1183. & RANGE_MASK];
  1184. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  1185. CONST_BITS+PASS1_BITS+3)
  1186. & RANGE_MASK];
  1187. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  1188. CONST_BITS+PASS1_BITS+3)
  1189. & RANGE_MASK];
  1190. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  1191. CONST_BITS+PASS1_BITS+3)
  1192. & RANGE_MASK];
  1193. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  1194. CONST_BITS+PASS1_BITS+3)
  1195. & RANGE_MASK];
  1196. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  1197. CONST_BITS+PASS1_BITS+3)
  1198. & RANGE_MASK];
  1199. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  1200. CONST_BITS+PASS1_BITS+3)
  1201. & RANGE_MASK];
  1202. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  1203. CONST_BITS+PASS1_BITS+3)
  1204. & RANGE_MASK];
  1205. wsptr += 8; /* advance pointer to next row */
  1206. }
  1207. }
  1208. /*
  1209. * Perform dequantization and inverse DCT on one block of coefficients,
  1210. * producing a 11x11 output block.
  1211. *
  1212. * Optimized algorithm with 24 multiplications in the 1-D kernel.
  1213. * cK represents sqrt(2) * cos(K*pi/22).
  1214. */
  1215. GLOBAL(void)
  1216. jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  1217. JCOEFPTR coef_block,
  1218. JSAMPARRAY output_buf, JDIMENSION output_col)
  1219. {
  1220. INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
  1221. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
  1222. INT32 z1, z2, z3, z4;
  1223. JCOEFPTR inptr;
  1224. ISLOW_MULT_TYPE * quantptr;
  1225. int * wsptr;
  1226. JSAMPROW outptr;
  1227. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  1228. int ctr;
  1229. int workspace[8*11]; /* buffers data between passes */
  1230. SHIFT_TEMPS
  1231. /* Pass 1: process columns from input, store into work array. */
  1232. inptr = coef_block;
  1233. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  1234. wsptr = workspace;
  1235. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  1236. /* Even part */
  1237. tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  1238. tmp10 <<= CONST_BITS;
  1239. /* Add fudge factor here for final descale. */
  1240. tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
  1241. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  1242. z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  1243. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  1244. tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
  1245. tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
  1246. z4 = z1 + z3;
  1247. tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
  1248. z4 -= z2;
  1249. tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
  1250. tmp21 = tmp20 + tmp23 + tmp25 -
  1251. MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
  1252. tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
  1253. tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
  1254. tmp24 += tmp25;
  1255. tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
  1256. tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
  1257. MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
  1258. tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
  1259. /* Odd part */
  1260. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  1261. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  1262. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  1263. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  1264. tmp11 = z1 + z2;
  1265. tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
  1266. tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
  1267. tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
  1268. tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
  1269. tmp10 = tmp11 + tmp12 + tmp13 -
  1270. MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
  1271. z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
  1272. tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
  1273. tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
  1274. z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
  1275. tmp11 += z1;
  1276. tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
  1277. tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
  1278. MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
  1279. MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
  1280. /* Final output stage */
  1281. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  1282. wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  1283. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  1284. wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  1285. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  1286. wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  1287. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  1288. wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  1289. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  1290. wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  1291. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
  1292. }
  1293. /* Pass 2: process 11 rows from work array, store into output array. */
  1294. wsptr = workspace;
  1295. for (ctr = 0; ctr < 11; ctr++) {
  1296. outptr = output_buf[ctr] + output_col;
  1297. /* Even part */
  1298. /* Add range center and fudge factor for final descale and range-limit. */
  1299. tmp10 = (INT32) wsptr[0] +
  1300. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  1301. (ONE << (PASS1_BITS+2)));
  1302. tmp10 <<= CONST_BITS;
  1303. z1 = (INT32) wsptr[2];
  1304. z2 = (INT32) wsptr[4];
  1305. z3 = (INT32) wsptr[6];
  1306. tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
  1307. tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
  1308. z4 = z1 + z3;
  1309. tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
  1310. z4 -= z2;
  1311. tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
  1312. tmp21 = tmp20 + tmp23 + tmp25 -
  1313. MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
  1314. tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
  1315. tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
  1316. tmp24 += tmp25;
  1317. tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
  1318. tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
  1319. MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
  1320. tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
  1321. /* Odd part */
  1322. z1 = (INT32) wsptr[1];
  1323. z2 = (INT32) wsptr[3];
  1324. z3 = (INT32) wsptr[5];
  1325. z4 = (INT32) wsptr[7];
  1326. tmp11 = z1 + z2;
  1327. tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
  1328. tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
  1329. tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
  1330. tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
  1331. tmp10 = tmp11 + tmp12 + tmp13 -
  1332. MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
  1333. z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
  1334. tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
  1335. tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
  1336. z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
  1337. tmp11 += z1;
  1338. tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
  1339. tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
  1340. MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
  1341. MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
  1342. /* Final output stage */
  1343. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  1344. CONST_BITS+PASS1_BITS+3)
  1345. & RANGE_MASK];
  1346. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  1347. CONST_BITS+PASS1_BITS+3)
  1348. & RANGE_MASK];
  1349. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  1350. CONST_BITS+PASS1_BITS+3)
  1351. & RANGE_MASK];
  1352. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  1353. CONST_BITS+PASS1_BITS+3)
  1354. & RANGE_MASK];
  1355. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  1356. CONST_BITS+PASS1_BITS+3)
  1357. & RANGE_MASK];
  1358. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  1359. CONST_BITS+PASS1_BITS+3)
  1360. & RANGE_MASK];
  1361. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  1362. CONST_BITS+PASS1_BITS+3)
  1363. & RANGE_MASK];
  1364. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  1365. CONST_BITS+PASS1_BITS+3)
  1366. & RANGE_MASK];
  1367. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  1368. CONST_BITS+PASS1_BITS+3)
  1369. & RANGE_MASK];
  1370. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  1371. CONST_BITS+PASS1_BITS+3)
  1372. & RANGE_MASK];
  1373. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
  1374. CONST_BITS+PASS1_BITS+3)
  1375. & RANGE_MASK];
  1376. wsptr += 8; /* advance pointer to next row */
  1377. }
  1378. }
  1379. /*
  1380. * Perform dequantization and inverse DCT on one block of coefficients,
  1381. * producing a 12x12 output block.
  1382. *
  1383. * Optimized algorithm with 15 multiplications in the 1-D kernel.
  1384. * cK represents sqrt(2) * cos(K*pi/24).
  1385. */
  1386. GLOBAL(void)
  1387. jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  1388. JCOEFPTR coef_block,
  1389. JSAMPARRAY output_buf, JDIMENSION output_col)
  1390. {
  1391. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
  1392. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
  1393. INT32 z1, z2, z3, z4;
  1394. JCOEFPTR inptr;
  1395. ISLOW_MULT_TYPE * quantptr;
  1396. int * wsptr;
  1397. JSAMPROW outptr;
  1398. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  1399. int ctr;
  1400. int workspace[8*12]; /* buffers data between passes */
  1401. SHIFT_TEMPS
  1402. /* Pass 1: process columns from input, store into work array. */
  1403. inptr = coef_block;
  1404. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  1405. wsptr = workspace;
  1406. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  1407. /* Even part */
  1408. z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  1409. z3 <<= CONST_BITS;
  1410. /* Add fudge factor here for final descale. */
  1411. z3 += ONE << (CONST_BITS-PASS1_BITS-1);
  1412. z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  1413. z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
  1414. tmp10 = z3 + z4;
  1415. tmp11 = z3 - z4;
  1416. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  1417. z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
  1418. z1 <<= CONST_BITS;
  1419. z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  1420. z2 <<= CONST_BITS;
  1421. tmp12 = z1 - z2;
  1422. tmp21 = z3 + tmp12;
  1423. tmp24 = z3 - tmp12;
  1424. tmp12 = z4 + z2;
  1425. tmp20 = tmp10 + tmp12;
  1426. tmp25 = tmp10 - tmp12;
  1427. tmp12 = z4 - z1 - z2;
  1428. tmp22 = tmp11 + tmp12;
  1429. tmp23 = tmp11 - tmp12;
  1430. /* Odd part */
  1431. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  1432. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  1433. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  1434. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  1435. tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
  1436. tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
  1437. tmp10 = z1 + z3;
  1438. tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
  1439. tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
  1440. tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
  1441. tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
  1442. tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
  1443. tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
  1444. tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
  1445. MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
  1446. z1 -= z4;
  1447. z2 -= z3;
  1448. z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
  1449. tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
  1450. tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
  1451. /* Final output stage */
  1452. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  1453. wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  1454. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  1455. wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  1456. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  1457. wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  1458. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  1459. wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  1460. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  1461. wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  1462. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
  1463. wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
  1464. }
  1465. /* Pass 2: process 12 rows from work array, store into output array. */
  1466. wsptr = workspace;
  1467. for (ctr = 0; ctr < 12; ctr++) {
  1468. outptr = output_buf[ctr] + output_col;
  1469. /* Even part */
  1470. /* Add range center and fudge factor for final descale and range-limit. */
  1471. z3 = (INT32) wsptr[0] +
  1472. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  1473. (ONE << (PASS1_BITS+2)));
  1474. z3 <<= CONST_BITS;
  1475. z4 = (INT32) wsptr[4];
  1476. z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
  1477. tmp10 = z3 + z4;
  1478. tmp11 = z3 - z4;
  1479. z1 = (INT32) wsptr[2];
  1480. z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
  1481. z1 <<= CONST_BITS;
  1482. z2 = (INT32) wsptr[6];
  1483. z2 <<= CONST_BITS;
  1484. tmp12 = z1 - z2;
  1485. tmp21 = z3 + tmp12;
  1486. tmp24 = z3 - tmp12;
  1487. tmp12 = z4 + z2;
  1488. tmp20 = tmp10 + tmp12;
  1489. tmp25 = tmp10 - tmp12;
  1490. tmp12 = z4 - z1 - z2;
  1491. tmp22 = tmp11 + tmp12;
  1492. tmp23 = tmp11 - tmp12;
  1493. /* Odd part */
  1494. z1 = (INT32) wsptr[1];
  1495. z2 = (INT32) wsptr[3];
  1496. z3 = (INT32) wsptr[5];
  1497. z4 = (INT32) wsptr[7];
  1498. tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
  1499. tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
  1500. tmp10 = z1 + z3;
  1501. tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
  1502. tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
  1503. tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
  1504. tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
  1505. tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
  1506. tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
  1507. tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
  1508. MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
  1509. z1 -= z4;
  1510. z2 -= z3;
  1511. z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
  1512. tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
  1513. tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
  1514. /* Final output stage */
  1515. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  1516. CONST_BITS+PASS1_BITS+3)
  1517. & RANGE_MASK];
  1518. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  1519. CONST_BITS+PASS1_BITS+3)
  1520. & RANGE_MASK];
  1521. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  1522. CONST_BITS+PASS1_BITS+3)
  1523. & RANGE_MASK];
  1524. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  1525. CONST_BITS+PASS1_BITS+3)
  1526. & RANGE_MASK];
  1527. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  1528. CONST_BITS+PASS1_BITS+3)
  1529. & RANGE_MASK];
  1530. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  1531. CONST_BITS+PASS1_BITS+3)
  1532. & RANGE_MASK];
  1533. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  1534. CONST_BITS+PASS1_BITS+3)
  1535. & RANGE_MASK];
  1536. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  1537. CONST_BITS+PASS1_BITS+3)
  1538. & RANGE_MASK];
  1539. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  1540. CONST_BITS+PASS1_BITS+3)
  1541. & RANGE_MASK];
  1542. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  1543. CONST_BITS+PASS1_BITS+3)
  1544. & RANGE_MASK];
  1545. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
  1546. CONST_BITS+PASS1_BITS+3)
  1547. & RANGE_MASK];
  1548. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
  1549. CONST_BITS+PASS1_BITS+3)
  1550. & RANGE_MASK];
  1551. wsptr += 8; /* advance pointer to next row */
  1552. }
  1553. }
  1554. /*
  1555. * Perform dequantization and inverse DCT on one block of coefficients,
  1556. * producing a 13x13 output block.
  1557. *
  1558. * Optimized algorithm with 29 multiplications in the 1-D kernel.
  1559. * cK represents sqrt(2) * cos(K*pi/26).
  1560. */
  1561. GLOBAL(void)
  1562. jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  1563. JCOEFPTR coef_block,
  1564. JSAMPARRAY output_buf, JDIMENSION output_col)
  1565. {
  1566. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
  1567. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
  1568. INT32 z1, z2, z3, z4;
  1569. JCOEFPTR inptr;
  1570. ISLOW_MULT_TYPE * quantptr;
  1571. int * wsptr;
  1572. JSAMPROW outptr;
  1573. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  1574. int ctr;
  1575. int workspace[8*13]; /* buffers data between passes */
  1576. SHIFT_TEMPS
  1577. /* Pass 1: process columns from input, store into work array. */
  1578. inptr = coef_block;
  1579. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  1580. wsptr = workspace;
  1581. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  1582. /* Even part */
  1583. z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  1584. z1 <<= CONST_BITS;
  1585. /* Add fudge factor here for final descale. */
  1586. z1 += ONE << (CONST_BITS-PASS1_BITS-1);
  1587. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  1588. z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  1589. z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  1590. tmp10 = z3 + z4;
  1591. tmp11 = z3 - z4;
  1592. tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
  1593. tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
  1594. tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
  1595. tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
  1596. tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
  1597. tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
  1598. tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
  1599. tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
  1600. tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
  1601. tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
  1602. tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
  1603. tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
  1604. tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
  1605. /* Odd part */
  1606. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  1607. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  1608. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  1609. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  1610. tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
  1611. tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
  1612. tmp15 = z1 + z4;
  1613. tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
  1614. tmp10 = tmp11 + tmp12 + tmp13 -
  1615. MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
  1616. tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
  1617. tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
  1618. tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
  1619. tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
  1620. tmp11 += tmp14;
  1621. tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
  1622. tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
  1623. tmp12 += tmp14;
  1624. tmp13 += tmp14;
  1625. tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
  1626. tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
  1627. MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
  1628. z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
  1629. tmp14 += z1;
  1630. tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
  1631. MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
  1632. /* Final output stage */
  1633. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  1634. wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  1635. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  1636. wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  1637. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  1638. wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  1639. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  1640. wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  1641. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  1642. wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  1643. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
  1644. wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
  1645. wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
  1646. }
  1647. /* Pass 2: process 13 rows from work array, store into output array. */
  1648. wsptr = workspace;
  1649. for (ctr = 0; ctr < 13; ctr++) {
  1650. outptr = output_buf[ctr] + output_col;
  1651. /* Even part */
  1652. /* Add range center and fudge factor for final descale and range-limit. */
  1653. z1 = (INT32) wsptr[0] +
  1654. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  1655. (ONE << (PASS1_BITS+2)));
  1656. z1 <<= CONST_BITS;
  1657. z2 = (INT32) wsptr[2];
  1658. z3 = (INT32) wsptr[4];
  1659. z4 = (INT32) wsptr[6];
  1660. tmp10 = z3 + z4;
  1661. tmp11 = z3 - z4;
  1662. tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
  1663. tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
  1664. tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
  1665. tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
  1666. tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
  1667. tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
  1668. tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
  1669. tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
  1670. tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
  1671. tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
  1672. tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
  1673. tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
  1674. tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
  1675. /* Odd part */
  1676. z1 = (INT32) wsptr[1];
  1677. z2 = (INT32) wsptr[3];
  1678. z3 = (INT32) wsptr[5];
  1679. z4 = (INT32) wsptr[7];
  1680. tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
  1681. tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
  1682. tmp15 = z1 + z4;
  1683. tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
  1684. tmp10 = tmp11 + tmp12 + tmp13 -
  1685. MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
  1686. tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
  1687. tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
  1688. tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
  1689. tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
  1690. tmp11 += tmp14;
  1691. tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
  1692. tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
  1693. tmp12 += tmp14;
  1694. tmp13 += tmp14;
  1695. tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
  1696. tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
  1697. MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
  1698. z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
  1699. tmp14 += z1;
  1700. tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
  1701. MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
  1702. /* Final output stage */
  1703. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  1704. CONST_BITS+PASS1_BITS+3)
  1705. & RANGE_MASK];
  1706. outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  1707. CONST_BITS+PASS1_BITS+3)
  1708. & RANGE_MASK];
  1709. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  1710. CONST_BITS+PASS1_BITS+3)
  1711. & RANGE_MASK];
  1712. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  1713. CONST_BITS+PASS1_BITS+3)
  1714. & RANGE_MASK];
  1715. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  1716. CONST_BITS+PASS1_BITS+3)
  1717. & RANGE_MASK];
  1718. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  1719. CONST_BITS+PASS1_BITS+3)
  1720. & RANGE_MASK];
  1721. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  1722. CONST_BITS+PASS1_BITS+3)
  1723. & RANGE_MASK];
  1724. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  1725. CONST_BITS+PASS1_BITS+3)
  1726. & RANGE_MASK];
  1727. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  1728. CONST_BITS+PASS1_BITS+3)
  1729. & RANGE_MASK];
  1730. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  1731. CONST_BITS+PASS1_BITS+3)
  1732. & RANGE_MASK];
  1733. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
  1734. CONST_BITS+PASS1_BITS+3)
  1735. & RANGE_MASK];
  1736. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
  1737. CONST_BITS+PASS1_BITS+3)
  1738. & RANGE_MASK];
  1739. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
  1740. CONST_BITS+PASS1_BITS+3)
  1741. & RANGE_MASK];
  1742. wsptr += 8; /* advance pointer to next row */
  1743. }
  1744. }
  1745. /*
  1746. * Perform dequantization and inverse DCT on one block of coefficients,
  1747. * producing a 14x14 output block.
  1748. *
  1749. * Optimized algorithm with 20 multiplications in the 1-D kernel.
  1750. * cK represents sqrt(2) * cos(K*pi/28).
  1751. */
  1752. GLOBAL(void)
  1753. jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  1754. JCOEFPTR coef_block,
  1755. JSAMPARRAY output_buf, JDIMENSION output_col)
  1756. {
  1757. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
  1758. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
  1759. INT32 z1, z2, z3, z4;
  1760. JCOEFPTR inptr;
  1761. ISLOW_MULT_TYPE * quantptr;
  1762. int * wsptr;
  1763. JSAMPROW outptr;
  1764. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  1765. int ctr;
  1766. int workspace[8*14]; /* buffers data between passes */
  1767. SHIFT_TEMPS
  1768. /* Pass 1: process columns from input, store into work array. */
  1769. inptr = coef_block;
  1770. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  1771. wsptr = workspace;
  1772. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  1773. /* Even part */
  1774. z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  1775. z1 <<= CONST_BITS;
  1776. /* Add fudge factor here for final descale. */
  1777. z1 += ONE << (CONST_BITS-PASS1_BITS-1);
  1778. z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  1779. z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
  1780. z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
  1781. z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
  1782. tmp10 = z1 + z2;
  1783. tmp11 = z1 + z3;
  1784. tmp12 = z1 - z4;
  1785. tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
  1786. CONST_BITS-PASS1_BITS);
  1787. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  1788. z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  1789. z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
  1790. tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
  1791. tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
  1792. tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
  1793. MULTIPLY(z2, FIX(1.378756276)); /* c2 */
  1794. tmp20 = tmp10 + tmp13;
  1795. tmp26 = tmp10 - tmp13;
  1796. tmp21 = tmp11 + tmp14;
  1797. tmp25 = tmp11 - tmp14;
  1798. tmp22 = tmp12 + tmp15;
  1799. tmp24 = tmp12 - tmp15;
  1800. /* Odd part */
  1801. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  1802. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  1803. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  1804. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  1805. tmp13 = z4 << CONST_BITS;
  1806. tmp14 = z1 + z3;
  1807. tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
  1808. tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
  1809. tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
  1810. tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
  1811. tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
  1812. z1 -= z2;
  1813. tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
  1814. tmp16 += tmp15;
  1815. z1 += z4;
  1816. z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
  1817. tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
  1818. tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
  1819. z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
  1820. tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
  1821. tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
  1822. tmp13 = (z1 - z3) << PASS1_BITS;
  1823. /* Final output stage */
  1824. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  1825. wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  1826. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  1827. wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  1828. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  1829. wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  1830. wsptr[8*3] = (int) (tmp23 + tmp13);
  1831. wsptr[8*10] = (int) (tmp23 - tmp13);
  1832. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  1833. wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  1834. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
  1835. wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
  1836. wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
  1837. wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
  1838. }
  1839. /* Pass 2: process 14 rows from work array, store into output array. */
  1840. wsptr = workspace;
  1841. for (ctr = 0; ctr < 14; ctr++) {
  1842. outptr = output_buf[ctr] + output_col;
  1843. /* Even part */
  1844. /* Add range center and fudge factor for final descale and range-limit. */
  1845. z1 = (INT32) wsptr[0] +
  1846. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  1847. (ONE << (PASS1_BITS+2)));
  1848. z1 <<= CONST_BITS;
  1849. z4 = (INT32) wsptr[4];
  1850. z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
  1851. z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
  1852. z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
  1853. tmp10 = z1 + z2;
  1854. tmp11 = z1 + z3;
  1855. tmp12 = z1 - z4;
  1856. tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
  1857. z1 = (INT32) wsptr[2];
  1858. z2 = (INT32) wsptr[6];
  1859. z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
  1860. tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
  1861. tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
  1862. tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
  1863. MULTIPLY(z2, FIX(1.378756276)); /* c2 */
  1864. tmp20 = tmp10 + tmp13;
  1865. tmp26 = tmp10 - tmp13;
  1866. tmp21 = tmp11 + tmp14;
  1867. tmp25 = tmp11 - tmp14;
  1868. tmp22 = tmp12 + tmp15;
  1869. tmp24 = tmp12 - tmp15;
  1870. /* Odd part */
  1871. z1 = (INT32) wsptr[1];
  1872. z2 = (INT32) wsptr[3];
  1873. z3 = (INT32) wsptr[5];
  1874. z4 = (INT32) wsptr[7];
  1875. z4 <<= CONST_BITS;
  1876. tmp14 = z1 + z3;
  1877. tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
  1878. tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
  1879. tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
  1880. tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
  1881. tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
  1882. z1 -= z2;
  1883. tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
  1884. tmp16 += tmp15;
  1885. tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
  1886. tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
  1887. tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
  1888. tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
  1889. tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
  1890. tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
  1891. tmp13 = ((z1 - z3) << CONST_BITS) + z4;
  1892. /* Final output stage */
  1893. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  1894. CONST_BITS+PASS1_BITS+3)
  1895. & RANGE_MASK];
  1896. outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  1897. CONST_BITS+PASS1_BITS+3)
  1898. & RANGE_MASK];
  1899. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  1900. CONST_BITS+PASS1_BITS+3)
  1901. & RANGE_MASK];
  1902. outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  1903. CONST_BITS+PASS1_BITS+3)
  1904. & RANGE_MASK];
  1905. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  1906. CONST_BITS+PASS1_BITS+3)
  1907. & RANGE_MASK];
  1908. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  1909. CONST_BITS+PASS1_BITS+3)
  1910. & RANGE_MASK];
  1911. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  1912. CONST_BITS+PASS1_BITS+3)
  1913. & RANGE_MASK];
  1914. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  1915. CONST_BITS+PASS1_BITS+3)
  1916. & RANGE_MASK];
  1917. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  1918. CONST_BITS+PASS1_BITS+3)
  1919. & RANGE_MASK];
  1920. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  1921. CONST_BITS+PASS1_BITS+3)
  1922. & RANGE_MASK];
  1923. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
  1924. CONST_BITS+PASS1_BITS+3)
  1925. & RANGE_MASK];
  1926. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
  1927. CONST_BITS+PASS1_BITS+3)
  1928. & RANGE_MASK];
  1929. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
  1930. CONST_BITS+PASS1_BITS+3)
  1931. & RANGE_MASK];
  1932. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
  1933. CONST_BITS+PASS1_BITS+3)
  1934. & RANGE_MASK];
  1935. wsptr += 8; /* advance pointer to next row */
  1936. }
  1937. }
  1938. /*
  1939. * Perform dequantization and inverse DCT on one block of coefficients,
  1940. * producing a 15x15 output block.
  1941. *
  1942. * Optimized algorithm with 22 multiplications in the 1-D kernel.
  1943. * cK represents sqrt(2) * cos(K*pi/30).
  1944. */
  1945. GLOBAL(void)
  1946. jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  1947. JCOEFPTR coef_block,
  1948. JSAMPARRAY output_buf, JDIMENSION output_col)
  1949. {
  1950. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
  1951. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
  1952. INT32 z1, z2, z3, z4;
  1953. JCOEFPTR inptr;
  1954. ISLOW_MULT_TYPE * quantptr;
  1955. int * wsptr;
  1956. JSAMPROW outptr;
  1957. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  1958. int ctr;
  1959. int workspace[8*15]; /* buffers data between passes */
  1960. SHIFT_TEMPS
  1961. /* Pass 1: process columns from input, store into work array. */
  1962. inptr = coef_block;
  1963. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  1964. wsptr = workspace;
  1965. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  1966. /* Even part */
  1967. z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  1968. z1 <<= CONST_BITS;
  1969. /* Add fudge factor here for final descale. */
  1970. z1 += ONE << (CONST_BITS-PASS1_BITS-1);
  1971. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  1972. z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  1973. z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  1974. tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
  1975. tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
  1976. tmp12 = z1 - tmp10;
  1977. tmp13 = z1 + tmp11;
  1978. z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
  1979. z4 = z2 - z3;
  1980. z3 += z2;
  1981. tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
  1982. tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
  1983. z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
  1984. tmp20 = tmp13 + tmp10 + tmp11;
  1985. tmp23 = tmp12 - tmp10 + tmp11 + z2;
  1986. tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
  1987. tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
  1988. tmp25 = tmp13 - tmp10 - tmp11;
  1989. tmp26 = tmp12 + tmp10 - tmp11 - z2;
  1990. tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
  1991. tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
  1992. tmp21 = tmp12 + tmp10 + tmp11;
  1993. tmp24 = tmp13 - tmp10 + tmp11;
  1994. tmp11 += tmp11;
  1995. tmp22 = z1 + tmp11; /* c10 = c6-c12 */
  1996. tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
  1997. /* Odd part */
  1998. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  1999. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  2000. z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  2001. z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
  2002. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  2003. tmp13 = z2 - z4;
  2004. tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
  2005. tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
  2006. tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
  2007. tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
  2008. tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
  2009. z2 = z1 - z4;
  2010. tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
  2011. tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
  2012. tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
  2013. tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
  2014. z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
  2015. tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
  2016. tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
  2017. /* Final output stage */
  2018. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  2019. wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  2020. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  2021. wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  2022. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  2023. wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  2024. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  2025. wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  2026. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  2027. wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  2028. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
  2029. wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
  2030. wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
  2031. wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
  2032. wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
  2033. }
  2034. /* Pass 2: process 15 rows from work array, store into output array. */
  2035. wsptr = workspace;
  2036. for (ctr = 0; ctr < 15; ctr++) {
  2037. outptr = output_buf[ctr] + output_col;
  2038. /* Even part */
  2039. /* Add range center and fudge factor for final descale and range-limit. */
  2040. z1 = (INT32) wsptr[0] +
  2041. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  2042. (ONE << (PASS1_BITS+2)));
  2043. z1 <<= CONST_BITS;
  2044. z2 = (INT32) wsptr[2];
  2045. z3 = (INT32) wsptr[4];
  2046. z4 = (INT32) wsptr[6];
  2047. tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
  2048. tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
  2049. tmp12 = z1 - tmp10;
  2050. tmp13 = z1 + tmp11;
  2051. z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
  2052. z4 = z2 - z3;
  2053. z3 += z2;
  2054. tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
  2055. tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
  2056. z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
  2057. tmp20 = tmp13 + tmp10 + tmp11;
  2058. tmp23 = tmp12 - tmp10 + tmp11 + z2;
  2059. tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
  2060. tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
  2061. tmp25 = tmp13 - tmp10 - tmp11;
  2062. tmp26 = tmp12 + tmp10 - tmp11 - z2;
  2063. tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
  2064. tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
  2065. tmp21 = tmp12 + tmp10 + tmp11;
  2066. tmp24 = tmp13 - tmp10 + tmp11;
  2067. tmp11 += tmp11;
  2068. tmp22 = z1 + tmp11; /* c10 = c6-c12 */
  2069. tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
  2070. /* Odd part */
  2071. z1 = (INT32) wsptr[1];
  2072. z2 = (INT32) wsptr[3];
  2073. z4 = (INT32) wsptr[5];
  2074. z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
  2075. z4 = (INT32) wsptr[7];
  2076. tmp13 = z2 - z4;
  2077. tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
  2078. tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
  2079. tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
  2080. tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
  2081. tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
  2082. z2 = z1 - z4;
  2083. tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
  2084. tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
  2085. tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
  2086. tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
  2087. z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
  2088. tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
  2089. tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
  2090. /* Final output stage */
  2091. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  2092. CONST_BITS+PASS1_BITS+3)
  2093. & RANGE_MASK];
  2094. outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  2095. CONST_BITS+PASS1_BITS+3)
  2096. & RANGE_MASK];
  2097. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  2098. CONST_BITS+PASS1_BITS+3)
  2099. & RANGE_MASK];
  2100. outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  2101. CONST_BITS+PASS1_BITS+3)
  2102. & RANGE_MASK];
  2103. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  2104. CONST_BITS+PASS1_BITS+3)
  2105. & RANGE_MASK];
  2106. outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  2107. CONST_BITS+PASS1_BITS+3)
  2108. & RANGE_MASK];
  2109. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  2110. CONST_BITS+PASS1_BITS+3)
  2111. & RANGE_MASK];
  2112. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  2113. CONST_BITS+PASS1_BITS+3)
  2114. & RANGE_MASK];
  2115. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  2116. CONST_BITS+PASS1_BITS+3)
  2117. & RANGE_MASK];
  2118. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  2119. CONST_BITS+PASS1_BITS+3)
  2120. & RANGE_MASK];
  2121. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
  2122. CONST_BITS+PASS1_BITS+3)
  2123. & RANGE_MASK];
  2124. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
  2125. CONST_BITS+PASS1_BITS+3)
  2126. & RANGE_MASK];
  2127. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
  2128. CONST_BITS+PASS1_BITS+3)
  2129. & RANGE_MASK];
  2130. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
  2131. CONST_BITS+PASS1_BITS+3)
  2132. & RANGE_MASK];
  2133. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
  2134. CONST_BITS+PASS1_BITS+3)
  2135. & RANGE_MASK];
  2136. wsptr += 8; /* advance pointer to next row */
  2137. }
  2138. }
  2139. /*
  2140. * Perform dequantization and inverse DCT on one block of coefficients,
  2141. * producing a 16x16 output block.
  2142. *
  2143. * Optimized algorithm with 28 multiplications in the 1-D kernel.
  2144. * cK represents sqrt(2) * cos(K*pi/32).
  2145. */
  2146. GLOBAL(void)
  2147. jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  2148. JCOEFPTR coef_block,
  2149. JSAMPARRAY output_buf, JDIMENSION output_col)
  2150. {
  2151. INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
  2152. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
  2153. INT32 z1, z2, z3, z4;
  2154. JCOEFPTR inptr;
  2155. ISLOW_MULT_TYPE * quantptr;
  2156. int * wsptr;
  2157. JSAMPROW outptr;
  2158. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  2159. int ctr;
  2160. int workspace[8*16]; /* buffers data between passes */
  2161. SHIFT_TEMPS
  2162. /* Pass 1: process columns from input, store into work array. */
  2163. inptr = coef_block;
  2164. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  2165. wsptr = workspace;
  2166. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  2167. /* Even part */
  2168. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  2169. tmp0 <<= CONST_BITS;
  2170. /* Add fudge factor here for final descale. */
  2171. tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
  2172. z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  2173. tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
  2174. tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
  2175. tmp10 = tmp0 + tmp1;
  2176. tmp11 = tmp0 - tmp1;
  2177. tmp12 = tmp0 + tmp2;
  2178. tmp13 = tmp0 - tmp2;
  2179. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  2180. z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  2181. z3 = z1 - z2;
  2182. z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
  2183. z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
  2184. tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
  2185. tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
  2186. tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
  2187. tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
  2188. tmp20 = tmp10 + tmp0;
  2189. tmp27 = tmp10 - tmp0;
  2190. tmp21 = tmp12 + tmp1;
  2191. tmp26 = tmp12 - tmp1;
  2192. tmp22 = tmp13 + tmp2;
  2193. tmp25 = tmp13 - tmp2;
  2194. tmp23 = tmp11 + tmp3;
  2195. tmp24 = tmp11 - tmp3;
  2196. /* Odd part */
  2197. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  2198. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  2199. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  2200. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  2201. tmp11 = z1 + z3;
  2202. tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
  2203. tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
  2204. tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
  2205. tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
  2206. tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
  2207. tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
  2208. tmp0 = tmp1 + tmp2 + tmp3 -
  2209. MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
  2210. tmp13 = tmp10 + tmp11 + tmp12 -
  2211. MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
  2212. z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
  2213. tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
  2214. tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
  2215. z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
  2216. tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
  2217. tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
  2218. z2 += z4;
  2219. z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
  2220. tmp1 += z1;
  2221. tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
  2222. z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
  2223. tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
  2224. tmp12 += z2;
  2225. z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
  2226. tmp2 += z2;
  2227. tmp3 += z2;
  2228. z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
  2229. tmp10 += z2;
  2230. tmp11 += z2;
  2231. /* Final output stage */
  2232. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
  2233. wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
  2234. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
  2235. wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
  2236. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
  2237. wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
  2238. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
  2239. wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
  2240. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
  2241. wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
  2242. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
  2243. wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
  2244. wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
  2245. wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
  2246. wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
  2247. wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
  2248. }
  2249. /* Pass 2: process 16 rows from work array, store into output array. */
  2250. wsptr = workspace;
  2251. for (ctr = 0; ctr < 16; ctr++) {
  2252. outptr = output_buf[ctr] + output_col;
  2253. /* Even part */
  2254. /* Add range center and fudge factor for final descale and range-limit. */
  2255. tmp0 = (INT32) wsptr[0] +
  2256. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  2257. (ONE << (PASS1_BITS+2)));
  2258. tmp0 <<= CONST_BITS;
  2259. z1 = (INT32) wsptr[4];
  2260. tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
  2261. tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
  2262. tmp10 = tmp0 + tmp1;
  2263. tmp11 = tmp0 - tmp1;
  2264. tmp12 = tmp0 + tmp2;
  2265. tmp13 = tmp0 - tmp2;
  2266. z1 = (INT32) wsptr[2];
  2267. z2 = (INT32) wsptr[6];
  2268. z3 = z1 - z2;
  2269. z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
  2270. z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
  2271. tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
  2272. tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
  2273. tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
  2274. tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
  2275. tmp20 = tmp10 + tmp0;
  2276. tmp27 = tmp10 - tmp0;
  2277. tmp21 = tmp12 + tmp1;
  2278. tmp26 = tmp12 - tmp1;
  2279. tmp22 = tmp13 + tmp2;
  2280. tmp25 = tmp13 - tmp2;
  2281. tmp23 = tmp11 + tmp3;
  2282. tmp24 = tmp11 - tmp3;
  2283. /* Odd part */
  2284. z1 = (INT32) wsptr[1];
  2285. z2 = (INT32) wsptr[3];
  2286. z3 = (INT32) wsptr[5];
  2287. z4 = (INT32) wsptr[7];
  2288. tmp11 = z1 + z3;
  2289. tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
  2290. tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
  2291. tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
  2292. tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
  2293. tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
  2294. tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
  2295. tmp0 = tmp1 + tmp2 + tmp3 -
  2296. MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
  2297. tmp13 = tmp10 + tmp11 + tmp12 -
  2298. MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
  2299. z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
  2300. tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
  2301. tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
  2302. z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
  2303. tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
  2304. tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
  2305. z2 += z4;
  2306. z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
  2307. tmp1 += z1;
  2308. tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
  2309. z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
  2310. tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
  2311. tmp12 += z2;
  2312. z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
  2313. tmp2 += z2;
  2314. tmp3 += z2;
  2315. z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
  2316. tmp10 += z2;
  2317. tmp11 += z2;
  2318. /* Final output stage */
  2319. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
  2320. CONST_BITS+PASS1_BITS+3)
  2321. & RANGE_MASK];
  2322. outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
  2323. CONST_BITS+PASS1_BITS+3)
  2324. & RANGE_MASK];
  2325. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
  2326. CONST_BITS+PASS1_BITS+3)
  2327. & RANGE_MASK];
  2328. outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
  2329. CONST_BITS+PASS1_BITS+3)
  2330. & RANGE_MASK];
  2331. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
  2332. CONST_BITS+PASS1_BITS+3)
  2333. & RANGE_MASK];
  2334. outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
  2335. CONST_BITS+PASS1_BITS+3)
  2336. & RANGE_MASK];
  2337. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
  2338. CONST_BITS+PASS1_BITS+3)
  2339. & RANGE_MASK];
  2340. outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
  2341. CONST_BITS+PASS1_BITS+3)
  2342. & RANGE_MASK];
  2343. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
  2344. CONST_BITS+PASS1_BITS+3)
  2345. & RANGE_MASK];
  2346. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
  2347. CONST_BITS+PASS1_BITS+3)
  2348. & RANGE_MASK];
  2349. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
  2350. CONST_BITS+PASS1_BITS+3)
  2351. & RANGE_MASK];
  2352. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
  2353. CONST_BITS+PASS1_BITS+3)
  2354. & RANGE_MASK];
  2355. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
  2356. CONST_BITS+PASS1_BITS+3)
  2357. & RANGE_MASK];
  2358. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
  2359. CONST_BITS+PASS1_BITS+3)
  2360. & RANGE_MASK];
  2361. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
  2362. CONST_BITS+PASS1_BITS+3)
  2363. & RANGE_MASK];
  2364. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
  2365. CONST_BITS+PASS1_BITS+3)
  2366. & RANGE_MASK];
  2367. wsptr += 8; /* advance pointer to next row */
  2368. }
  2369. }
  2370. /*
  2371. * Perform dequantization and inverse DCT on one block of coefficients,
  2372. * producing a 16x8 output block.
  2373. *
  2374. * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
  2375. */
  2376. GLOBAL(void)
  2377. jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  2378. JCOEFPTR coef_block,
  2379. JSAMPARRAY output_buf, JDIMENSION output_col)
  2380. {
  2381. INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
  2382. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
  2383. INT32 z1, z2, z3, z4;
  2384. JCOEFPTR inptr;
  2385. ISLOW_MULT_TYPE * quantptr;
  2386. int * wsptr;
  2387. JSAMPROW outptr;
  2388. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  2389. int ctr;
  2390. int workspace[8*8]; /* buffers data between passes */
  2391. SHIFT_TEMPS
  2392. /* Pass 1: process columns from input, store into work array.
  2393. * Note results are scaled up by sqrt(8) compared to a true IDCT;
  2394. * furthermore, we scale the results by 2**PASS1_BITS.
  2395. * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
  2396. */
  2397. inptr = coef_block;
  2398. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  2399. wsptr = workspace;
  2400. for (ctr = DCTSIZE; ctr > 0; ctr--) {
  2401. /* Due to quantization, we will usually find that many of the input
  2402. * coefficients are zero, especially the AC terms. We can exploit this
  2403. * by short-circuiting the IDCT calculation for any column in which all
  2404. * the AC terms are zero. In that case each output is equal to the
  2405. * DC coefficient (with scale factor as needed).
  2406. * With typical images and quantization tables, half or more of the
  2407. * column DCT calculations can be simplified this way.
  2408. */
  2409. if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
  2410. inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
  2411. inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
  2412. inptr[DCTSIZE*7] == 0) {
  2413. /* AC terms all zero */
  2414. int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
  2415. wsptr[DCTSIZE*0] = dcval;
  2416. wsptr[DCTSIZE*1] = dcval;
  2417. wsptr[DCTSIZE*2] = dcval;
  2418. wsptr[DCTSIZE*3] = dcval;
  2419. wsptr[DCTSIZE*4] = dcval;
  2420. wsptr[DCTSIZE*5] = dcval;
  2421. wsptr[DCTSIZE*6] = dcval;
  2422. wsptr[DCTSIZE*7] = dcval;
  2423. inptr++; /* advance pointers to next column */
  2424. quantptr++;
  2425. wsptr++;
  2426. continue;
  2427. }
  2428. /* Even part: reverse the even part of the forward DCT.
  2429. * The rotator is c(-6).
  2430. */
  2431. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  2432. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  2433. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  2434. tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  2435. tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  2436. z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  2437. z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  2438. z2 <<= CONST_BITS;
  2439. z3 <<= CONST_BITS;
  2440. /* Add fudge factor here for final descale. */
  2441. z2 += ONE << (CONST_BITS-PASS1_BITS-1);
  2442. tmp0 = z2 + z3;
  2443. tmp1 = z2 - z3;
  2444. tmp10 = tmp0 + tmp2;
  2445. tmp13 = tmp0 - tmp2;
  2446. tmp11 = tmp1 + tmp3;
  2447. tmp12 = tmp1 - tmp3;
  2448. /* Odd part per figure 8; the matrix is unitary and hence its
  2449. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  2450. */
  2451. tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  2452. tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  2453. tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  2454. tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  2455. z2 = tmp0 + tmp2;
  2456. z3 = tmp1 + tmp3;
  2457. z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
  2458. z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
  2459. z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
  2460. z2 += z1;
  2461. z3 += z1;
  2462. z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
  2463. tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
  2464. tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
  2465. tmp0 += z1 + z2;
  2466. tmp3 += z1 + z3;
  2467. z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
  2468. tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
  2469. tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
  2470. tmp1 += z1 + z3;
  2471. tmp2 += z1 + z2;
  2472. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  2473. wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
  2474. wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
  2475. wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
  2476. wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
  2477. wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
  2478. wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
  2479. wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
  2480. wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
  2481. inptr++; /* advance pointers to next column */
  2482. quantptr++;
  2483. wsptr++;
  2484. }
  2485. /* Pass 2: process 8 rows from work array, store into output array.
  2486. * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
  2487. */
  2488. wsptr = workspace;
  2489. for (ctr = 0; ctr < 8; ctr++) {
  2490. outptr = output_buf[ctr] + output_col;
  2491. /* Even part */
  2492. /* Add range center and fudge factor for final descale and range-limit. */
  2493. tmp0 = (INT32) wsptr[0] +
  2494. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  2495. (ONE << (PASS1_BITS+2)));
  2496. tmp0 <<= CONST_BITS;
  2497. z1 = (INT32) wsptr[4];
  2498. tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
  2499. tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
  2500. tmp10 = tmp0 + tmp1;
  2501. tmp11 = tmp0 - tmp1;
  2502. tmp12 = tmp0 + tmp2;
  2503. tmp13 = tmp0 - tmp2;
  2504. z1 = (INT32) wsptr[2];
  2505. z2 = (INT32) wsptr[6];
  2506. z3 = z1 - z2;
  2507. z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
  2508. z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
  2509. tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
  2510. tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
  2511. tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
  2512. tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
  2513. tmp20 = tmp10 + tmp0;
  2514. tmp27 = tmp10 - tmp0;
  2515. tmp21 = tmp12 + tmp1;
  2516. tmp26 = tmp12 - tmp1;
  2517. tmp22 = tmp13 + tmp2;
  2518. tmp25 = tmp13 - tmp2;
  2519. tmp23 = tmp11 + tmp3;
  2520. tmp24 = tmp11 - tmp3;
  2521. /* Odd part */
  2522. z1 = (INT32) wsptr[1];
  2523. z2 = (INT32) wsptr[3];
  2524. z3 = (INT32) wsptr[5];
  2525. z4 = (INT32) wsptr[7];
  2526. tmp11 = z1 + z3;
  2527. tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
  2528. tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
  2529. tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
  2530. tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
  2531. tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
  2532. tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
  2533. tmp0 = tmp1 + tmp2 + tmp3 -
  2534. MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
  2535. tmp13 = tmp10 + tmp11 + tmp12 -
  2536. MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
  2537. z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
  2538. tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
  2539. tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
  2540. z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
  2541. tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
  2542. tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
  2543. z2 += z4;
  2544. z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
  2545. tmp1 += z1;
  2546. tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
  2547. z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
  2548. tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
  2549. tmp12 += z2;
  2550. z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
  2551. tmp2 += z2;
  2552. tmp3 += z2;
  2553. z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
  2554. tmp10 += z2;
  2555. tmp11 += z2;
  2556. /* Final output stage */
  2557. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
  2558. CONST_BITS+PASS1_BITS+3)
  2559. & RANGE_MASK];
  2560. outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
  2561. CONST_BITS+PASS1_BITS+3)
  2562. & RANGE_MASK];
  2563. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
  2564. CONST_BITS+PASS1_BITS+3)
  2565. & RANGE_MASK];
  2566. outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
  2567. CONST_BITS+PASS1_BITS+3)
  2568. & RANGE_MASK];
  2569. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
  2570. CONST_BITS+PASS1_BITS+3)
  2571. & RANGE_MASK];
  2572. outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
  2573. CONST_BITS+PASS1_BITS+3)
  2574. & RANGE_MASK];
  2575. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
  2576. CONST_BITS+PASS1_BITS+3)
  2577. & RANGE_MASK];
  2578. outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
  2579. CONST_BITS+PASS1_BITS+3)
  2580. & RANGE_MASK];
  2581. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
  2582. CONST_BITS+PASS1_BITS+3)
  2583. & RANGE_MASK];
  2584. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
  2585. CONST_BITS+PASS1_BITS+3)
  2586. & RANGE_MASK];
  2587. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
  2588. CONST_BITS+PASS1_BITS+3)
  2589. & RANGE_MASK];
  2590. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
  2591. CONST_BITS+PASS1_BITS+3)
  2592. & RANGE_MASK];
  2593. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
  2594. CONST_BITS+PASS1_BITS+3)
  2595. & RANGE_MASK];
  2596. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
  2597. CONST_BITS+PASS1_BITS+3)
  2598. & RANGE_MASK];
  2599. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
  2600. CONST_BITS+PASS1_BITS+3)
  2601. & RANGE_MASK];
  2602. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
  2603. CONST_BITS+PASS1_BITS+3)
  2604. & RANGE_MASK];
  2605. wsptr += 8; /* advance pointer to next row */
  2606. }
  2607. }
  2608. /*
  2609. * Perform dequantization and inverse DCT on one block of coefficients,
  2610. * producing a 14x7 output block.
  2611. *
  2612. * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
  2613. */
  2614. GLOBAL(void)
  2615. jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  2616. JCOEFPTR coef_block,
  2617. JSAMPARRAY output_buf, JDIMENSION output_col)
  2618. {
  2619. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
  2620. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
  2621. INT32 z1, z2, z3, z4;
  2622. JCOEFPTR inptr;
  2623. ISLOW_MULT_TYPE * quantptr;
  2624. int * wsptr;
  2625. JSAMPROW outptr;
  2626. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  2627. int ctr;
  2628. int workspace[8*7]; /* buffers data between passes */
  2629. SHIFT_TEMPS
  2630. /* Pass 1: process columns from input, store into work array.
  2631. * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
  2632. */
  2633. inptr = coef_block;
  2634. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  2635. wsptr = workspace;
  2636. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  2637. /* Even part */
  2638. tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  2639. tmp23 <<= CONST_BITS;
  2640. /* Add fudge factor here for final descale. */
  2641. tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
  2642. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  2643. z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  2644. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  2645. tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
  2646. tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
  2647. tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
  2648. tmp10 = z1 + z3;
  2649. z2 -= tmp10;
  2650. tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
  2651. tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
  2652. tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
  2653. tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
  2654. /* Odd part */
  2655. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  2656. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  2657. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  2658. tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
  2659. tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
  2660. tmp10 = tmp11 - tmp12;
  2661. tmp11 += tmp12;
  2662. tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
  2663. tmp11 += tmp12;
  2664. z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
  2665. tmp10 += z2;
  2666. tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
  2667. /* Final output stage */
  2668. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  2669. wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  2670. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  2671. wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  2672. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  2673. wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  2674. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
  2675. }
  2676. /* Pass 2: process 7 rows from work array, store into output array.
  2677. * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
  2678. */
  2679. wsptr = workspace;
  2680. for (ctr = 0; ctr < 7; ctr++) {
  2681. outptr = output_buf[ctr] + output_col;
  2682. /* Even part */
  2683. /* Add range center and fudge factor for final descale and range-limit. */
  2684. z1 = (INT32) wsptr[0] +
  2685. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  2686. (ONE << (PASS1_BITS+2)));
  2687. z1 <<= CONST_BITS;
  2688. z4 = (INT32) wsptr[4];
  2689. z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
  2690. z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
  2691. z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
  2692. tmp10 = z1 + z2;
  2693. tmp11 = z1 + z3;
  2694. tmp12 = z1 - z4;
  2695. tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
  2696. z1 = (INT32) wsptr[2];
  2697. z2 = (INT32) wsptr[6];
  2698. z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
  2699. tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
  2700. tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
  2701. tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
  2702. MULTIPLY(z2, FIX(1.378756276)); /* c2 */
  2703. tmp20 = tmp10 + tmp13;
  2704. tmp26 = tmp10 - tmp13;
  2705. tmp21 = tmp11 + tmp14;
  2706. tmp25 = tmp11 - tmp14;
  2707. tmp22 = tmp12 + tmp15;
  2708. tmp24 = tmp12 - tmp15;
  2709. /* Odd part */
  2710. z1 = (INT32) wsptr[1];
  2711. z2 = (INT32) wsptr[3];
  2712. z3 = (INT32) wsptr[5];
  2713. z4 = (INT32) wsptr[7];
  2714. z4 <<= CONST_BITS;
  2715. tmp14 = z1 + z3;
  2716. tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
  2717. tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
  2718. tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
  2719. tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
  2720. tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
  2721. z1 -= z2;
  2722. tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
  2723. tmp16 += tmp15;
  2724. tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
  2725. tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
  2726. tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
  2727. tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
  2728. tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
  2729. tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
  2730. tmp13 = ((z1 - z3) << CONST_BITS) + z4;
  2731. /* Final output stage */
  2732. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  2733. CONST_BITS+PASS1_BITS+3)
  2734. & RANGE_MASK];
  2735. outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  2736. CONST_BITS+PASS1_BITS+3)
  2737. & RANGE_MASK];
  2738. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  2739. CONST_BITS+PASS1_BITS+3)
  2740. & RANGE_MASK];
  2741. outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  2742. CONST_BITS+PASS1_BITS+3)
  2743. & RANGE_MASK];
  2744. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  2745. CONST_BITS+PASS1_BITS+3)
  2746. & RANGE_MASK];
  2747. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  2748. CONST_BITS+PASS1_BITS+3)
  2749. & RANGE_MASK];
  2750. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  2751. CONST_BITS+PASS1_BITS+3)
  2752. & RANGE_MASK];
  2753. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  2754. CONST_BITS+PASS1_BITS+3)
  2755. & RANGE_MASK];
  2756. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  2757. CONST_BITS+PASS1_BITS+3)
  2758. & RANGE_MASK];
  2759. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  2760. CONST_BITS+PASS1_BITS+3)
  2761. & RANGE_MASK];
  2762. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
  2763. CONST_BITS+PASS1_BITS+3)
  2764. & RANGE_MASK];
  2765. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
  2766. CONST_BITS+PASS1_BITS+3)
  2767. & RANGE_MASK];
  2768. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
  2769. CONST_BITS+PASS1_BITS+3)
  2770. & RANGE_MASK];
  2771. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
  2772. CONST_BITS+PASS1_BITS+3)
  2773. & RANGE_MASK];
  2774. wsptr += 8; /* advance pointer to next row */
  2775. }
  2776. }
  2777. /*
  2778. * Perform dequantization and inverse DCT on one block of coefficients,
  2779. * producing a 12x6 output block.
  2780. *
  2781. * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
  2782. */
  2783. GLOBAL(void)
  2784. jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  2785. JCOEFPTR coef_block,
  2786. JSAMPARRAY output_buf, JDIMENSION output_col)
  2787. {
  2788. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
  2789. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
  2790. INT32 z1, z2, z3, z4;
  2791. JCOEFPTR inptr;
  2792. ISLOW_MULT_TYPE * quantptr;
  2793. int * wsptr;
  2794. JSAMPROW outptr;
  2795. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  2796. int ctr;
  2797. int workspace[8*6]; /* buffers data between passes */
  2798. SHIFT_TEMPS
  2799. /* Pass 1: process columns from input, store into work array.
  2800. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
  2801. */
  2802. inptr = coef_block;
  2803. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  2804. wsptr = workspace;
  2805. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  2806. /* Even part */
  2807. tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  2808. tmp10 <<= CONST_BITS;
  2809. /* Add fudge factor here for final descale. */
  2810. tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
  2811. tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  2812. tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
  2813. tmp11 = tmp10 + tmp20;
  2814. tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
  2815. tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  2816. tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
  2817. tmp20 = tmp11 + tmp10;
  2818. tmp22 = tmp11 - tmp10;
  2819. /* Odd part */
  2820. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  2821. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  2822. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  2823. tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
  2824. tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
  2825. tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
  2826. tmp11 = (z1 - z2 - z3) << PASS1_BITS;
  2827. /* Final output stage */
  2828. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  2829. wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  2830. wsptr[8*1] = (int) (tmp21 + tmp11);
  2831. wsptr[8*4] = (int) (tmp21 - tmp11);
  2832. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  2833. wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  2834. }
  2835. /* Pass 2: process 6 rows from work array, store into output array.
  2836. * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
  2837. */
  2838. wsptr = workspace;
  2839. for (ctr = 0; ctr < 6; ctr++) {
  2840. outptr = output_buf[ctr] + output_col;
  2841. /* Even part */
  2842. /* Add range center and fudge factor for final descale and range-limit. */
  2843. z3 = (INT32) wsptr[0] +
  2844. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  2845. (ONE << (PASS1_BITS+2)));
  2846. z3 <<= CONST_BITS;
  2847. z4 = (INT32) wsptr[4];
  2848. z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
  2849. tmp10 = z3 + z4;
  2850. tmp11 = z3 - z4;
  2851. z1 = (INT32) wsptr[2];
  2852. z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
  2853. z1 <<= CONST_BITS;
  2854. z2 = (INT32) wsptr[6];
  2855. z2 <<= CONST_BITS;
  2856. tmp12 = z1 - z2;
  2857. tmp21 = z3 + tmp12;
  2858. tmp24 = z3 - tmp12;
  2859. tmp12 = z4 + z2;
  2860. tmp20 = tmp10 + tmp12;
  2861. tmp25 = tmp10 - tmp12;
  2862. tmp12 = z4 - z1 - z2;
  2863. tmp22 = tmp11 + tmp12;
  2864. tmp23 = tmp11 - tmp12;
  2865. /* Odd part */
  2866. z1 = (INT32) wsptr[1];
  2867. z2 = (INT32) wsptr[3];
  2868. z3 = (INT32) wsptr[5];
  2869. z4 = (INT32) wsptr[7];
  2870. tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
  2871. tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
  2872. tmp10 = z1 + z3;
  2873. tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
  2874. tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
  2875. tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
  2876. tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
  2877. tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
  2878. tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
  2879. tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
  2880. MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
  2881. z1 -= z4;
  2882. z2 -= z3;
  2883. z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
  2884. tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
  2885. tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
  2886. /* Final output stage */
  2887. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  2888. CONST_BITS+PASS1_BITS+3)
  2889. & RANGE_MASK];
  2890. outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  2891. CONST_BITS+PASS1_BITS+3)
  2892. & RANGE_MASK];
  2893. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  2894. CONST_BITS+PASS1_BITS+3)
  2895. & RANGE_MASK];
  2896. outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  2897. CONST_BITS+PASS1_BITS+3)
  2898. & RANGE_MASK];
  2899. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  2900. CONST_BITS+PASS1_BITS+3)
  2901. & RANGE_MASK];
  2902. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  2903. CONST_BITS+PASS1_BITS+3)
  2904. & RANGE_MASK];
  2905. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  2906. CONST_BITS+PASS1_BITS+3)
  2907. & RANGE_MASK];
  2908. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  2909. CONST_BITS+PASS1_BITS+3)
  2910. & RANGE_MASK];
  2911. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  2912. CONST_BITS+PASS1_BITS+3)
  2913. & RANGE_MASK];
  2914. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  2915. CONST_BITS+PASS1_BITS+3)
  2916. & RANGE_MASK];
  2917. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
  2918. CONST_BITS+PASS1_BITS+3)
  2919. & RANGE_MASK];
  2920. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
  2921. CONST_BITS+PASS1_BITS+3)
  2922. & RANGE_MASK];
  2923. wsptr += 8; /* advance pointer to next row */
  2924. }
  2925. }
  2926. /*
  2927. * Perform dequantization and inverse DCT on one block of coefficients,
  2928. * producing a 10x5 output block.
  2929. *
  2930. * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
  2931. */
  2932. GLOBAL(void)
  2933. jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  2934. JCOEFPTR coef_block,
  2935. JSAMPARRAY output_buf, JDIMENSION output_col)
  2936. {
  2937. INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
  2938. INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
  2939. INT32 z1, z2, z3, z4;
  2940. JCOEFPTR inptr;
  2941. ISLOW_MULT_TYPE * quantptr;
  2942. int * wsptr;
  2943. JSAMPROW outptr;
  2944. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  2945. int ctr;
  2946. int workspace[8*5]; /* buffers data between passes */
  2947. SHIFT_TEMPS
  2948. /* Pass 1: process columns from input, store into work array.
  2949. * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
  2950. */
  2951. inptr = coef_block;
  2952. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  2953. wsptr = workspace;
  2954. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  2955. /* Even part */
  2956. tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  2957. tmp12 <<= CONST_BITS;
  2958. /* Add fudge factor here for final descale. */
  2959. tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
  2960. tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  2961. tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  2962. z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
  2963. z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
  2964. z3 = tmp12 + z2;
  2965. tmp10 = z3 + z1;
  2966. tmp11 = z3 - z1;
  2967. tmp12 -= z2 << 2;
  2968. /* Odd part */
  2969. z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  2970. z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  2971. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
  2972. tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
  2973. tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
  2974. /* Final output stage */
  2975. wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
  2976. wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
  2977. wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
  2978. wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
  2979. wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
  2980. }
  2981. /* Pass 2: process 5 rows from work array, store into output array.
  2982. * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
  2983. */
  2984. wsptr = workspace;
  2985. for (ctr = 0; ctr < 5; ctr++) {
  2986. outptr = output_buf[ctr] + output_col;
  2987. /* Even part */
  2988. /* Add range center and fudge factor for final descale and range-limit. */
  2989. z3 = (INT32) wsptr[0] +
  2990. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  2991. (ONE << (PASS1_BITS+2)));
  2992. z3 <<= CONST_BITS;
  2993. z4 = (INT32) wsptr[4];
  2994. z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
  2995. z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
  2996. tmp10 = z3 + z1;
  2997. tmp11 = z3 - z2;
  2998. tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
  2999. z2 = (INT32) wsptr[2];
  3000. z3 = (INT32) wsptr[6];
  3001. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
  3002. tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
  3003. tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
  3004. tmp20 = tmp10 + tmp12;
  3005. tmp24 = tmp10 - tmp12;
  3006. tmp21 = tmp11 + tmp13;
  3007. tmp23 = tmp11 - tmp13;
  3008. /* Odd part */
  3009. z1 = (INT32) wsptr[1];
  3010. z2 = (INT32) wsptr[3];
  3011. z3 = (INT32) wsptr[5];
  3012. z3 <<= CONST_BITS;
  3013. z4 = (INT32) wsptr[7];
  3014. tmp11 = z2 + z4;
  3015. tmp13 = z2 - z4;
  3016. tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
  3017. z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
  3018. z4 = z3 + tmp12;
  3019. tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
  3020. tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
  3021. z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
  3022. z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
  3023. tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
  3024. tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
  3025. tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
  3026. /* Final output stage */
  3027. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  3028. CONST_BITS+PASS1_BITS+3)
  3029. & RANGE_MASK];
  3030. outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  3031. CONST_BITS+PASS1_BITS+3)
  3032. & RANGE_MASK];
  3033. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  3034. CONST_BITS+PASS1_BITS+3)
  3035. & RANGE_MASK];
  3036. outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  3037. CONST_BITS+PASS1_BITS+3)
  3038. & RANGE_MASK];
  3039. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  3040. CONST_BITS+PASS1_BITS+3)
  3041. & RANGE_MASK];
  3042. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  3043. CONST_BITS+PASS1_BITS+3)
  3044. & RANGE_MASK];
  3045. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
  3046. CONST_BITS+PASS1_BITS+3)
  3047. & RANGE_MASK];
  3048. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
  3049. CONST_BITS+PASS1_BITS+3)
  3050. & RANGE_MASK];
  3051. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
  3052. CONST_BITS+PASS1_BITS+3)
  3053. & RANGE_MASK];
  3054. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
  3055. CONST_BITS+PASS1_BITS+3)
  3056. & RANGE_MASK];
  3057. wsptr += 8; /* advance pointer to next row */
  3058. }
  3059. }
  3060. /*
  3061. * Perform dequantization and inverse DCT on one block of coefficients,
  3062. * producing a 8x4 output block.
  3063. *
  3064. * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
  3065. */
  3066. GLOBAL(void)
  3067. jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3068. JCOEFPTR coef_block,
  3069. JSAMPARRAY output_buf, JDIMENSION output_col)
  3070. {
  3071. INT32 tmp0, tmp1, tmp2, tmp3;
  3072. INT32 tmp10, tmp11, tmp12, tmp13;
  3073. INT32 z1, z2, z3;
  3074. JCOEFPTR inptr;
  3075. ISLOW_MULT_TYPE * quantptr;
  3076. int * wsptr;
  3077. JSAMPROW outptr;
  3078. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3079. int ctr;
  3080. int workspace[8*4]; /* buffers data between passes */
  3081. SHIFT_TEMPS
  3082. /* Pass 1: process columns from input, store into work array.
  3083. * 4-point IDCT kernel,
  3084. * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
  3085. */
  3086. inptr = coef_block;
  3087. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3088. wsptr = workspace;
  3089. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  3090. /* Even part */
  3091. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3092. tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  3093. tmp10 = (tmp0 + tmp2) << PASS1_BITS;
  3094. tmp12 = (tmp0 - tmp2) << PASS1_BITS;
  3095. /* Odd part */
  3096. /* Same rotation as in the even part of the 8x8 LL&M IDCT */
  3097. z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3098. z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  3099. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  3100. /* Add fudge factor here for final descale. */
  3101. z1 += ONE << (CONST_BITS-PASS1_BITS-1);
  3102. tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
  3103. CONST_BITS-PASS1_BITS);
  3104. tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
  3105. CONST_BITS-PASS1_BITS);
  3106. /* Final output stage */
  3107. wsptr[8*0] = (int) (tmp10 + tmp0);
  3108. wsptr[8*3] = (int) (tmp10 - tmp0);
  3109. wsptr[8*1] = (int) (tmp12 + tmp2);
  3110. wsptr[8*2] = (int) (tmp12 - tmp2);
  3111. }
  3112. /* Pass 2: process rows from work array, store into output array.
  3113. * Note that we must descale the results by a factor of 8 == 2**3,
  3114. * and also undo the PASS1_BITS scaling.
  3115. * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
  3116. */
  3117. wsptr = workspace;
  3118. for (ctr = 0; ctr < 4; ctr++) {
  3119. outptr = output_buf[ctr] + output_col;
  3120. /* Even part: reverse the even part of the forward DCT.
  3121. * The rotator is c(-6).
  3122. */
  3123. /* Add range center and fudge factor for final descale and range-limit. */
  3124. z2 = (INT32) wsptr[0] +
  3125. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  3126. (ONE << (PASS1_BITS+2)));
  3127. z3 = (INT32) wsptr[4];
  3128. tmp0 = (z2 + z3) << CONST_BITS;
  3129. tmp1 = (z2 - z3) << CONST_BITS;
  3130. z2 = (INT32) wsptr[2];
  3131. z3 = (INT32) wsptr[6];
  3132. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  3133. tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  3134. tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  3135. tmp10 = tmp0 + tmp2;
  3136. tmp13 = tmp0 - tmp2;
  3137. tmp11 = tmp1 + tmp3;
  3138. tmp12 = tmp1 - tmp3;
  3139. /* Odd part per figure 8; the matrix is unitary and hence its
  3140. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  3141. */
  3142. tmp0 = (INT32) wsptr[7];
  3143. tmp1 = (INT32) wsptr[5];
  3144. tmp2 = (INT32) wsptr[3];
  3145. tmp3 = (INT32) wsptr[1];
  3146. z2 = tmp0 + tmp2;
  3147. z3 = tmp1 + tmp3;
  3148. z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
  3149. z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
  3150. z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
  3151. z2 += z1;
  3152. z3 += z1;
  3153. z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
  3154. tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
  3155. tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
  3156. tmp0 += z1 + z2;
  3157. tmp3 += z1 + z3;
  3158. z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
  3159. tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
  3160. tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
  3161. tmp1 += z1 + z3;
  3162. tmp2 += z1 + z2;
  3163. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  3164. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
  3165. CONST_BITS+PASS1_BITS+3)
  3166. & RANGE_MASK];
  3167. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
  3168. CONST_BITS+PASS1_BITS+3)
  3169. & RANGE_MASK];
  3170. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
  3171. CONST_BITS+PASS1_BITS+3)
  3172. & RANGE_MASK];
  3173. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
  3174. CONST_BITS+PASS1_BITS+3)
  3175. & RANGE_MASK];
  3176. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
  3177. CONST_BITS+PASS1_BITS+3)
  3178. & RANGE_MASK];
  3179. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
  3180. CONST_BITS+PASS1_BITS+3)
  3181. & RANGE_MASK];
  3182. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
  3183. CONST_BITS+PASS1_BITS+3)
  3184. & RANGE_MASK];
  3185. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
  3186. CONST_BITS+PASS1_BITS+3)
  3187. & RANGE_MASK];
  3188. wsptr += DCTSIZE; /* advance pointer to next row */
  3189. }
  3190. }
  3191. /*
  3192. * Perform dequantization and inverse DCT on one block of coefficients,
  3193. * producing a reduced-size 6x3 output block.
  3194. *
  3195. * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
  3196. */
  3197. GLOBAL(void)
  3198. jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3199. JCOEFPTR coef_block,
  3200. JSAMPARRAY output_buf, JDIMENSION output_col)
  3201. {
  3202. INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
  3203. INT32 z1, z2, z3;
  3204. JCOEFPTR inptr;
  3205. ISLOW_MULT_TYPE * quantptr;
  3206. int * wsptr;
  3207. JSAMPROW outptr;
  3208. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3209. int ctr;
  3210. int workspace[6*3]; /* buffers data between passes */
  3211. SHIFT_TEMPS
  3212. /* Pass 1: process columns from input, store into work array.
  3213. * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
  3214. */
  3215. inptr = coef_block;
  3216. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3217. wsptr = workspace;
  3218. for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
  3219. /* Even part */
  3220. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3221. tmp0 <<= CONST_BITS;
  3222. /* Add fudge factor here for final descale. */
  3223. tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
  3224. tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  3225. tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
  3226. tmp10 = tmp0 + tmp12;
  3227. tmp2 = tmp0 - tmp12 - tmp12;
  3228. /* Odd part */
  3229. tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3230. tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
  3231. /* Final output stage */
  3232. wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  3233. wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  3234. wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
  3235. }
  3236. /* Pass 2: process 3 rows from work array, store into output array.
  3237. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
  3238. */
  3239. wsptr = workspace;
  3240. for (ctr = 0; ctr < 3; ctr++) {
  3241. outptr = output_buf[ctr] + output_col;
  3242. /* Even part */
  3243. /* Add range center and fudge factor for final descale and range-limit. */
  3244. tmp0 = (INT32) wsptr[0] +
  3245. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  3246. (ONE << (PASS1_BITS+2)));
  3247. tmp0 <<= CONST_BITS;
  3248. tmp2 = (INT32) wsptr[4];
  3249. tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
  3250. tmp1 = tmp0 + tmp10;
  3251. tmp11 = tmp0 - tmp10 - tmp10;
  3252. tmp10 = (INT32) wsptr[2];
  3253. tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
  3254. tmp10 = tmp1 + tmp0;
  3255. tmp12 = tmp1 - tmp0;
  3256. /* Odd part */
  3257. z1 = (INT32) wsptr[1];
  3258. z2 = (INT32) wsptr[3];
  3259. z3 = (INT32) wsptr[5];
  3260. tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
  3261. tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
  3262. tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
  3263. tmp1 = (z1 - z2 - z3) << CONST_BITS;
  3264. /* Final output stage */
  3265. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  3266. CONST_BITS+PASS1_BITS+3)
  3267. & RANGE_MASK];
  3268. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  3269. CONST_BITS+PASS1_BITS+3)
  3270. & RANGE_MASK];
  3271. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
  3272. CONST_BITS+PASS1_BITS+3)
  3273. & RANGE_MASK];
  3274. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
  3275. CONST_BITS+PASS1_BITS+3)
  3276. & RANGE_MASK];
  3277. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  3278. CONST_BITS+PASS1_BITS+3)
  3279. & RANGE_MASK];
  3280. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  3281. CONST_BITS+PASS1_BITS+3)
  3282. & RANGE_MASK];
  3283. wsptr += 6; /* advance pointer to next row */
  3284. }
  3285. }
  3286. /*
  3287. * Perform dequantization and inverse DCT on one block of coefficients,
  3288. * producing a 4x2 output block.
  3289. *
  3290. * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
  3291. */
  3292. GLOBAL(void)
  3293. jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3294. JCOEFPTR coef_block,
  3295. JSAMPARRAY output_buf, JDIMENSION output_col)
  3296. {
  3297. INT32 tmp0, tmp2, tmp10, tmp12;
  3298. INT32 z1, z2, z3;
  3299. JCOEFPTR inptr;
  3300. ISLOW_MULT_TYPE * quantptr;
  3301. INT32 * wsptr;
  3302. JSAMPROW outptr;
  3303. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3304. int ctr;
  3305. INT32 workspace[4*2]; /* buffers data between passes */
  3306. SHIFT_TEMPS
  3307. /* Pass 1: process columns from input, store into work array. */
  3308. inptr = coef_block;
  3309. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3310. wsptr = workspace;
  3311. for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
  3312. /* Even part */
  3313. tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3314. /* Odd part */
  3315. tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3316. /* Final output stage */
  3317. wsptr[4*0] = tmp10 + tmp0;
  3318. wsptr[4*1] = tmp10 - tmp0;
  3319. }
  3320. /* Pass 2: process 2 rows from work array, store into output array.
  3321. * 4-point IDCT kernel,
  3322. * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
  3323. */
  3324. wsptr = workspace;
  3325. for (ctr = 0; ctr < 2; ctr++) {
  3326. outptr = output_buf[ctr] + output_col;
  3327. /* Even part */
  3328. /* Add range center and fudge factor for final descale and range-limit. */
  3329. tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
  3330. tmp2 = wsptr[2];
  3331. tmp10 = (tmp0 + tmp2) << CONST_BITS;
  3332. tmp12 = (tmp0 - tmp2) << CONST_BITS;
  3333. /* Odd part */
  3334. /* Same rotation as in the even part of the 8x8 LL&M IDCT */
  3335. z2 = wsptr[1];
  3336. z3 = wsptr[3];
  3337. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  3338. tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  3339. tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  3340. /* Final output stage */
  3341. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  3342. CONST_BITS+3)
  3343. & RANGE_MASK];
  3344. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  3345. CONST_BITS+3)
  3346. & RANGE_MASK];
  3347. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  3348. CONST_BITS+3)
  3349. & RANGE_MASK];
  3350. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  3351. CONST_BITS+3)
  3352. & RANGE_MASK];
  3353. wsptr += 4; /* advance pointer to next row */
  3354. }
  3355. }
  3356. /*
  3357. * Perform dequantization and inverse DCT on one block of coefficients,
  3358. * producing a 2x1 output block.
  3359. *
  3360. * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
  3361. */
  3362. GLOBAL(void)
  3363. jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3364. JCOEFPTR coef_block,
  3365. JSAMPARRAY output_buf, JDIMENSION output_col)
  3366. {
  3367. DCTELEM tmp0, tmp1;
  3368. ISLOW_MULT_TYPE * quantptr;
  3369. JSAMPROW outptr;
  3370. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3371. ISHIFT_TEMPS
  3372. /* Pass 1: empty. */
  3373. /* Pass 2: process 1 row from input, store into output array. */
  3374. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3375. outptr = output_buf[0] + output_col;
  3376. /* Even part */
  3377. tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
  3378. /* Add range center and fudge factor for final descale and range-limit. */
  3379. tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
  3380. /* Odd part */
  3381. tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
  3382. /* Final output stage */
  3383. outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
  3384. outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
  3385. }
  3386. /*
  3387. * Perform dequantization and inverse DCT on one block of coefficients,
  3388. * producing a 8x16 output block.
  3389. *
  3390. * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
  3391. */
  3392. GLOBAL(void)
  3393. jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3394. JCOEFPTR coef_block,
  3395. JSAMPARRAY output_buf, JDIMENSION output_col)
  3396. {
  3397. INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
  3398. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
  3399. INT32 z1, z2, z3, z4;
  3400. JCOEFPTR inptr;
  3401. ISLOW_MULT_TYPE * quantptr;
  3402. int * wsptr;
  3403. JSAMPROW outptr;
  3404. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3405. int ctr;
  3406. int workspace[8*16]; /* buffers data between passes */
  3407. SHIFT_TEMPS
  3408. /* Pass 1: process columns from input, store into work array.
  3409. * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
  3410. */
  3411. inptr = coef_block;
  3412. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3413. wsptr = workspace;
  3414. for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
  3415. /* Even part */
  3416. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3417. tmp0 <<= CONST_BITS;
  3418. /* Add fudge factor here for final descale. */
  3419. tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
  3420. z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  3421. tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
  3422. tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
  3423. tmp10 = tmp0 + tmp1;
  3424. tmp11 = tmp0 - tmp1;
  3425. tmp12 = tmp0 + tmp2;
  3426. tmp13 = tmp0 - tmp2;
  3427. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  3428. z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  3429. z3 = z1 - z2;
  3430. z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
  3431. z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
  3432. tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
  3433. tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
  3434. tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
  3435. tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
  3436. tmp20 = tmp10 + tmp0;
  3437. tmp27 = tmp10 - tmp0;
  3438. tmp21 = tmp12 + tmp1;
  3439. tmp26 = tmp12 - tmp1;
  3440. tmp22 = tmp13 + tmp2;
  3441. tmp25 = tmp13 - tmp2;
  3442. tmp23 = tmp11 + tmp3;
  3443. tmp24 = tmp11 - tmp3;
  3444. /* Odd part */
  3445. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3446. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  3447. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  3448. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  3449. tmp11 = z1 + z3;
  3450. tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
  3451. tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
  3452. tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
  3453. tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
  3454. tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
  3455. tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
  3456. tmp0 = tmp1 + tmp2 + tmp3 -
  3457. MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
  3458. tmp13 = tmp10 + tmp11 + tmp12 -
  3459. MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
  3460. z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
  3461. tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
  3462. tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
  3463. z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
  3464. tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
  3465. tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
  3466. z2 += z4;
  3467. z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
  3468. tmp1 += z1;
  3469. tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
  3470. z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
  3471. tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
  3472. tmp12 += z2;
  3473. z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
  3474. tmp2 += z2;
  3475. tmp3 += z2;
  3476. z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
  3477. tmp10 += z2;
  3478. tmp11 += z2;
  3479. /* Final output stage */
  3480. wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
  3481. wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
  3482. wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
  3483. wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
  3484. wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
  3485. wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
  3486. wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
  3487. wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
  3488. wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
  3489. wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
  3490. wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
  3491. wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
  3492. wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
  3493. wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
  3494. wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
  3495. wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
  3496. }
  3497. /* Pass 2: process rows from work array, store into output array.
  3498. * Note that we must descale the results by a factor of 8 == 2**3,
  3499. * and also undo the PASS1_BITS scaling.
  3500. * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
  3501. */
  3502. wsptr = workspace;
  3503. for (ctr = 0; ctr < 16; ctr++) {
  3504. outptr = output_buf[ctr] + output_col;
  3505. /* Even part: reverse the even part of the forward DCT.
  3506. * The rotator is c(-6).
  3507. */
  3508. /* Add range center and fudge factor for final descale and range-limit. */
  3509. z2 = (INT32) wsptr[0] +
  3510. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  3511. (ONE << (PASS1_BITS+2)));
  3512. z3 = (INT32) wsptr[4];
  3513. tmp0 = (z2 + z3) << CONST_BITS;
  3514. tmp1 = (z2 - z3) << CONST_BITS;
  3515. z2 = (INT32) wsptr[2];
  3516. z3 = (INT32) wsptr[6];
  3517. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  3518. tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  3519. tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  3520. tmp10 = tmp0 + tmp2;
  3521. tmp13 = tmp0 - tmp2;
  3522. tmp11 = tmp1 + tmp3;
  3523. tmp12 = tmp1 - tmp3;
  3524. /* Odd part per figure 8; the matrix is unitary and hence its
  3525. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  3526. */
  3527. tmp0 = (INT32) wsptr[7];
  3528. tmp1 = (INT32) wsptr[5];
  3529. tmp2 = (INT32) wsptr[3];
  3530. tmp3 = (INT32) wsptr[1];
  3531. z2 = tmp0 + tmp2;
  3532. z3 = tmp1 + tmp3;
  3533. z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
  3534. z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
  3535. z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
  3536. z2 += z1;
  3537. z3 += z1;
  3538. z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
  3539. tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
  3540. tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
  3541. tmp0 += z1 + z2;
  3542. tmp3 += z1 + z3;
  3543. z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
  3544. tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
  3545. tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
  3546. tmp1 += z1 + z3;
  3547. tmp2 += z1 + z2;
  3548. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  3549. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
  3550. CONST_BITS+PASS1_BITS+3)
  3551. & RANGE_MASK];
  3552. outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
  3553. CONST_BITS+PASS1_BITS+3)
  3554. & RANGE_MASK];
  3555. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
  3556. CONST_BITS+PASS1_BITS+3)
  3557. & RANGE_MASK];
  3558. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
  3559. CONST_BITS+PASS1_BITS+3)
  3560. & RANGE_MASK];
  3561. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
  3562. CONST_BITS+PASS1_BITS+3)
  3563. & RANGE_MASK];
  3564. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
  3565. CONST_BITS+PASS1_BITS+3)
  3566. & RANGE_MASK];
  3567. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
  3568. CONST_BITS+PASS1_BITS+3)
  3569. & RANGE_MASK];
  3570. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
  3571. CONST_BITS+PASS1_BITS+3)
  3572. & RANGE_MASK];
  3573. wsptr += DCTSIZE; /* advance pointer to next row */
  3574. }
  3575. }
  3576. /*
  3577. * Perform dequantization and inverse DCT on one block of coefficients,
  3578. * producing a 7x14 output block.
  3579. *
  3580. * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
  3581. */
  3582. GLOBAL(void)
  3583. jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3584. JCOEFPTR coef_block,
  3585. JSAMPARRAY output_buf, JDIMENSION output_col)
  3586. {
  3587. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
  3588. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
  3589. INT32 z1, z2, z3, z4;
  3590. JCOEFPTR inptr;
  3591. ISLOW_MULT_TYPE * quantptr;
  3592. int * wsptr;
  3593. JSAMPROW outptr;
  3594. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3595. int ctr;
  3596. int workspace[7*14]; /* buffers data between passes */
  3597. SHIFT_TEMPS
  3598. /* Pass 1: process columns from input, store into work array.
  3599. * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
  3600. */
  3601. inptr = coef_block;
  3602. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3603. wsptr = workspace;
  3604. for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
  3605. /* Even part */
  3606. z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3607. z1 <<= CONST_BITS;
  3608. /* Add fudge factor here for final descale. */
  3609. z1 += ONE << (CONST_BITS-PASS1_BITS-1);
  3610. z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  3611. z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
  3612. z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
  3613. z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
  3614. tmp10 = z1 + z2;
  3615. tmp11 = z1 + z3;
  3616. tmp12 = z1 - z4;
  3617. tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
  3618. CONST_BITS-PASS1_BITS);
  3619. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  3620. z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  3621. z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
  3622. tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
  3623. tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
  3624. tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
  3625. MULTIPLY(z2, FIX(1.378756276)); /* c2 */
  3626. tmp20 = tmp10 + tmp13;
  3627. tmp26 = tmp10 - tmp13;
  3628. tmp21 = tmp11 + tmp14;
  3629. tmp25 = tmp11 - tmp14;
  3630. tmp22 = tmp12 + tmp15;
  3631. tmp24 = tmp12 - tmp15;
  3632. /* Odd part */
  3633. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3634. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  3635. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  3636. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  3637. tmp13 = z4 << CONST_BITS;
  3638. tmp14 = z1 + z3;
  3639. tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
  3640. tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
  3641. tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
  3642. tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
  3643. tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
  3644. z1 -= z2;
  3645. tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
  3646. tmp16 += tmp15;
  3647. z1 += z4;
  3648. z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
  3649. tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
  3650. tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
  3651. z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
  3652. tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
  3653. tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
  3654. tmp13 = (z1 - z3) << PASS1_BITS;
  3655. /* Final output stage */
  3656. wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  3657. wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  3658. wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  3659. wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  3660. wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  3661. wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  3662. wsptr[7*3] = (int) (tmp23 + tmp13);
  3663. wsptr[7*10] = (int) (tmp23 - tmp13);
  3664. wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  3665. wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  3666. wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
  3667. wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
  3668. wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
  3669. wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
  3670. }
  3671. /* Pass 2: process 14 rows from work array, store into output array.
  3672. * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
  3673. */
  3674. wsptr = workspace;
  3675. for (ctr = 0; ctr < 14; ctr++) {
  3676. outptr = output_buf[ctr] + output_col;
  3677. /* Even part */
  3678. /* Add range center and fudge factor for final descale and range-limit. */
  3679. tmp23 = (INT32) wsptr[0] +
  3680. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  3681. (ONE << (PASS1_BITS+2)));
  3682. tmp23 <<= CONST_BITS;
  3683. z1 = (INT32) wsptr[2];
  3684. z2 = (INT32) wsptr[4];
  3685. z3 = (INT32) wsptr[6];
  3686. tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
  3687. tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
  3688. tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
  3689. tmp10 = z1 + z3;
  3690. z2 -= tmp10;
  3691. tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
  3692. tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
  3693. tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
  3694. tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
  3695. /* Odd part */
  3696. z1 = (INT32) wsptr[1];
  3697. z2 = (INT32) wsptr[3];
  3698. z3 = (INT32) wsptr[5];
  3699. tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
  3700. tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
  3701. tmp10 = tmp11 - tmp12;
  3702. tmp11 += tmp12;
  3703. tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
  3704. tmp11 += tmp12;
  3705. z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
  3706. tmp10 += z2;
  3707. tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
  3708. /* Final output stage */
  3709. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  3710. CONST_BITS+PASS1_BITS+3)
  3711. & RANGE_MASK];
  3712. outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  3713. CONST_BITS+PASS1_BITS+3)
  3714. & RANGE_MASK];
  3715. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  3716. CONST_BITS+PASS1_BITS+3)
  3717. & RANGE_MASK];
  3718. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  3719. CONST_BITS+PASS1_BITS+3)
  3720. & RANGE_MASK];
  3721. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  3722. CONST_BITS+PASS1_BITS+3)
  3723. & RANGE_MASK];
  3724. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  3725. CONST_BITS+PASS1_BITS+3)
  3726. & RANGE_MASK];
  3727. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
  3728. CONST_BITS+PASS1_BITS+3)
  3729. & RANGE_MASK];
  3730. wsptr += 7; /* advance pointer to next row */
  3731. }
  3732. }
  3733. /*
  3734. * Perform dequantization and inverse DCT on one block of coefficients,
  3735. * producing a 6x12 output block.
  3736. *
  3737. * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
  3738. */
  3739. GLOBAL(void)
  3740. jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3741. JCOEFPTR coef_block,
  3742. JSAMPARRAY output_buf, JDIMENSION output_col)
  3743. {
  3744. INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
  3745. INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
  3746. INT32 z1, z2, z3, z4;
  3747. JCOEFPTR inptr;
  3748. ISLOW_MULT_TYPE * quantptr;
  3749. int * wsptr;
  3750. JSAMPROW outptr;
  3751. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3752. int ctr;
  3753. int workspace[6*12]; /* buffers data between passes */
  3754. SHIFT_TEMPS
  3755. /* Pass 1: process columns from input, store into work array.
  3756. * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
  3757. */
  3758. inptr = coef_block;
  3759. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3760. wsptr = workspace;
  3761. for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
  3762. /* Even part */
  3763. z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3764. z3 <<= CONST_BITS;
  3765. /* Add fudge factor here for final descale. */
  3766. z3 += ONE << (CONST_BITS-PASS1_BITS-1);
  3767. z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  3768. z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
  3769. tmp10 = z3 + z4;
  3770. tmp11 = z3 - z4;
  3771. z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  3772. z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
  3773. z1 <<= CONST_BITS;
  3774. z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  3775. z2 <<= CONST_BITS;
  3776. tmp12 = z1 - z2;
  3777. tmp21 = z3 + tmp12;
  3778. tmp24 = z3 - tmp12;
  3779. tmp12 = z4 + z2;
  3780. tmp20 = tmp10 + tmp12;
  3781. tmp25 = tmp10 - tmp12;
  3782. tmp12 = z4 - z1 - z2;
  3783. tmp22 = tmp11 + tmp12;
  3784. tmp23 = tmp11 - tmp12;
  3785. /* Odd part */
  3786. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3787. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  3788. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  3789. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  3790. tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
  3791. tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
  3792. tmp10 = z1 + z3;
  3793. tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
  3794. tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
  3795. tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
  3796. tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
  3797. tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
  3798. tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
  3799. tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
  3800. MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
  3801. z1 -= z4;
  3802. z2 -= z3;
  3803. z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
  3804. tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
  3805. tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
  3806. /* Final output stage */
  3807. wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  3808. wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  3809. wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  3810. wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  3811. wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
  3812. wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
  3813. wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  3814. wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  3815. wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  3816. wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  3817. wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
  3818. wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
  3819. }
  3820. /* Pass 2: process 12 rows from work array, store into output array.
  3821. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
  3822. */
  3823. wsptr = workspace;
  3824. for (ctr = 0; ctr < 12; ctr++) {
  3825. outptr = output_buf[ctr] + output_col;
  3826. /* Even part */
  3827. /* Add range center and fudge factor for final descale and range-limit. */
  3828. tmp10 = (INT32) wsptr[0] +
  3829. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  3830. (ONE << (PASS1_BITS+2)));
  3831. tmp10 <<= CONST_BITS;
  3832. tmp12 = (INT32) wsptr[4];
  3833. tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
  3834. tmp11 = tmp10 + tmp20;
  3835. tmp21 = tmp10 - tmp20 - tmp20;
  3836. tmp20 = (INT32) wsptr[2];
  3837. tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
  3838. tmp20 = tmp11 + tmp10;
  3839. tmp22 = tmp11 - tmp10;
  3840. /* Odd part */
  3841. z1 = (INT32) wsptr[1];
  3842. z2 = (INT32) wsptr[3];
  3843. z3 = (INT32) wsptr[5];
  3844. tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
  3845. tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
  3846. tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
  3847. tmp11 = (z1 - z2 - z3) << CONST_BITS;
  3848. /* Final output stage */
  3849. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
  3850. CONST_BITS+PASS1_BITS+3)
  3851. & RANGE_MASK];
  3852. outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
  3853. CONST_BITS+PASS1_BITS+3)
  3854. & RANGE_MASK];
  3855. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
  3856. CONST_BITS+PASS1_BITS+3)
  3857. & RANGE_MASK];
  3858. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
  3859. CONST_BITS+PASS1_BITS+3)
  3860. & RANGE_MASK];
  3861. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
  3862. CONST_BITS+PASS1_BITS+3)
  3863. & RANGE_MASK];
  3864. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
  3865. CONST_BITS+PASS1_BITS+3)
  3866. & RANGE_MASK];
  3867. wsptr += 6; /* advance pointer to next row */
  3868. }
  3869. }
  3870. /*
  3871. * Perform dequantization and inverse DCT on one block of coefficients,
  3872. * producing a 5x10 output block.
  3873. *
  3874. * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
  3875. */
  3876. GLOBAL(void)
  3877. jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  3878. JCOEFPTR coef_block,
  3879. JSAMPARRAY output_buf, JDIMENSION output_col)
  3880. {
  3881. INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
  3882. INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
  3883. INT32 z1, z2, z3, z4, z5;
  3884. JCOEFPTR inptr;
  3885. ISLOW_MULT_TYPE * quantptr;
  3886. int * wsptr;
  3887. JSAMPROW outptr;
  3888. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  3889. int ctr;
  3890. int workspace[5*10]; /* buffers data between passes */
  3891. SHIFT_TEMPS
  3892. /* Pass 1: process columns from input, store into work array.
  3893. * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
  3894. */
  3895. inptr = coef_block;
  3896. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  3897. wsptr = workspace;
  3898. for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
  3899. /* Even part */
  3900. z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  3901. z3 <<= CONST_BITS;
  3902. /* Add fudge factor here for final descale. */
  3903. z3 += ONE << (CONST_BITS-PASS1_BITS-1);
  3904. z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  3905. z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
  3906. z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
  3907. tmp10 = z3 + z1;
  3908. tmp11 = z3 - z2;
  3909. tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
  3910. CONST_BITS-PASS1_BITS);
  3911. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  3912. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  3913. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
  3914. tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
  3915. tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
  3916. tmp20 = tmp10 + tmp12;
  3917. tmp24 = tmp10 - tmp12;
  3918. tmp21 = tmp11 + tmp13;
  3919. tmp23 = tmp11 - tmp13;
  3920. /* Odd part */
  3921. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  3922. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  3923. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  3924. z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  3925. tmp11 = z2 + z4;
  3926. tmp13 = z2 - z4;
  3927. tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
  3928. z5 = z3 << CONST_BITS;
  3929. z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
  3930. z4 = z5 + tmp12;
  3931. tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
  3932. tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
  3933. z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
  3934. z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
  3935. tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
  3936. tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
  3937. tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
  3938. /* Final output stage */
  3939. wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
  3940. wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
  3941. wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
  3942. wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
  3943. wsptr[5*2] = (int) (tmp22 + tmp12);
  3944. wsptr[5*7] = (int) (tmp22 - tmp12);
  3945. wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
  3946. wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
  3947. wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
  3948. wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
  3949. }
  3950. /* Pass 2: process 10 rows from work array, store into output array.
  3951. * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
  3952. */
  3953. wsptr = workspace;
  3954. for (ctr = 0; ctr < 10; ctr++) {
  3955. outptr = output_buf[ctr] + output_col;
  3956. /* Even part */
  3957. /* Add range center and fudge factor for final descale and range-limit. */
  3958. tmp12 = (INT32) wsptr[0] +
  3959. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  3960. (ONE << (PASS1_BITS+2)));
  3961. tmp12 <<= CONST_BITS;
  3962. tmp13 = (INT32) wsptr[2];
  3963. tmp14 = (INT32) wsptr[4];
  3964. z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
  3965. z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
  3966. z3 = tmp12 + z2;
  3967. tmp10 = z3 + z1;
  3968. tmp11 = z3 - z1;
  3969. tmp12 -= z2 << 2;
  3970. /* Odd part */
  3971. z2 = (INT32) wsptr[1];
  3972. z3 = (INT32) wsptr[3];
  3973. z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
  3974. tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
  3975. tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
  3976. /* Final output stage */
  3977. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
  3978. CONST_BITS+PASS1_BITS+3)
  3979. & RANGE_MASK];
  3980. outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
  3981. CONST_BITS+PASS1_BITS+3)
  3982. & RANGE_MASK];
  3983. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
  3984. CONST_BITS+PASS1_BITS+3)
  3985. & RANGE_MASK];
  3986. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
  3987. CONST_BITS+PASS1_BITS+3)
  3988. & RANGE_MASK];
  3989. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
  3990. CONST_BITS+PASS1_BITS+3)
  3991. & RANGE_MASK];
  3992. wsptr += 5; /* advance pointer to next row */
  3993. }
  3994. }
  3995. /*
  3996. * Perform dequantization and inverse DCT on one block of coefficients,
  3997. * producing a 4x8 output block.
  3998. *
  3999. * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
  4000. */
  4001. GLOBAL(void)
  4002. jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  4003. JCOEFPTR coef_block,
  4004. JSAMPARRAY output_buf, JDIMENSION output_col)
  4005. {
  4006. INT32 tmp0, tmp1, tmp2, tmp3;
  4007. INT32 tmp10, tmp11, tmp12, tmp13;
  4008. INT32 z1, z2, z3;
  4009. JCOEFPTR inptr;
  4010. ISLOW_MULT_TYPE * quantptr;
  4011. int * wsptr;
  4012. JSAMPROW outptr;
  4013. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  4014. int ctr;
  4015. int workspace[4*8]; /* buffers data between passes */
  4016. SHIFT_TEMPS
  4017. /* Pass 1: process columns from input, store into work array.
  4018. * Note results are scaled up by sqrt(8) compared to a true IDCT;
  4019. * furthermore, we scale the results by 2**PASS1_BITS.
  4020. * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
  4021. */
  4022. inptr = coef_block;
  4023. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  4024. wsptr = workspace;
  4025. for (ctr = 4; ctr > 0; ctr--) {
  4026. /* Due to quantization, we will usually find that many of the input
  4027. * coefficients are zero, especially the AC terms. We can exploit this
  4028. * by short-circuiting the IDCT calculation for any column in which all
  4029. * the AC terms are zero. In that case each output is equal to the
  4030. * DC coefficient (with scale factor as needed).
  4031. * With typical images and quantization tables, half or more of the
  4032. * column DCT calculations can be simplified this way.
  4033. */
  4034. if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
  4035. inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
  4036. inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
  4037. inptr[DCTSIZE*7] == 0) {
  4038. /* AC terms all zero */
  4039. int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
  4040. wsptr[4*0] = dcval;
  4041. wsptr[4*1] = dcval;
  4042. wsptr[4*2] = dcval;
  4043. wsptr[4*3] = dcval;
  4044. wsptr[4*4] = dcval;
  4045. wsptr[4*5] = dcval;
  4046. wsptr[4*6] = dcval;
  4047. wsptr[4*7] = dcval;
  4048. inptr++; /* advance pointers to next column */
  4049. quantptr++;
  4050. wsptr++;
  4051. continue;
  4052. }
  4053. /* Even part: reverse the even part of the forward DCT.
  4054. * The rotator is c(-6).
  4055. */
  4056. z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  4057. z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
  4058. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  4059. tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  4060. tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  4061. z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  4062. z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  4063. z2 <<= CONST_BITS;
  4064. z3 <<= CONST_BITS;
  4065. /* Add fudge factor here for final descale. */
  4066. z2 += ONE << (CONST_BITS-PASS1_BITS-1);
  4067. tmp0 = z2 + z3;
  4068. tmp1 = z2 - z3;
  4069. tmp10 = tmp0 + tmp2;
  4070. tmp13 = tmp0 - tmp2;
  4071. tmp11 = tmp1 + tmp3;
  4072. tmp12 = tmp1 - tmp3;
  4073. /* Odd part per figure 8; the matrix is unitary and hence its
  4074. * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
  4075. */
  4076. tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
  4077. tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  4078. tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  4079. tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  4080. z2 = tmp0 + tmp2;
  4081. z3 = tmp1 + tmp3;
  4082. z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
  4083. z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
  4084. z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
  4085. z2 += z1;
  4086. z3 += z1;
  4087. z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
  4088. tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
  4089. tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
  4090. tmp0 += z1 + z2;
  4091. tmp3 += z1 + z3;
  4092. z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
  4093. tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
  4094. tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
  4095. tmp1 += z1 + z3;
  4096. tmp2 += z1 + z2;
  4097. /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
  4098. wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
  4099. wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
  4100. wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
  4101. wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
  4102. wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
  4103. wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
  4104. wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
  4105. wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
  4106. inptr++; /* advance pointers to next column */
  4107. quantptr++;
  4108. wsptr++;
  4109. }
  4110. /* Pass 2: process 8 rows from work array, store into output array.
  4111. * 4-point IDCT kernel,
  4112. * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
  4113. */
  4114. wsptr = workspace;
  4115. for (ctr = 0; ctr < 8; ctr++) {
  4116. outptr = output_buf[ctr] + output_col;
  4117. /* Even part */
  4118. /* Add range center and fudge factor for final descale and range-limit. */
  4119. tmp0 = (INT32) wsptr[0] +
  4120. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  4121. (ONE << (PASS1_BITS+2)));
  4122. tmp2 = (INT32) wsptr[2];
  4123. tmp10 = (tmp0 + tmp2) << CONST_BITS;
  4124. tmp12 = (tmp0 - tmp2) << CONST_BITS;
  4125. /* Odd part */
  4126. /* Same rotation as in the even part of the 8x8 LL&M IDCT */
  4127. z2 = (INT32) wsptr[1];
  4128. z3 = (INT32) wsptr[3];
  4129. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  4130. tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  4131. tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  4132. /* Final output stage */
  4133. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  4134. CONST_BITS+PASS1_BITS+3)
  4135. & RANGE_MASK];
  4136. outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  4137. CONST_BITS+PASS1_BITS+3)
  4138. & RANGE_MASK];
  4139. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
  4140. CONST_BITS+PASS1_BITS+3)
  4141. & RANGE_MASK];
  4142. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
  4143. CONST_BITS+PASS1_BITS+3)
  4144. & RANGE_MASK];
  4145. wsptr += 4; /* advance pointer to next row */
  4146. }
  4147. }
  4148. /*
  4149. * Perform dequantization and inverse DCT on one block of coefficients,
  4150. * producing a reduced-size 3x6 output block.
  4151. *
  4152. * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
  4153. */
  4154. GLOBAL(void)
  4155. jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  4156. JCOEFPTR coef_block,
  4157. JSAMPARRAY output_buf, JDIMENSION output_col)
  4158. {
  4159. INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
  4160. INT32 z1, z2, z3;
  4161. JCOEFPTR inptr;
  4162. ISLOW_MULT_TYPE * quantptr;
  4163. int * wsptr;
  4164. JSAMPROW outptr;
  4165. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  4166. int ctr;
  4167. int workspace[3*6]; /* buffers data between passes */
  4168. SHIFT_TEMPS
  4169. /* Pass 1: process columns from input, store into work array.
  4170. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
  4171. */
  4172. inptr = coef_block;
  4173. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  4174. wsptr = workspace;
  4175. for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
  4176. /* Even part */
  4177. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  4178. tmp0 <<= CONST_BITS;
  4179. /* Add fudge factor here for final descale. */
  4180. tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
  4181. tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
  4182. tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
  4183. tmp1 = tmp0 + tmp10;
  4184. tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
  4185. tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  4186. tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
  4187. tmp10 = tmp1 + tmp0;
  4188. tmp12 = tmp1 - tmp0;
  4189. /* Odd part */
  4190. z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  4191. z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  4192. z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
  4193. tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
  4194. tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
  4195. tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
  4196. tmp1 = (z1 - z2 - z3) << PASS1_BITS;
  4197. /* Final output stage */
  4198. wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
  4199. wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
  4200. wsptr[3*1] = (int) (tmp11 + tmp1);
  4201. wsptr[3*4] = (int) (tmp11 - tmp1);
  4202. wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
  4203. wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
  4204. }
  4205. /* Pass 2: process 6 rows from work array, store into output array.
  4206. * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
  4207. */
  4208. wsptr = workspace;
  4209. for (ctr = 0; ctr < 6; ctr++) {
  4210. outptr = output_buf[ctr] + output_col;
  4211. /* Even part */
  4212. /* Add range center and fudge factor for final descale and range-limit. */
  4213. tmp0 = (INT32) wsptr[0] +
  4214. ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
  4215. (ONE << (PASS1_BITS+2)));
  4216. tmp0 <<= CONST_BITS;
  4217. tmp2 = (INT32) wsptr[2];
  4218. tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
  4219. tmp10 = tmp0 + tmp12;
  4220. tmp2 = tmp0 - tmp12 - tmp12;
  4221. /* Odd part */
  4222. tmp12 = (INT32) wsptr[1];
  4223. tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
  4224. /* Final output stage */
  4225. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
  4226. CONST_BITS+PASS1_BITS+3)
  4227. & RANGE_MASK];
  4228. outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
  4229. CONST_BITS+PASS1_BITS+3)
  4230. & RANGE_MASK];
  4231. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
  4232. CONST_BITS+PASS1_BITS+3)
  4233. & RANGE_MASK];
  4234. wsptr += 3; /* advance pointer to next row */
  4235. }
  4236. }
  4237. /*
  4238. * Perform dequantization and inverse DCT on one block of coefficients,
  4239. * producing a 2x4 output block.
  4240. *
  4241. * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
  4242. */
  4243. GLOBAL(void)
  4244. jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  4245. JCOEFPTR coef_block,
  4246. JSAMPARRAY output_buf, JDIMENSION output_col)
  4247. {
  4248. INT32 tmp0, tmp2, tmp10, tmp12;
  4249. INT32 z1, z2, z3;
  4250. JCOEFPTR inptr;
  4251. ISLOW_MULT_TYPE * quantptr;
  4252. INT32 * wsptr;
  4253. JSAMPROW outptr;
  4254. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  4255. int ctr;
  4256. INT32 workspace[2*4]; /* buffers data between passes */
  4257. SHIFT_TEMPS
  4258. /* Pass 1: process columns from input, store into work array.
  4259. * 4-point IDCT kernel,
  4260. * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
  4261. */
  4262. inptr = coef_block;
  4263. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  4264. wsptr = workspace;
  4265. for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
  4266. /* Even part */
  4267. tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
  4268. tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
  4269. tmp10 = (tmp0 + tmp2) << CONST_BITS;
  4270. tmp12 = (tmp0 - tmp2) << CONST_BITS;
  4271. /* Odd part */
  4272. /* Same rotation as in the even part of the 8x8 LL&M IDCT */
  4273. z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
  4274. z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
  4275. z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
  4276. tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
  4277. tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
  4278. /* Final output stage */
  4279. wsptr[2*0] = tmp10 + tmp0;
  4280. wsptr[2*3] = tmp10 - tmp0;
  4281. wsptr[2*1] = tmp12 + tmp2;
  4282. wsptr[2*2] = tmp12 - tmp2;
  4283. }
  4284. /* Pass 2: process 4 rows from work array, store into output array. */
  4285. wsptr = workspace;
  4286. for (ctr = 0; ctr < 4; ctr++) {
  4287. outptr = output_buf[ctr] + output_col;
  4288. /* Even part */
  4289. /* Add range center and fudge factor for final descale and range-limit. */
  4290. tmp10 = wsptr[0] +
  4291. ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
  4292. (ONE << (CONST_BITS+2)));
  4293. /* Odd part */
  4294. tmp0 = wsptr[1];
  4295. /* Final output stage */
  4296. outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
  4297. & RANGE_MASK];
  4298. outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
  4299. & RANGE_MASK];
  4300. wsptr += 2; /* advance pointer to next row */
  4301. }
  4302. }
  4303. /*
  4304. * Perform dequantization and inverse DCT on one block of coefficients,
  4305. * producing a 1x2 output block.
  4306. *
  4307. * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
  4308. */
  4309. GLOBAL(void)
  4310. jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
  4311. JCOEFPTR coef_block,
  4312. JSAMPARRAY output_buf, JDIMENSION output_col)
  4313. {
  4314. DCTELEM tmp0, tmp1;
  4315. ISLOW_MULT_TYPE * quantptr;
  4316. JSAMPLE *range_limit = IDCT_range_limit(cinfo);
  4317. ISHIFT_TEMPS
  4318. /* Process 1 column from input, store into output array. */
  4319. quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
  4320. /* Even part */
  4321. tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
  4322. /* Add range center and fudge factor for final descale and range-limit. */
  4323. tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
  4324. /* Odd part */
  4325. tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
  4326. /* Final output stage */
  4327. output_buf[0][output_col] =
  4328. range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
  4329. output_buf[1][output_col] =
  4330. range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
  4331. }
  4332. #endif /* IDCT_SCALING_SUPPORTED */
  4333. #endif /* DCT_ISLOW_SUPPORTED */