From bec481acdfb47eb27c42fbce72f2213e5cbde627 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sat, 5 Sep 2009 07:21:10 +0000 Subject: [PATCH] improved disk read performance and fixed bug in storage_interface's backwards compatibility functions --- Jamfile | 3 + docs/disk_access_elevator.png | Bin 0 -> 5768 bytes docs/disk_access_no_elevator.png | Bin 0 -> 5309 bytes docs/features.rst | 28 ++++ include/libtorrent/disk_io_thread.hpp | 16 +++ include/libtorrent/file.hpp | 2 + include/libtorrent/session_settings.hpp | 11 ++ include/libtorrent/storage.hpp | 4 + parse_disk_access.py | 14 +- src/disk_io_thread.cpp | 132 ++++++++++++++++++- src/file.cpp | 52 ++++++++ src/storage.cpp | 76 +++++++++-- test/test_storage.cpp | 167 ++++++++++++++++++++++++ test/test_transfer.cpp | 3 + 14 files changed, 484 insertions(+), 24 deletions(-) create mode 100644 docs/disk_access_elevator.png create mode 100644 docs/disk_access_no_elevator.png diff --git a/Jamfile b/Jamfile index a2d296d25..e02f221ad 100755 --- a/Jamfile +++ b/Jamfile @@ -244,6 +244,9 @@ feature.compose on : TORRENT_DISK_STATS ; feature memdebug : off on : composite propagated ; feature.compose on : TORRENT_MEMDEBUG ; +feature simulate-slow-read : off on : composite propagated ; +feature.compose on : TORRENT_SIMULATE_SLOW_READ ; + feature logging : none default errors verbose : composite propagated link-incompatible ; feature.compose default : TORRENT_LOGGING ; feature.compose errors : TORRENT_ERROR_LOGGING ; diff --git a/docs/disk_access_elevator.png b/docs/disk_access_elevator.png new file mode 100644 index 0000000000000000000000000000000000000000..1e29240538b3bdc62b7c52569bc04da3d67453c7 GIT binary patch literal 5768 zcmb_=c{r49^#5(lSjLiV#va)-leLt6DP+r%8Bd9xyedmlvZQ)5M#|PgjM7UeS!U!R zB#{{=i6WE67K$jzk`fB@n|{~z{p0)p_rA{c-1mL%b3W(uJlB1l=Q=0VYoD_ORtXCL zkZ{?%#|Hou1b_fhM3Ar1o3OqQfFSU4_jTm)czjJ^Vd3A22>^x11UxV|$F$==fVw)9 z$5aOtyFv<&Vh4B>9$>Dr+?vi00t8X z(~e1j3z;zAk6Fk+;CsUAd@ER;KkV$(?fB;E@LxCnuv4d~^Cv!~!Z#n7>gs^Pj|C{q z4}5z-a=t ztzfGGFf{?zX2973cw2*m_8@oxrzz-()$p%X=!LQd~xfiUw11m#dbrk%b0DnG% zwQpekJ6K-=8^8H?1zgi37y=->+hvcv@7e!m^V1vYdg95`yH_@#Zx!8Uu_l>vo6jOlG7%jy9fhG zZ`Hi7!QPWtr~00;rxQJf7ys>smk1(CUe~jQ{5R-l^wtJ#-+y~+*ka!j7v^!BiX(&F zTUh597KXE|R8J49E4t9m8^06=&WBzF}K1^_8_#(vs=Te z7$#7J2GmuynXxTA1@=BHP3+vGb~?`;DU&R@K9P0h%y9C>%T(K&eEVMexA8v;Y?dV0 zDULY33*Cehx*~g!GL(I>2>u;C#Ld-6gR6+$aF<~nbfQ0Oii@G<8B!f@htyqplYjSE zMO>#BI^9H?RZO{5atSL<#HPU0<-9{Sd(m#5=y1OU_a`KDxIAl-ol>HeV>rDxB7p}j z-GZm5oqWx$5kw%lN`W=xh&1_7OG=d!lg&=n=gFut>u_^OBV0mwmS*Wf%d5 z=q}?R2`Sd1t0*mRu9o{D5Re;U-VI;7sU%dcAxfqfNaEdbtN$*E5@sEp29N#jxg-G5 z%4}2_RjHnf`tC%{(ljk7p#H8nIj^E))>cK|9XB|bXxl8v*mmwVhG2ckX1oSAH@+@1we59}RkZmCPZx~A>6Vj)ftf60m7e$%?;cBh} zbQni8HV@9jasEV9<}L4zg^R{pkL*)qe0cL#lN(z4@{I>8E1f>W&IcX(R7~fki-(qe z`fNH7pCAH-+MbfEIELImb!&c4x(KVHx%`|9D?;)6ZF?Vl1vfBWr3ZQ-0S z0-@l(rI6p?lK*Xcl8 zNaP?jr(Z^z(5~^+v&b|k0v(>Rtj$bwdrrS8dPDh?3o;e2wh3Q

4KrO9$$ZWL509 zg$Dm@_r1kV+ICT{RD^8uhl6rM?2wBG1sNffa6z#Xy_Or=P*QUjw04bZqKvd=x#!{) zv7%)1Tb66R+d6KlQS0@HPp&z_HkbJ##i}6ViA}v>b1Gd*#IqSNDIAB48|f^k!h>n> zmPqe@5$^W+c;t<-Ad=Pj0G^!}rQKepN5pBvMQ7$z9g?LOh1u}yx9k+0$p{de@0o5kxpsPeqlHZU^d)fj=<9iEn(0rl0zx4OxVJzh*4`gei-p z!U6LeLu)%DF=7wW*r8R~K91zXNp}tW*UR?obEc3HDH!@9onsbq%3P6QJ=#af`Q~wB zWYuX@oZHXVJy+NE&kcQ_XPPrY^ffv99wKXGwP(YH#B%K-{{@UzuO7RtMmHthC$UX1 z;dG%c(^XeQMFE@8R^VOH@h2&V{QQ9|(rs1`Z@u4&2y`h6G|MM_hW^wY5@4L#M;7PK zk0`UK^v&Yzo)5>9KuS{e1+W_d&C%8)^>g#*C1A=Ic#4wLyZ{- zH3=4?J;s{r1RZ5!AD=n-tZ?$j^|6GW^0+oVY0!9fKhhrGp9=MsX)Cl>R%&t&2qeYG z$q+r}kIFX&>B()|TPaFT6rr8453|NH;EoFh&|TU)_Z{=4b12%Fi3**d!Hs?IWrJt* zc(7V^`S;vLxL28#FA1%f?ZmjfKW2<)ob+>ba;Db3gjrn=#K>UpZZ>w4fpT?YK&YaJ zbZg0DVM5AQ9axkeH?M-7VYQxkFTk)}Mt5~Tt@4Hgi2u|l;zp+Iy98NTjmA<$+1LCW zoBZ~AOYuwTneF>A_^8aE2S+umFFq+64tNyjNl3yeNgPO{+a+`)iqkIm%g42!$I5Z< zNOC7Gx9U>f;i`iLC+4wZ!nD4D&5W}1c(~}Waz6LZPZ;T5BqPX29b`ONFr)!POtISv zHOQ-A*Qw80D3UL;RRkbf+y_Z9EiWxui`ODTGrJh)GSFQPSUC01)GHQB&|@>hqe97F z5kq^EE((uC=*jI2$5XWDAp;;5p_O5{lNDAO5nOdlFB|{lye8Qp+@4(_KtRSMxcE-P zJK{9n4?B+$JaL7-io~MHzlpkLO+_D>`RWhV28g;sBb@x5C`dM2(7tOF z)hGoGos)%J)|yhFGUZUV>q}JlS{MJtFq~wj>w3yR5C$sQy=C!<5ELjQWWAM|BPYxF z5!qCXANCm9B;@u`!qiD8ChMO_wAdx-;QI>1+n$?FnqYY5mo ze8)iFW2!KO=DqfcEd7eu=8T-+jM}}pRnMa^<{qZr7)M+?`(tfbf?Hi%@NoRNZNVL5 zxlU*M_PzIfGk?PYhtzu$;@D}`=Z>)TtK~K&JC$qvVWp|vbdld%D3gfkB!h4NXq zQYuFLx`)jgjgBupa1wHa|`hg*Y>!uG%}JOxx(!GKlY&B!4@M3LmC6>{4V%3^Lx0 z=*}y_N4F(;Ofu_G(CM+>^CDzdRx`84D{1+YgL0B~z2qTVLE=u`pvtTWryW5qVqf|z z1^Y#aMzd zaiHdeY=^SiKt^px`T;s6=I2(Z7Msz?jDA{RBG+oZ9!b?4>7QiD)Cb^`Cgl%PaEIId z92q3t`Hg;KgJt;=yE|eC`id0sP%R4j-XCU-5Nq+!xaxQ_GmYn*+Pj)|?*xkct=WSl zO&|7u8h2QV@#&7}Np7*3UeN4KjmUDlLpQF3|6siflp+r3_LS}RcU1D-`QmGA|5s}- zhn`gfhL*?8hb-*G7>SKy{CCgQ>qt_L7!&(yUEd1L$m{$%Dk!?U2nz+GvsLt|xbHGQ ztuGwLl543>NSGECCw%kU{InFl7o{6}OPE0158MLl^-`eJwuXeF@7u;}cIR|Sblxj! zedMsqAdMi-3g0EgqT`TU+8yfTV;=#+;`G=N_f#mIGWjWdbFyJIKbeRVH%|9DSVJ#K z+cAX8@n(=tqb5|?w22Y@ei}uH&&3d$>i)HaI0~f#WJy|jj2Q3rWx^NA(gaAvMQ*sO z%-vOAl@f+t>V|PtS=4NHaFPnf*+Q;jr>Aq0o2L+XV= z+Hk)1ouA+7BoQ(xB4U2;3#ZJoUBBuW{T#?Mi0 zcs>2~XYL38%NcLgN3ab>LgBpZEzt4lI)&<}uxn7cgzpaPN1TQO0^~-sY6teBn+SK_ z>b)58G1E)rLxBkSJFn^u+tODXTIc`>NAz`FqP!ym!}@*aB*o!Mzp5zN%l}LJrJ{xX zB>!Q{8{)K^PZTg*ou=Jne}2~9(nrDA{2(GqVZ%d7xu0U;=i;o^&q*bf*RH%dC1asN zWG9;Jniyb5;orw zlp`RJ5Nj*U7|+9xRtG9q*Xt(Mapx{q6`=4ID@w?T#Z_sdpAdI-RFE(|yp2^+5&F^k zT?le&buY3&{!$oT3C}v3GHaw*N@~>$7elBqB9zGpqQkp@C_rxB0$s^%O-bH-K#Fm9 zNP~Nm)p{KLWX~UEykNL%MMu>MX6K1p{zAloLKMUDRrS>Uw6(7~(r#Zq$Dc((oX4SC zkY_qph>gvzW91qxCK$rDJ#oPttCf>*{${R#SV@}r{!Asl&rPd9MM_<6C7(RO^T0M? zTDqE+qzi5}2M2L5`WQl(Cz9up0w?3LuQ-Tvt1{qsVcl~=PgY+D9y`QmVFKq|uDOU@ znslX>Y=5D2TR*DbjYVzmV_M9{xP~W2pWxewkUxFOggq8)l!%A+A;_o%jfrJxK!trH zSmMXsQjGY~*Gw6rtuoOWiLDgi9%XAnvOj&1`h|wg(BMO9qU!vqp>$T`t5V#s7uu~x z=W!&;ZOzCr&2tmW)$MwMUAzlBt3Ib~u~sBoJRO>apL}6UGiraX2=_U)IGo+fuTsKB zMacs@R6^4okyz2vQ)f8OQIlqH!~|<7L7n!)cMBDhri$9T;#h!!a;-I?iI~SJq_fXN zxu&cv`35IS4!Zr2se&Wx^>I9Li_&rmoH+gx*0v5$g;x9$Za>5=SGuFcn5D0wx}0_pr+B0xSd55h(K&|F@`25s8t*aai6TM zd95=4%kM723A}$R;=$aanCCeWTBiKFHOQHXmF*h1pVpTOO>sth7x9Q!MaSVN;*4+R zGH@H7_h&{u$+$=>*HE4tncKu|NM@WIr0{E3?$>N!A-|f-1j@mFmhrhd^7tx8-H|K)*&MJqZ<4QpWipo(SWj3|BtsfeKU8>=A5hO zcrggwS0)Tl$~D>?u9wx?-(4>KtDz6<^>HlV4L(S_@ZVL(#_CHFxAZxytJM#E9DRCx zb)oJjZ`w1ZYP3jNjxSwVi%xd)7zONBEWeD(_`HBluizTLick2(GE@jXvc)kVrPfQ% z(VH)Q_t5i7AMc8puC^zeHU9SIth74SQs4jb#;(Z^8QH4FQfCe=^du`5k&>=XU6kAY ztmb3Y{8IG2WCrI1#;cukdv0)zAN3 zoB#Oh%-}Yj?DkFetL-I!iM?rHJMpH!%sYvbXTFUWTVCeu60@~@8=-dr@2K}+cttfI zP3s!TAivMsScf!}`ibJjlVzV}}HthEoz*4kpNn1UDpVC~^U=Jo&x zLjVX-R*T>m)&2ty0T2PURt{8wK!9V?)6@R|9smph4+y~cIL{b=01}BO;E@2sIGrJ2 z7y|)A0C*mN#{(V=o(BUkfU$s~<-uTh0Asu{a25EN!0_;3;M70r1i%CI7xIjG43Bi4 z2QJ4;#|gNk2MOo$AmPo}m}HEzlRW;2;mw%DAmIRaDgE6S9*G1PxGlioeZl2<48UV} z2yjnuYdnDfALG(=ZUb?`;e`pN~?$~pF`nb=iRH&%_uuO7X*dg#`gzJJ#AB#L!##9NZY-`tg`NtUR*x3)B8 z9h$m6H*Nic2ZXyBk_nj`m{}Vlv!z0Eq?sKpD}rZApi~)@setnB;JGTOPy;W>;3Wm{c7j)$ph_E5>i}#w;Ol~#y`Xj< zc)cIg9RT%4puq&ZF$0ZM&}0GL9tQ8MK(jSyu?6q#L8~KZa{}$o;KMP{aU6U+0Xk2D zE>F;X8uXk6pZvh*0Py8+&>I5!E`q*r(9Zw^SHa*lFvJGKH^A4M;M;95k_1MR!DuS@ zo&mZrm4c}XFwFx$F)&jHW*fn;W-!+V<~qUrC$P{Deh-7i?_g;X zEX{!B1w2+joGMup0Lcr7%}pFG+#gH1QNmS~IQY%%$8y2G_hHf-R=>7U@~(W*-d)1J zTxIBy-?(=b9trNQ*ZB`O2dFB`*aIMc;sgP7{eILUP&W|^QjTdB130Zg~94XO%f%VFj{Q2&i6-@ z%Ri~rX2`1a7b~0A`9eeW>!0?1tEua}tl(qzK$*5$PkJ@%YZjn-(SqiTXp`D?f5r@Q zFO!IKt1#j1m)JRV1aYgIk(-mk^@lb03ZvJl#)$7~UH%LmT&1`Dsm zgx4|;nozVX11-f|C4c`PXU9zaZ-;HXizK4fiD#M*c6@GZ>Ab8+5WfRZ#+v^ET3<|( z{MPlD3o8q<`0m(eiX>hbR&4+6<8F1avd;U?6$3&;TnBowhr7?Vh!#FL-*nONbMj}^ zpv{(2%sqso8tvbs9HmD)lE(6CTL#A9@ZUXS>g_-EIR<1C%KJ|7kR~6@f%!LfPtr0X ztT%DubpGtdup($&))Q&SDtz(c4%*qA@;L5}PeS)%5QSNsJ-g}+G!2)%*%G-teG;9^ z=MDtlHz>TRz5zbXOk5dy6@v3#N4_D^gl# zVaCn)LK5^~QS;3o6ZGqLd9R5!al0zjh@rJ^@cf)rZ%bN>J~7fi$C#L_q{u0Ttoz-3 z!oO5%Ix1szwT}f_dzH3ZSuxQUMw`1HG>hoYRW@2sSUlb?J zno)TvZb;1)h|pj{Y3p42=oGnBU9c(j6p7Z&VatripGYaWhwu9#{jo&*yw|Gj%_c^B z>CtozZ0d$@US^{2wU>PxS$6Y6B93t}ZS8J8aALnPQbI@@_^1}|oOg&h6MY-zQHmCY_M#zUg)iJ}N+1twSSpUj|LFZ5fi~G$!RjoT3N@yqgb@0;jt^g*cY1h~ z(kr{lIUqNx7@A+z_KS6@YA0OQBp;bfUa{ z*0Y25C{uA@Pk5Tx)}6Hcw+o!A3(kE9@sPGJNV&Sk%^oc4`51(>(_`H{{787J$Vue)t9qr9c+UO@~DR;hKp1kGu zZYw*aXly@graP-}uFYuwzz3}FL;oe0UlIi}4YkV>n^o6kY>Uj-lp$MnO{Vw71{FX* zhc72fu&CDlU)||$h{w8knJrby!x`dx(C*1!5gN4P=&t~SBHj6u`d6Uf;`6uU&sd-a zYu2MKVumi1q#Ng&!uU$zJU-j4=pb^r<>2HcC0bz&$D7d5jJ}Ah$%vk7T8Ad@zQBMw zjqx+3HB@o>LG(cS2WBx}w8f3TVdrUsL&&?NXED@UT3;u(qwtGc(C$_mTsD1V@d#R< z#g9#JzI%2nRs`+$;7qhcX=!MzN_f$ZH#1+QUC$NWcH!11o5=nw!`S zd#WkoA6p-b$9ZuJO!F%rp0rfNMvN^GxaQDRhlE);dJ9b|CR$9R|AzmiyHnU^IvRDZ zWXNt|gG;^K>D%hN*!-wF#D@7Utk3n07xJ@_e_H*(=ko9xpO?=cOdngk0Vk;Z9fBUB zw`)Xqb@v#jT!!Ato=JadR}q0$?QQ2P@&CpGO}=kO4dTtkM_E)~B^$n;LABI`9|@+a zsKfk#`_g2ZT-;P@ZD=y&r4ye$+B`X3;5+VDYDclL<~y;dM*K#m%)?GgN<`h_c&8c% zP7w*NgcggIHCZzgqlWzC=RP?VM=7!*JGzc*a$dQVK7;1@_2tmA_vVeL1#{5sfY0BQ z=(JB<2X^t%K=eMP{d3`GJ^BF}<(WN_V)Lnh-)BkGxWFl3%2;%{Q0z+j-3rgSep2R$ z5*k#prhO;UP~AG+^F`3WBU?rJc2Crl{uCRTtG{ptEtyyOnFzIRP(cm0c3*-Lv8gKE zVKF#5Snns432C4%p+h0jC&J69$n;#VJbB+<70y*?r1tg^9wm?+;X}xRy`0aOEH}S4I2A3~7pln)GCY@pF3xLd(Gym?!2H6W zEEKByd}G7gif;eBADhtbmvFNlV(}q1%Q?VnEXs>bt-g6eh6A}*6{G}qDy7*MeH%Uu zvvi13WZhC%N(3>Hb&7YDuigm7OT}pLb|b0K;+zZc0@rR_<)7&ST36n;^nL==Cek-`%x|&6f>!e9!oA)^*p9yDf5xsKBhzbcrPX>j)Lv84z$}4&YF1Ea2Dg}ezgK0>$bh5w$8*f$xAAJ--LcfT-Kl6xJcqIZH|TeMRc=3= zrMFn7B{&J&iF|7h<5WeUEo%?&Se=hN$$`3B4Yld~fZ2fCVf;R?)6tRr+62oN_?fdr z<7zV?uSo|)uE`eBdO|o;ieqzFgCttn?YgDRi*WSV(`T0O#I@3w)wqq?B!9sY- z_zd6ujqQV#4|>#;s~5lTS?lhC1y;kX%noIarK>S{7|ueYnNEL=c>hUHWp*x$O17bU%{Vg$^`y^mJ7rme%IKyvv}J4^+_N65 z4^ggdPi|d{mE*})uq**jG6urh|88veq9LNWN|kuUka({w0g9{WALF|us85Mw3Vr(U zlDG-BU>gu?PmQCsY4=SyM$m9id4j36iwOxrneo; zJ-3Ah@|+2lUuv-t?oS1#W8Ym^pBW@Y;Myx89q;!SyVh7Dp!#8P;@=Lcw22?Bc=w&;YD%os38G_j}ZK#C30`=wk6gLKAEf3ABoo&|BQT=qW|N0R?^H$=9_l}!gWaa zl48Xkyx*^FYWkYvkN|Gi@s-G=P$fz?2SUDpyc1{ z{SS4_=0;yN_0Gbp8`{~O&4E9tu>)dL9j>PzRcc=g4e3xkKD=S;&MN6R1*Ud)+_kF( z@Vn74oyI$3>#|PVRfrF5GHRX-`C&&OTg&usyBPYT7z)USzJ|nV@1gjOd^y~b9eHH^ z%&hySZb^E@=C!Nb`LD|(AIYcrFJ6jeolvZe-(+C3D8-$@E==!-t%@^yzi{LxzQsj8 zul*gUu(0l0Qu@NH{6CKQ`&-7&kKZ|WP`&-@M?UquMDy3^ZB0{hViQz5_Mmp$ix@1h zczx#Pt$ztz%J*KY3Tyrv7Y-l0Cj7tO7=K0`tcG(#7wbj*#R6v<&vzXJ>s%HYPdsuK)l5 literal 0 HcmV?d00001 diff --git a/docs/features.rst b/docs/features.rst index 653c7bb71..98ee821b1 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -174,6 +174,34 @@ The graph to the right shows the same download but with the new optimized disk c algorithm. It clearly shows an increased utilization, which means higher read hit rates or smaller caches with maintained hit rate. +high performance disk subsystem +------------------------------- + +In some circumstances, the disk cache may not suffice to provide maximum performance. +One such example is high performance seeding, to a large number of peers, over a fast +up-link. In such a case, the amount of RAM may simply not be enough to cache disk +reads. When there's not enough RAM to cache disk reads, the disk throughput would +typically degrade to perform as poorly as with no cache at all, with the majority +of the time spent waiting for the disk head to seek. + +To solve this problem, libtorrent sorts read requests by their physical offset on the +disk. They are processed by having the disk read head sweep back and forth over the drive. + +This makes libtorrent very suitable for large scale, high-throughput seeding. + +.. image:: disk_access_no_elevator.png + :width: 49% + +.. image:: disk_access_elevator.png + :width: 49% + +These plots illustrates the physical disk offset for reads over time. The left plot +is of a run where disk operation re-ordering is turned off and the righ is when it's +turned on. The right one has a relatively smooth sine wave shape whereas the left +one is more random and involves much longer seeks back and forth over the disk. + +True physical disk offset queries are only supported on newer linux kernels and Mac OS X. + network buffers --------------- diff --git a/include/libtorrent/disk_io_thread.hpp b/include/libtorrent/disk_io_thread.hpp index cacfd484e..1245558bb 100644 --- a/include/libtorrent/disk_io_thread.hpp +++ b/include/libtorrent/disk_io_thread.hpp @@ -72,6 +72,7 @@ namespace libtorrent , buffer_size(0) , piece(0) , offset(0) + , phys_offset(-1) , priority(0) {} @@ -101,6 +102,7 @@ namespace libtorrent boost::intrusive_ptr storage; // arguments used for read and write int piece, offset; + size_type phys_offset; // used for move_storage and rename_file. On errors, this is set // to the error message std::string str; @@ -124,6 +126,20 @@ namespace libtorrent boost::function callback; }; + // returns true if the disk job requires ordering + // some jobs may not be processed until all jobs + // ahead of it in the queue have been processed + // jobs that require this are fence operation + bool is_fence_operation(disk_io_job const& j); + + // returns true if the fundamental operation + // of the given disk job is a read operation + bool is_read_operation(disk_io_job const& j); + + // this is true if the buffer field in the disk_io_job + // points to a disk buffer + bool operation_has_buffer(disk_io_job const& j); + struct cache_status { cache_status() diff --git a/include/libtorrent/file.hpp b/include/libtorrent/file.hpp index 00f3001d9..95e7d1032 100644 --- a/include/libtorrent/file.hpp +++ b/include/libtorrent/file.hpp @@ -157,6 +157,8 @@ namespace libtorrent // belongs to a data-region size_type sparse_end(size_type start) const; + size_type phys_offset(size_type offset); + private: #ifdef TORRENT_WINDOWS diff --git a/include/libtorrent/session_settings.hpp b/include/libtorrent/session_settings.hpp index ce7dfd095..5c7fef306 100644 --- a/include/libtorrent/session_settings.hpp +++ b/include/libtorrent/session_settings.hpp @@ -178,6 +178,7 @@ namespace libtorrent , write_cache_line_size(32) , optimistic_disk_retry(10 * 60) , disable_hash_checks(false) + , allow_reordered_disk_operations(true) , allow_i2p_mixed(false) , max_suggest_pieces(10) {} @@ -626,6 +627,16 @@ namespace libtorrent // disabled_storage) bool disable_hash_checks; + // if this is true, disk read operations may + // be re-ordered based on their physical disk + // read offset. This greatly improves throughput + // when uploading to many peers. This assumes + // a traditional hard drive with a read head + // and spinning platters. If your storage medium + // is a solid state drive, this optimization + // doesn't give you an benefits + bool allow_reordered_disk_operations; + // if this is true, i2p torrents are allowed // to also get peers from other sources than // the tracker, and connect to regular IPs, diff --git a/include/libtorrent/storage.hpp b/include/libtorrent/storage.hpp index bd78b0ffa..c291763a4 100644 --- a/include/libtorrent/storage.hpp +++ b/include/libtorrent/storage.hpp @@ -131,6 +131,8 @@ namespace libtorrent // negative return value indicates an error virtual int write(const char* buf, int slot, int offset, int size) = 0; + virtual size_type physical_offset(int slot, int offset) = 0; + // returns the end of the sparse region the slot 'start' // resides in i.e. the next slot with content. If start // is not in a sparse region, start itself is returned @@ -342,6 +344,8 @@ namespace libtorrent , int offset , int num_bufs); + size_type physical_offset(int piece_index, int offset); + // returns the number of pieces left in the // file currently being checked int skip_file() const; diff --git a/parse_disk_access.py b/parse_disk_access.py index 9f3a5f2e2..0c9ff573b 100755 --- a/parse_disk_access.py +++ b/parse_disk_access.py @@ -11,9 +11,9 @@ lines = open(sys.argv[1], 'rb').readlines() keys = ['read', 'write', 'head movement', 'seek per read byte', 'seek per written byte'] colors = ['70e070', 'e07070', '3030f0', '10a010', 'a01010'] -style = ['points pointtype 1', 'points pointtype 2', 'lines', 'lines', 'lines'] +style = ['linespoints', 'points pointtype 2', 'lines', 'lines', 'lines'] axis = ['x1y1', 'x1y1', 'x1y2', 'x1y2', 'x1y2'] -plot = [True, True, False, True, True] +plot = [True, False, False, False, False] out = open('disk_access_log.dat', 'w+') @@ -65,16 +65,16 @@ for l in lines: out.close() out = open('disk_access.gnuplot', 'wb') -print >>out, "set term png size 1200,700" +print >>out, "set term png size 600,300" print >>out, 'set output "disk_access.png"' -print >>out, 'set xrange [0:*]' -print >>out, 'set y2range [0:*]' +print >>out, 'set xrange [*:*]' +#print >>out, 'set y2range [0:*]' print >>out, 'set xlabel "time (ms)"' print >>out, 'set ylabel "file position"' -print >>out, 'set y2label "bytes / %d second(s)"' % (time / 1000) +#print >>out, 'set y2label "bytes / %d second(s)"' % (time / 1000) print >>out, "set key box" print >>out, "set tics nomirror" -print >>out, "set y2tics 100" +#print >>out, "set y2tics 100" print >>out, 'plot', count = 1 for k in keys: diff --git a/src/disk_io_thread.cpp b/src/disk_io_thread.cpp index a81f6008c..3efb7959d 100644 --- a/src/disk_io_thread.cpp +++ b/src/disk_io_thread.cpp @@ -30,6 +30,10 @@ POSSIBILITY OF SUCH DAMAGE. */ +/* + Disk queue elevator patch by Morten Husveit +*/ + #include "libtorrent/storage.hpp" #include "libtorrent/disk_io_thread.hpp" #include "libtorrent/disk_buffer_holder.hpp" @@ -1292,8 +1296,55 @@ namespace libtorrent m_ios.post(bind(handler, ret, j)); } + enum action_flags_t + { + read_operation = 1 + , fence_operation = 2 + , buffer_operation = 4 + }; + + static const uint8_t action_flags[] = + { + read_operation + buffer_operation // read + , buffer_operation // write + , 0 // hash + , fence_operation // move_storage + , fence_operation // release_files + , fence_operation // delete_files + , fence_operation // check_fastresume + , read_operation // check_files + , fence_operation // save_resume_data + , fence_operation // rename_file + , fence_operation // abort_thread + , fence_operation // clear_read_cache + , fence_operation // abort_torrent + , 0 // update_settings + , read_operation // read_and_hash + }; + + bool is_fence_operation(disk_io_job const& j) + { + TORRENT_ASSERT(j.action >= 0 && j.action < sizeof(action_flags)); + return action_flags[j.action] & fence_operation; + } + + bool is_read_operation(disk_io_job const& j) + { + TORRENT_ASSERT(j.action >= 0 && j.action < sizeof(action_flags)); + return action_flags[j.action] & read_operation; + } + + bool operation_has_buffer(disk_io_job const& j) + { + TORRENT_ASSERT(j.action >= 0 && j.action < sizeof(action_flags)); + return action_flags[j.action] & buffer_operation; + } + void disk_io_thread::operator()() { + size_type elevator_position = 0; + int elevator_direction = 1; + for (;;) { #ifdef TORRENT_DISK_STATS @@ -1330,19 +1381,88 @@ namespace libtorrent return; } + std::list::iterator selected_job = m_jobs.begin(); + + if (m_settings.allow_reordered_disk_operations + && is_read_operation(*selected_job)) + { + // Before reading the current block, read any + // blocks between the read head and the queued + // block, elevator style + + std::list::iterator best_job, i; + size_type score, best_score = (size_type) -1; + + for (;;) + { + for (i = m_jobs.begin(); i != m_jobs.end(); ++i) + { + // ignore fence_operations + if (is_fence_operation(*i)) + continue; + + // always prioritize all disk-I/O jobs + // that are not read operations + if (!is_read_operation(*i)) + { + best_job = i; + best_score = 0; + break; + } + + // we only need to query for physical offset + // for read operations, since those are + // the only ones we re-order + if (i->phys_offset == -1) + i->phys_offset = i->storage->physical_offset(i->piece, i->offset); + + if (elevator_direction > 0) + { + score = i->phys_offset - elevator_position; + if (i->phys_offset >= elevator_position + && (score < best_score + || best_score == (size_type)-1)) + { + best_score = score; + best_job = i; + } + } + else + { + score = elevator_position - i->phys_offset; + if (i->phys_offset <= elevator_position + && (score < best_score + || best_score == (size_type)-1)) + { + best_score = score; + best_job = i; + } + } + } + + if (best_score != (size_type) -1) + break; + + elevator_direction = -elevator_direction; + } + + selected_job = best_job; + // only update the elevator position for read jobs + if (is_read_operation(*selected_job)) + elevator_position = selected_job->phys_offset; + } + // if there's a buffer in this job, it will be freed // when this holder is destructed, unless it has been // released. disk_buffer_holder holder(*this - , m_jobs.front().action != disk_io_job::check_fastresume - && m_jobs.front().action != disk_io_job::update_settings - ? m_jobs.front().buffer : 0); + , operation_has_buffer(*selected_job) ? selected_job->buffer : 0); boost::function handler; - handler.swap(m_jobs.front().callback); + handler.swap(selected_job->callback); - disk_io_job j = m_jobs.front(); - m_jobs.pop_front(); + disk_io_job j = *selected_job; + m_jobs.erase(selected_job); if (j.action == disk_io_job::write) { TORRENT_ASSERT(m_queue_buffer_size >= j.buffer_size); diff --git a/src/file.cpp b/src/file.cpp index 66b796a55..fe12f379e 100644 --- a/src/file.cpp +++ b/src/file.cpp @@ -30,6 +30,10 @@ POSSIBILITY OF SUCH DAMAGE. */ +/* + Physical file offset patch by Morten Husveit +*/ + #include "libtorrent/pch.hpp" #include "libtorrent/config.hpp" #include "libtorrent/alloca.hpp" @@ -51,6 +55,12 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#ifdef HAVE_FIEMAP +#include +#include +#endif + +#include // for F_LOG2PHYS #include // make sure the _FILE_OFFSET_BITS define worked @@ -760,6 +770,48 @@ namespace libtorrent #endif // TORRENT_WINDOWS } + size_type file::phys_offset(size_type offset) + { +#ifdef HAVE_FIEMAP + // for documentation of this feature + // http://lwn.net/Articles/297696/ + struct + { + struct fiemap fiemap; + struct fiemap_extent extent; + } fm; + + memset(&fm, 0, sizeof(fm)); + fm.fiemap.fm_start = offset; + fm.fiemap.fm_length = size_alignment(); + // this sounds expensive + fm.fiemap.fm_flags = FIEMAP_FLAG_SYNC; + fm.fiemap.fm_extent_count = 1; + + if (ioctl(m_fd, FS_IOC_FIEMAP, &fm) == -1) + return 0; + + if (fm.fiemap.fm_extents[0].fe_flags & FIEMAP_EXTENT_UNKNOWN) + return 0; + + // the returned extent is not guaranteed to start + // at the requested offset, adjust for that in + // case they differ + return fm.fiemap.fm_extents[0].fe_physical + (offset - fm.fiemap.fm_extents[0].fe_logical); + +#elif defined F_LOG2PHYS + // for documentation of this feature + // http://developer.apple.com/mac/library/documentation/Darwin/Reference/ManPages/man2/fcntl.2.html + + log2phys l; + size_type ret = lseek(m_fd, offset, SEEK_SET); + if (ret < 0) return 0; + if (fcntl(m_fd, F_LOG2PHYS, &l) != -1) + return l.l2p_devoffset; +#endif + return 0; + } + bool file::set_size(size_type s, error_code& ec) { TORRENT_ASSERT(is_open()); diff --git a/src/storage.cpp b/src/storage.cpp index 634b6a8bd..371612653 100644 --- a/src/storage.cpp +++ b/src/storage.cpp @@ -294,7 +294,7 @@ namespace libtorrent int ret = 0; for (file::iovec_t const* i = bufs, *end(bufs + num_bufs); i < end; ++i) { - int r = write((char const*)i->iov_base, slot, offset, i->iov_len); + int r = read((char*)i->iov_base, slot, offset, i->iov_len); offset += i->iov_len; if (r == -1) return -1; ret += r; @@ -308,7 +308,7 @@ namespace libtorrent int ret = 0; for (file::iovec_t const* i = bufs, *end(bufs + num_bufs); i < end; ++i) { - int r = read((char*)i->iov_base, slot, offset, i->iov_len); + int r = write((char const*)i->iov_base, slot, offset, i->iov_len); offset += i->iov_len; if (r == -1) return -1; ret += r; @@ -414,6 +414,7 @@ namespace libtorrent int sparse_end(int start) const; int readv(file::iovec_t const* bufs, int slot, int offset, int num_bufs); int writev(file::iovec_t const* buf, int slot, int offset, int num_bufs); + size_type physical_offset(int slot, int offset); bool move_slot(int src_slot, int dst_slot); bool swap_slots(int slot1, int slot2); bool swap_slots3(int slot1, int slot2, int slot3); @@ -1181,7 +1182,7 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " write " - << (size_type(slot) * m_files.piece_length() + offset) << std::endl; + << physical_offset(slot, offset) << std::endl; } #endif fileop op = { &file::writev, &storage::write_unaligned @@ -1191,14 +1192,48 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " write_end " - << (size_type(slot) * m_files.piece_length() + offset + ret) << std::endl; + << (physical_offset(slot, offset) + ret) << std::endl; } return ret; #else - return readwritev(bufs, slot, offset, num_bufs, op); + return readwritev(bufs, slot, offset, num_bufs, op); #endif } + size_type storage::physical_offset(int slot, int offset) + { + TORRENT_ASSERT(slot >= 0); + TORRENT_ASSERT(slot < m_files.num_pieces()); + TORRENT_ASSERT(offset >= 0); + + // find the file and file + size_type tor_off = size_type(slot) + * files().piece_length() + offset; + file_storage::iterator file_iter = files().file_at_offset(tor_off); + + size_type file_offset = tor_off - file_iter->offset; + TORRENT_ASSERT(file_offset >= 0); + + fs::path p(m_save_path / file_iter->path); + error_code ec; + + // open the file read only to avoid re-opening + // it in case it's already opened in read-only mode + boost::shared_ptr f = m_pool.open_file( + this, p, file::read_only, ec); + + size_type ret = 0; + if (f && !ec) ret = f->phys_offset(file_offset); + + if (ret == 0) + { + // this means we don't support true physical offset + // just make something up + return size_type(slot) * files().piece_length() + offset; + } + return ret; + } + int storage::readv(file::iovec_t const* bufs, int slot, int offset , int num_bufs) { @@ -1207,17 +1242,21 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " read " - << (size_type(slot) * m_files.piece_length() + offset) << std::endl; + << physical_offset(slot, offset) << std::endl; } #endif fileop op = { &file::readv, &storage::read_unaligned , m_settings ? settings().disk_io_read_mode : 0, file::read_only }; +#ifdef TORRENT_SIMULATE_SLOW_READ + boost::thread::sleep(boost::get_system_time() + + boost::posix_time::milliseconds(1000)); +#endif #ifdef TORRENT_DISK_STATS int ret = readwritev(bufs, slot, offset, num_bufs, op); if (pool) { pool->m_disk_access_log << log_time() << " read_end " - << (size_type(slot) * m_files.piece_length() + offset + ret) << std::endl; + << (physical_offset(slot, offset) + ret) << std::endl; } return ret; #else @@ -1467,6 +1506,7 @@ ret: bool move_storage(fs::path save_path) { return true; } int read(char* buf, int slot, int offset, int size) { return size; } int write(char const* buf, int slot, int offset, int size) { return size; } + size_type physical_offset(int slot, int offset) { return 0; } int readv(file::iovec_t const* bufs, int slot, int offset, int num_bufs) { #ifdef TORRENT_DISK_STATS @@ -1474,7 +1514,7 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " read " - << (size_type(slot) * m_piece_size + offset) << std::endl; + << physical_offset(slot, offset) << std::endl; } #endif int ret = 0; @@ -1484,7 +1524,7 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " read_end " - << (size_type(slot) * m_piece_size + offset + ret) << std::endl; + << (physical_offset(slot, offset) + ret) << std::endl; } #endif return ret; @@ -1496,7 +1536,7 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " write " - << (size_type(slot) * m_piece_size + offset) << std::endl; + << physical_offset(slot, offset) << std::endl; } #endif int ret = 0; @@ -1506,7 +1546,7 @@ ret: if (pool) { pool->m_disk_access_log << log_time() << " write_end " - << (size_type(slot) * m_piece_size + offset + ret) << std::endl; + << (physical_offset(slot, offset) + ret) << std::endl; } #endif return ret; @@ -1921,6 +1961,20 @@ ret: return ret; } + size_type piece_manager::physical_offset( + int piece_index + , int offset) + { + TORRENT_ASSERT(offset >= 0); + TORRENT_ASSERT(piece_index >= 0 && piece_index < m_files.num_pieces()); + + int slot = slot_for(piece_index); + // we may not have a slot for this piece yet. + // assume there is no re-mapping of slots + if (slot < 0) slot = piece_index; + return m_storage->physical_offset(slot, offset); + } + int piece_manager::identify_data( sha1_hash const& large_hash , sha1_hash const& small_hash diff --git a/test/test_storage.cpp b/test/test_storage.cpp index a2a705861..56af1e444 100644 --- a/test/test_storage.cpp +++ b/test/test_storage.cpp @@ -121,6 +121,170 @@ void print_error(int ret, boost::scoped_ptr const& s) << std::endl; } +int bufs_size(file::iovec_t const* bufs, int num_bufs); + +// simulate a very slow first read +struct test_storage : storage_interface +{ + test_storage() {} + + virtual bool initialize(bool allocate_files) { return true; } + virtual bool has_any_file() { return true; } + + int write( + const char* buf + , int slot + , int offset + , int size) + { + return size; + } + + int read( + char* buf + , int slot + , int offset + , int size) + { + if (slot == 0 || slot == 5999) + { + boost::thread::sleep(boost::get_system_time() + + boost::posix_time::seconds(2)); + std::cerr << "--- starting ---\n" << std::endl; + } + return size; + } + + size_type physical_offset(int slot, int offset) + { return slot * 16 * 1024 + offset; } + + virtual int sparse_end(int start) const + { return start; } + + virtual bool move_storage(fs::path save_path) + { return false; } + + virtual bool verify_resume_data(lazy_entry const& rd, error_code& error) + { return false; } + + virtual bool write_resume_data(entry& rd) const + { return false; } + + virtual bool move_slot(int src_slot, int dst_slot) + { return false; } + + virtual bool swap_slots(int slot1, int slot2) + { return false; } + + virtual bool swap_slots3(int slot1, int slot2, int slot3) + { return false; } + + virtual bool release_files() { return false; } + + virtual bool rename_file(int index, std::string const& new_filename) + { return false; } + + virtual bool delete_files() { return false; } + + virtual ~test_storage() {} +}; + +storage_interface* create_test_storage(file_storage const& fs + , file_storage const* mapped, fs::path const& path, file_pool& fp) +{ + return new test_storage; +} + +void nop() {} + +int job_counter = 0; + +void callback_up(int ret, disk_io_job const& j) +{ + static int last_job = 0; + TEST_CHECK(last_job <= j.piece); + last_job = j.piece; + std::cerr << "completed job #" << j.piece << std::endl; + --job_counter; +} + +void callback_down(int ret, disk_io_job const& j) +{ + static int last_job = 6000; + TEST_CHECK(last_job >= j.piece); + last_job = j.piece; + std::cerr << "completed job #" << j.piece << std::endl; + --job_counter; +} + +void add_job_up(disk_io_thread& dio, int piece, boost::intrusive_ptr& pm) +{ + disk_io_job j; + j.action = disk_io_job::read; + j.storage = pm; + j.piece = piece; + ++job_counter; + dio.add_job(j, boost::bind(&callback_up, _1, _2)); +} + +void add_job_down(disk_io_thread& dio, int piece, boost::intrusive_ptr& pm) +{ + disk_io_job j; + j.action = disk_io_job::read; + j.storage = pm; + j.piece = piece; + ++job_counter; + dio.add_job(j, boost::bind(&callback_down, _1, _2)); +} + +void run_elevator_test() +{ + io_service ios; + file_pool fp; + boost::intrusive_ptr ti = ::create_torrent(0, 16, 6000); + + { + disk_io_thread dio(ios, &nop); + boost::intrusive_ptr pm(new piece_manager(boost::shared_ptr(), ti, "" + , fp, dio, &create_test_storage, storage_mode_sparse)); + + // test the elevator going up + add_job_up(dio, 0, pm); + + uint32_t p = 1234513; + for (int i = 0; i < 100; ++i) + { + p *= 123; + int job = (p % 5999) + 1; + std::cerr << "starting job #" << job << std::endl; + add_job_up(dio, job, pm); + } + + for (int i = 0; i < 101; ++i) + ios.run_one(); + + TEST_CHECK(job_counter == 0); + + // test the elevator going down + add_job_down(dio, 5999, pm); + + for (int i = 0; i < 100; ++i) + { + p *= 123; + int job = (p % 5999) + 1; + std::cerr << "starting job #" << job << std::endl; + add_job_down(dio, job, pm); + } + + for (int i = 0; i < 101; ++i) + ios.run_one(); + + TEST_CHECK(job_counter == 0); + + dio.join(); + } +} + void run_storage_tests(boost::intrusive_ptr info , file_storage& fs , path const& test_path @@ -657,6 +821,9 @@ void test_rename_file_in_fastresume(path const& test_path) int test_main() { + + run_elevator_test(); + // initialize test pieces for (char* p = piece0, *end(piece0 + piece_size); p < end; ++p) *p = rand(); diff --git a/test/test_transfer.cpp b/test/test_transfer.cpp index 2b4eb65dd..a1d054aa9 100644 --- a/test/test_transfer.cpp +++ b/test/test_transfer.cpp @@ -147,6 +147,9 @@ struct test_storage : storage_interface return ret; } + virtual size_type physical_offset(int piece_index, int offset) + { return m_lower_layer->physical_offset(piece_index, offset); } + virtual int read(char* buf, int slot, int offset, int size) { return m_lower_layer->read(buf, slot, offset, size); }