From c287091159009bab7802f089a34b5e900b58dab4 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Sat, 7 Jun 2025 19:50:06 -0400 Subject: [PATCH] feat: use BMI2 instructions --- Cargo.toml | 3 ++ doc/h1.png | Bin 1631 -> 1637 bytes doc/h2.png | Bin 1676 -> 1672 bytes doc/h3.png | Bin 1834 -> 1819 bytes doc/h4.png | Bin 2519 -> 2414 bytes doc/h5.png | Bin 5144 -> 4724 bytes doc/h6.png | Bin 14821 -> 13170 bytes src/lib.rs | 94 +++++++++++++++++++++++++++++++++++++++-------------- 8 files changed, 73 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d80fe5d..3ad7fb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,9 @@ license = "MIT" categories = ["algorithms"] repository = "https://github.com/becheran/fast-hilbert" +[dependencies] +bitintr = "0.3.0" + [dev-dependencies] image = "0.25.6" criterion = "0.5.1" diff --git a/doc/h1.png b/doc/h1.png index d84954b5bbaf6996da4ce22a52b4973169d6fd5e..72776220aa9a515d796a03eede8785a4c36510df 100644 GIT binary patch delta 83 zcmV-Z0IdJt4CM@vBmpduCG{4f@}JB$S6%h*Ac_AhRQ~fx;;O6uC$rH3xdW4c0i_Io pKHFS%)xU`({u3dSaRRj#_(wyo3IXwMDqR2o002ovPDHLkV1krwB+vi= delta 80 zcmV-W0I&b$4BrfpBmpLoCG`^`_)lh=tFHPVir}iN{s*(oe4TuepBmqy6C8QCrlf;XP%Dux7UEyYHdkHs zKbRz5R8;=;ByrVM{})iP-KPN~x$3I_zo`7{+2;Q)N&MesoB!)1@uH&guP2GC{@0Sk zRnJ7S0fYgQ3;`$(Li{V)=Blgyr;^0K0^*Ya0aq4(1lls=v&Qqc1J%#?4 z-+ZH?;`YH-Q&@J7SBKNKxx0p z8=12wZ(#IR6wsfNyYAQZ2}kPHZpVCGuhI!o9RAl|sdM$e%wzQ9yF*?^JT z@aOa!&+KQa{F{99+5Q$VWj`Orn`f`}U&!;%e1*>c!YUu@TUGv92O{g{5?74VxE)h^ z^?%W%ihnEqc&ij%4gGJ9qJ~xflm5oD^+uk5)Hk24=k)xOz6nf0csBJR|5PXauy=&` zh0F6#`D8=ZJqk{H0=DgZ74Izce`Zec)%}xHAWn(-YTv1{?pMF##EYvZ7qAHF$0yDH zpEBu(f7eaRWGkZMx&vN_E^T|j4b8h>2 zK0R9Ky!_;gOr?|0GCC>#7rOoDefsSG+Bu)+`wRX5ne+KR=g0rext=j!7j4w-{QrA$ zBJ;+{3s`u}_5=Vmt9Smdflxl%e(pbH_Fq-yb^l$WSyxE=FVY4h38|As66luteqs&53eLcVQsB3tsr2O(PZow@%e1GUf0 zUH9wygd_DvxBr+g{^SG}`RDENXq9+xE{NVQ40XDPp7<%V|KT^D+0R$`XMFS7e*v%; zKf6DTSL*!#SLNgU^Jf2ls(h?(Qu+7!<}=pGjx2jr7k#>a7Ubr_tKG-IZol7o>OaGz zAO2~xSN}5w>N#Wf|18YMLVvt-KG!$p{`22(_P^-1pYL0COkTifEcU~F>(lCIOn>;# PGXR07tDnm{r-UW|&mYs* diff --git a/doc/h4.png b/doc/h4.png index 58fb10b39307f2bc6b767ed1cce740e518ae3ce5..866c1394c904dd4bdb815b57f0b7815fa4448dfe 100644 GIT binary patch literal 2414 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K58911L)MWvCLk0#;Q%@JikP61P2bdSAL{2;Q z@qhWd-V&hTOd21FYyAojQA{xgLI&OcLNfGNTPY<=SFdR|x{K6iV{IvIN~ zfx@nu`VMWaV9Y7LTCV|e0hopa&$?gll{;7eE2h4Mu1`@y2AV{GQ3Fg@|E0G5ygmVz zY#?ILw6-&_@}SDc`9L)@u~Bh2;{lv*iH|mfa>Y1?+kg0#!SU2S5$q^Lin_C6PXZ+I z%=Ua#Z|wQU-yM=e$gv4$Ad3U@j}$oFq6f~8>l<(-uX)l_#es<$YcK%?-$S!9H68kh zb;DU;DIu@a2}!^}cDx&{!-LpDsCeMOIXruT7*lv{tV&@oO<5df3m^lB(NlhmW)_~2`G3Ur7FN>Gme@A zhw>xb3A0!Ka|YKXU>Y1eg;(q4J!8Jkr-6mQvL7jAQ0ghvJcUv)o$o!xt=9S9XVQ=J zDp;uRIOBn|&_JqoxjzD{T3~Gjs$K1S!1V|sML}{Itoj0#KvqavsTFE_5|Bz`)WE@0 zpFv7Stic2ntjF23z>z?R@RxvoLoTS*!sTfr-+|F&!C~${K~5XCFP+zqrU2Vf44$rj JF6*2Ung9`7np*$> literal 2519 zcmcImT}V?=9KV~$N*&625LO`SO`!-;(;j+QnqODNk5DWuXK0FT5*n42a6v)2=%Q^X z$iRs~ih>^ri(;|DVJgZVM4arw7&t*tm?Ly?-RYcj*SfhT*N4I2jJyBN`Jeyq>oOH( zh0-W_6hRPDswnRcL5Sd25phlom$f)WIzgNtrt)&CIz)S4BSy%3g9~o(VqdekGS*(w zFxBcX`9CfF!-K`s!x@=L``+gtAHRCvdsDSO-M(H8=*myK6TmWl|JgcFR!x201!-4m z9{dFH1Cvv~K;Lq8%`wQ|o|yE3(F3vFrKDtO84fm8Z|z&j21}$nX>OIypv;SEkFga0 zq$+ZHM-IeAPUUbn+cP*yvaaI zDOP27DKxD<{iaKKTL$Ftd4mBon!Fottxw+_bWu))hV*5@RuUP|2s=$?XsLsR7f=kr zPVo(FlHfV;PZKb<;BsI0KU3h&%x^M)Qj@nvXQky1_A2Gv>RYBSotUhDrZ$p^AA%R7 zoakS~>97T#e=5pc`O9w*(4}mkx4USWgMGp^cbLm2JgSw|97|@eF+e4RG-o`W$1ykp zj2?GMgb!vCq8jARPQi90N5W!Vhj!D2Aq$Mv&qcTe9MDcTpT%cvC{#IUQ+pIlDQ_%XA#YmM5Ly6fL9J8zZ5%3n4in<zHgT_^!f-DKNk#6Gx&%6AL1J?uUl!OUKbOg`4^e5;=r&;L> z4&710yovN}%|HpJt4o{~c^E9Jj=&y-{6@~{3pO)=?H<`UXPU@^GNHLr2(9Q8>`6gv z;wJ1jVB~Ho*wZ>I1m}(r5pXf3c*F53mE+Y~r8gGJkRsiHA$_S@D-egtDq(`4Vf1Rb zSj3USdmtJ&69G|uhw?;87)ZzjE+`L2g^E%KNmx(ya=a5?rUmS=p zm4Oy)V>qw1R!a|t+*p9K69*H{RBt%gPsd@x(J}v7{wATKANyKD0d}+4WIXkVa8M}{ qzSpEU*ya!aSzad!NKj1gqryGU4|C_;TQTs3jiB<&@}_d@yM6=Hw&*qh diff --git a/doc/h5.png b/doc/h5.png index 2b206ced3fa4710b6c54d97d3324340931b9d8e8..1d53e0a717aa4ad16ddc2e5193afc59098d9d1cb 100644 GIT binary patch literal 4724 zcmcIoU1(ER9KUsrX{DsgOId}+w$~PX39DOR7?$-rca>cNkekb5BX(<-v=IPlKjsP61sXdR6%roO$OYW64Io34Aq9KB_;o@9d~!-p8o z-Wt2f2;PPTUlp6Q52ZFZ464EYHqK=#s`QB5-nkBWE|o#1|&;srcH`3;8TFJ!W*2P0n!n zLsAo_{RjR<-ST;!h!0nMSu|3IrxU#ja6t<`0|xHIP)f{wiN-iEtb7U31!!b+)8A1K z(X*VMh>Y$wqk3WOw{dC)7#{MF&x)bRzm$1Z>bM2)orcFoHEi<2P%8X68=M@L&XHF| zX2EOQ5e=}?aUp`<7S`N-X7qHge)-7%2%mi%KS*I^s4h#F5?rv14Uewc9fEg)i@Q59 zD9Wq^Jl4wv_b(v97W@L1cKnK{o#0~WD!mvQfUAjDCL(-2Cms><2fK=88t-9uEchC- zB!jY|F;U9O62o`TF;OWiBeho{_Q|)yk}!)rg4eZhW7RRk2FDgbOy;C}TznSoD*$?Q zk-|YuBf50%QYs{qsmR@pY#>jhh>9}9^3vUcC5H%2r`g(;ongkEE1{YA7GJ=Fqyt0< z(KZY~+SG(qQGX%x>wz=w7-OZKT>j8tfWA<6F}vyik(paIpnFE|p%imkRB^V2Kg|;! zz|Y-bjuPENE?Ij9xj1hrn?|%(X_}i!O{tu@X>Qnh7Wk-Ci89j{Bk6s85)g4}gcp#S ziCH*K6I?zsupm@JuLx^4?rXK~w zdj_ny1H2aJn*NM&;dJx5e!=*_m+e{0l><%2g**ibNY)-#K>{kVq0%=Ndx_QJxYJ6q z51dDTL5i0o9tb|?A^4~UOMsSTExU}uqmC5Q3II{DEvToQpwJvM)#>b-nF4V zXZ*ECEBt1HpS96{-mn}0iaJ0qA6|ZQrGK2P2Cp(7E7_eg8HbV#EX8nni*Y$qe b;WvJDsd}@=^7JD7n#6G3uC7qWyQlsGJB{bJ literal 5144 zcmbVQe@K*f96qa&)TFu%WD?o*GH{5@$}N&zHqEZ0AH#@kO$(j1b=M!pme(I$v@N?i zF;6n&hW&yq64a7i+X`$X{g6Z=BIO7{NFk^=kC-=Z_wM<8KJWY9d!9eef#ALOzVE%? z@AG*+&+~j<_l|8PDGSmTm`tV=+oqyVOr|7!Ofn_U!M{g~t?!#mZw=Xs3QKE}Ufh~{ z-tuYloq%%WW&OPezgBOlXm|L1>fcvCwKi|=Xw6%d@iO%7^{X zKb~Jvx-akf=CM*<+WzNrB{zHb7q6AnGi~kvD0RKT&K&n>rPsB>R_Av_VgUS*4OQ^wAMNzXDopNd$8kO_vk*uE#(`Z6=%6qTO4I3+s%9S z;G9jv#TDMv_2%3>|4oyprg3m2)Ywz^AV2rGrL5BDOtJ(nUhP}B+;B^Hpt+}OrlsOH z>#+uVFuZ9vbhWSU4%X;%9y5I*y~klO3PMz#v7Kq75sa0 zy2mxo7QSH{wl-NJYb=3RS#BlSGrqkr9-?^q5hAjmLMD}p(>|3qM8&muwoN0^GDZ{Z z6R51GXxC+y($-r30FZ7Bj=XWYr*12e7fe(6ir)pAAg4-RkP{_xGJ{YU)EpEYKRW~W z1R<#{6_R>^mm;7!L2qV-jyQD@!ba4vSmPfJlsQ zGTMeO;5MOq3g#!m#hVA1UqgLbv!OkxWf@m=05!!*fB4`4VHK=kj&XgdD_=1ymAvfS zJeGs`jXlB6K0>sCdRwI-I43tMX`~P;6Y+E1UIyknM!0xGf-HbY&~m70(x55XJPXQrS3@3v>f|#hV-`IR;;6G! z7L{Omp=Rgq=PMsUn6DAB>2U&}l!b||Gjppibp6uU#l{|2mMvT&&A}X?ofh-m;4F1U zMiUh^wZ#%yWg8y)p?3;FaB=@_aBV+KL;1|NB4G78Y~d%@a1^j_86KR{TMbVigW*LzRNPN!K-HUE>tb%n zy)=dr=^h$_`0A{Kd6UaQ7|8|M$%W!{m41t}F(DV2XsjqHwIP^{GLm{&*$PNEW1W?h zm|Q@Zgo-JLSjuR12Q)UQhQYGV#X~1$T$sLxwVEx7WPi$La|en zKZcNK%Cbtnd1<13tOwVLY?HW)C>hPRQ(SU(rxVMeXuv`yxq+--+FDhLOi6vyT!Q=) zAL$LUBaKDGKp9>0{FITPdK5yXp(EfMbvT3$9piduxVstJ)?f9xS-Fl#uGk@W<+Ta9 zfy5tVG8qDhgFIKL6M;A-+F^nK6K0w0oK=X($d}IqVXT8~UV_cAP zA(J2q$B>U2^u=Wowe&(WjY$tRdEoe)$tjo?y^(?wxfZOqd!agU#g)gH%_-+C;_MZZ z(8QEDYc0@UDY@86bZE(pGoFZqv>&u z1oAM|tNG19bac=c5<@$Q=U4~|Ap>sf+(i0^5ChAU>dF2^r`Kv3vSRP z_)u7cC1Dqd9p@SxmZlua5R&2nRbt1pwO4wHBP1d{=2n~KSNkuGK?L-5s)k8lX=!~{ zU(Lp8)%jdJb;bkTSLO9UL>zRcgIT#Kf~LVfVWdQSyj_q_lq)F~bHe1Y_T=x=y?=Af a+wVX6VePFSbMeN{WGmiQ)V^WwiT?l#GA1Gb diff --git a/doc/h6.png b/doc/h6.png index 85fe122377c72fe2379f342439cd4c2f1213cf61..f0c97bd0baa91a1ac6053c4d9371222afd0df85d 100644 GIT binary patch literal 13170 zcmeHOe@InV9KX+mehjf*Y(fL8*PTLxWY&b(WA;PWyVKbZ64`RuSgo~`sMyjmv^G|I zIZ3aSIEO~TiUJJ+%MrX_kI^cT5mCcnAXFesc$1Nq&%5tA=br2LoO|D|fA)%95Z?FQ z_wHBk`JV6h`}ur7$Ka-l4O#OR%~KR5OAC~2Q4|lD9%W7jyxf^z{Dz{eT&-wzI>iegDjTncn>dB8=L&%%4c8xbnLCYcl!?9 ztu&f84~$+gLg%YLziZTN?frSwC|F+g;Xg*s$;Ugpo@pb6!~VnSipztu0;2g~qqpOa zM;>*5XI<_Zo9XFqP&8#n*X-6zMKTXNec$rZs9(KZl)WT)t9D$G%qQ(OU+6dZ7oL2) zG)R6nc!y4?-HSDUv0k66*Egz>sldo&TX=j~!jIwB_2_Q6)sOFnk7u8}*!$FYDe`0O zctKluSAW9-vMy6)WF59=lE1hhL#zIzt0|}|mj;{Hc$NE)N|(%~CahN}eKK_CwK?R@ z>DP4SI=K@bDCdaU9lSZ-8g4H48jJM$Hf^M4J+cZ4wW+-Ua?NfXxmj5iHA){|xHxT` z`P*N6u+t1(WB)*pWJcBf#luz_tbc-!A@Ij|>{;L&tk|=Ck!|j7D~yZZBfGcw2eLd(tH|RTnYuDOmEQ{C zojj38wjz87;#kZh)3ivNI@cEFFL~Cx{4+SE{Mp0R^mcJ4lcxu#u(YsGV3lDLDcXaf zq9^Ulq*pWVliZ?Ff#2*=Jv42&NgYTDl7!o5T82W>d2BaiHZx(9&I&M-?K;?aBvGs+ z%BFcVlSBzVPMl-kpgp?IpD*N&;U#MV833_>+<~~%_{c`_(K50ikU`9(vo5$BazKsv z+QXrv>h_)b#JyHzvGgL>Y*y9jSYb35nJoRxdZXCMrW{5wqE;AFbK7ALBZ_Fc!r12{ znG$MIuNN7^v?%UdW=0^?u1+7)!^Q{t#Gl7e_=OVX&}g8RpzeZ76us)$U3f>ZN>plY zd0m2PoV|pVC}gIqM4^7+cA6Kh(~PV}-36+f@hR!`#$46YKe>^@0jmQ+F>cDx7?Lqo z=@T251^G+)%)1iS-dTxi$n75}x4|!~vx{&Fl^FuE0G05@joJV=Eok77f^pn(ltwF| z4Y2*ED6N^6ns4hX`X@U`L9(!xh0<8336+>AjddCV*hnd(od(~@@E!ChhSFf6phpdr zJOqeQnip)!(yrZ5BYjJemD8zCze%sztu?X2i0T|SVZBl8BvTF}OAuQaxi1bYpt;6z zyTSz9D2&KVQy2$EL&u@LR7G?2@XE~%x%$=7&?&X|>Ask2iS{lx#l0($4Z4;#WMo@Q z?hDbMIBt1L@ZKa+qcp80-q=7t+>~JT?nO(98G^z{3~W_sA&2Xx7g7kefp7}=4uLW~ zyjT^tsMB`>Bm4!xnfY#^Fbn8DR<{CE5|OBg0E^;JG#~(=ZbD=mh=S%PFrDPxMokv< z7d|*+^8hJbuhWYdN_B4biOY7OkU9FY-;WslB=NJL`Epjtxn z&GAcvyCkgxDIgAD3C)d#u`r+@1c>MepAMJN_qi#;Hz-O--*Es}$gPXBIO5_si%Dk) zE<4~VSZ{=pWmAUBlX6>}iJfp*pnT``j76C>j&Y_ca>#Ut| z@^7O16a`92-gFQsC4uZKlMB#2`W;BRn4{Ftk-ey>fY1bUE+A>3Mp83rb`vSz3gVlo zv&fo?O~yph=Db*8Qoj1Dz#MT6E$UCV_Oo8-nk{95=FZMEm24d5lvpAxG&h{+!Q;F=+^BtNXNJF z_!(~RcA8`dGZZfK?n0*cmUK@w7jo8ucOaO|5CMjX#69nLjXi3Fzi8IGpl~u~=$E1n z`9Tn8b4V{Gs29u#6Al3Ov22sF>3*)@e*x#u8VcZp1zSFw%M6pUrg^o2J5)exkV8 zte0bjkuNeo76Ir4!iZugn{pVDEP}$AqT5}zFrtX2D~wgqD$-sq80$D`iQd!0l}1*q zwG?RtWfWpc&^$nzew<@sXf0E8Yo>$+p9LX5U2Ey4gcJXXmub&sN>~Jikr>uOKr{r+ z1?m<^`;5c@h$aFIEmruX7@Qm1ty|~DeCQ}ztSJCTxFW!!2lF_<0<1-+0JKnv=fv1R zz(JZ!!Y$x7s(XrZGL%Vnl|CEPVsgqd>C<77wvzaO=G&~)xWq*uoi1?^##-?h1{`iA zQlyNGtmYu*Rnl%<;^Gn)=kvufHcpnfu#>}7QRW0iknY_P8J8lZ8CZe|nmZyR$Gm2S sp8uZ_8JzGBm3fDbUUmNe7Gut1-zzQKYHAk1|I#R0X+=rzx*cEt3qfS{bN~PV literal 14821 zcmb`OeQe#;dB?8}SqM>-;a0Lnw-~}rQZZ4tW)16VHM}HCZj-Sv(V_{KghD_Yu_}tT z#T~K(EiV@9k~oAk3UZ*6HUuk#M)JmlW>Bn#rbPZ@LJ+x9q)bVqPD|xj2s|9$eZJ3g z&iS3+^>ujBBoywwzqj){&+~nGo^w2~@z%AAF2C~fN~N->wr=&GRVpofY^hw<%K!fJ zw^x0oQu*4y)K=eg`}UUqdE=7js(=2>&nN1Cd+tYXz5UPMTYt~XJ>$dSzrXzZp=ZAN z%23z!OV3UH&Ax>LRHo#`9gzGS$! zQtMtk-o2wb^hD2+8{5Wy@&A6}{Jk%|{J-_5jy?3VHIIqfHF`mV zBx+5sBtBo$+vHa=;gi=4oV{9a!~C{XC%UT-GNvztN%+t+$n9wk%E=?&D_As~Ym1&@ve$TSx7#`wkF> z%6n&~{xJuSUS#q3^!ja+ov$8$>TN}MX4&x6zW2_oxlg07w~fEPad_%oONsu0o=smI zpMHRwyn1}$)&qQ>`6}Sx`;F6IfA5*m{@)!R{P2cXuN$cU;qbDF?|$dbw)%tH)}J~# zI$WLkFOH84^wg%0a=ibV0M(SMmW4vZkp$QQa14M5Nkv)WZ+V5Lizl{JTXqUSv`u#u8BvV)V7-Dm8NPrvxmO#LSwq24zwD!_5! zmH_1Ew@u!Bq5y4&lG#HhjQ52hde8*<{YGzgH6a@rEx~{DcMkNo>}9<5HUr&~2-k16 zb+!g*ipHu!*VOLOk=1RQ24=-i!S^22@WPHir~){^1Z zHUtQAodH@?637a8^Npbv6RjgqcK523371+5W><&vop+ChT#57X!5#(K;A?~=)}GFI zdaXv&r?*#v?5q|b)ac2F%c_EZ2~0+;jMbs#G+U`nuUDGQEE5j75fdc^xQ4_kLouG; z-O(sqjyVC6KocQ9ld+TY@U>GmZ`=8F1gZ^u2nqtq%#d+?xL5#PCc+$AF!3xv>|X#r z$L?bTcJRy>(k|c&l z!sxkeaCOd$h*4fetoXev1w}~=j4YFO#*rkwbS{e|0Ie|#Z#(lQMrT3 zJ#BNeuh6?d4a#OQhF*5MDt;r3mIy z%W&hmo-f(89$uGYIJ@I^a7J#qKswv-&SSM8qkwGW@hXY7J)t%`ic}^zF5MG>Ye;no z&g8bczcB@4YC9-12Hq!Am7&m5LY%WhX_l;w(L-eEAh8BTG{|RzR4UM@JhH4Zo^>VX zBeW(ou0Guh<0tApYgi5JDjXxTc!ct{Nab~ZMMh@S0gPwoU~<1>RPBPkwq zbz*l9u04n(9{N!L%RGx={oBL6urdLKq}!veJJ|}Ou8$5|^i`3($At4wJYQM^0Z-pq zJN4M$=*XZLb<@3piW#uTdE?-R?}|r)lj`g;C%04a|L_JQ=K3F%qjV|W5%#5A!Mrdll1rtaf#S@wVZ?{D^t%y6t9lGD1~3cgveF} zM(zBuBFTG*p(G%>7rEe>WiVsfIp}`fnMH2TR8trJk0!Zy% z5k}^Q)RHl{lxTk=GFxmykc)}T)+(F`4#9b7u<*ih8XfydX`^{=@C+?B^&SBK_e~E* z(oUh4paxIXpp5482#gt6$18J;{@VnCUd^~UmxUHUTp(xrzs}j*{fZ!(NebO@B?$~f zlzY?2Ib}I@LkniOY1!U-2Ck7Un_Rj}FELXp1j%6ix3fpWh$=!i9%7jux3NSW!VWgp zJ?~>b6M=_Il=zkvqybSsd&xxqV?DLI-aj~oo9%qA#84T8Gr;!l#w;CKS-*04 zs(qtu$}3k7^e?Kt{+8KMT!^NWV6EbKU%G`Mj!f=dO zK`oG4P0UO@Ti8}kbvtR?L5P?E@wj5=Gt4BJ`P^jKa*bCm5vv^UCp#jBFr*3n15LuV^ixJiRkScISftOrFkqpxurnri2Wq4Bl0@q_pc(WYtx-~Q+iIR5M7)XqSrUp zSk8W?Dh?WeVrWq%YBOhyWEC8R25+c7qXI<0t)eS8aglEeG9(wPKB8oGN_M&{At%r) zZs-uiUX9#W*2yh;LoTG|RGdk9UIHB9F~awhavnvwi>8!&1aCh?T>GO$Z|2{=Q3?-r zFq7Q?QH*K?vSL7+juIucju=7gha@Z#9SBw=cd_>H#D?3cNmAbjxrnV={F%Fa5w1jX zX{i@$(gS?VUue3;eO)!$DXB_>BjT12zUPwAwq?Wp{cJnq9+ z&HNsqT=fq?^uQ-+<6co}ucPo*Z+Vn}s?QY*5JbnVgS?$okOZ*6TvRxDInMK&IEMTt zRyPLKRCL2lmR}3wN9URFqI3@w>K4cE#8@qiloRfM7|tvmRlY_M=5wM9WxEw3UHR&IsnSnGErf9eON-s>^-9qot)2gBLe51>i`3dJ_ zdMR%NF!C1CYPq8XKu+lYolG%Gc&HSt>ZyHd4pXazTqMO-`m;x8cdN_ukpY!e;-|J+NFJcK$NaEH563dsvyuWHc<9{NUKa<;B_s@0~1WENcSI z#`Ta~2(7?X@llv0=V@Q*NrDNA$@^J1Ocu^pjW`{S$py|Ig^??LzkVf|7Jn`BxcoHc3_6|Nd29TP}{^DZ33+_h|*o)#_TRG z7*5}@LQTrpjCE%%P2!yb#Fjh0tS6*J%=OzQU&(ZI>W=5m0|PPOBCVqaPNhDC9YbI4 z6t(oQ_ zWv8x~xlZIR8c*a-?cLbY@E%9#Fk1J zj(@YN#YB_j!78K{KG`i(^oSe}iw%s<=Ox-ozD3RB8GP&9EtO$Oo-AlsMlx)v?zfr{ zZ(dS3mu!5fylY%U)2Vc8ExLs`@@7sJ;Z$xp|J2B?SWSKe?bh2M)XhJf&7GF0tp-ZxlZ&II(C>9}6#}*(&R(Rzu7bk&$+HLSr92GY##8r?_ij zOl-$PLZ_x*uEn((-fp~V)(d7dnK5~&A#W|Z=#BSkthq3;)COdsH64}VS^owGA{bgp z#NP0Gmlc~d@Q_nB#9);&gv!kuml0<*d>6g|gJ?O6HX2xCUSLGeGi#2R`3}Wm{mQ}Z zyRC`m8xnY3vN#7Lu(4f6u>ftp)4-QiwJ>ipFt>WDrdC;DmfOQf@-ge~!QI-&>L6KO zOPf$J%(_FiEVd*ph&j%%o%Zr++krVOipm^%%%vWu`AAg8^x3uq&kBbU^I#ny=0W1I zR(?Ej7z_~i8F=G>RY^}d(zGOoZkz}`9IbT$tzUlpnPGQ<)sO!T9{ayYcjw*1Yy-mS zw1CZyV=S4xuM!itG#yXbzPuz7xutakSnt;oFJO$}g}#Y0I_~4!{-!>ABgdu5^JEU8 zlc9@BS=N3e_JFrf+bH@m|YN2}1g2RZwgVCH-@F*;+~2Jpn+^zNozUw^*(+WYQ*n^PauKH*PNQ zojB|p^Yuy*7a?{piz4z}<>E=J9c3RpM?x1Noms!!i^mIR4_~+7&~*bZ&8(+v`Bd#z zcM{%)GUE^nN6{qw-=vtN(swvs7WNI8yh2N{VpUjXUSN@pxe=>HGn(YiAQpV+91D}a zE@0pjL;6{tc|#JpX4QEzC`5?Hdn5QO2QRwYP0RLTKS1AnW>~&O<{L! z+FFYyH(pyA>#H`H33~qJ7N)O?tZdm1QrQ9qw?EZ=&Qjwqa$~ z>IsiZ23;>bh!GppoG!!4LPD9-Vcex5<;w6X*Kbh@R@Gt^ixwP0s!G!~0;`d795&`v zWy=>T#p))>M1moy@R$@^v|Ds%cGGT$xd^52PEaY69n}G|YO{L`Oho9>yk!~hJ`|{l zWqmI!r^wxuIOiv zINfiH(4(4|nR5U!c57!kmS{}-LI}wp-Dhl%W@2SNOGSg-ni4C1RO=;4t1&Ck<<_Mw z1uCTCqKH4xb`r#)Uw_=6qUd*vMCEkimd>3OC!E~Em@nG;+hAUNe6(fS`88y zc5BfPNcaUhpRKo=wlKl80SgR%U*Cs^h~AA0QQTQv#INs{Rz8 zXTmz05pxwI8r;#1z`>$^_o#SjpJ2#|{16B!#>&mNa&pE&k9(c z_N6MdU{iy2WT@#W_i@T{LZ}Yy$9GtLYAElsg%6LSEf(<6W)Ox$vAls3%l)DcN6NdT z#VhmuJhuLx`Uk~IR$9b1us1iUPSSn;HwoI$YGKa(Dn=l3`N#HMp$qT0`kzKv0$aI~ zlo5zpmicL3jhe%4*fX=m{V17_;{DWdXL2}ogD-LTHrem`AC1hh_jPZ+p>?_O>PZTc zr>iqR^&Tq?SGGvL`+RBd-Qw}PmJ3R3agm72i81|9v(RX5FUl|5!JnFo-XgU>IIv^P zSlW9Q+(U4-5Wb9S+;J6KT~y@Uy|9^cZcwGfNEQrR4ryAuf-nXfWctF5SgMy_o%2Sr zN1t}5Yc?HZV|mY5^9Czk0MB}nZEbmT#KkG@$5PuT7L(>zBaOth?zmkmDTCFWcB(h8 zefeB38c&V6+|ZMSoSN*k)c~6P-IrGa{;$t+`UUQHGwc6SZY6s=`mrN}wU diff --git a/src/lib.rs b/src/lib.rs index 427b8ef..cbf4075 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,23 @@ use core::convert::{From, TryInto}; use core::ops::{BitAnd, BitOr, BitOrAssign, Shl, ShlAssign, Shr, ShrAssign}; +use bitintr::Pext ; + +// #[target_feature(enable = "bmi2")] +#[inline] +pub fn h2xy(value: ::Key, _order: u8) -> (T, T) { + let y = value.pext(T::MASK_ODD).as_half(); + let x = value.pext(T::MASK_EVEN).as_half(); + (x,y) + // unsafe { + // core::arch::asm! { + // "pext {0}, {1}, {2}", + // out(reg) x, + // in(reg) value, + // in(reg) mask_odd_bits, + // options(pure, nomem, nostack) + // } +} pub trait UnsignedBase: From @@ -88,41 +105,78 @@ macro_rules! base_impl { }; } -base_impl!(u128); +// base_impl!(u128); base_impl!(u64); base_impl!(u32); base_impl!(u16); base_impl!(u8); +pub trait KeyWithHalf: Copy { + type Value; + fn as_half(self) -> Self::Value; +} + +impl KeyWithHalf for u64 { + type Value = u32; + #[inline] + fn as_half(self) -> Self::Value { + self as Self::Value + } +} + +impl KeyWithHalf for u32 { + type Value = u16; + #[inline] + fn as_half(self) -> Self::Value { + self as Self::Value + } +} + +impl KeyWithHalf for u16 { + type Value = u8; + #[inline] + fn as_half(self) -> Self::Value { + self as Self::Value + } +} + /// Unsigned integer input type which has a double value type as key pub trait Unsigned: UnsignedBase where - Self::Key: UnsignedBase, + Self::Key: UnsignedBase + Pext + KeyWithHalf, { type Key; // Double the self unsigned type const SEVEN: Self; // Pattern needed for computation const SIXTY_THREE: Self::Key; // Pattern needed for computation + const MASK_ODD: Self::Key; + const MASK_EVEN: Self::Key; } -impl Unsigned for u64 { - type Key = u128; - const SEVEN: Self = 7; - const SIXTY_THREE: Self::Key = 63; -} +// impl Unsigned for u64 { +// type Key = u128; +// const SEVEN: Self = 7; +// const SIXTY_THREE: Self::Key = 63; +// } impl Unsigned for u32 { type Key = u64; const SEVEN: Self = 7; const SIXTY_THREE: Self::Key = 63; + const MASK_ODD: Self::Key = 0xAAAAAAAA_AAAAAAAA; // 101010... pattern + const MASK_EVEN: Self::Key = 0x55555555_55555555; // 010101... pattern } impl Unsigned for u16 { type Key = u32; const SEVEN: Self = 7; const SIXTY_THREE: Self::Key = 63; + const MASK_ODD: Self::Key = 0xAAAAAAAA; // 101010... pattern + const MASK_EVEN: Self::Key = 0x55555555; // 010101... pattern } impl Unsigned for u8 { type Key = u16; const SEVEN: Self = 7; const SIXTY_THREE: Self::Key = 63; + const MASK_ODD: Self::Key = 0xAAAA; // 101010... pattern + const MASK_EVEN: Self::Key = 0x5555; // 010101... pattern } /// Convert form 2D to 1D hilbert space. @@ -210,7 +264,7 @@ pub fn xy2h(x: T, y: T, order: u8) -> ::Key { /// assert_eq!(x, 1u64); /// assert_eq!(y, 0u64); ///``` -pub fn h2xy(h: ::Key, order: u8) -> (T, T) { +pub fn h2xy_old(h: ::Key, order: u8) -> (T, T) { // Mapping from hilbert states to 2D coordinates // SHHH => SXXXYYY // 8 bit => 8 bit @@ -375,26 +429,18 @@ mod tests { #[test] fn h2xy_one_bit() { let h2xy = h2xy::; - let (x0, y0) = h2xy(0, 1); - let (x1, y1) = h2xy(1, 1); - let (x2, y2) = h2xy(2, 1); - let (x3, y3) = h2xy(3, 1); - assert_eq!((x0, y0), (0, 0)); - assert_eq!((x1, y1), (0, 1)); - assert_eq!((x2, y2), (1, 1)); - assert_eq!((x3, y3), (1, 0)); + assert_eq!(h2xy(0, 1), (0, 0)); + assert_eq!(h2xy(1, 1), (0, 1)); + assert_eq!(h2xy(2, 1), (1, 1)); + assert_eq!(h2xy(3, 1), (1, 0)); } #[test] fn xy2h_one_bit() { - let d0 = xy2h(0u64, 0, 1); - let d1 = xy2h(0u64, 1, 1); - let d2 = xy2h(1u64, 0, 1); - let d3 = xy2h(1u64, 1, 1); - assert_eq!(d0, 0); - assert_eq!(d1, 1); - assert_eq!(d2, 3); - assert_eq!(d3, 2); + assert_eq!(xy2h(0u32, 0, 1), 0); + assert_eq!(xy2h(0u32, 1, 1), 1); + assert_eq!(xy2h(1u32, 0, 1), 3); + assert_eq!(xy2h(1u32, 1, 1), 2); } #[test]