From 63373b896eb9c5874cad114dd8249d8ffb1dda32 Mon Sep 17 00:00:00 2001 From: parashardhapola Date: Thu, 7 May 2020 19:17:35 +0200 Subject: [PATCH] first commit --- .gitattribute | 3 + .gitignore | 4 + LICENSE | 29 +++ MANIFEST.ini | 6 + README.rst | 11 ++ VERSION | 2 + logo.png | Bin 0 -> 63053 bytes push_pypi.sh | 3 + requirements.txt | 26 +++ scarf/__init__.py | 3 + scarf/ann.py | 148 ++++++++++++++ scarf/assay.py | 240 +++++++++++++++++++++++ scarf/balanced_cut.py | 195 +++++++++++++++++++ scarf/datastore.py | 439 ++++++++++++++++++++++++++++++++++++++++++ scarf/knn_utils.py | 45 +++++ scarf/metadata.py | 147 ++++++++++++++ scarf/plots.py | 239 +++++++++++++++++++++++ scarf/readers.py | 212 ++++++++++++++++++++ scarf/umap.py | 199 +++++++++++++++++++ scarf/utils.py | 49 +++++ scarf/writers.py | 95 +++++++++ setup.py | 35 ++++ 22 files changed, 2130 insertions(+) create mode 100644 .gitattribute create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MANIFEST.ini create mode 100644 README.rst create mode 100644 VERSION create mode 100644 logo.png create mode 100644 push_pypi.sh create mode 100644 requirements.txt create mode 100644 scarf/__init__.py create mode 100644 scarf/ann.py create mode 100644 scarf/assay.py create mode 100644 scarf/balanced_cut.py create mode 100644 scarf/datastore.py create mode 100644 scarf/knn_utils.py create mode 100644 scarf/metadata.py create mode 100644 scarf/plots.py create mode 100644 scarf/readers.py create mode 100644 scarf/umap.py create mode 100644 scarf/utils.py create mode 100644 scarf/writers.py create mode 100644 setup.py diff --git a/.gitattribute b/.gitattribute new file mode 100644 index 0000000..314766e --- /dev/null +++ b/.gitattribute @@ -0,0 +1,3 @@ +* text=auto eol=lf +*.{cmd,[cC][mM][dD]} text eol=crlf +*.{bat,[bB][aA][tT]} text eol=crlf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..75a11da --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea +.vscode +__pycache__ +data diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e1f8022 --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) [2019], [Dhapola P, Karlsson G] +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.ini b/MANIFEST.ini new file mode 100644 index 0000000..1b19ccc --- /dev/null +++ b/MANIFEST.ini @@ -0,0 +1,6 @@ +recursive-exclude * __pycache__ +recursive-exclude * *.pyc +recursive-exclude * *.pyo +exclude .gitignore +exclude data +exclude docs diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..ab8e3eb --- /dev/null +++ b/README.rst @@ -0,0 +1,11 @@ +===== +Scarf +===== + +|IMG1| + +.. |IMG1| image:: logo.png + :width: 50% + +Scarf enables analysis single-cell data for millions of cells on a regular laptop. +Currently supports scRNA-Seq, scATAC-Seq and CITE-Seq diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..49b49e4 --- /dev/null +++ b/VERSION @@ -0,0 +1,2 @@ +0.1.0 + diff --git a/logo.png b/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..b05fe146f355b156d64ff787e66e9d7b48532217 GIT binary patch literal 63053 zcmeEu_dlEc7x#stC><2FI_$0X=B8*6YHwP5&!BcX)LktqR_zs0yY?QfmfFOstuZCy}z&LFL-`>@=7aBu6)iu=Y7s~^z9Q36>?HKQUCzRA&(zv0{|iT5+8n*2>j8y zGn@(jxZj0^uV^BRHS_&@1^8T9 z^6IvbjLhkxmmh0b9ckQLDj#RTJXW(@#wVsL$Nf#Z)#dA@{YQ`L$*;#*8xD;pg(*DvyUuSSdJx@0(uiiceXyZt1qfI%^a}ABz6bLj_$>QjQ;<} z|6>i%^(YYA)5yq7YejlH#yaY!tWZS_IiOzFYe;O0vO#4wJy*sll~mEX0jzotMnBl; z#tPH1(iOLHE#*x4PCL{qHpsOo(fAb$KnIx(RCEU(*~IPX=6>Yd*VXB!QcGGxna;0F z;wHHG$RUVCwD7QbqVl9(g`reg@p{0%eLKN*Ly5_Nb9V+>{wUx*XIM1FmJ8+ad1Dm3 z8r9Kitf|}>7=&`Q!bK0o=(!Ga2!lwO`i~$}J=Z@!mNFfu&hgJT9U19Kc0p=L^#6|M z*?K-9+M}w|$Rg4sI+&LytW%7!O}ak{tF^6hcmv5*O)_m4511dfMX8wie?zL6`hQ9q zye?W&&geZ!bIt|aBMg7@v-FB_hT6iBx7S`BEU_)T$+p@6O?jN2hHxrSn_!V~$g5Hv zFLQ`lCri2^B4!t9mrap+N6pSJUkEBWl{44_L)F`=N$Q|^Ac-wnQJu&lzS9=y>eSxd zgTm44tULX+=ac|@Jt}xu&GLgjZ;g>*TVpQFC`-@P)I1HJ`U+M-7Z1$CoN>$O;OHbK zHRVd19oq?d$$M!VS?{F9{K|UsRqafWUh0x2#a_r)nz?G>1j>cEc0Id{zC~$1;Z1(+ zjQXUESI?=y^%d|q99&B*Zq*m>mHu`!UY3SFtwy%9xJ2)MG;RLi@J?l1yLY+tcyui7 z^VWEuo`GNRQbw=n=-8qpdt7fkEHbfo%wo4Eo+=bNM&+YcXB6F3gVC5^ki2$&JW%x1 zlm_kJ{PLyRX_?cNx`otLeqYeHB#j&X3JVjOhCMY#P70MLXypbV*n*8Wm2* zOkhI)%L`F{`54bswm}Dkc~mxA4B%qX=pHyy+^BY#Ca~3#$S> zmZnso+E2WrF}S=S%{{`+l>0)b*tU9S@8x?JTnN9a;s*l!PssBy^a#?u$y`&q;ifOC zsxr|mHoptA#IsF{Gn`Qw&Y}i=>n~8zs64JXL}#3q9ycp1#B*~-NmlHi81}voMpJTo zHIOc`;%dFU=}V8Y9UUQjn%&tPbnLTicikm6!2f6U+bqM-z+6(s)XpU-Jq}ppXW$X2=E-GPv7mz+?YN-FL98exr<@zVyxo*F9X6 zlJbo(-(#By4jhYvwe`l$E;)xnfjf7ma-#==ns*JUzH?Er|?G)qH5zdlmZ z(k=PfqNUfV6hR%lyRnSJW**!mRf$c`u!zA=nfyJvP(_9 z>cY*4$ZL)j;==mdx7*D%^5c>=!~*uaU?Yhog@<)2pS!es#%cztggtdGL`qUJp@y3^ z2A3nN#XS4YmG&!rridj^)VkSOG&k}q%Wcw_p@+X}JH7-4D=6-(tU_WQW#`<#twJm! z>HkC5!sE{;%EC%Pn@*loTv_dXHWRTF?>BtShqh{TjSnv{i11$k+FG_y_!!%pa5sBvjU8B(cO77N?CG&j(D)zRk6#51sNga_PR&c6bzZ_B(ie zqMBLvb!(@!XXg_8rzgt{0cjyvKR$~m$wA^3posapbXs}Bg_aK1Yxg42pH;u#jO)xM zPhOPL*P>c26tc)OD(@vvYRy@PK$hI5odaGGT?YS`@%*hsu^G9i#d?`ukI_PXt^;#X zxQb^9SJHXP?L#EQMR`B=d2=N5f+9T;HF&~j@`UPprd1>vXubtitvFAuh3!wB3t6D4 zZog~gic{9ji&apOwvAzjj@B-jWhpcX!Kfez2rgjG|G2zg;zEn|$SN+N-v0zIk4**;3-< z&s9N*?;gq?Yz6AEdMO=O>iH+%sv`0c$ShFpNxHGR-1+)l=TBYm?|nJBKf2nivyJ!Z zSrb%o$p{mOp@qf+6u6r-v` z!7QUo_5GOa%lqHr1>(IwvL0(uafd%pqUH`43h$+s!Rx4FBlq7U>UTm9SeY%O8=0wy6bognCIm zzHV&Qc@6s4OaG52UeXo`uaj7Hb4|#~)zk$;7KqY>uXJ&4da!QJErSPHdWjl|(u(Ea z0eL!_WQ>b7EU1K;fCL#TN#Y9yM{IV@>F{#&Vp1KWw>1$U`yjDgBcZm>X zUMcTc^6U?=xs_g{qU%%jjl2{u<&S?XkS2-l!Jx@Lb!^ZiYRO)R7bd!XbTzP}CjK0X z1O5BPO~Bg9_Rj6q`;}jgJ>_$>(Zy4m4x_~`bPSV9cmBQSS^E-|;+cfflrxKA*EFC% zrHo;z9SS(`pSbZ?q`i@&1i-FBecHU|k!bR0Vd9egNViuk+gqYAUxz-dT}42j4(RoY z?TX2QLeP)y;VAXQoID-dymBwq<;UJOsOyijfGg+RR5Zr(^)7aAkt8-SB@7*nvM^;2 z4E(b`c3ilR_0xZ)ST7ExWoHGoqQjP+bUxIM39v8%h_v!Km+ceQ&XWTq;t_p-Li zRPOp7kG%|T4LXSpK@1H%gfKG32p?A@bWZ8p8#-X61EY6ATMH2u z8Afxn{mz0HWh_6;QK6m18>T+;y0?RCUdm=93m2OVSBpvsdc)UnXgl{N>HKJT)exfS zpMyY)`C^ewuL|Y^-33pr?_k!tO!NrOG_C1R7rs-E1GtCl}z!c3;<(#kE3FdxXJ{qvZ z=@nDy6~*cn$LdP_(bYxh(x)`|B_>|x8qeAW@P#LsbZTFMd8cgbCFqu(wItzL9I#1` z4zm!h!yd%Lv-92*u$ST$!xJxQw{Szbz3F|ytTs6RLE$q}wGt_u$0f1L8T6$jrt?ce z^7<@7yq6JYkE`5lz0%c(?uQZX48h zY(3NJoCNU138Z1)wrj!tZ`PlGV_vJopfHBdK1M=s)tN%}YP+JQPG_wU^A7pKL0D>6 z2L#8k;aggj#HA@_{$A_rP0oVfx4dxIc*3c`_hfvQC$fu9^H@mb&ySX$aL6NR@oPL& zj&UVVC6Esg92`Ce2?|T+p8^^2#*1&P)Z(8rK+-|Q${%}<&Ztfcrqf)c1aWHKoe1pQ z#IAXQ`E5yJc$Lpq1JkMg@{Nn{4r!~I;!h6FG3H^OLHJ2h3FRjOoX0uqHY249ss2K_ z%KlrFa0g^m`1W;qtCYrolK3!+%WP95e0t@nTS=cfGF>f+L6QvaVdX1|R@-P8^a;BJ zv@IQuMG8qV|rXlJxB23RAv8M41lML8Tx z2?smIXQ;mOw8$1@zKuFDB{`S3rPRLD&5?4joh6kLOjRFY4hB)v=daEm?D-fzytIJO z^gDXH8S1H-etAJ@QPvglaT-&!bc4i&?r)!Q@blmno)D8Zwc{`(i-6~74;!TVX+ zLc=k9Fl1c|$ZH$DHLrAtIhBT8DmqeSP#xei`dxY{&tj|&QuD7GTecjN@ZDD_kKoAB zbB4og{GCPvah3g-;7Obb`p{)`aBfh0P!VDK8cb#?Tktbwv%_P^CBQg4%_rFQ2u>xXwUQhk{RaOW`{H}A!~Ja4D|CNc4(SxR{HuAkN! zp8b8c)FkfJD8)<9nl-$V3Z#tAjec~vXH5i%|MT4uNhUVqV4_HT;Q+IbUqUa0N|a@E z3<1O;q3Zd~*BCF-Ej*hu4-sJtYzRE2mU|Q49o`m67SR)5S zsFcta|EVMT3MZhVcJv83c7LA;PFL^m8r97AS00MoUxZH$68aFkzmzw~fW5C&_s;d7 zktostR2YV^=-6c+5W?#odVBjG`KM>+v^3)}iV@@20XklhAZggeC;%}3@95(zOpIlvc) z*rnf_CIFJULuF)w9sWl2-V6VP>n`ED!D z6oXNmU*GaazZze_bIPQo5-&r^Ii837|6>{p{psl3xkh&#}0NuBI3A?Y9v1EYv z{(>sRZK`2RS>bQ+htm0wY-UpP?Y_;nIemqkAt{vuhF+?%dIeby5}h_$Gf7^*Ajr@5 z81K3KL11&I8Q3xV{9pjw@tNM6HrGCTHAVBc#;BnC)!-KPb7np&JZ8VqT#(@pM!+mB zdE#!z50S9#i-Y<}UupnMj^NDYo@Y>}nVSLS0zUjy8-g>B*xTxvsm@ghyUa?4B4z`k z)ua4xk*h3?CffI5Ou$^S8f%QcNq#ff_%dBfy-4`*xnUULES?wN!I($Z0$Zady0G65 zEJG9O#(Eu?ww_gIm1Tho2~s|xc!BGNV1rqtnWvH$Y=W+5fs&?@$WPFT3>FEVn%vKp z8YvROLNC#VAJhWmvzH7*0pcRn4Q;4!6{5}J;HER^gKa_T_oWQt#K3(m?_~Uoy=(;sa6u9ZT1_b z#$v{zK}|Gn#D!TxV8t*^-@M)I%#%lo$}{`FT*!Mh$AzqgrrbT#?oFq^U)wI7<{n+5 zIR@PYl~mDYkh}|@D^o;kvYx7=uQQLH9ySC&8oSVF%T7cA{-!$!*VhK6?w+B4Y^LGH zv_5`{jO+k=s88T1LFr-Ry&FGW6N9-%2|Q1SOs})tk@4?K-M3$UdO^A3rTQ|^lVKRU zKDqTx>fQJI>$3BEwx~eYm!yo8!xxa9QjbGjS0y-0aLn{`gFJoy=pd*&IM1{N1DK+1LweFZZNAq>9dHHudU2fZ3o)(eyMN7h_1WP*OI zaD8qKv*@89?y%iCpZQ$O7dCX81pYakyka95`^r8*6XO5PGzuBbq@SP8J<{6oDY(h6 zJmNLQrMjLAVc7w@^sN|8nLYLOPB}=vt}6Hh+pI&iC?zeVbCtWhzktOuL-M_D%_o@x zS{~9^oNE6G7lJ7PqgBf!14Ho7`Tr@dcNT!jUkJRJ;vt9kQ^Iem32Yi-6qdyAC3AoJ zknHYmDUcq5*Sce>+IoH&*|ZN7YMK1X-^QGOX)+s*~aGy_uVZOYg#=o1hXi-HplB|Poy z@7Fr_Y96@6I2EWz`Q`Jvjw9ljx)yfl>%8^J8E;+iIMi0d#GWi2bQ*KK?$D;3TqNzZ**Q+9X+vE-Xl8P8sy5YJ=RaT)!Eh#2842$<#11eYl$6V4DqdoY zwma2?v$adIM=QLVvYU#U?K5%jwW#j$;O`tpmVOlg2QL$u&U__-#fpjQX%1f(%QVq3 zbW%^}Ahl@%gF1PuTO1=grK?+mnxjHuGtQ9NlnVwkI^IvH!NbA>aIVtvA&6Mi+e@#Z zlm6;Ku0`ws-V-DW=S@vbU552I;sr_G>EIO#>z?+L1Dd{kjS%&-G79X}vX7nu%GA zRQ{I&Xtga|nZ?-2{E2p%;J`T+8S@qx^QwK%oI464W+T6@POA(~W>T~mY^ao?e~V{x z^nx9!qWN9!a4U#5+ceI=VSlnq5S-p%i-j4b`o7BOAsC*I#jS+|X%}&UbCWa<$bNH* z?m=@37o&2-P}xD$P}8Da7g-O zsv+#7=lt&B43p%A5rLNIip9zV_Gi=5>8{j`)|2L^@5cov?)>l5_w^+5?<)U%C9BKw zbkLpfl`UqRxNA@<%pgf^tjDmf%la3u!CUEl2{TlTE%_F<871d-$zW|58hdy5T%WZ- z?RsCi9M#bBJMIkO%<90U_n<_Ji(i?2Zpqcxmf__*`0e(Kj%m7JDj;p3mr+yT&cE4t z`E?xZiGN!ez+HUK5&tt{nYXMpE(Bq{0`}j%y`26XcW4}+KabkSP8G`pq!0J^^VJUS z%=-C&aQa*Ua9gor(iVaQ98&>-%`7H2y@Vytai(c~yxgvSe|@4Cc8|paxCC|f!E>k! zR2&((-nEI5u(Yi$h>7H%*Bf9Mu@N;@ZK|KH!_Yivqm zPqQwoeHA*3_x<;WH*fYq5>and_8qNZ>92{_i!}6Y z(!)7cVhaPV2zzVmbme`Tm&~olx?EB2EswbcFS}Qu|D;U`@6Y&QaHi=}XzzW<;mMVS z9m+H@&gHaxQ@(sdaK^iwp{zm#P9{GY(&^_3-xA=Be6J{fK~R2&o`&#g+uo6FT8Yp* z3hGz6>au%&QF;Y7;NX;wmm*(W5}ar}&dyfkihC1>a>)4hfyGD_?^2;j(={9b>#6ZF zz|IxZ;qd!g3Y$jCksA2z+j?9|aokFAs?1!s!_LPHoC&%-wDYf90?Y8LoTruA`Z?MR+*b=0N|)jg z`Ntt?UsX3;|E#-ZMVH$z17)oD(xo7t4l7VbJxk_eR;2@;`P~4 zbH==l?XZqIs_K(1&pCUcVfa6tn^Q^3%XoVq9wKbRdS6f)XA=iynf@LX9X+Zc+b=tHX7#Cq~%Kcnrk$v4pa=hhQYf6+XYW-40Z z*>T+J^u)KjKZ=~7zB49J(T(p+Ugafx)9zD25xwUjH2X7fbvS*!P#xP{`x~OK8l0ER zW#_^{8&e==)^{)?(mz6&p;l7af8YHpR~)MG#6WP1D{?RGq-y zGQLoA3)C*zsS26!nCo%gJX^J{CuIL+AB*h1Q#k?qwM1(xH!j~=B!?(DwTejpRP$(# z3agCs;&rwCa^bg3k++U9$)XlrHuLPAE~V-l!2|8uxVM*Mpo|x;8+Fgh+PD0Mb(cE4 zQHedG7~uq4RY^0nHAII6$_FXUN5uY^voPn5sYC0k@@7{~og#!EID{eXSY}+y))5=- zV^*$bJ41qU>4<=pQtr{BPogWQE+Q@_=WX5A2>UMG08mxvS(`_kyZ?p)o^2wtgwi4P zkZiY1XCAfT>+xXaS3{R7Z^*x}^OokE%+X;nwf1DbBk*ZLvW8s^zneRiJ zxHU?aRFQq_M)B*^#2U z$EA{rOD%7mR@C6&=v(N>woguj+t2SsZE@__ZS8G3NU4~ePVB=$R)m=)b`-^DHX`HI zZoG%fO%9Su)^7^hL#=}I$1eWZ$9_L0eay-;; z5$UQn;j@9($)gFeJ`pU5dKFdmAqB_)mS(lLAprw2cLVP^qXovRfZg|tBVPH$dj^ZQ zo-(|8;V0JAmc1L{*^F9S@MHUKWf5k)P1$k{vLSHbtJ|(T72;TC(i5~Nk{s_s!C0L5 z@#95qc3iI~!{vN-gCW_&VLA1ujz&pK+l4GftLBz|PdztX!l6#}$9EHUdA)G)HtTXF zwO(V`emwe27czM-Gipf!4UNk2Z-h@fIa}_N?-nL4%Od>-koI(t_oQ)|0Zm-;Y{T&BgnitD1GeiN?k63#^`;2GoKlE<41e?r?m|U#>H<|J)F8eUZV|;8N_2 zKn~c;t=ato*B=HxHZ!*pacSIU93{szIa;+vs2oucpH>+c5 zYB8h`8AKM5Z3+jbn&osP1)CUESa}wgSaH;6;8xefO9US;`;~ezjI0!aQKY0CZ{Q+T z7oz(gQY3y=z*V?o_W~48CCb|eYWud3S|XL_BMSxdwR-IVbQ|Y9$mF-(g$>^2 zn&gEOvEpdNf}U-?S->%JC_*c2{=4zpFX8TqoeHc+An7FA-U;#Q*lgwZKOFBFujS^~ zW(aw-J@X`}Nx((uaCC5}$%uoZgv=u(Y*IwTzp^jE%FngQz%6 zx~wmDABx*X6UU40(5{ozySUCcK%6xhu^tz= z?YTY8@f8WNRCIKa`LFQe*b{Puxz;P2bl8}MCZF_94w!!J(QK#RCdY^6*hQuSYT7vg z(C-DzixOu-p2aR7nE6+)JU7Y8{sUn+bZhoF-9C1zoLx5)Q`g|xaY?9itXw4FO5W9U zNC|Y@GP^rYE#Bqf(1q`AUJ^2foVk`tz+P<1x6*Xn%qZFRt2Um?D9~$}wg?-a#&62t z*Pwtl(Vgp4;R{euZT>*rX%W~}CpW3nRk^wzUrcY|uw9*aIKk4m$dVB7#P+U9#jiRx z*P*DsB*E?A7B-gi(zLqyDj;yzUj8!Ty>MfPNjE!A}*=;EY8;0 zykt+aWsVQ`>N|Zxhz#sQwnM89Tm}LAXPHV|B2JYP_L{l8f|c&fR!Cq8MdDC(^e`q! z7yX)!zqxfvPLEE~TTg207#ramviS^h*CJ?OMbk%p{EWJz*c;P1XZ~ZafUVirhKQ8%M7>I2zXb0K#@$s+U~{Hl0Xuvm#+f z6#k}qOpuF<@60+Rw)Tv7wIqlImeJ}^kN>&az>ckwpSu6)uw%@D7z!nmR%1jC)?H@3 zBVV=vNJN6hRAs(y1kM?==yWjhd>vKVJam$eEGb@JPc9ge&6{hsvt&68n5=|l>PEYy}QCzZq-CSz@U>DhR?^86(1{in%s-C{^)py?a-iou-9 zqHtS8j)XD$c3^hEutX_ZwxhVXbz9DRY=d|D`IMQrVLfJQWWpCBsDCAGS%xaz=P*5? zS9&i!dCNVPW@jptTkzHQ#A6n32CPfXmGhbB`r>{uJ(~f~4h>t-6Ky_NnqsX3<~Mm4 zoln#Wh=7fZ`wE2yDQm3NAs_@ zZ>JAso_Hn5CL|g+IyZoJ*lpYE+OVkb7@LreSnSxfVE_Ehf$i9`eNJklzS2%l$5hf? zxOKsZ-+(ft^t>E6b}WeY4<2CiZI}cxQA5rP>8mCYJUG`RFCBg8zTKt*TY%dAlHwQeG~^cfT*V#t8F{P?}VD&7DIB!)B7vBa>V$jYt~u~ zW;}voW_ZdTQqP>e2%vE>+^_qwMve%6-q1X=K$DA|F&!Mn^_ciKxqC1^7*S7JMqiA} z;mU^K`zv6jo$;*>iGzVFcKlg|?}MA8>C@u#_&!j9Yd@5`ox&ri7udLjPzq1^iQ8~j zD)@Q^W&@9dw1sBQ(`2H2#khJ`7Bc1i^LLm>ta;V*xw1;oSPN$5zv|phpU#yhbOhBx z+GSi_QKKqMGu5boVH&sOiP87CFCV%?SYhWVJ0s1tdF9Osm_=~66R~l^a; zyaAswrIfW4$=YLtTV;;=UQE*mC=f@Fe+H=7f$C5ehiwvh?0p-`_+ zh^`*>y_y6TkehxgR}5yg2-`bphh{hJeADYC&ZZ}5erU>kz_8p{s)Bt-U%Lbmf^M-i z27%bNKvBg3oNne9Nb_UB+cxgoh{dKWH7sUEgyZgGyI!)64d^2+x_Y)}nNf|a#3nIb z+=_hSRBhp{80?~nvtzE_Q1e$>mR6ZV_kg(I?5*;8wF378&A~ol$L1tK=-UTDUQfS3 z?Lyz*JfW{M9+&u3K5V-&_*@T~RA*sz2+3kU6h?U+en*U{WJbk8W(42&+fuJtLze(J1X2~5q+WMWFr z-O1PDWBOw{!h0Ya<6&K34#b6Nk6lN`ORl-@WPw5HFl6iGZ(x3ZG+oTG*&hV>gd2bA zpBz`~;n5~0t^8|~Qp4i%n?!!JM$XS?y@5Z)3E~ixy3zc(kGXlV$T!5@j`4ujfZC-F zJ9BHyi6yn0X4Of1EQ+Y^_}H!a`wk6hloAq zdfn?K-L0`~KF4ki9Zj$`q6<v=WQaWQ;?u`lpABnHiz~eR$Xi+uG2r= z@})Ubo5HnP<^@$u2Y_q<5udB7-CT`_7vrV+E#Ire7rTB_E}+?m3KvYGj;36>uDnC? zht^Ql6p_l`A$%(Jc<>R{VVPD%(3Kq+^*Ly4M=#5<^n6-XM!E5i*R0)Lt!o$e*<&0{ zICoIl+T!7kAZbmXo#s0{k+Q=5gE)G=YGWJeA^C&+P`<`SCa3fJrV+yZ!_B67Q=FBy zAVW<#b_Z-h?q-?-f(LD+}W$`*4e!G8j-7*HS>w!UJP~X9ln%?JuYNMNEdsmMe?&q0_x>$8C{8l^STQHZ+Q<)!kGY+Dig72M=j&nrjS49qYq8%h>6AhRQ=7bjELYI>O_zO z_>GZXGe0f1y#N+)Ad^E>HB*lnJ6@^%l7wwKsGne8tyL*^KO}Yo(`{h(zEd*3K*HSi zjL!f*;cU%Ez!bzOrtL^ZvE%^ejmCXO6z;1b*n2g8|AH|{-Yuy#?8_;%9<|F_zQ~+^ z44Xd}^@rQLXwKB5-=E6IhJXy%KRXY;O!1>8WVwP}UNp`y^R{>WGhEVioWaM!X7g3C z%sX*1iAoXUg)ZO(t>&cKtfGar#YGP9`LUkZJXwrSU{B7QAVCC+m&m*@XG2ZSbsk=e z@~ygqy47+KQ=j}qoSC=gVy;uY0k6#DKlce_3VLam&Am@;ri4N`s!M8|e8?|u`jU(E z4WTYee@cWkyZsG@buUBoKCQ`P5H>5gW}{$U)k)9`^! zA`~G!++)slU{HVDHKZ&T3+9EEKb2Lv8@^NP?;OitAUSzGtF=Xh8>uw0!$mNy-mkrPRpz#_;4(ku+hm{XJ2^G)&pv= zvKz1k8EU&|T8XYk8>J&P)sAXvo2QUuLmC!Qp!x+|i!kQisgq5}{bStv?DE*4qlfz} zztP~}0Z~HykkvZ%lVuL2=8=CR#+L}toWO+A;0%F)%ZwVFOml77ZBqu9W|hC%W9@M= z4QAgZYoAnP-PF=~Y}?ZiOcfN1`K1rN8D&qx8u{;_kt1Ej=4Fziqqw}ix}tA7%~>~( zD>U)!8Sv43=!(U39mgVYOL^3%x&wwcg63{Vq3wU!iu5e;eV zSm3X@XaWABMwsn)ic8`gPLv8NEv1KFPiKENQ1OD6J7lZ-ZGK)tHx2r1afuM3CZ(N+fH)u5b)rC1xqV3CTo@9;k>fk3Q6 zTzqZ{fl~o@j=&AJY&~B)xg~9bWd?GqBc-B*Zo5GPNVrp@Ye9J7Squ_nN*R^#x)(m~ zaHIpzL0x~&0%C^7$?j$<$#ybUY>c8^OR33G3)-_3Ve|l_0`6vkkBG9WfMM;9gwWoB z$I%m3A{IliXFa3N&s-g*u2;FdyDY!x-7vV}>NLS-J(+>^WDI`+$%f7|+d0d?rc|D( zz;*Mem;x3T@K+$f^FCFZEbXVWi_8cpHRhn>C3#e%yg0-P4yZx)RMmm1n4#nb!1-4I zfaUHv`Gk}JccE#$QfVX!B4>Wr%U1AK&eK7T73|JkX~iZ~|B*vsNW3t%*fjk2xF(R~ zX9pVFMb>bG5!|l@m({Hx+U4(L6d0jIqsf4noKW-$PNoSaCX<7EEc%j?s`U1 z*+DV}p4URwgGusBMOu)gGG8e`_6-F>%j+z*$BhfI1h;uD0N1JfvEOacE;)zS@f;cv zRv`7#92^hlUCFLr&to*$e*ff|{(O?m7Q}I}`jRo?x@zWEl`P9BslvgWdgk0i0}Vnh zI#mh8tmhvvfd_K6E;O%yH^~H?kKp_;hl9N43H)5$R9vNmSP!_I#Eym1CrmA)mo6jG zkrR^LY(P1!()f>Z{CTolRhkN{8F8u-^uif69@wAae;|+_S4~2KBYm@R;{)KN?JG{` zyLCEg>YU-c;PByj12kE^H5QeGsk&H+f*XT9i*F9HI<)s@|Naq=2~N%2RvXb>T7iNDGTsqQiHdr{ASJ$3G5?X)LY ztc2HtNIjh6U7LK|ctXmNb?MgU&N9?xGgUs#hXGd*U+bkq8h<<>BH9whYL(Afogr;GFS=b-PnzUkC<6}_|o04KAN z2w~&lTdOJY{uQnkn}}D~?-<8Q@)Y9c=2YZO0{NhCQZSWUz3($ncQ5fnoq>t>Ck5LZ z1tW<34_D}l^oNYA>&QH8Iz*SG529YjTRz5@u$$gV^J_|vI;f(ZK2Yfp+(}sqx{H7I zNh<7y!qxk)Z`8k%e+RSMB;H;#zKhSa<>+E1Fq8Q17zfOe!PSBN!BmBs_J`R7{Rf0C z>M%?+u(;Y=noGMfQ73fOm*s=IWPM=?@bi<@yUWuEqGfG5t^m21-e1IzrWx>vfOf5r zO-C0U0gl9i#`k;xwPx#M;O3OX>wx#m01M85lTq9vY|LV2$KYH1Bk!;qSn>Lt-!7K2 z!~(w&njm`$SEHAHWD(X0{UM66JJ^jYGTEd7`kq%d&D`85;gBZ{$DPr22v)ayFAUZ3pTRiz{KkiK#Ee#j*E~5J~A}w7SJo zVHN@mnj0;4i^&b9GwlqPrqNyG5q!WcH9P@(hW3M8r=Y%$-w%LR*b=R@Y~E(X0hf2~ z4eTAJu|LefDs1Uea7%*+xWElzzzVjsNv@yP1gZYIOD`b1>v=uB&c??}PmNDZxDNPq zb}mS+p$f5_W0z4s>{Hr|hBE$b!hNByVK#%JH#rgN|) z-_v}6-r$B60{qjjT{3~oyenl*F{@CJQ9-K|?6t3EwlLtRTGbDS9M8MP07~^PoT@0fzJo2|upfu25H%&I zVlmM=;A&`T9rIP+!~wkJ87Y9#=42|QF(aFK9acX{Ov|7t>Q?sxK3wN@?nDY#OS5;8 z^b~~dpRf$UU)NlS?*XwDy3Pax_&`%o$vU;=Nfy%M7qjHPXg%;l8@NUnmP6pKX%oKs z2Ds{2f7I}UzDR;TimxF~{%gm}CZeYWSe^^-RB%@5JSxOqJ5!wY1@Jjs3QZv`Xw$VE zGQ2M1B(Y$N(KA~b;67nX#<`2;-X}{D!duIcdSgdRlQSnGH^``1W96!|Q1Db^x%blQ z-+a`?m3)bs?`Lq^C3_pq;4i;TSx->BQdqU$lNKZYbs+8^QutSckCj2ZAERp3eXsOA zG>d-A>ZS&Le0w?a0g>DD0n{oWUG!i006^QLshcNn-g@Z+JPvt3ZO3|ffRcH~zLcl_ zL%-Kj60h^D^xtXS#m~5gM?%~FFcVmA=4{D3zA^Py+(L$Sf`td&Ll+gmAVQ^s3|(9{~SjWz!L3mq+h|uM@SCquf`J6M7RyLdi5i zg;MWI30oWn#b4gL>)Wk!?hI~MAJ0dO*dF7KzLHNW-7idHo?@ssWcJ%13wuU$_L267bLayNNwqZhICTVV59gK8}(id+Lt& zxm+g_M}*KU89mlKBkOoS=xx_O`JMhCs?YH!W*B?V<}ovdjg#YP#~YVH#c9I z-c@3HZI^hH+o8Bl&-X*>o#Qu&yqo~h&DM}gk!H2;C7ci1Sf(Uk&+c`9ljDwzIGoF0 zwmi&q`Sf@q-U(>P`D@%Cn(DOKQm^hl=lo>B`c-3+B*r)lvS*43RDXq!23w5OjbtEEJT`?>`l01e@`T z{}%vXqd)bFFD;GtwL3@7lEwj5SPsisE@Q5`f49{BXXs~79)Wh&mgNFm4W^$o<{{L_lmYN4)X_F*>gqlpU; zoO|>qpM5}#0)J~`urw)bEbCp@i0-MG@4bO$(cxgV*UU4-^GU=PJ`k;=2qWHPo?usj z2k10qHc+;#1Fjabwb;!MRv|ZAt`TJcJk|U6Gv0k6zgEe>P*+lMs9?eP4-pT)dJ^&76tssaYZDM^zFE-8xw;Us~#;k6Q zW{rsLwF~LWm0X;bL`}Cd6Arz;{KUUEPto#JFpHpn8%q4tV4y$y*|yf%Bq!kRhfHls zi5xvcp3X&A?@ROElC{1Kn@U+HWxdzxav4^|0&jkFLBP;$VQ} zqp&4x{ww*Pce~Y?Tg6Vr8P&`uG}hmBfnpCbs-EN~M)8ivMvhDq|K?x$PyW|g_icrd z^YNkhb6^el4A|sL?Qh!*Sd@2a0 z&S{&hS4!`}O9Uo^pPF|4TI)(QT#pjm8b9?WzBSc3!WT$2yYydL^Rq%?fnX}Z&8wf6 zQ=~`3){m23&pm8Vh`-x1|D3y{^zc<<#(7x(diG zeLhuYi&roDmsN9=kz=OY*y5C;<@$e_?qyMN4c|3+EU>2J^GDS0-rXC@8hHAz2b5#q zeq$2&!Om>~IYu?v_HJ)RrJC9c_kDT@r>z!zhQcyj2=$;w887>u z7m>fn{AnQr9>rC;+k!;zjl!7}xGi8nahi|Qm6j({65HJmAHmF!snhE1j!ydD+{86} zGcuee1SQF5W&VqJ_fWlr6v0Wt0Gt8_xOSD94E{tLppuN{Z*rBCT)ST@l zs3>Oy;onuDMfV1d?Q66zddu33%~(hek0}*#ZhEo+Ut8L6n z#GrIz47M5AQn_#C3n6~`w1aB)h1c2#u%M1U1cKAnNgJahS%vnt=1+zPZz)rlZC`JK z`r=T>K=`_@ z_rBsx?3eolx$ysa`Il@YfxE%Hj2)cTmt9Zvy_TwDzXEp%10L2{(LlUVDwOsb(LVML zgQu2cd?0B{525bnL^&P*a|>TlIXddG-Im4uhehy70`3b-Ot!+oyvR=20&FSWx2m>Y z_*2sV{&WD(@ZQY(SBuYqcT%-GMe00$qv3|zyH{>421V`n@4k6N{rdPnEREO7F~s;K z_ky8=MYJb!hxw=aM|}0BKXnc^o zr5$*#2x4K@kv{@@FPShz<8f5%C=t&t|8jE+y>nZqbcw=dmc9GLm@PiAMm80(Aq1{fqCX|^NP~k&gI(D% z@MkbDV?k}O(#e}SSQt7p(5M!APlAZ&LL;WF>e|2>c`2AizBPy%eB}@dfApZ`WViJf zGuTyx-H;HW$WRI|dN(*Q-cB26_56{~ST`}X_cBF1H^q+NgZ!b?|3lMNheg$W-8(~f z3rLI7jYtfg5(0vh(jbVWFfh`CfFK}(NOuULbTiZd64D{v(%oI(<^4UMf8z7pIcJ}J zcC5Aby$WajKWW1WV6O@V)dp|Csst%WJUojzOMo_ICS*6~2h7dIwvQx2@rVVbH_lI) z0?h+!88V(gp9l@JeJ>FV-+HxBFj;XNHJIELA3Jsb=7;h&-yr92P{Tl4wLFz zPxN?00CA3>U=|>Qp3sU}BW#L6n+`xfXA%!Ac@f;^6!kmco5FZ&qhGGr%S^YvR3jhlW0!w~1SBlIxk0I%OexxWihT_@@8$HqL z`~$t2=K9fh18XX*#np`%<|#Wo4^B6-MI#^eX__z7Yi6`2uDMuB)1#XwNCf1M>wVba zqOla$tGWuaZT_=21QDq+NRP8Ggdq?tf+{RUQ)hSZRx1O}LT8@PG}iXHKRW%+mv5E` zmg%}+CKpx&`~1K1X6{c+Hg0QAWwx3RW`?CI(B}@SSy=b0CPo9w3&*3r5H#f2M{cj_ z%MHiXW=v}S>aFmxPZb7p-lYr-pBTc7^{at2(XfmlNME25I*mVXf}DeAnCfiRf2)+| z-`3qbk@42##ASK>lFFpsAkHqwGMqG*rKDCsT@zQk_4Ziq_Y}{HN7+comLXF-cxm58 z6wn%dq*v+YWD|$dKLovtmPYB?KS3at%)hovR4XKOKl&{39=xG@jE7dl(y%svFd&j!mo|8f5ooxH zkPF_CKN5pdVa&8qdeB5K6ncQS7%Rdyp@vM`zU9*6`cXQ{y=^`Rj7S{Z?hlZkFeT{G znTlc#Zr)Sxktmg~f`M|(P}lwMXjz(wec;ROX)!r2A=`)L@$qSCw&YKbZvO zbm#;$a|sr@EIN`+Cg>mar*$P{8qUO*Y~g9cA)Qj*Q~xe{s}3mbzRlyjr#k>4<)U(ce&1@_|yM;XX07chQ1PJp>g)*_Uo0KR2 zOz&=M^u5cF_bru-uMc5Qif|U`MaKv+ThhM;_isiHre|hmRjm`PR&Mq!AXcaVb~jdJ z!VY~4-A78m|D(eM!aW2)!YgbzL%G0M+BmA*gCOG&udDs3grH&Z_-oSX-UqA^0(p=b z#Mfmq99aTnh}`?NZB2VeCG}uQJCd5eSKmaIDn}{k_u!0-6ozQuc`DySKEDse5mgDddi^zn-=WW<}glYRyy;W67E z6RIp{C51e zvv1+8E)i$kt4I>u+1K;3ZZ92$fsgpB5<>w=D+-lvEw1#NWZ8;YO~0ICAe@(ckU~v7 z^|a$_KiL@>-HRrKwN>EsBaQ>rJFoTf9oE1R`(oY?)t(>TC2upQw5^kud8QK6#HM5r z|5(2<@mH^bVmu>(NABud8H*qp^?he^4IW@88;0DAq{Wgw=p7^CTrKpUndMcKp$Yx4 zKecah84q@`UrX?BB|cz1jjU4Br_f zTZWIjo~)sUog7}6tU}fUHZYb1CyhHS*=jQoNKc!qOXA#|p5;sWG>;(}v!`lJwOBQCIz(j80 z(j{J#F!IAueWxQYT_s~8&kqkg&}b2qa6(HBe~M~MZ7Nd!zx(NvkG0znUyk$Cd&A4R zyD<~IpMN{-`yS-tfd1J&sf^x^4Bq@-b4NqQlbG+0^X`c^abTl7IuE|Ot`gTw3!R~v z6ur!o;(Yv9>!p{FQD3mR57`dWDjbk(&+u+pD4N@paD8+U2V!{-oGUc379D^?U~6|j zPD^;fsk>R>t>Q`I%z3>D<1eX)ijotM-bX8md*}P!P-)9~Kj@qkPuV(To$wngTB6n+ zv!-sb_*fF#kI-C(I&1wzI~u&7uRSA37QF&53s z-}r)R#dUiWrOn>fo`PpihU|yHzR(>pBqummGdx@=DE6EAjHNsL_9Io*;w#LHC&&k) zqtmMFkkjbEr7d^8FP{-d1X|+bVy3Z32ZDXLrPXfu6O~qTFP#%QI5S}Q$m9LFJpWUSLmiq z4HClT^j<%UtwgycwETe1f8(q-i(9xn5hz_X^;%Y~aB^LiN0*3Qgqtt_NfLsgW=%1pL78k?|c1yliF#| za{prOv(+b*6I5U-{8$PNTz{e!4OW5k_E)pEfIa17OBCdHc7hHsM z3f)?czYTj`@MB)v)pf7`Lc>Qz-Mxpsb1+0lj=U$49eqFt- z`Sw#BW%AjHp4N!xQ7Ytv?*SF^wWruj5NC;)!>+V@fPt2J(39#f=OV~M$o*(Pu zLK{N|M9ZmxCiY4^o+v1z2Wvbo{)L^L?((U7p1`N)SZKqDyponh&{22m54v>6YyPY% zkl=B`V}0ci5!*whNtckMd{_u>fg|8l_3e;5@hMkTU3HC@#gO`Bc+a%QJStgbgiKMsrU3MbcEYo^-6|ls8R9%?2kq zKtfU!S16%<15S{*>GDCT2v^Y8i7y7e0tre(`;%1Wd~V#|1WZ*7;@#K&m$xj*v+F!emPbC^E+xkGb3Pu1C%_zTwNv2M660+?liyR7Fy)+N5v12;1a-c>1` zHg|98U=VQFBT5)k8q!?q;j=S7ds-ygr(t^Ew!@RKwyvld+l4y%c=&63*s5EFEwj5; zN&6{W>J95W&ZB2~1}k237o4Ib>~)KFKnMt|##=X6^pvHT^}T#`Z7^eT#Uos~!<^SG z9v|97Yz|%*w(%M0ou?KzFhsc3_bj_<35N9Z*f;LYNS*hdYMHv0Z2yR79%KCEpE;$L z%ypQXJj$yqwawBZ4Yte9(PSca%f&GL&iy4mW)=@}=-%rGS0Z4K(&TsWRcn*@p)0!x zi(b^H5x%gXZR*551{(b<){JoUvP76apk3x6#|jbOlu$gS%i$NEq7y1zGD&=&Ivrn; zypb(3JiNce8fN#oo$oU5qtcTbtJUS1Z?d)rV!&jQnpko4ekS1R z=hA-ISXUCaKl#5c3&3e4l|58nIo>pFwM_;PWBhyk#^&veocBSpql%tbm{`5bk)g=fRVWA}5^>jTnpm5XSK3;QN{!jFwYeYWc0+mgk7A)4=gLqjpc zrPdf^*LV^AYtIyJMM3y|q|$o|lI>5xjN9=>-B4UB*Gl9ROj?lx4DH1>PxvotD)Kk% znMZZt#6K@o)wQtqZ$S^|qQczCmz(jI=B3k&`f z;WgaO!+;F1RfI(Oj6lS4%!h@6NiY$5a<)e-gNZ;=m#O&r>mXSh#`jdFL#|Rk4k`vAYV#DP;&>-(IVa%F1j3G#3^B%IpxP8S@;pvO2V7& zv5lOaqS%)&X2r{v6#n(DSpjq{t{fP8hxsW)V68_C+H>O_rgB6LyqghJoP|ZmGbFJO6o>%ivLhgj&>Sg0X_P{2x3 zw%YbzrP<5AN-DQxF0+wT)sUW??;VG9$xhJ0KdADp2;`W%Er1+yr*8Th`6wO3C1Qt% z4;GrBKhp8m`GekGZ<)xd^C&~7mOMVm1hHQxEaXpI$6FGXBmXaK!Z-MPjzFPbmgk}{ z-oupb+af=;0}ktVqGXzTj|&((UO7fzp#GtTxj{tU<5=!^KZzBsD63f{DM2%2cZP@Q z^BWzGa&zd=Fre_+v+UBgp#tzO%dYfrl0i1%>?Z~(5PJTc4P^+Dqh4DoiepM*u1dZY zCvRi*Dj)GuO~FIR>;5aTiXBmyOIMh(%o1X>KFwRlBm5v8%=-Pz(X1iT!lw^<%gaz$ zTc1<=^NE?MHhYwI+rHqOwTB<{#K545Uyu@o?2h+<<uOQKn%IAQ2CgvLi%17dTw zlUB~RG6JOd$!Ld8-&gr<0JB)#1u^9KrJ78cLb2~F=+4bTB4!jmg!3k#G^-&0LV8Bf zqdnt`yX{(=0O^ReW#u{u9THz9UxU|f3!&rC!`gH9$mdMf8OBC&tc&}qY38irKaXw-oHIt-#+hHyLDCcEwjB`V|tIolTvch}7Fx+;X z+DhKH{zP#`5I{26rtT1mYb*AM<}LpDwPZ2kaTK(J$)-j%w*4$()9Vs}SwQseBnPC= z=SbiUAn3^gHT!2OFY_;1c<{T*h9--mv_K)i!GM&rm4`%KPYB9psDM#AOH1*c*xzrF zBeP!4?`Gc5ygM*5d1q219d|_)xB-dyo7!P=)hfRE%lDoCWM9Jdb(4s2yTZOg;`@WL z3@W(|tIF7dJzV5FR5>A~;Oz?pHo#$2y{88>)~XNdMmWE=tVDFZ@CJBD;!YJGTC)RN zr+@rg^Fi{<4x~dHN@1Ihc64sU$PDU^kEO10*xRd-q<+>fA=(lEMqel^de01`s7nHI zK&0OiphTMyg={yCb-d+XZZq8(QE{e+Njnth?!XWqPMU6GT3M5w1{f^I-W2R$Vwejq z#)BSXLZVs!_Q#GuV4JyG$5$D4m#>gpqWz7jhzr95;~5s*-{v3(r$T(KHMlqW+P%#^ z*SE1ItoasX(H?VKiFA-=daat-?m179&|k~=64k9edn^AFi6`U2pZP^lI%dE56vpzQ zF9XWV4YYy|F)Q>Bw*=(sTSMwEIglXm7(|n?fT?L?sm^2ywl<201fS=}@p zsPgv#vhkYEPp#co*IM^c7+W9t;UI5k8&l`5d#J0=X6g+?cDvh7=wkl0$r8Evvc`R- ziwH8d%z*ohH`*Up1U<)~n4z?rx*$i^(-1D*OJ1KeWqfQcK}_%KbYAA_jTqy;KYJkp zgrH*jFyZdf(@NYyn(dT8_H)8OInOpmp=@BSn-vXGrbgJjW2p+EGp*^BXgaXETgH31 z&+acBh)52d!T+N*(7AUtF|HqFUkcn8fx9I}0Us#PQ|hlrYqz3c0Wt7oy> z#X(2VAq&+n3Z2Vv=fD+S)`OIEOuU*@9$P2Q>`v4&tD-_rfh|jxbR5s4mPFX~LHV}H zsJOSz)hns(QCy5Ucg{3UWe@k@q5%f^P-fxv-ceH&2JSBH5QOG~76DaQD#VP7C+G_y zWbY6}mYKRV3JKo6Q~Pyc*jt^*frUO}O|4rRAaPe&Q@0uVs$jn28-6+Ur0_(EdJ+&! z_-~E#-Kg@KfPe<7SBc9po5W+W0rN{%5K@h!N_YX=JO<4LIwjtJx6XbfqAeKn>TRCc z4O*wK=O8ZZ%ZYU_@Aft|M9gJxy+*EjN@5?qJ4~hujvI)0*8TMoCfmF4`}vY zO$@y46g0Qsx#6+hj*H!&Oij-Sm--EQVT=LU8nwVG-IkM8i`Dl{uG<`#N;-$N24tH8 z_flYxX#-@q1{hv5>$k_<(1QVPd)~bJ&)ei2O-wW5(LY3}pVBXdO%V9Jp|e8_&6pFX zh2aqT(#2vy-I0Pyg3}+!BoGg1ncuA;?~_z`>h7zKz-9T+l>*IR#X|d-KgD52J~6uo z++`q|v(F&cMA*U+;;H9bz{aBT0ml}s2Hw0bIMEk?-paphJ}=!Nync|D4QpX0dgLmU z_u=DD=9MbJZW`PhMX7&H-yZY%C?X;nbN-D{LegvK zx)g)WbA|>Vy+#Edv4KoJ=1LpN?Y#p>jBa}5nEsNhu8mXwvY&KC(`zGj_mEBzwpxr5 znP7{{SAg=?p8778&O(-yN%39VmPj@bLWA+-?^|x3u{$pm$^$qMzzUuf7qXEa6AHYx z|9L|^-w$?Rf^Q=F-;Mia-Ib{BDuSwD=oAC1D?U=6nf&GZAx?i8GG&yE-%CJUZy#V- zM5(Q>%THA<4;RgZB<^C*&*8v#@B%#TKtwF%7iT5r+wL}LKjohE;`DDR-(f0z(SfHR z&02oRHWtY;Pj|Z?-XkOVYyZRPDg0U46g4C&Bt58jEs|N0H1P!hYKf40Y2kAt2LG=G z5L7|3Zf%OvVD{DJ{e4hE{BZek&SGV`JC0!UhTW%uGnW%h;1F*6C%TZu`t&M6FH(Tx zO)4RlMqdNB<)2}9oC(cJDpQhqkYjY+`q%4d5C_r#p8c*UOp-J9PB{p9xapXCo2PI^*i*8p&aJ5 zMVXYam$b!FF$a~eg3iLhYJ7J7`iUAo5}*kfW$ z-(ryHX~?txD1pz1RkCYdj)fUK-WMN|D8CdTepe``uxoGv7pJ%afCdl3@tVNc z#^fAR4Hw0)<0E@BP^5ILJl97#Ml_iEfSvvk7TRQOgODH4uDEWZ7%-kRtaa5It8*Uu z8r^f`ZqR~-DSY5|QANrql>eW(kl+zc+#(1o9T|`5fZ|P%f9!5d{xmjgYG6z~%J5qY zEXR~m5FH=S&cU?b;hwifLT2*!R)yA)viy49H^#nWZCvs)Kri?Tb}>az^hcu*)>*#X z`cOsg)-i>m%k&3Kk&jdyB)v>n$PBj62o#U9n|Ww-Hgul0cK79WZA!6Bzt)6k{}KJ| zlkx34tOb2`1)AuaGsmv=K>yw0xktFQ2bZ6AhPOB!mnL}B!8*U zrJ#5UEoTwppB!Gd=25|gde4t+jPI7Z4hv|Qs46~kz?hm?wE8Tkn9=34rp*487;Q?H zN@PAw+;niX)!v133aT+G-vwP8-4(aHjPF& zV_F1)(X8gPtXoIOcHKb(Zo*w#3!Q<)_@&h;a#pG#KHdoAAG+IiQekV#2tK(8sht*9 zA3X<@S6sEstbFX2mnLryEbU@rq4WOC^p!%UhqN<-^)|2}$T*!%#N2)p#UiS?z~HPs zSJ=Z;_oyT_2ChQ-;HidJD>D{aGUFS#n59h^P@Oy*)mfI!Hwco|Us4*(#&5R4>lU@c zEKvZePN=(Tsb&49m5$SA?Wg>IOU$rb=*TRuECBsusUXmE6bP5}e93pRaf8ZJsvYlU z?((Ra3=f92bnRQ3#0dVaMG3$E9IF9gM4o=6qM8v&!0LQ{F8=!sGbKYl>;tKsrCKFc z)SYP(b%4k7#OUoV5NFhoeF-)U^=R}}?{@ZK5pII4eCcNxjhHdw9&ZBU{hpjQW9GaD z`a3m&t46@{ktJ(~v%EM>GEhyXFLjBR+T;?>exB z1IfQ5I(FB0{XwonTJVmzq|`5yH%X^pvA@!%T=sTKn;>92eaI2F(Q` zuG<>lBgbfs?RFpyC8N&P7NM$_8nJ*x<#!{Qr)j-vO8O>4qRwdQ5K%>A= zH&@3G&F#mxEb^fNLb;PrPGAMgN&<-W)Y?#^`{m5ma(%Z1+Fb4pD-r6A$c68JE0Qdj zryQ0N*)-<`~PMCjVk%4;5`-yXlSy7GbFVdond90W-D{(ZO;?`i~SKz70x zgGPy0PMx7X?#Ss9PRr*PvU4bbf-#E=aOeFg3*jB*&HA+KaPC39g4W||RSq!`g0;2)3+DVrxjJ7Dwyb)D13!lCpY z2>F94UT1ZP3w1&v9(WO7cUQ@Kn%CPG$BLMp?U>&)|43LvgLDY6>Y`I*@#zoc&n0dV zn4wj4;wXcw&@c;s0h|pc4UJUHIoU$d5B4A|I8B?J_h^cvSm$2fVbv9^^~OVn=eavH zzR!D}&(K~4wmUg7EpGURDwxh4ad!UhxrttU+o1u)!$C9eN)T}T%XU}U>zXxTvIP?! zH}pp0Oi!%1_=x6*OI-C@AG4x1&2yjv1p=`A5$nL?Biy~nmq|~28PKVBhG4jHSgO=t z%v&A>VOu5iYfx4Sf!#FSexX%a@0%Ak^KcUk`9|4-BP*U(?-9ob+I|rLH?t$SxZGT{ zAfSpf4>*Xwnbx174Z_7|{mEMe2Udt4K%*gnX@n`z`f^P2zP--&L)@pEpb*U#cdlR2 zUOErfZXVS;25|gMZ6%@1e0~>_NvAK-Czw{b$*DEJdD`Lvw3pf%EQN4$sB`B3l+Z0W z6Z*NA*pspd7{tE+Q3&R7NguFiP^JW812a*=q0@raURz}r&)J!SBw+WrGlElSA z&G~VMeniEGRulP=#r3E9k)A!eGbA|%7cWxhAW$ToV;$(JT_^gq1VB zde03oWXljOAy8`5?Whw9?bx@eQOXd=ZqANo}F|JN-GW(+|~jH8kaz~i(w^8h{s^w;{oc(j>~) zs=T8NmhrfayV^Y>P-!lZt}vMqzgX4|*}?T|tAZ6T)1yy=&3k9{l(n&(H8xiwSaY3E0;Uv($_Y_uzv2tdl9 z8X8G!m4k%ugqs7YR<>Y<5!Mup2S-mt3F|Xpt+HajnZST!u%&}|XmnEUR6y|p^X1FN zg6UuKsgyvl3&p}NiuH`Pyz4Ha4J14VKo;0DSVIg-s0f^_JG^z>rrGOTdCdR53e+(Z z3ivXjWf~?`LZ@B))xYU7k znD*sjEJG10#@ z>OK66ML9u*N?}31ghgQF>pj=vQ;HUX9{6i6zt8-Qaau`_J?_(lfOb_0qi4lO-cdb- z^CDm>3yoczOg4tjtd72flySZqPuue#h!b6uuleDl;K`2c0MjWe)^t8B5bVX3zbi!o zt5gaS0L0L*0Vj&yLvj~7?Q}Pt@%q`wVZz8N;3sWxiNEj_gP?ZLa)t!}mcDI9&YL;( zD6K2LR|bXUWe%=|fLNVfbGlqqwj0bmi+kmX@xN`w=U+L|clG(u;1%9cCm7c5&2R?c zKG5aj1*5A9n4x%h_LRm3_7NZ$Vm8}oGt1>LpSp>T(+lYtgg}&n0_^C=o!9RS27J2gqx_py8s-l zxY&F8v_K&7!4Jaxil;c{($8}#fr))bNR+JY8poR#6))hiA_g~IW#=5EenhqWPZKOs zuKm{h;n?FtPcDQ(*t=)mYJxp^ZaN3{8#&RZHbdqU3QO@Y_@cZ^H38=ixLH|A`smew z#OLd3Q|>tsHo>jFQ+`6D?5d@3-|*QNncmH#$i060T}c8s#N$*)1O>da%gR*wIKlKY zAN3?1oEk|TYX6BZkPEySid6`Ou%d!QG*4`lY&R#T06L1NG`jCOOqyIUkOp8zOGJRO zZ+Ca|zo5!-)Z!vH**@tb@=Jo?WkEZ^Z2up>hYU#h_W_CaAcTPD5j8hA7QC0li6tvT z{W7S#XC1Jf0|!-*tz5RJz>b=oo1U(!?y@zqQ5LO^I>asICY0u5D1@0i5N(wwn}{mID&AzBxY|4U?C*aI-@UY zmG7{pp3~SkTHzW}Bnz)a3sE~E#KNCC7q6fJz=h9^yqJ8E_;yNi!w|`j6&0h2#o|F7 z?e;0s_dn&%yexxvGBOmznk-K9C7>vH-X2k|a&a8-_LMs((x;B6F)-lZYi`%al$|ai zEJNIWQgCGBgH54PN3HSQFZY4J+?#xDVokQAn@bP**aQP05?;%ke_G*r<3*tJ+uG?K zS8@fh%GTVM4cXC<-&!gFd-*kE=bxR@!eib^LA34eKpdzg`ZK=_hGc9{1l8|5&1cE$ z_?74J=l5_DOh#YC_MP|r_v_!D=wH*K8&Ga*^7Fy}6QmK}EduA}084Gv17daxOup|_)0m0Dy%C#2cOl7(U@fy> zFDANs)c8vO9$W%B)-c_HGYeUa5PzoU|Cd1#cR{*pO9|5l1QLtL-m8K_FDbCXize|g z1K{+x)J8#d-(G_)*|`@kE`S_o>Ogu6gYwQ1a89OJqJZrL-c> zCmy|@`OcXIT70K<5})tB152PuQ%}CcH8aUCh2B;6K7KtqHP511uw>aNa&@)3DaX!H z+TwYh&E6H4xURTmjXUt)fyBV&) z*%HSzGjRGu7jci}Ao!L5$pY3`qATEN1qD7*De^Ta>9=H7yU#rq`$R8Ib18+`rk^i@ zI?L?0SPzCuJZDw^CR*y(VxE92_EARQU1Xb?x*)Pagl&%gGnVo;DW` z&^C-rx79Mpf=`cn_2yD%qXxWEBtaSY7dLR7l_3 z`uXdjm7nSy*1q^Nr-D_RJ;Ps~0M#$Uw0InUCg~5=o5$|R!Yo^O!1}k7{ZTK$jhu@O zaAt_O7vHc;+Mh6EJd$;{md;x&S@67IAb~+e$O-(5q8j|(u|Zs#=fQ(XUZRg+jAo+r zzO_LdSG-~cB9FY7hN;9H;2?d}<5;t#@Ii#&BSLtG*wPpVB8zk=H$e8!B2_VX(k2Od zZ~@_KQnWov`1p)Q13`9p)#CU{CH{$?ujA-_*P2$9^=Z`d6t~pCcMO4D8ey!2&o!WzWSG-UVAW(#e2yiFd#v}2WdKgf zIFzb_NxvX$>RkHzRmXEcqxMiXPZln7H~6WQ;R!(fC&bBSow)zq4uhrAopg`KzUT_P z(aS7^N%(t~z-^}q)Q?*6n6S}U{0#nPZYu#SLT)+jif*p&$Q2QUxQEu*W_2h0m?i94=c3j;{cyKbE&DL*2?LEc*H9!m2BS4now$?S^ITL^ zbzdv6y*zwGdL0>1vLnf8;gQmH@G>o<7(f0u}vv+E}0c!bn8a^MOY##&y zXcvL49qKpi$k84DGa`Wov?9O28IyjeSLV__eiNLL*zmOf<;|x%o0kt&j%*6+Zd!Iu z(gGCjs}s*O6vGRBQGDs=vaxojrTAoGU}etLPMUwaOx+C@hyp8jSgr}E1{~{IQ3_bg zfgscB6r|HXjWP6Y858f_0vrRW>X_KU47L(TZ^@~E5zJ1CqMM3%P=(guh@c)e3Q>ky)R;$^^{lI)|TJo72N4N+T7#Lqd7z>Zt=J=~p}e zW)`=ln54B;iN*gF!o}eytPM4kj#d6(lY0lyxsMzK!siS0s6gfz0y9-RNvS>|rH9 zd9CkJ&#J8NJx0G?(lcG1o6UIVFG|G9s#YUm@cI)60e%e9!PE|=%~LL}i?|(9CyqJo z*-txl(9fHbbOo1eytHz8<3}P0;YqhZIAeEsCv_8UDDFox5K0h!uX__8GbbgDQ+l1 z`GP5LgZ93c<^HU|UwDJ0rWp7R>-Puebai}O?`BYSt|(gkK`XrgCjwI`a+CUVx7f*G(v$kf|LBJRpJPQDnpk|I5sSZ__k*vA59hoYb3 z?yHtF6<=gj!JN~bk{`KtBUM(Y~;wHpTjGLAf z4p^s64Yl_Uu!ulA74I6*kvOxjKubvuQG#RQK&;PkLMT6nk9|_i{7q=J7qz6Ui z8LOAW3}6GL>E6I(`t-K}u(la$(?_}&jnPjl5gawvXa=SGblw^ z$-*Dgbo@IWr7+L8z(kf#DU4JJ4wHaBUd$3zk4fEPE;ZaUc5-#de|m51bG3ZEGp^?I zxDtK<`TkXBCb8IAhOow~B~1gC4}ZWif=v+|qP?=}Bs0&*FsG4l4;eQ((p&Nby-A9q ztH^S1gnLmW9r6nwyNlT*01BX0@<4NWnJU)VUJ^JBvj4l{29dYJ03`;%FE-39hUZ^r zJn(W!PvxyY%@Owebb&nFcVcmbX{|G=m@(!3$UtOEmO{c`mM|AwZhHBMK!)~cMKOHY z!KN;Fpie{3{4z^h{D1QYvdiY#1S~#3SNrs9tMx)9moA~qG{NA2)U3kLC@UL8 zv9Vam9w(wNoev97Ck-78RRJV*m>S;kJRdW_tBDF|b+@Is0HNvo7dL`HnyZ`1SLi%) zX^L!q_8T~jk3G)@h!&=NlN@=TKw{2_yq221kOD}A_8Ay(BD@TDJYDCy^8fgz65Gn? zR5U3(PD9;lb_Bd*rep$g17XNPU|^VoN?<+K;gI<~mEk7f8^>fuP#_VBK&BpJwntpO|D$H*}q@$_NmKZ6RAk_rO3^JbW z?Q7SA8gL}Kd`wRYd%hHte2ybclCLXLI2AW0kqk$`l2lz7!Y{l>DOiX|xH(%wA<~{Q zvry~p1ZcdQAq{%(-_aFmP(Tb=(F+qlY!SmIW;U0^xD+>{Y}T2T3PXAB*0P1<6eNI=5>NAKyOcykRc6UQ! zJIVhoYWqWBX8A)SI!8g@E8wj8vztD>*tg2lq(jL3a%QhvLK37sPX)NEkMq~yBBOf=hb<$JK1X4GYF$xRyI%!1g$B@(-_kj z1w^TN#_$nS5#ccG$PVFhOR?F#MYP$%l8u+-bmi(^)=k4wTtNuBIDEhR-Z9_sMCyYF z52!eZsEZ~sN}%6!0h;rQ3O;S&dMqlSqwsT80;3be>3nu87DrEysDWe)15F0nGMWYp z#9^qa+JC##eEHUSYEvxvgNv>oLuMu-hF%ldOuIwIFRdm>2Xq%=Zp24cJ5 zLu$oh_ue6PpQFWspr~bZ{D(G|QZ)bB^8HXT1~H<3hT|*8*9R5C?PoT9+h%>+CSHg& z0dP;`rU(2wm6$aTzUN{LIKExOrLcGh7tDapV^$8{8}5FFel~N=5b|4aSNQN0BP65` zoNEf6!x+f;2@XEBleiy0?VJn6=p92K$>Dq_hdmWbGznKu0NXq#KDvc4re5t)Y%?bE-nPiw2O*4@qa|NbvMG&HpS z#Q_Ycywy@t>*qwroj}dmx|aDMalynoPD#a%ePEdlgtmG; zH5?|~Dl_~+z{)$boy&fb`#IZ>I*+zjAD>J$S=HC~^v~3CQ*;neI~bl^4#>YEYN)RM}-!`R$V1T#43R zGZymjzTHXt;G|+9VA#3e6}PS=J0U#QfBR4R*T&@SRBsWMDWimzAZ8a5xdI-dH!JXA4B&HNK~G;uk@{(Zxcc z^K|OhNfM#ts}e|WmF~*+Q!F8IIG~zdG)ZM^*yKwfCIF8kuL*4KNnAaulBAXZZ5R6R z75GclSs1chddhh~{I|t!lx=YexrE9iql(hw#&!^I)hPBndMas(*@zvgu=+K6IVlVJI-E^S-WwoPKSsKd*w|7}7)ivSse4F{fnS9=a|DIS2} zx7rXRAZnjdON$(TlsX$3SUGQg(4b^mKG*EQ0iR9%ONieAGSJ{M6SQ22D!&nOhjcl) z^X>G&N8EkddKwj53~DIDS`R*^J!&FKRR9lVh-DB9q`d(BpGTBHp$0y3{!Hcvfj|M) zX8g?X->HNBDQ_kzhmZ!S`v(^&q0x(hCKa;5m|*(i6g?Ha4d^$Mjg%4b{FTOz#@A;! z^0u5*fz2Gr=dNq?;3fIW_=yi#5!wI>ULh%x>&=BPVhWS~4;XqtB`Mjh%giUMlWBASV_|(B*2)1y@;>o>;-RuUadVfI% zJ4HT*%T|vu15zj}w%kB}`H3y8SBx7T_ceS=J(KnBktpzy3B%;`VXL0`LCnml=Q^%&ejPoll&08+u=-}=>r{BQmNpx_)2&V7|j zmS2q;a%G;8H5$H%24$;m7466Rp zV+wH^DB}byCAKk*nj+WB%v;OmL{In}KbO&C`}{7wRIue@uN5;GAVv$1RuVx-?z#3H zP%+$-2zaP6R=->WSz{mCwyW^BPCUso0e?Jl8eqQ@IwU#khBvfX#&9)xZGlL)xO`v3JO} z!Ri3j%<>@1vs>Dg|2fCSopW@6u4fH~PA~C$0HmB`&h_7SU4@P)sdDS#ug$r$am6I` zPZq^`=7_Jj;JK9ERTjfvSHlE$8kA_tx5R#6Xurwk?P}i_MFuL^;{BOt&lJA%5s63O zlk+i3vcPl}8b3omZ-GOok%;1f<0dAo*eAPvmMWZUY&ac^nSO)4gP+P7%*frIv2!wI z8S%=AUKb~0F9~&h%eV!;5?j8!6Z}5$U1%KQ90aGeu+U-mzP~0x|Ih&WGU-V@?~$GX z7VCeLZ^DzkUKaVj5fCP@#mmrr)3GyXNqAprql=!PmERP0-_#;sAq+zx2?5-`pTB5`=!>$JjYj9wo;SJ~^rMX%ikPhDs9< z2K9Y|Dv4`9V!L@Q*v_kvn3pzln=!!hFona)3|jFB%KTLF!ovurkiHn}kJp(ri?f4=JuJE zfh}j`4Y)STOY@_;ETWxKe5BR<*}F{Pr|U|JOhlg9NI*@Y}58MWy+e&QmC(}!Oq%>Q+82y~~ zr&nZ+AoWqnvo?C4!S>Nh%xjoHfTf<%gJ^h)w2_4KyZ$~6P*MF!v>>Hr{ z`rCwI0=(lwf~x$0elRifLe2_lRF@I-s{YG=1tjQ!pM^&os%6Ui2hOp7$=l+!I9lPc z)#jhM<(O^d`U2d)H&9f?*qVf?tg1aVGH`6LB2ukZqy}aZDZ$f@Nh3ywaRZv7RsKTY zvr^rhQ18vBMrp$X!nj`061GqLOsDisEAVBa;X0ADTP^ya{oN%?Si$z`{SG~V@@{`% zXd7S_p=;L zm+y_n^}Z=?EB!&lAgk88FZ!`BO7>+Z!hQRC%?{>0|YqH|EG=lCa# z-xX2peeI3c9R9iM>jQqbT*ny4fGACN*|G5d$a~AbsJ`!gcxWXBq@)ByLb@9yRk{Qu zm6n#FV+K*WyF-u?Y3UfG8>DlP?q-Mq=05oTd>{P&hWk7qUYz0Vz0TTeUF%wF?Oo|J zyfI1)=*hE15@V16;OE{Xrv=J<_yGeV*{LbrwiPm9t=S5m)%Xq^@5*hmKIf;KtGW9d z$2s7z;5?S?a>zO9TWnco30oh|i3yzV0} zszDD_gzUesIti#ODknZ8y{SUg@iE{B;QCfV zBUiQmAWgOAS8bHH@Z<1}&uI^99MuiCfHzceFebQFuL|QW+wJ1m(}KVr7uwo-vuA{BtMZL6H_9V zaPGP8apFAB#}lfR!UrdMFC}=L?~KpCKpqRVH-0QnYba^GRWf#?E!^LxBrIpQDEL%~ zIt$JMZ2WI(sb|)O9WmbqM-d0O1+Sjd)l3HyGBAbPZ^>p%Q~U3hG$M3=Y`W1%XN`C) zKrzA}a}V7_xH;?)k`tEpO+}pD-374UTQG~d`x%m$^%Z=_c!YRJ`xH&hrxAjR{z}O3 zooxLQciqj~fhN_I^do=_Ev0|g^H1CqKe|enRf@@U8{WxSyZkEN{DUFtc5~5fKXZD- z(CbXUyWW2(5s48W7SA)Ovp@{`)arNg>M&c3jbCF2SPXGefZO@_u)^Y7!U7h13x;#b zd-~lnV;670L5SfA*WNrdci5uAs`%ZRXyTZ4DG$*>3(%Jh7zIu2b~#s*-(L%GDvhu<-2fDF zZ>l#&8SX>&Ko>8-9t$P(0r2^yW04(%iN_;~pQAAefn!{a1XUa(e;pN5vyY+HoqK4=t17^0g#hr% z%+*-1m$2ng@#S14Tyw5#*A|(9Xf4BE* zPc+nPkA!0Rej${J4tootiO_vSm6+b%ISziAXOvqNS6)^g+d)pw|HB|a%W2Mvf z1vFiD^A%$cdGN4CXjyvGjb&7H?S-j_HvFp(m^G#c2!oV(Ifr+=Tx6*dPF5~QGdJtL z(W1w>xq+g(ji*c2yOohm8FY@vX zwBO>u(`6#GH@{;Y`ESC4TJa9&#w$7d^FPs#X&nW;8sR0`b32V@_0Q#~yOlR9)a!~g z8moo(>J&fad%`kxQ7~vzyxaR`VVy%EX#w3#4NIm^d5CZMl`^!)=8b&Kb$7!6aF$=l z;}nB;ViVXLQCv2ywW1Vay`J{s_T9+=1b_lW5=ZJs#QokaZL_a3*W+I~Rf1T|n<2DG zsUXxNOBk2(-K=_nv2UrK&DJ^?hWiKD(`5^+5Tuq!ay1^R?ZtB4DCNd$4n*4;T!es4 zxw%6*fgR|XVR6Y>gff6bVNkGI`?i&IFs^q{lc1o*p*Dbhcc)H3&2{6o^>}sC*THpI z!kgttWs9S4rLZ)u?t|LaNE%3I5DmUz}UVET{}IWp8W#!wqclZOakd2nsK4YBevTqJMI% zSmz{sUTdNk;vlYZ1@qFEhr%OI?g{6b%3th4gWA&*=&^_UI*)lZlWwo;hKq&|#C29(1bU9u(J#n+ ztu~8Px^*x)C^RJ=i~TN3IUd!pIOOG+RK}t&3}^l zshb*tUp$xjIt-XSlxbdA^}@izpZsrjs*qRz&OQ2ubP_d^LbK7O_l*{=iqm}PjYAYr z_=BAKgd$?`mA~vha%V%-;9bRH3U5u==U%kWdpNKlHcqeK(Gk$*Kl&tyL7{j%S-`IUiN@CpJq~Lz05_#yOUb7^{AO(Ad7VwtRO!6baG{)wO5~*@r5JK^4@i7 zVIxv)Ac6qt$GdsXn)ZbSapD6D+B-D;vpGLJ?{5P8X7t&=msfrbzv zQy(VC;#;MS^ix*1-LktqDC%lUzE}3?nYz*+m37~z7Dmt=U+6p!&T~yNt}8&C$l(bV zBxL>d`8v$QzWFVvb;?k#hc&@<+PI22kQ+UVFq4^$CJCU(CR~HjcguF`DMcU&>1|{8 z|1RcKLEn_2=LLmTD`X3wPmCKe(sQetr_x-RGF~#^0KQ&Ip!e8E6Tw}a9(eTu589ye zsrt1ozzD~%UJZ;yh;#O^5lC{v(|Y-Of#)6qbVi1wC)3fcth`U_1JB#$L?(~aS0~K1 za$R%O$ak$IVhXvOdk(je9Zk@ceN+M?c6mwb#WAp>zouH_uK0$S|2C^IoYbn(v;Lc-2_@m2#g)mL=AV!)wxY` zN`HTJm0z<>E-08_#QKJMHSum)z>Y zHUsuasmH9u#v04!UW{3cR>8Nh52N0@&+ zfb2=;n&%r&+fJ)Q^VQB}1}Zi#9*3t18C{MuDzeUylUmAS!7yD#%Jj|k^axr4VlNyH z*`_ie#P`-X6Wu5Y@P}PBB7(G{Rn&kTv|3rSivQDJ0JuaX)Ntz&6JP5@#oyoA6s}<# zZa+r^02Zs0!!@p*h(%-pTy7 z@v!#WDn-v$Ndxxw5Zb(+MF{2C6N=5c3b}ro9PN=Se8tq`unR+b0VT#Ggwftk!QY)d zm75imnWZ@Fc@gH?-UxCkYx9Apm0;l^vv zVt`@e*tQnK9nsyc%u#t(+e==Ip%G-MJ?TQj>`YbRS4fEz*jEEe1AhsQNV;;68j{-+ zn(FJqTdQ2(7Tz5_OA*JDwFXWbinru17cV?q34NTcU3~9e0HZ~aLg8v_#V&#n{$V$7 z9XuQ>wD^lQ`LC~YLVEVo;Y|3#X|6-p@K}XypKH{QymMwgaxC#@cCb3(qUB1v*hpWE z9{IJbK>?l>a{jyd?#O`yjx%5Jp>NunN@T?%@7}&0HZj0@=E|&xKA}Uj^FNWh;a1=) zkCB)ay;+ii^4<+;xMrV+6G_hlOf~U{Gvg%@!t?27px`(?>i9mls1NhhdK#5(z=#X= zuw2cZ7F=jtTN-^SE9$YxZ1`z%egsn}XqZG_DC+Xs`642s9f$-W9hCd93~TchtVn9E zYuDnF6a|!aNy8sT_s?X%n9PqK9Nxj>a9;Cm4t=HPYqZI~NOy6_-F%{%&p2%(@du4< zeXX~>(9IP7m4Wtwj?lfD08DmAmF?S`AFz`jEFDUaTaCFs*zYjx z`y3o0uvKC>hJCa8s*Vc3@yZZL7w*2WEC{YussoW#M?g66H-b_wg05evIm2xYE`?SW zd;)SGrIPGi-fYi)t^DZNv&#$Fe^s|0qO1j)kn(6+p5rddz&{Ywl^!uZ63IJMPkm({ z%gL9C&f#T6teFtk6lhq53++}P*1uS~8|BTK9-Y2k+}X_X6YPdm;7||*(Z8`rO&hnZ zpvV}q_85ezE9G)^@TZ8n{JnYA8q|K=yNvG;p3~>-Hv+mnQWLs$E6t;#as4iU*SKtc zpG~OGXRzB7T8ZAaS}7Q2j>qZ{+FQj6HFB*}s|=rr0oj!+wd^o5MJG zn)!bB%tCvSa%&s!*l|z)uaU(f33o{xP9{31X+J*-Z1d?P_J1b+RUv4AP^+3K=d@~+ z*xe%QwGrBp7asDRkZQRXnCpnxY$3g0yw=_hRl79l_QJ&Y63q8jG+au(zKvNApQaYI zqDAv2=IxLq{oEAfGNMkzh)PUZ#z(Y)`d9E4-aYTx{0jx$g~9pfdQz?mpZL6M)%mS2 zZctHq{@$bPVJd^|3aeg~B`mXAjD=u*&u;IA65il^`^OZ`lj~Zo_$2?c%iz=*%#eabBWyN)tY?nPCqD+b5=Lw)~JshBv_nfwGlXgHRVyro!2dvfo5hGj_PN@2XijI00V zyU1h>;Gxhx64|xi7|j_st(-|-b?W}2-Ao@8!2Z8l0BVGTSq;~3r>r{v?K~ekhm1i4 zz%SaG?|?X1sP>=$$vgP+pH?!a-)rTp{W~#`gW}c$?xV<{?<*hLiVM$)@O+Y6+lRP7 zLeZz=g_MZIgGR*g?j}UDd?~Sa?#lFsOEeb|&v{2wOKYX|c0b6q@EwrSv_85tTHYQt zKo>18VD~t^r`T%qEZzKi4CInpFp8*wm9}6AZ? zy@!@Be(yT_Q-~=)bvu5vd(RwFiM?X`FHidm!tQR05#b)ozc$oc!_0=nH*7jR{&wv4mZ%-;t0sF8C$-l z-YFMh0LN%;m6>|k5BWux^IuZtNgqSFoS@}r9^EBz`YjHR#+HiZh9!16x*8^-Rl@gHCf?Jw7w5JY!ovnGIdWZx>5VU3 zBs?tuyG40GLna!%0Aw5m})9$6D7kshPhwX=Y zqA9p{nYwQKdga9}zU8Cuaf_SE%$gamyT&c;a??Mv2 z|A?$~YK^1{B~`~|_H!z1xNB`acx~0~0_0Rwl$E0PWIu%4mq~&wab{IM(m5dlYo9H6 zXfNJgXc74Rth2fp%CtXn>24btwWUMu|JZr!+wcc*%bs60_ruC+!Ra~tmxjHeOOn7D zmt|9Uap+cl&9b8korB-3L^VYRLijWnyN{jNqs*>0mRV zx`9oyIKtPgIuX4b4YT=Yf5y>XVMpQlll)8t(u;DrnXcnq7z8bx*~9O9B0UcIg4rv= zhpZMuZ{t2_oKlP83l_*Px-m~22h?zMH^^*gu{g3}z12&0UAbcdB zk9#>9GzHAJ&2}4j zrve)nUxs;~PsRrxVo>&smCISPCb{|~FD=eggJZpFV9U|K`Ym?6(U$1VZrq)do)U$0 z)qMNH!6ZCk>hjsTfh_E%zo@#aGU;oJGThnm@J~~?SaL*3uJvs6wcQP7g*$sS_0f>U zx9su~J5+Yrb(Fm64dLPOMHHa%(tTa-!E&pVQu~|aO22@zfa%AQX2gQ$T>G%gHvj?d z66bS(0FZ@)x#Z2q$%PjtttDlv-bSzt9wiZ=gtA6ri5IQu zhD&Bj5e8WY|3SHiL$rk|xg#9dAw{=Z7w> zwrJugFV23}G3B~CRmEGqA3N^!NAIp&6Bj15ra;mpz7>eh4f?iUIH5a#r^bKhqepM< z0{+wygaVuv-BfP6hYjX;x;aJ2 zJgOV@gj412s!o+Rg+5D(=X562FKf2K8ks{`o9D2w=zA}gIt2ZZ+_IYw!cs@b!+)bvm}_pv+q4LO z@q9mgp+RcmhVAq|!X9+|$B~9am~O5Ns{p^qJ4t7yb<7el|FAi) zJFc8H>ZLay9QF;Jn%HE051*ElL_OVb9*hQwKQ_mv8v%(!Z@&J)&nYN+z4)Y8u#9W! z$eb0_eLE%n6Norh7hiDydOOszvTjjj+Sy`D6zn-%>9o??TwK0Rfklukdk;V-RUms> z72)9$lk^KW-wLv((!(X9_i~53GE@65r2KQKv(ec-wwtTFj&?)7P0p9B(hHQ^`4#AB zdrX-MHHhh*dh=~SU>#=zb&Wz5o3J55YCYLIXl4ZxI13qVm81^mfHDLs|?v=A> zjn+FJoFrT3ZJ{WyG>EApUFA>~5Ir_}a}yvvQ9da#4sUBeCY7~(MIEdT5Tm{lU_z=k zK)Ee9{1vunZ2xwv(=BCrbpSC=4TaMi<_6Fu`SRST=rhqE?35bYc(Ud8jih28fxwb| ze&>0L=#MZK(yJvi!FpA!g23rz6_yX|N5Ajr)C76fnFFVw>*Ave(l5)lyF)i=W*zaM z-Pr(CkK&rTdF?KD8|Fn-r!Jns8}R;adLdao{bq*1y3~%EUB5nI3#eC$Y%s+d&H+Oe)A1z*g8M#H^`DlfV#7fp)PGVi7O`a;Sp z!axGYI*Q6QAb#f9X>`V+=cWqKQK{4~2VEbwjTTZjs|A<>cB;}TcOz}_NkFu7r zW%0*NqWOaORG6D z#?U61>h_evl9MVwi&JX=?-;E|lyy$!p`5gyqSIv;`Jw(pJ` z5U44Mm~vRdDrycOG5TcbU|@jLRI--1@XN@<6||yy!gER_bXJ3Hd^Mpv?eY$jcIPG? zwCQ1W7iudQ?N?4>Dz6p}!~!naDyV$*&zwx{){M_m%`cRWVjY58_8>JklEzxcYb|BN z(Ptsb&w+VD$-wC?tD!2y3+WKcqIS%Y5IN6j0y<)Pf_-f(-Ki#F=r;-BjU zO=YXJQIM~hO=p(VwW1sjK#ig|FZLmTwF(sG^;X?K@^-5WLpno;opy}94MS#~M34Uz z-^LdT>aWqmy!=A&EqBls^b8*_#)aZQ!1v%OnDALeW38XNJ%aEEkM-nkifoMtHvMU) z4jf`Cmj4aduWVfHbX#tC9{L^DmwWokR$12r)@Zd94k7DOhtM>pEfXw$0Jon3a_QO!j+D$Eu@3t~;@tgT-%Ic6O?WF`_?~x!? z6GNaXAy6%2ZPgaMB|Kjed_dGNZQju2G8~4nssb*h?(y6^9TdS3;Q*KMUDz!vIU=fi zU8M*1sNtzoyZN7zO3;_1KB`~LcKSZ@Wbyu9$5=mJk=qmIUa$CWuM9+)o?Tm1_-h64 zJNKlb;aOS(P6n;Q&Cd>M*1=V{Pz0Up)Jm)4JU`Ou)k&6n)cj843saoCtiQ||+TjAa z_;Rwa5hDn7owWnTTpCbN)5B?RmZ;6-A{suC|AaXU{1>x=+_db}CX0^g&t2uGl?9pCb6x?C-@R7L$*77hv zcA!=3;$qei`Tq@>S*I^H^%h z?P0&O*jgk51v935EnR<)229IbY1XuhFAWkN16g2Q0}wE9c3#tBrm0tFc?p8*#;sSc zO>DC~MW`X0|3h8%pgxGkpmQ6W23g*_|K~oB$-W{Hr!hUWEAo^YiKI3Bc$u4fn(!j< zSLIckFrsEbYnv9*R8`Vy&Tz0_w7c3BAs#RV)bXJRcyD*+5VO3#q^l{9PJ=D$dW6oh z6eKBbKYe3z0|LlTo7b}WaTdF9u{^OpZl2^f9>zY0@`#J1Gw0K4AoO=4%*>-n)U7P7 zog!jh@fBWjI7%D!%7NSo1Hn>FCSVku98aEUpn#HR*KMGZ*>-0snl?FHbe2PGcF~%8B58SI%4cxDe=xt@ z4qP7$D$Rr^EjBt>#Vg#Icnr%LvwW=>I^N}28Hn696b>ScC%<2Yi=QyE(cX>mom90Lj{L<3jSxU?9`~?Nzr|2#-*a9dVuW~a1tc7x3*-30 zBg*Z?!HRWip!l%`5$mo1In6Udeh87X@0JAHD$F4TWi2#p{3CMhVxdHIz}n#X^_Pw- zop%hQzyaDPMl_J^`wZU$T5F|$IoHPu**!Nu6E;V>Xi;x1#-xSLIQ(*AFn)maMRh=f z+W#i1FJXxc^Hq;tL|Z_+>BtzrEn-!|AjWesQ95pVpZ@?KftSgE1tjrho)AbEUvnKT zrtg+7If(~`yFgf+pL9WTEyCJ=@IZ9^X|p%Eud=hJJlgKFW` z8?r<{lJ!TXOoK5_>XD%_!4)Y^S%xI)^a;WRC5lc!=uLs)E zIxep2%F?QR$zzSb%!)$=9Boe`I)wRRf3hO_`hj}7Ro#Pf--ACZU0Dqm3~IG0!i~p@ z8RJEJcrg~Pjyt=RuChH39Fmoxk;(ujWB1SQ{BY0Il=jHs@fyn9!E!(Oqgg)K!e|T4 zfaiSp@_7E9()043hnc?gEl}{sc4X0BYBL92e7>C-n(my$kp4&J$l{d9+igOAZlne{ zY>FB|U9q=)9dBr?q$_5cDEq-c|HFyrmJI1HAbZ>L$pXIK-4r02d%D*xv>aS-oNih1 zLr3{sZ^%F@T;;4xYmoWm(mh9o?5+Ps-= zQ$!2pxzpzMEjz_k+3^!9S|=hV=DxvWCH|8u%|@y=QHE?Iiq1*!l1;6g^n!p;*Fgj_ z9v3Vd97nMW;D+ZymsWw*0sOBWt(PA;H+B3ZZjEZ+i+&=+>E&3Lov0-|Z0IjfqqHfW zp?ufiwMBswzl79M5dJtQ*k5LUK`fw97 zLXY^aPl>tlhf_-nJ2+(2T^9u2%>TQtswMl+BBu{bQfqQ5W_A?A&PE#)&5sf~M(|O^ zO{iz$Y$oO7KFr(ys&WkYSYnS!IT55Qy(YH9W3cwz>iuQgK}hucA&{NlyWxuet2aV> zQUx#aVKNq@@UVI=eY9p2y$UcJ45nZFvA#nNBtSP$R`m|ZQ+ zE#UA(NuF4#$GL`DNuhgymJl{epAw27=LllCo|=ysWsse>MB~wJNzO_Ln!ZN^*SFEX zMR^&;7=)r&5PB&t0^d3C#SD24&=2_bE)H(k!)~LnsFUwzE8?>07`5;-&y!WNKEy{4 zclqmJ;`CewSh+tMeu<6KS>a@%;$h8gwv#o;Ya{^2m%x`GlnARXdH*oP}P@YA}1#E;LyiDV06)fZXVCF9@!G&PUo?+#6#t@~t=U@l6 z2kBJ{o%O-c^{AwFfy7vwds*bKi~J*U~<5xMx2 zsSG8O1CIka<-Y*rpfA}E!*@Q&(_ALtLWw^u>hE#_LEwivHN<=m_&jetf z4U-+m@OXiVYQ13xJs-U{Rmhi7f+CX{)1n*#Gt%I)XOwB3cj;59VNZa&{$1wz=w`Sa zCVQ#KgFLP%&*1j?;^pMv@? z>g+H7I#lj&j3Z|U6(kw%OAKR5VK#^2UKupHE(OswNr4H0(&zgtwg%|ai;=Wks#|J> z*DWonM43Ye7$9i|qWt3~A^-QVRp4P%Tf5awfCj_y37mO`;po6lsX82D_&|%nzstfG z$Tcvf3(ac#Ng~!ei2l9`7bU%P>MwmU8cFr9`vj;72dctfE9_D0^{HhYB2}K`@P_Gc z4I98ra`{!ibO;NV)_DZ|KVyFzGh5xlp4MqMBr?7%u&k5w99-sd?GN`Be*u2*-vode z0J!6G=6_1GXVhuineowCqWeWa$>^;6UqtJ;Z%86>ih!9+t?mlDWpCDJ@-7Sw@;&EMOm z%B29J%xuYB_LpOF54_obK1U<<^^$nC;Y;Y?BIgmYIdpef(q zZx>+h;eyPBIgnCH<25@8)1T5V7e5@->{}wxIb1*H4=!`X$mt&IdWz%Uut{sw_=oMEYl?JJ#@cq@$;*mgqU4~dFft-#=^LVDK82J zc7S~_le#)+m7S;XTs)dTrNeT>^5?rb9D(Md59&%vz{_kx3;*fcE3aa-R@K&Bb@?|c zSg5CU>uI-*_^etS!IMX-RP2*I!b+JK0uq8s+Ey(XNXIByaZFtfb^;tyRY%ko`tPZ)uF}wGgabxK(C9&}LjC(A_-%`-{)U{; zCvko`1+RF3&m6yA7R;6V-xweFv60@~aPxW}I|G++03MmVybcKeZ{Fhrq(9LpAvn&- zhpt>MU_3w=BNEn%)zAFj2NL&}ANn2~&Q{wQ5mNz(<-6N6XMeQrzF6*O{=_p<0WJqP zch;8&B;e zAEw%2PLJ)2@7WAp4gXIT>nRceBYySw9|B6ii#b_glqfthUpe~8=LB++Zl+xXxc3OI1S%45T4RYo z)O~deJ}D`oLR2DDp3KgVui)Vy&74kr95l0^$njW_3=eP3Q@lGbj$2#sdm_+d@u3 z&fHgewGh!8DK^t!v_kZrIFt_`0gp*Nx6^ZoX+5o{fJXOm3MS3VyIpUpD>(d#$&8`} ze}%)P1Kt(#I(ziiSK?zYDpC}GCY&P6V^8(&aBzGrJJ6m9b=#VL1J4wZnGz^L+lY}) zJ+nE1Pus4klPs#-iH_Z_L0BMx2Tis~%&3f&2PZEoqiF69<$6($s*I=^!O$DHQ!mQo zJRWlMRFju*G{0vHlMQL7| zg~o-|`lj{iB^vIy;I-8WQ9|A}GfK%_ofd6yv1?zw9p!Y`+Oc17~s*SE=$%1NDAUk8-!+#q*}AYc4K)dvogLpRB?k_Wqo! zV{GOg?xK4$L4`;?7hf5`Y@RY(usKs5NiO%?tYdjPoQdzP5iS=jaPAu$qL5dkmBL2$v6~Al7LzfE$Sxb)pqZl=O{8;9wICt(il6I6tK06&xXZ~C zK6HC-;{kaA!XPBEG`p?noHc|;E9@rA+#wPX$RW+(XFdIwl~ru%4Omju)^8aaF zVYEi)O?p;3xudZ_msxkRWZJnQZ~8gvHhk;?tT*g^;w^O~%UDU$+JlAzKYs+T= zI%Zw>mSwMyis19XmxI8$+3&B+mG%gHrFxL%O4B0=R=KV{KNRQU&+)~V9^sN_<7Hqi zq}4kp>=V$0W3zN^*w83WCuHXwUJ6eN43$Fc!w56@){Ey1Z<7(|cbzsq2#)&9`QE#X z@)xti!Z@|rWEo}+Oi*S#MvoVBdW)a_DNYl8S+h7dSm+cL@p;AhPpK#LN$85f5 zAJ8PL6aLZ6s3Z>m9Mxqw*6}`Zzmd?B;y4p>Yh%0|zqORRchT^K79Jd7kIaW5M+Ro6 zaSqD5t+baBUiL)36j%$|H4YC5WM2E>_oP8TrwCis@TxbE?rYoSuJWAS zX;^0%z-PY-utd2_g5A}SQ$0CbhK zwyp~ewlk=R^dv)e00Pe)PAR?k{s0tIHlmxL07J61wFxtW^xe#U88;*ggpUdiQENPH z2|GN)7I-JTE9r#+9UnW+`ZW~)$&r7pLvZ>69&mjpxrspF+m}*MDS7h?ud4N8w#Kd0GT+y9SfDK~Fpoq=@j777fgIRwQPNT-Vlx{zU_2)8e%$AFgSG^zz2 zubU*Kw|sPB8j7f>0)$BSR~Nb)&u-dd&6fRQz=Atr;>=IShohPLQvT~a&U$KN*qOE< zcGn#1^k3_~VJn%pg+AwQb;+b*ibg`j$bX1kOG9 zIXdLub{^J(ww{#+-w%3LCFN}+RfOj@qJ-RhXIwO2bu0Kp*FdkZ{pk$);%e#me$ZK^ zk0zs?goCGqGoi&`n-C7CvaStnmg&yw7#X-92v z+uX>=BAMgGg)O+pTPvvcgUY^DU546TSoU@ za4X~?$k8D(Ufgu%U>hVy&B`X4O^Kok_sUzFl0O4m(W{qCHgisk!kW6Nq5g1j#a0OK zj*))ITTG)bQI(+)2Tb$1#Ro`|y18Y%0PQh!i79H6&`(Q`XGgL>fEA9|SnHd(hV~MS z1KtMOCo&{}AWxnc3sl2|>M-swernkCQ;Q?SvqBEw);Yw_6vR-3*lIF)OUN$#gm`Yd zF9s|>#(1v=d)ua8Uf_(63vedyj<$3+QXS4F--KJ%kou^Krun|8bg$A|KL7e~XG?)w zMq4{t{vv$wjAYr~`^D2)2UCUWUmr^4s}0G&N#ukp_Tz$=Qobts9t3A$eZWprk!vXN zumsKrC{@o{XjI^M4_^M0wk4sz_RdkV9}RZRAuH@yOqe4$_U7H8JX%QjSf35UyKnPi z;eKXK4^{UY(SCx@Fw^?$r5c@wl{%0fVGB!9HgEbbP6UKHBqqY?o91RPSZ!{!Ck7g; zI*?N@9TsUlMR7$uR-e~@B-I4AaJck%;jJM?jio-{V7E4cH`cLhv8-AYEGXc&VFu1E z6X(gC6Nf@}-I++zB0g91wzp9SsOP2Es|75GE8R7NE=k1P;ZX%-!37UmaIxfDsmI8F zOd+-wh^Z?KKO^!^$G{cA^UckMuixxNpQ!g)= z@HFm9ZkcwkF`b|E%T0nwHU8Yq#6mi3(vD_}v=0gA;G{!}J!^!l&L>(cVh;WKf`1B1 z4IUzMkP&=Vq;2JULBr}EUzaE!r8&|(xt*~98iB@$nr4E`HE@1rqm=wWo=%-eI;>VG zwyOm3@!eF*T!0O^{+51vIC^Y(=!|F2U3K-e-`bs|(O~;mkzXAJS|59KuE{W`>qMRKPG#im-Q;(v*`kN6Gt}CfBw&|t zg2z#H&v$-FMAvs|rByxXJr3{=8tV;jD_D@bw5W0J)W|7gv2`!2OCHPA^*>E$4u|?a z|B$Z!t@)j$qJ)}4J3Qr7d#Whr5jVQllEveU&{=rWw^Nld!i00p#xS_jRI)d5jBb(K zY91?V>W7kZ!_=9Z z8=bFpL9xN-YgITjpG(yg(}H`2-13@leVn6(aFIJ`N36y|(v>nZ^)dTJj`ha35fMXB zwme#A%JntpVyr~hM-RayFVZ}02*9ND+InFY{fD8Y zyXdd%QLY!U0jump?U|!#Q(8@fH$_fyhk;wc1JKgvf{M1_j}$^)(tp)r8BW}~ z-Tv62!3xJ--3V(Mmvy|h5pcJWHhy!ebmHmCF0htwqULO21Bq^Hc^pL|^`p(jm#p}GzODikA2(Iz&)__)HG!WKiG*P9J= z$?=YXtt(8OfAnluBeN&5)Y^C@si+MUxum=g7<@n|CXb6&HUCI>MqRYXv<%o-ognGs z?`Yhl)6g$nmK|)a2gg|V+e2-h3}J}YQBi|3ppL(*YR^I2BblV5TM47B24G!F9MC{& zVAR;C^$pe}rmpPEsK;n9F+JQH#=nOrc@KBCe}T-5;m4SdZSehil%^)KN_+#4tbM~i z1thDf;Z7I|fJD%SHu~lemCSQ3gt#HVE_;M%>B&W>ivj!D8qQ6;XfUPGmR%ps9>*;y zyTI6s77BtaPl<0)GbiVtvq@<{5p3|N8!mN1duze|^YnLvpePJHN1yBv2ckUU$l_D5 z6S-zNAf5`YLL^JivE_qvU|YmDtj51g9)gw>=ZH;msH-$uEKec6Lf@jOiyd{B3vfV0 zg!T}u$)7XLTz_*WP4#pC}C2r3K`ms=(w}B{kmgpByqb9?^-;tu_BN{I;t&wMqT0@u{^g zn#=>Zo!^hn+Un1O2E``*m}_n6m5cAg;L#qqprwa5{J2_)ndbroi}2vjRlMs6lKri9 z(DD;jX{#m@arU3usp*yb$IKhE)H3`mCS-0S-=5-m{;&w%(wlKQM$k zSHo=#Df&68YUNrx?Uk=ojYdQ2idQOVN>SKenQteA0JKDDvx}n)Y;U)TaSmx)cBch# zI7WQ#Lmv_5F`-#o_Lk5mDWOsUkkmmT@WC(Mt&@hv{Zau)}AYWM92fAMbo z|JwWRw#oEam1>C;=83io4;I4bp$@ZrD@+MxKq*^|+;vV)NL@*#)^Q>AciuFYl<{GHe0TxT z27AoPdiK**1H#V7EN&)V5k?g*wm~%>!B)JdqX_yC=2%JlqTrOCp+l{ znXTLuqCfByg7f^_?(d!_!F<@GCBv!VArx`@rGKnoq+gOmqdQAC8hp4Xem+MDpu@~b zLsYtdo9T*(R-++O6Qr=J?Xj#WuDlRbf2L=T2ycpwyPcfK0VLT-GknWF(niHxP#I}l zzBA+ENAReAS**<5vh)*|UDufg`sC~t<{~lVMS@C#=#eqwridjnc6^X9_?e=Vf;{%p z-O+)2VQK$C@~f#nJd{u~{wWjf4ad+~^!GJ4gCz0%`I%dLxf#%gR(8h4oE6XHiixS- z9{f7$xVeh&bcpeTWz=SB96T*;=I2e)5kbM(%++(D&vHG@w;VW39X}@V@QLfOCj?KZ zo_Bep#1JRh3SZy{`2p#>?K*~MjznEyi1Cu4zJOMp&?{xy!J<(C1;iH)JQPy$;+kKj z+Qr14JBARby?5_(zcYV}m77N6puL7p{-)xZPb~#w&XEFaf2#;puJd^du=IiXhY*rl zup)9_VXNv#?ZfvGxfY-s=ayKW__{R-uT10~HUFM? z$ru~)9|X)X1OO#Fb6vFnd{k)~5dP(R8Th9{gn20wwW)Q>Er55Wn*BQ3pR0VIDcQJ< zZiFl@e3C^AyS}VC!mzv^8T9c5Gu2*h3oGM!KY!h>o{?!RzfmpQU2X8i!dY3fL>?B| z2!7L_3weAHw$D&ts3`Tj)pC;2?njUm1Aao>#U;+}CCd9)iHtC!s^pao6kvlm_kXFh zc)wcPNJBbiu%eJ$yh^Ig)XWecXv_i%77h#MnvxZ8zqsq` z3bXN_TwRE47<`&i{DkC+IH7Xjse3V_a%|8pUq{Hu_kaO9D*kx9J%iVFyzf5Op~y`5 z(?8DX5gpUbYq(kXYH;HwwfO93`}sZAaQyFbrAT(V*7U7AUkBFCUYaZ)Qz+n+q*i0k z0vs4!TN4Lq(7fL!sFd}8b%eCvnx#C9;j@eQUc7c{N?F97F1mkmWVcgyc3^*8C}-oO zc#PdKEz{onQ^O4XE4k9u>yg0FqAJ1G@BRRv8Q6lBAQM?_Z&K_Y#=IKp3FPr}xYvi$B>R`T^1EwuX&9SS#Z?mIR0Jx6CDp+zI8^(!;MF2{GV{ zqx}r0f7^LJ-}+Sa%;)sl&@LQjY=Q(5xMhJRn#p&M4B8jpNO|1W+Wt&#in4sAGWg+E~h`s_o+i`wF9hP* zR~EgAQ|p{a-xWP;^n!gPlBO&we00MLaGV<*J1mLQ@qep06!g-l=3JhBqoSEouy2@B zsL%trnM@?Y4Lph-I2#m_A7<?8fo=;j5tspAso;j#N<(h%+6v%@j;rA4A-+Q5x(0z~7p z9;2`K_Pq(bz}SSt51gUvDNSdn>N-gB3C)fON#1xvqMgh0aJpJp9)YN zy@mqVk1cQSipj55j?-BD_)Vy|=6_Y3_(#BMf8LqOHz>WSi_Zyu%8fj^hc6o$aljh( z!MkeX%{$3y#C|#axSTG!k>TNk$|~r+%ugu^Wqd6ucO;G0A~>xj{F~OeHis(mP5&|0 zsy6Cm#?Vh-W4!0Ui^i2TQ*sUS!c-q^mm~+wE1pdTzF_6?G(R4`?+2YcNfzebnb?9| z`iKtZTupzl62(*imUPJLzsSG{AN;%@U*(ua>t=p~oiwzqKW$O*1Kf2Wa;ViPpA5TT zSJ&SSf1&@v{f{x*nm4|`!3I?CwNhI*&*?yB>TuCGO--Z!EUx&?Hh+pAfKx%&Q=C;uZ9B`+sh9Rp~%kmjEv4c(H@gBX$x)vb|Z3#`o`hbTjT; zswtu+_c3R^o=^Hwi#QT9|5S9O{+8g>nD*G+i;I0F7V=VEyOpsZNHtb{wav#wzGDoA z@irZX*ILJ2F>yZ!IgF99#zkAR%?h?Ws6t-p{J`5(xCbCMs5c@IGVrU@Hh=Wx3z**V z9p@fB{-mL}t~|gL?&Bls&p`ARB#cTnhc&TKtoWuT6s#R^xfr@}!$o0b6}% z|1(ZIya=4m>Iy#U74OY@yS}`4;B@Z5%&!tx@rvao^ekVc2&7{}{VTDj>?)nZT}xN8 z;l)w2&&o#Ixn93mZ`2iLz{~sX8^ng0e_$c};Ifgcj&b+d!(cH1!1tj&{Tm6$qol7A zk;ZP9O$p($`kgssf_Owg7M2etdOH5<%7ucIprPj-ubho=FN7JWFzxeHPq5s$_t)G< zc$Q6f`p+h3OwUuf?Jl@_pRBuh`$N<91!r-~$Kjzb1sv>^kGxg<*7gamQwvc)MD3KO zk9_OoQ#sLAE`9@f3HiA4d0@P7JuYl+vQgznfZpoMorX>H-kAN-Nf0`CbbV@y5AeOc z;Gk{wNYjyIbK*4!6SEMGe+}Ihjy; z%CpH|ojj1%LMHetl2x)_4i<~slOA2!D4d&J0RR2+2Uu;d@}Z-w8oZtib<;$nKG_<9 zb_xHXG8)N~W5w>?OcZ6*^}X@@y1T21!CkmuQHDoxP3zI$QNlW(UD+9nY>-xxO1SM{ zJ33bqQjW%2*3T@i4q4;g#RuP>uX>(df2lFT&kc7c3r6-k=nsk3Ce;jwh(m(aj=pxT zpe}iR_JZsT4j569{8i(B%;b-~c6>1-nNuI0*kKgpW{}2K14$ycB6P3|>TBE>&W|iF zVFC*c740P)b4%v2IbE^oYmGz^{70_4Mh z5WhfrW1hvJ>&Sf(R>BKvqU)*X+jPST;|J)d<{+tykP_)2MP4{Pr+_xb0qpk(c|I*b zliH>3`B{d2n7*}%2TM7nL2aVH@z)M{g*ae<8ftN9scy?8EPlDew;ZH&iyAg=D zhHbvasQozSAp75FS` zTNv3Zu$~k7Yd)V@*5)jt-d{0X*gVI1UsHjyT59A^%$1H9Ks-rFH2WE%<<_?tWW5`H zOk$VERL)0{;5*o=%VW?<%B3qTaL7`vfJd%f8qeUr=JbzgB9n+(KUDG3RrT;%Cp#=o zE}?B0UTzM29Ijh3E&;F{LhbQK31|-k8tfiuP_5fd^iMb7b1)g4i8o~L$x0c)bkuG3 zmn*aMzCqolrbFfh*y?eJUb`dT0#oVxv?4n-t41P=6WIs8J8{0+)p4);U!|co#qk)% zhTaSH)b6{1KynTQE5UH8_||Dn^@PB}W&eK0$}_}xcLHRK7HFdue3PgO*6a|=%lp@J zHzu0C2jtx@?ffK>%v4%`Yek{)D~&(JM9Q0LBEzU=)%9))DuJ(fsGaoa{k4naodm)g zE4tA`&ZGA=uzyjTO!&eTNOp3s=a?fI679FOx@oBwXU~w?Qtfv^k`d4>FJKvyx>9>& z_|qHgi$G{d5|}&l<0CvxQY4sHsVpv*80g04t0lEmoibEIa?1~v@n*6I%6I=}fX$;Q zx$lF_Z*w0;O}wEiDyinRtrR^B+sAr zyE$e&6F=if8eYCkQ*A4G&Pt!;0I9A3|%S zaB{5xc1YV+;fzzdZ@q{UyouvtoI-U}ZBsa9tv>XeeD^VLMEi(Q2|gWi*-;cw_IV%% zoW7Y_{VniMWcOxt9fRDlIkz7UTzH5d2fX?_c>?}OdkrQJfYFTo1FFEig$>TQpN zL)V(lPSrAKe6NKODTtH6JMyg+t{S*e0HE_$SCTg{siJ7>;QXx2Lau%f_JG_{eWmh~ z#ai178^BekW2xnBygHnd-$mKSIu0^}7=r3ZYF)QyE|Q&FLjg)_*tl`Z*|k-C={-Jr zxw_Y;DoqFH^(Yn5I<6ERgc|8}~*ry|L4vsk|ed{E8i*au&{}J=C&f1# z=$?XAF`hQQ_s(u^5$cF*BXVE=IA_D=a}JYl5dVgV^9S_fY?F(XHA^N}3g9?*wSbCS z9C^2yjnLwKQ#`&dYj_&>#F#j>*#N74m2C zn_}TAj<%D@S6gUs_`?^1D|~F;YTNcQM*G@{L<@f|XF$N=g~C||ey_$=U`bn)pJS>e z(-@0w0Un;ooT~3$u?2Ol&bV7j<2Obys?UM*MSSWcXDU(85aM`d^b=!0QBvr*?+TXX z1lz5MeZ@1uJS`-%|=E zxb8$tS*WQh{zzQkIpwI9q}s7VX19ylEBXFyU!SxMRs$d8jJ!#M0`h5j4cofvk|1aH zjgX77!crE^RZDsP0=@xeQ~q<$f%RRn2lVoF~Z|2yJ-`8Bo6!Z zxWGeoTa-7!@;2CJqyZr*d;XIbApm8{#ToukS(maVL>95rRhe<6=_}G@jnVXs_4zLF zt5IzY8eY9pP2epsl}~)>pL1VeMRVjJwM;siwJih=3CmM`7o8m*3Y%9d4}4A{o!YeX zas+35SbWp~f?eE}K^~Sk@sye1*j9)W@36pS7plQ*MUGdF!^R8~qGM4v5w!F|W6Td^ ztZfds+QmO#8l822I9jfUG7h2ZE?|*K$20q?7?jrq73d zZ5P!DW^R0G!VHEaZ4Dlbe%Q{Unz$0Nm&|KfK!(dr%b;4KH*;bJO3OJngdd1r#-qgY zJ_eS(_bvuG`4j4bK|^mZoR9Lzmyq9*k{O{YS>XeIhOw+UaXa|025hvpCd`<8Lw9Wh zY@uFQjZ;~dTV9ez3waMW%n|gGb|+JjG@~H_DwoTI z4>TP8oDhTL8^rwjSR%49voTOqF;S(dJ(HV*RvXc2XVBZ`-&?9A56rL{oG78Wk|tB1 zXaBG&so&KeZlbNh0&IDoy^h9y!$Y~_p(K9H9#SCZ<|;-frZS>fBZAdt6jx9GY~rDQ z+bKEq*`7h%{AzmA!{{OY{vQX5p@2N@3EgT2k~daD?0Lg(H+2?7`08Q%M-Aa2Hw+l?W&>xBY_iNdz!H91U6ib(?tB;QrES zHFzi)&W+QTza?wTsdPhn66a~)uXJ+9Ca_J>c#q-LR9B9@=(-WwG3Zs?X#lbe0vC=0 z6&kk{9y05lc0-QjG&3Dm?$V-q%|c4%2ZwmcQ6JsB`k)pORqsx?%c>2Tfxc&YM1VY> zqt%9m+7Y7M-tKib{;Q3CCJ=I*Sv_;i!t5Wog5~ z3UE~{yKSG{vYfGT(!)G0!?u$3#7UJl|F&G{FR)QPm2|^>PvC9TTMpSH}hIhi_#4F$1_}u-PifPmC?_kLNSd=HXEpX=i?p>zQIJ(*K2H5eJnAqB*Lq zadNh)CT;eFt`IlQ{%&yc9w=Xc1o}-;*Sfn`F#N3uNUs}yeckx@^8SiRUG?&|d7s%o z{wN~`2IPkkuQsk}Ty>yM>Cg+ACv^8a1~_WBsvK)`yjNcEiZ?Cj9GYjGoU8%) z&L8RCm$Cp@o&lJkHZnr-=pHn_uX*-1Z&c=}dL5seJuDR& z`L&Jgh86)ufw617VJ0I$Py~-Ui!BE!2IqLEK}CuA{n%n=voca3RE2xRUSk8?2TWy; zBFUTo_^ZU`PW>Wa*`cQlVjIzxpSj}6Xx&Wf(z9Ek-;lB@-w{`nnZI^ zf6%m-GLnEDehC`{PKmU>w2=VPU)!|wpY|>+dZnNbmx>9%_qJZ-CyRTNOtY+%Y$tV? zFGdH=iC3gNzOUby1zIP#s0;JeRs9W;K`Jwc?O#?*XxpS%wSE%|x{8ob;IF)vOxwCp zuLR9H`4Rh<(QHR`n07uYc+f0=d2SjRInk-i`HHo_dDgpVjkshYJMH@#V#NSMj-rqj zcZES`dDjd~9a6gs9ToPskM0u&iLtB{=j#Z%ytF&Jz2(+62t$WSkd9R_E zJHB&3Q&FN~4j%e~ub4+n0%&4CQ~6szFvO;!Z{G;=y!3WrKQ@PmGdB5H>zq?S7(OHo z$)dPyW^hMk&K_lUx9Ykq`{+c^4G?)z$`26l{)TKfUI`CEmJfn%^s7Z}U-a?+qnG)IHu3q`>gNiGXaP4pLoJv; z2^gA$I-4p)zs{lt2#nuZ4Huc(Kr@1SKOyc8ck`!K_OHI`pv2n<=60qUbFC7iV(CtRNZ z;L(%5bw=QC*SmoN$9SuSR1wOZVYX-=^U>z|-*}JFL9ZGUI=zc}0835hUZduQ(DMUL z{EqU_bDu&Vtf&I0nh69i-uY2-V)x7lx%nN7{xsvEgW9q>Y-2JdDAVU-1hTcx44xX& z5di^88!46*ayf_(iSx57-V+{|tZ_=MC5!%53ccNVbbE6tbd6j^Hg>d6hq4Yubu0tL zVbD!cUi(8?&TF}-)rsMuwY+4+l(T6ly8z%p62*5?xzE~qeh&u%j^ikkvhhI zxO?o{f0P@!KVkS%V&hRj1NcXI#D1Ipvq*F(sQ~F%R%2D6Kz>s))Fi3A|H5OvNB?wF$zd{k|Xd5_EIwwKk`FAh^L!R{uLXAZ)E^bWRj&t86dWSan z@}NA=?7E3)B_^OSq9p=t(8|fnn*Yv?D|5_rTw*uaQ zsQqQ~+*0LQM*g}GbWUF8VN5P$4ieZF(Q=Q92-rLjO)gn3vg1A6-x1>(e&DG)8-MOx zT#>ChRJVvkzdxu+#miHP?(bCSc&8?eozAI%$cC%n&&_su-z&b-yZQ-^r7xkx*T#8S=m~ptnXaDHPK2G$9qb&LGs~}a;m$0=Z6GcT=8S^An( z=m!*tEZs3?BfN^(P+cZR2Gq)^dn7ouueaL0@pH6nClx@udD;Hn&PMEK82QO`(ofKJ ziT2(5b`K+iI*a8_-0^Ov;li0$e;Xtpz)*i^1fWw z;!pFn{xQpu0H!lyRH%Pc#AXy$s0>HNvtQG;6dN<;m{}#UmE3!W5Qa?Oy^tP8(0mfU z{#%REMyrjs=>y&F6sSWn0Gb`L2`&#}_Y>n*(`ZzjHL8`Qv~;)=p6G@hY_=t&~J zYlS*5aU=hNE!hgdj5SQ|C7CESk7fP!bL#HmYGy8Yodl)In;THB-%JlR>?Re=*SDc~ zj8>2?@O~!qfG64P;BU;PiyzUGr1$6PEjg=Ispo|sradXZ3cLZ9jaj|BET60S)D;HLr+s_d^AOZCMOpkC^3`%r$z30`;CUe*xey$P@%JZB6&@{V+ zFmZqf8K_Z`Z?BW5DqiiP(4Og_;(Wkh9n#Q7sjvjHk`p!DQ)xoTizo1i&+a_dQl zzz}xrjAf%#03a42lBp}2s1{c44k`(VZNY~NwiZDB^a6jT(R~UL_%=SAUc>lYn_@#} zFdqt-c`J3!*UV$FxpurZ#qPf#YJ#@xJ^@u^eVAPPFXImo~xOeHqou{&MSz^jm zxaT^om6hmFh>a?dOf>cJ6>E-Xl{M_vvy!x=98KICOb7LJUbjbpeYYv8xv>@QJ}7AZ zo@1`=Q$m&MMn`(fyM}3ZntFO3zIxmce{^qjS*ouFkI~TY&UxVUpR3}}Nj9GtE}k5U zTnJ=8^KkNf*ve)Kvytse+NvgW2KQ^DG$$(!;XgMAF$wr_V&z%y+|YX-bn_W$U4p^h zZSN8fR}xs!b_8gHW|PNK;5ldaQz?gnfz(E%gRf@E)XnpOK_x$MXWL#jb#H||4fL;- zWN%pFLGg$==iM=M?6)uW4NMf*xc0E& z$xuqOJQ<{&2ufwEdez4d+dq_sJ-_F`{@FfU>Tj^x!Lxkqau;vs3e^uexNJ?Jp>r3hv9DoFX)amhyS?IlTX zRz0Q)Mw4rn&%E;|>F@C;Ua57oB{ojG3PQP<&_MOXSo!o38L_$gY5DMhu(gTS$aDN{ zEg0o&g$qpYH8LObF;R|96#?f_>B&P}Gx4=I3~H&eh-c+vFpPF|+USnmQ#C*0ZMKs! zaO{V6Jk)7p*|?qec7PK6(l52`!hahkld+E4tBy$2E~uF>$Rx`k;(J?lZ{oPi$R`Z# zsW)~r-tN@D&L?UtS~Cv11)VPwp^`BcvOJW^MI2|?*vU1MX%%(69SV?n_mCv0Qwq7+ z=N}s%Xih2}TE!)vPa*Z97WbW-SFMFavcVQPzwPcR)WL8$=fWLC6mwLqzBbjd_bbl< zbVg<>k800%-iZL7QK99OacR%WNs3uymDK)q<MOEA~&%`2Wd-(hWfO?fZIwO2_)TZ+-tsAEt>{IC?T67ok=41OGYM z_e3Ic3o|b|>yY_BcZ0{2J?EORknbp5%b4WDe^ae&-R)gb%$+*sAj3KQ3J$5$6JinM zH?e=8nUO)BeHKfIxw;WZ!LdPW+1v{1IVudbg!+(pokt?= z^3jpd@qI0MFQOg6= self.data.shape[0]: + self.k = self.data.shape[0]-1 + self.nClusters = max(n_cluster, 2) + self.dims = dims + self.loadings = loadings + if self.dims > self.data.shape[0]: + self.dims = self.data.shape[0] + if self.dims is None and self.loadings is None: + raise ValueError("ERROR: Provide either value for atleast one: 'dims' or 'loadings'") + self.annMetric = ann_metric + self.annEfc = ann_efc + self.annEf = ann_ef + self.annNthreads = ann_nthreads + self.randState = rand_state + self.batchSize = self._handle_batch_size() + self.kmeansKwargs = kmeans_kwargs + clean_kmeans_kwargs(self.kmeansKwargs) + self.mu = mu + self.sigma = sigma + self.method = reduction_method + self.nCells, self.nFeats = self.data.shape + self.annIdx = self._init_ann() + self.clusterLabels: np.ndarray = np.repeat(-1, self.nCells) + self.kmeans = self._init_kmeans() + + self.reducer = None + + def _handle_batch_size(self): + batch_size = self.data.chunksize[0] # Assuming all chunks are same size + if self.dims >= batch_size: + self.dims = batch_size-1 # -1 because we will do PCA +1 + print(f"INFO: Number of PCA components reduced to batch size of {batch_size}") + if self.nClusters > batch_size: + self.nClusters = batch_size + print(f"INFO: Cluster number reduced to batch size of {batch_size}") + return batch_size + + def _init_ann(self): + idx = hnswlib.Index(space=self.annMetric, dim=self.dims) + idx.init_index(max_elements=self.nCells, ef_construction=self.annEfc, + M=self.dims, random_seed=self.randState) + idx.set_ef(self.annEf) + idx.set_num_threads(self.annNthreads) + return idx + + def _init_kmeans(self): + return MiniBatchKMeans( + n_clusters=self.nClusters, random_state=self.randState, + batch_size=self.batchSize, **self.kmeansKwargs) + + def iter_blocks(self, msg: str = ''): + for i in tqdm(self.data.blocks, desc=msg, total=self.data.numblocks[0]): + yield i.compute() + + def transform_z(self, a: np.ndarray): + return (a - self.mu) / self.sigma + + def transform_pca(self, a: np.ndarray): + return a.dot(self.loadings) + + def transform_lsi(self, a: np.ndarray): + return a.dot(self.loadings) + + def transform_ann(self, a: np.ndarray, k: int = None): + if k is None: + k = self.k + # Adding +1 to k because first neighbour will be the query itself + i, d = self.annIdx.knn_query(a, k=k+1) + return i[:, 1:], d[:, 1:] # Slicing to remove self-loop + + def estimate_partitions(self): + temp = [] + for i in self.iter_blocks(msg='Estimating seed partitions'): + temp.extend(self.kmeans.predict(self.reducer(i))) + self.clusterLabels = np.array(temp) + + def _fit_pca(self): + # We fit 1 extra PC dim than specified and then ignore the last PC. + self._pca = IncrementalPCA(n_components=self.dims + 1, batch_size=self.batchSize) + for i in self.iter_blocks(msg='Fitting PCA'): + self._pca.partial_fit(self.transform_z(i), check_input=False) + self.loadings = self._pca.components_[:-1, :].T + + def _fit_lsi(self): + self._lsiModel = LsiModel(vec_to_bow(self.data.blocks[0].compute()), num_topics=self.dims, + chunksize=self.data.chunksize[0]) + for n, i in enumerate(self.iter_blocks(msg="Fitting LSI model")): + if n == 0: + continue + self._lsiModel.add_documents(vec_to_bow(i)) + self.loadings = self._lsiModel.get_topics().T + + def fit(self): + if self.method == 'pca': + self.reducer = lambda x: self.transform_pca(self.transform_z(x)) + elif self.method == 'lsi': + self.reducer = self.transform_lsi + else: + raise ValueError("ERROR: Unknown reduction method") + if self.loadings is None: + if self.method == 'pca': + self._fit_pca() + elif self.method == 'lsi': + self._fit_lsi() + for i in self.iter_blocks(msg='Fitting ANN'): + a = self.reducer(i) + self.annIdx.add_items(a) + self.kmeans.partial_fit(a) + self.estimate_partitions() + + def refit_kmeans(self, n_clusters: int, **kwargs): + self.nClusters = n_clusters + self.kmeansKwargs = kwargs + clean_kmeans_kwargs(self.kmeansKwargs) + self.kmeans = self._init_kmeans() + for i in self.iter_blocks(msg='Fitting kmeans'): + self.kmeans.partial_fit(self.reducer(i)) + self.estimate_partitions() diff --git a/scarf/assay.py b/scarf/assay.py new file mode 100644 index 0000000..a345a5b --- /dev/null +++ b/scarf/assay.py @@ -0,0 +1,240 @@ +import numpy as np +import dask.array as daskarr +import zarr +from .metadata import MetaData +from .utils import show_progress +from .writers import dask_to_zarr + + +__all__ = ['Assay', 'RNAassay', 'ATACassay', 'ADTassay'] + + +def dummy_norm(counts: daskarr, **kwargs) -> daskarr: + return counts + + +def norm_lib_size(counts: daskarr, sf: float, n_counts: np.ndarray) -> daskarr: + return sf * counts / (n_counts.reshape(-1, 1)+1) + + +def norm_clr(counts: daskarr) -> daskarr: + f = np.exp(np.log1p(counts).sum(axis=0) / len(counts)) + return np.log1p(counts / f) + + +def norm_tf_idf(counts: daskarr, n_docs: int, n_feats_per_cell: np.ndarray, + n_cells_per_feat: np.ndarray) -> daskarr: + tf = counts / n_feats_per_cell.reshape(-1, 1) + idf = np.log2(n_docs / (n_cells_per_feat+1)) + return tf * idf + + +class Assay: + def __init__(self, fn: str, name: str, cell_data: MetaData): + self._fn = fn + self._z = zarr.open(fn, 'r+') + self.name = name + self.cells = cell_data + self.rawData = daskarr.from_zarr(self._z[self.name]['counts']) + self.feats = MetaData(self._z[self.name]['featureData']) + self.attrs = self._z[self.name].attrs + if 'percentFeatures' not in self.attrs: + self.attrs['percentFeatures'] = {} + self.annObj = None # Can be dynamically attached for debugging purposes + self.normMethod = dummy_norm + self.sf = None + self._ini_feature_props() + + def normed(self, cell_idx: np.ndarray = None, feat_idx: np.ndarray = None, **kwargs): + if cell_idx is None: + cell_idx = self.cells.active_index('I') + if feat_idx is None: + feat_idx = self.feats.active_index('I') + return self.normMethod(self.rawData[:, feat_idx][cell_idx, :].rechunk(self.rawData.chunksize)) + + @show_progress + def _ini_feature_props(self, force_recalc: bool = False) -> None: + if 'nCells' in self.feats.table.columns and \ + 'dropOuts' in self.feats.table.columns and force_recalc is False: + pass + else: + print(f"INFO: ({self.name}) Computing nCells and dropOuts", flush=True) + self.feats.add('nCells', (self.rawData > 0).sum(axis=0).compute(), overwrite=True) + self.feats.add('dropOuts', abs(self.cells.N - self.feats.fetch('nCells')), overwrite=True) + + @show_progress + def add_percent_feature(self, feat_pattern: str, name: str, verbose: bool = True) -> None: + if name in self.attrs['percentFeatures']: + if self.attrs['percentFeatures'][name] == feat_pattern: + if verbose: + print(f"INFO: Percentage feature {name} already exists. Not adding again") + return None + feat_idx = self.feats.get_idx_by_names(self.feats.grep(feat_pattern)) + if len(feat_idx) == 0: + print(f"WARNING: No matches found for pattern {feat_pattern}. Will not add/update percentage feature") + return None + print(f"Computing percentage of {name}", flush=True) + total = self.rawData[:, feat_idx].rechunk(self.rawData.chunksize).sum(axis=1).compute() + self.cells.add(name, 100 * total / self.cells.table['nCounts'], overwrite=True) + self.attrs['percentFeatures'] = {**{k: v for k, v in self.attrs['percentFeatures'].items()}, + **{name: feat_pattern}} + + def create_subset_hash(self, cell_key: str, feat_key: str): + cell_idx = self.cells.active_index(cell_key) + feat_idx = self.feats.active_index(feat_key) + return hash(tuple([hash(tuple(cell_idx)), hash(tuple(feat_idx))])) + + def select_and_normalize(self, cell_key: str, feat_key: str, batch_size: int) -> daskarr: + if cell_key not in self.cells.table or self.cells.table[cell_key].dtype != bool: + raise ValueError(f"ERROR: Either {cell_key} does not exist or is not bool type") + if feat_key not in self.feats.table or self.feats.table[feat_key].dtype != bool: + raise ValueError(f"ERROR: Either {feat_key} does not exist or is not bool type") + cell_idx = self.cells.active_index(cell_key) + feat_idx = self.feats.active_index(feat_key) + subset_hash = self.create_subset_hash(cell_key, feat_key) + subset_name = f"subset_{cell_key}_{feat_key}" + loc = f"{self.name}/{subset_name}" + if subset_name in self.attrs and self.attrs[subset_name] == subset_hash and loc in self._z: + print(f"INFO: Exact features already selected for assay {self.name}") + else: + # renormed parameter will not have an effect unless the norMethod uses it. But this is hard coded + dask_to_zarr(self.normed(cell_idx, feat_idx, renormed=True), self._z, loc, batch_size) + self.attrs[subset_name] = subset_hash + return daskarr.from_zarr(self._z[loc]) + + def __repr__(self): + f = self.feats.table['I'] + assay_name = str(self.__class__).split('.')[-1][:-2] + return f"{assay_name} {self.name} with {f.sum()}({len(f)}) features" + + +class RNAassay(Assay): + def __init__(self, fn: str, name: str, cell_data: MetaData): + super().__init__(fn, name, cell_data) + self.normMethod = norm_lib_size + self.sf = 10000 + + def normed(self, cell_idx: np.ndarray = None, feat_idx: np.ndarray = None, renormed: bool = False): + if cell_idx is None: + cell_idx = self.cells.active_index('I') + if feat_idx is None: + feat_idx = self.feats.active_index('I') + counts = self.rawData[:, feat_idx][cell_idx, :].rechunk(self.rawData.chunksize) + if renormed: + scalar = counts.sum(axis=1).reshape(-1, 1) + 1 + else: + scalar = self.cells.table['nCounts'].values[cell_idx] + return self.normMethod(counts, self.sf, scalar) + + def correct_var(self, n_bins: int = 200, lowess_frac: float = 0.1) -> None: + self.feats.remove_trend('c_var', 'avg', 'sigmas', n_bins, lowess_frac) + + def correct_dropouts(self, n_bins: int = 200, lowess_frac: float = 0.1) -> None: + self.feats.remove_trend('c_dropOuts', 'avg', 'dropOuts', n_bins, lowess_frac) + + @show_progress + def set_feature_stats(self, cell_key: str = 'I', feat_key: str = 'I', min_cells: int = 10) -> None: + subset_name = f"subset_{cell_key}_{feat_key}" + cell_idx = self.cells.active_index(cell_key) + + feat_idx = self.feats.active_index(feat_key) + subset_hash = hash(tuple([hash(tuple(cell_idx)), hash(tuple(feat_idx))])) + if subset_name in self.attrs and self.attrs[subset_name] == subset_hash: + print("INFO: Using cached feature stats data") + return None + + print(f"INFO: ({self.name}) Cells per features (1/3)", flush=True) + n_cells = (self.normed(cell_idx, feat_idx) > 0).sum(axis=0).compute() + self.feats.add('normed_n', n_cells, key=feat_key, overwrite=True) + self.feats.update(n_cells > min_cells, key=feat_key) + n_cells = self.feats.fetch('normed_n', key=feat_key) + feat_idx = self.feats.active_index(feat_key) + subset_hash = hash(tuple([hash(tuple(cell_idx)), hash(tuple(feat_idx))])) + + print(f"INFO: ({self.name}) Feature sum (2/3)", flush=True) + tot = self.normed(cell_idx, feat_idx).sum(axis=0).compute() + self.feats.add('normed_tot', tot, key=feat_key, overwrite=True) + self.feats.add('avg', tot / self.cells.N, key=feat_key, overwrite=True) + self.feats.add('nz_mean', tot / n_cells, key=feat_key, overwrite=True) + + print(f"INFO: ({self.name}) Feature variance (3/3)", flush=True) + sigmas = self.normed(cell_idx, feat_idx).var(axis=0).compute() + self.feats.add('sigmas', sigmas, key=feat_key, overwrite=True) + + self.attrs[subset_name] = subset_hash + return None + + def mark_hvgs(self, min_cells: int = 20, top_n: int = 500, + min_var: float = -np.Inf, max_var: float = np.Inf, + min_mean: float = -np.Inf, max_mean: float = np.Inf, + blacklist: str = "^MT-|^RPS|^RPL|^MRPS|^MRPL|^CCN|^HLA-|^H2-|^HIST"): + bl = self.feats.idx_to_bool(self.feats.get_idx_by_names(self.feats.grep(blacklist)), invert=True) + if min_var == -np.Inf: + if top_n < 1: + raise ValueError("ERROR: Please provide a value greater than 0 for `top_n` parameter") + idx = self.feats.multi_sift( + ['normed_n', 'nz_mean'], [min_cells, min_mean], [np.Inf, max_mean]) + idx = idx & self.feats.table['I'] & bl + min_var = self.feats.table[idx]['c_var'].sort_values(ascending=False).values[top_n] + hvgs = self.feats.multi_sift( + ['normed_n', 'nz_mean', 'c_var'], [min_cells, min_mean, min_var], [np.Inf, max_mean, max_var]) + hvgs = hvgs & self.feats.table['I'] & bl + print(f"INFO: {sum(hvgs)} genes marked as HVGs", flush=True) + self.feats.add('hvgs', hvgs, fill_val=False, overwrite=True) + + +class ATACassay(Assay): + def __init__(self, fn: str, name: str, cell_data: MetaData): + super().__init__(fn, name, cell_data) + self.normMethod = norm_tf_idf + + def normed(self, cell_idx: np.ndarray = None, feat_idx: np.ndarray = None, **kwargs): + if cell_idx is None: + cell_idx = self.cells.active_index('I') + if feat_idx is None: + feat_idx = self.feats.active_index('I') + return self.normMethod(self.rawData[cell_idx, :][:, feat_idx], n_docs=len(cell_idx), + n_feats_per_cell=self.cells.table['nFeatures'].values[cell_idx], + n_cells_per_feat=self.feats.table['nCells'].values[feat_idx]) + + @show_progress + def set_feature_stats(self, cell_key: str = 'I', feat_key: str = 'I') -> None: + subset_name = f"subset_{cell_key}_{feat_key}" + cell_idx = self.cells.active_index(cell_key) + + feat_idx = self.feats.active_index(feat_key) + subset_hash = hash(tuple([hash(tuple(cell_idx)), hash(tuple(feat_idx))])) + if subset_name in self.attrs and self.attrs[subset_name] == subset_hash: + print("INFO: Using cached feature stats data", flush=True) + return None + + print(f"INFO: ({self.name}) Calculating peak prevalence across cells", flush=True) + prevalence = self.normed(cell_idx, feat_idx).sum(axis=0).compute() + self.feats.add('prevalence', prevalence, fill_val=False, overwrite=True) + + self.attrs[subset_name] = subset_hash + return None + + @show_progress + def mark_top_prevalent_peaks(self, n_top: int = 1000): + if 'prevalence' not in self.feats.table: + raise ValueError("ERROR: Please 'run set_feature_stats' first") + if n_top >= self.feats.N: + raise ValueError(f"ERROR: n_top should be less than total number of features ({self.feats.N})]") + if type(n_top) != int: + raise TypeError("ERROR: n_top must a positive integer value") + idx = self.feats.table['prevalence'].sort_values(ascending=False)[:n_top].index + self.feats.add('top_peaks', self.feats.idx_to_bool(idx), fill_val=False, overwrite=True) + + +class ADTassay(Assay): + def __init__(self, fn: str, name: str, cell_data: MetaData): + super().__init__(fn, name, cell_data) + self.normMethod = norm_clr + + def normed(self, cell_idx: np.ndarray = None, feat_idx: np.ndarray = None, **kwargs): + if cell_idx is None: + cell_idx = self.cells.active_index('I') + if feat_idx is None: + feat_idx = self.feats.active_index('I') + return self.normMethod(self.rawData[cell_idx, :][:, feat_idx]) diff --git a/scarf/balanced_cut.py b/scarf/balanced_cut.py new file mode 100644 index 0000000..8b03885 --- /dev/null +++ b/scarf/balanced_cut.py @@ -0,0 +1,195 @@ +import numpy as np +import networkx as nx +import logzero +from logzero import logger +import logging +from typing import List, Dict + +formatter = logging.Formatter('(%(asctime)s) [%(levelname)s]: %(message)s', "%H:%M:%S") +logzero.formatter(formatter) +logzero.loglevel(logging.INFO) + +__all__ = ['BalancedCut'] + + +class BalancedCut: + def __init__(self, dendrogram: np.ndarray, max_size: int, min_size: int, + max_distance_fc: float): + self.nCells = dendrogram.shape[0] + 1 + self.graph = self._make_digraph(dendrogram) + self.maxSize = max_size + self.minSize = min_size + self.maxDistFc = max_distance_fc + self.branchpoints = self._get_branchpoints() + + @staticmethod + def _make_digraph(d: np.ndarray) -> nx.DiGraph: + """ + Convert dendrogram into directed graph + """ + g = nx.DiGraph() + n = d.shape[0] + 1 # Dendrogram contains one less sample + for i in d: + v = i[2] # Distance between clusters + i = i.astype(int) + g.add_node(n, nleaves=i[3], dist=v) + if i[0] <= d.shape[0]: + g.add_node(i[0], nleaves=0, dist=v) + if i[1] <= d.shape[0]: + g.add_node(i[1], nleaves=0, dist=v) + g.add_edge(n, i[0]) + g.add_edge(n, i[1]) + n += 1 + return g + + def _successors(self, start: int, min_leaves: int) -> List[int]: + """ + Get tree downstream of a node. + """ + q = [start] + d = [] + while len(q) > 0: + i = q.pop(0) + if self.graph.nodes[i]['nleaves'] > min_leaves: + d.append(i) + q.extend(list(self.graph.successors(i))) + return d[1:] + + def _get_mean_dist(self, start_node: int) -> np.float: + """ + Get mean distances in downstream tree of a node + """ + s_nodes = self._successors(start_node, -1) + return np.array([self.graph.nodes[x]['dist'] for x in s_nodes]).mean() + + def _are_subtrees_mergeable(self, s1: int, s2: int) -> bool: + n1, n2 = self.graph.nodes[s1]['nleaves'], self.graph.nodes[s2]['nleaves'] + if n1 > self.minSize and n2 > self.minSize: + d1, d2 = self.graph.nodes[s1]['dist'], self.graph.nodes[s2]['dist'] + if d1/d2 > self.maxDistFc or d2/d1 > self.maxDistFc: + logger.debug(f"Will not merge {s1} and {s2} because of high distance") + return False + else: + md1, md2 = self._get_mean_dist(s1), self._get_mean_dist(s2) + if md1/md2 > self.maxDistFc or md2/md1 > self.maxDistFc: + logger.debug(f"Will not merge {s1} and {s2} because of high distance of successors") + return False + return True + + def _get_branchpoints(self) -> \ + Dict[int, List[int]]: + """ + Aggregate leaves bottom up until target size is reached. + """ + n_leaves = int((self.graph.number_of_nodes() + 1) / 2) + leaves = {x: None for x in range(n_leaves)} + bps = {} + while len(leaves) > 0: + leaf, _ = leaves.popitem() + logger.debug(f"FRESH STEP: Leaf {leaf} plucked as base leaf") + cur = leaf + while True: + temp = next(self.graph.predecessors(cur)) + if temp in bps: + logger.debug(f"Will not climb to {temp} as already a branchpoint") + break + if self.graph.nodes[temp]['nleaves'] > self.maxSize: + logger.debug(f"Will not climb to {temp} because too many leaves exist") + break + s1, s2 = list(self.graph.successors(temp)) + if self._are_subtrees_mergeable(s1, s2) is False: + break + cur = temp + logger.debug(f"Aggregating from branch {cur} for leaf {leaf}") + bps[cur] = [leaf] + s = [cur] + while len(s) > 0: + i = s.pop() + if i in leaves: + bps[cur].append(i) + leaves.pop(i) + logger.debug(f"Leaf {i} plucked in aggregation step") + elif i in bps and i != cur: + logger.debug(f"Skipping branch {i} because its already taken") + elif self.graph.nodes[i]['nleaves'] >= self.maxSize and i != cur: + logger.debug(f"Skipping branch {i} to prevent greedy behaviour") + else: + s.extend(list(self.graph.successors(i))) + return bps + + def _valid_names_in_brachpoints(self) -> None: + leaves = [] + for i in self.branchpoints: + leaves.extend(self.branchpoints[i]) + n_leaves = len(leaves) + if n_leaves != self.nCells: + raise ValueError("ERROR: Not all leaves present in branchpoints. This bug must be reported") + minl = min(leaves) + if minl != 0: + raise ValueError(f"ERROR: minimum leaf label is {minl} rather than 0") + maxl = max(leaves) + if n_leaves != maxl+1: + raise ValueError(f"ERROR: maximum leaf label is {maxl} while total estimated leaves are {n_leaves}") + return None + + def get_clusters(self) -> np.ndarray: + """ + Make cluster label array from `get_branchpoints` output + """ + self._valid_names_in_brachpoints() + c = np.zeros(self.nCells).astype(int) + for n, i in enumerate(self.branchpoints, start=1): + c[self.branchpoints[i]] = n + if (c == 0).sum() > 0: + print(f"WARNING: {(c == 0).sum()} samples were not assigned a cluster") + c[c == 0] = -1 + return c + + # def get_sibling(g, node): + # sibs = g.successors(next(g.predecessors(node))) + # return [x for x in sibs if x != node][0] + + # def top_rsquared(d, cutoff): + # df = pd.DataFrame([list(range(len(d))), d.values], index=['pos', 'val']).T + # f ='pos~val' + # f ='val~pos' + # rs = [] + # for i in range(1, len(d)-2): + # rs.append(ols(formula=f, data=df[:-i]).fit().rsquared) + # rs = np.array(rs) + # pos = np.where((rs > cutoff) == True)[0][0] + # return 1+pos + + # def merge_branches(g, bps, nodes): + # bps = dict(bps) + # for i in nodes: + # if i not in bps: + # logger.debug(f"Ignoring {i} because not a branchpoint anymore") + # sib = get_sibling(g, i) + # if sib in bps: + # if sib in nodes: + # logger.debug(f"Merging two sibling candidates {i} and {sib}") + # bps[next(g.predecessors(i))] = bps[i] + bps[sib] + # del bps[i] + # del bps[sib] + # else: + # logger.debug(f"Branchpoint {sib} updated with values from {i}") + # bps[sib] = bps[i] + bps[sib] + # del bps[i] + # else: + # logger.debug(f"Not merging {i} because sibling not a branchpoint") + # return bps + + # def test(self): + # n = 30 + # np.random.seed(154) + # randz = ward(gamma.rvs(0.3, size=n * 4).reshape((n, 4))) + # + # graph = z_to_g(randz) + # branchpoints = get_branchpoints(graph, max_leaf=10, min_leaves=2, fc=1.5) + # clusters = bps_to_clusts(branchpoints) + # dend = dendrogram(randz) + # clusters[dend['leaves']] + # + # res = [9, 5, 5, 1, 1, 1, 1, 7, 7, 7, 3, 3, 3, 3, 3, 4, 4, 4, 2, 2, 2, 8, + # 8, 8, 6, 6, 6, 6, 6, 6] diff --git a/scarf/datastore.py b/scarf/datastore.py new file mode 100644 index 0000000..4c0282a --- /dev/null +++ b/scarf/datastore.py @@ -0,0 +1,439 @@ +import zarr +import os +import shutil +import numpy as np +from typing import List, Iterable, Union +import re +from dask import optimize +from scipy.stats import norm +from scipy.sparse import coo_matrix, csr_matrix, triu +from sklearn.decomposition import PCA +import sknetwork as skn +import pandas as pd +from .writers import create_zarr_dataset +from .metadata import MetaData +from .assay import Assay, RNAassay, ATACassay, ADTassay +from .utils import show_progress +from .ann import AnnStream +from .knn_utils import make_knn_graph +from .umap import fit_transform +from .plots import plot_qc, plot_mean_var, plot_scatter, shade_scatter +from uuid import uuid4 +from .balanced_cut import BalancedCut + + +__all__ = ['DataStore'] + + +def sanitize_hierarchy(z, assay_name) -> bool: + if assay_name in z: + if 'counts' not in z[assay_name]: + raise KeyError(f"ERROR: 'counts' not found in {assay_name}") + if 'featureData' not in z[assay_name]: + raise KeyError(f"ERROR: 'featureData' not found in {assay_name}") + else: + raise KeyError(f"ERROR: {assay_name} not found in zarr file") + return True + + +def rescale_array(a: np.ndarray, frac: float = 0.9) -> np.ndarray: + loc = (np.median(a) + np.median(a[::-1])) / 2 + dist = norm(loc, np.std(a)) + minv, maxv = dist.ppf(1 - frac), dist.ppf(frac) + a[a < minv] = minv + a[a > maxv] = maxv + return a + + +def clean_array(x, fill_val: int = 0): + x = np.nan_to_num(x, copy=True) + x[(x == np.Inf) | (x == -np.Inf)] = 0 + x[x == 0] = fill_val + return x + + +class DataStore: + + def __init__(self, zarr_loc: str, assay_types: dict = None, default_assay: str = 'RNA', + min_genes_per_cell: int = 200, min_cells_per_gene: int = 20, + auto_filter: bool = False, force_recalc: bool = False, + mito_pattern: str = None, ribo_pattern: str = None): + self._fn = zarr_loc + self._z = zarr.open(self._fn, 'r+') + # The order is critical here: + self.cells = self._load_cells() + self.assayNames = self._get_assay_names() + self.defaultAssay = self._set_default_assay_name(default_assay) + self._load_assays(assay_types) + # TODO: Reset all attrs, pca, dendrogram etc + self._ini_cell_props(min_genes_per_cell, min_cells_per_gene, force_recalc) + if mito_pattern is None: + mito_pattern = 'MT-' + if ribo_pattern is None: + ribo_pattern = 'RPS|RPL|MRPS|MRPL' + assay = self.__getattribute__(self.defaultAssay) + if type(assay) == RNAassay: + assay.add_percent_feature(mito_pattern, 'percentMito', verbose=False) + assay.add_percent_feature(ribo_pattern, 'percentRibo', verbose=False) + if auto_filter: + self.plot_cells_dists(cols=['percent*'], all_cells=True) + self.auto_filter_cells(attrs=['nCounts', 'nFeatures', 'percentMito', 'percentRibo']) + self.plot_cells_dists(cols=['percent*']) + + def _load_cells(self) -> MetaData: + if 'cellData' not in self._z: + raise KeyError("ERROR: cellData not found in zarr file") + return MetaData(self._z['cellData']) + + def _get_assay_names(self) -> List[str]: + assays = [] + for i in self._z.group_keys(): + if 'is_assay' in self._z[i].attrs.keys(): + sanitize_hierarchy(self._z, i) + assays.append(i) + return assays + + def _set_default_assay_name(self, assay_name: str) -> str: + if len(self.assayNames) > 1: + if assay_name is None: + raise ValueError("ERROR: You have more than one assay data. Please provide a name for default assay " + f"using 'default_assay' parameter. Choose one from: {' '.join(self.assayNames)}\n" + "Please note that names are case-sensitive.") + elif assay_name not in self.assayNames: + raise ValueError(f"ERROR: The provided default assay name: {assay_name} was not found. " + f"Please Choose one from: {' '.join(self.assayNames)}\n" + "Please note that names are case-sensitive.") + else: + if self.assayNames[0] != assay_name: + print(f"INFO: Default assay name was reset to {self.assayNames[0]}") + assay_name = self.assayNames[0] + return assay_name + + def _load_assays(self, assays: dict) -> None: + assay_types = {'RNA': RNAassay, 'ATAC': ATACassay, 'ADT': ADTassay} + print_options = '\n'.join(["{'%s': '"+x+"'}" for x in assay_types]) + caution_statement = "CAUTION: %s was set as a generic Assay with no normalization. If this is unintended " \ + "then please make sure that you provide a correct assay type for this assay using " \ + "'assay_types' parameter. You can provide assay type in one these ways:\n" + print_options + caution_statement = caution_statement + "\nIf you have more than one assay in the dataset then you can set" \ + "assay_types={'assay1': 'RNA', 'assay2': 'ADT'} " \ + "Just replace with actual assay names instead of assay1 and assay2" + if assays is None: + assays = {} + for i in self.assayNames: + if i in assays: + if assays[i] in assay_types: + assay = assay_types[assays[i]](self._fn, i, self.cells) + else: + print(f"WARNING: {assays[i]} is not a recognized assay type. Has to be one of " + f"{', '.join(list(assay_types.keys()))}\nPLease note that the names are case-sensitive.") + print(caution_statement % i) + assay = Assay(self._fn, i, self.cells) + else: + if i in assay_types: + # FIXME: Should this be silent? + assay = assay_types[i](self._fn, i, self.cells) + else: + print(caution_statement % i) + assay = Assay(self._fn, i, self.cells) + setattr(self, i, assay) + return None + + @show_progress + def _ini_cell_props(self, min_features: int, min_cells: int, force_recalc: bool = False) -> None: + # TODO: add adt data as well? + assay = self.__getattribute__(self.defaultAssay) + n_c = assay.rawData.sum(axis=1) + n_f = (assay.rawData > 0).sum(axis=1) + fn = (assay.rawData > 0).sum(axis=0) + n_c, n_f, fn = optimize(n_c, n_f, fn) + if 'nCounts' in self.cells.table.columns and \ + 'nFeatures' in self.cells.table.columns and force_recalc is False: + pass + else: + print(f"INFO: Computing nCounts", flush=True) + self.cells.add('nCounts', n_c.compute(), overwrite=True) + print(f"INFO: Computing nFeatures", flush=True) + self.cells.add('nFeatures', n_f.compute(), overwrite=True) + self.cells.update(self.cells.sift(self.cells.fetch('nFeatures'), + min_features, np.Inf)) + print(f"INFO: Filtering Features", flush=True) + assay.feats.update(fn.compute() > min_cells) + + def _col_renamer(self, from_assay: str, col_key: str, suffix: str) -> str: + if from_assay == self.defaultAssay: + if col_key == 'I': + ret_val = suffix + else: + ret_val = '_'.join(list(map(str, [col_key, suffix]))) + else: + if col_key == 'I': + ret_val = '_'.join(list(map(str, [from_assay, suffix]))) + else: + ret_val = '_'.join(list(map(str, [from_assay, col_key, suffix]))) + return ret_val + + def filter_cells(self, *, attrs: Iterable[str], lows: Iterable[int], + highs: Iterable[int]) -> None: + for i, j, k in zip(attrs, lows, highs): + if i not in self.cells.table.columns: + print(f"WARNING: {i} not found in cell metadata. Will ignore {i} for filtering") + continue + x = self.cells.sift(self.cells.table[i].values, j, k) + print(f"INFO: {len(x) - x.sum()} cells failed filtering for {i}") + self.cells.update(x) + + def auto_filter_cells(self, *, attrs: Iterable[str], min_p: float = 0.01, max_p: float = 0.99) -> None: + for i in attrs: + if i not in self.cells.table.columns: + print(f"WARNING: {i} not found in cell metadata. Will ignore {i} for filtering") + continue + a = self.cells.table[i] + dist = norm(np.median(a), np.std(a)) + self.filter_cells(attrs=[i], lows=[dist.ppf(min_p)], highs=[dist.ppf(max_p)]) + + def make_graph(self, *, from_assay: str = None, cell_key: str = 'I', feat_key: str = 'I', + reduction_method: str = 'auto', k: int = 11, n_cluster: int = 100, dims: int = None, + ann_metric: str = 'l2', ann_efc: int = 100, ann_ef: int = 5, ann_nthreads: int = 1, + rand_state: int = 4466, batch_size: int = None, + save_ann_obj: bool = False, save_raw_dists: bool = False, **kmeans_kwargs): + if from_assay is None: + from_assay = self.defaultAssay + assay = self.__getattribute__(from_assay) + reduction_method = reduction_method.lower() + if reduction_method not in ['pca', 'lsi', 'auto']: + raise ValueError("ERROR: Please choose either 'pca' or 'lsi' as reduction method") + if reduction_method == 'auto': + assay_type = str(assay.__class__).split('.')[-1][:-2] + if assay_type == 'ATACassay': + print("INFO: Using LSI for dimension reduction") + reduction_method = 'lsi' + else: + print("INFO: Using PCA for dimension reduction") + reduction_method = 'pca' + + if batch_size is None: + batch_size = assay.rawData.chunksize[0] + data = assay.select_and_normalize(cell_key, feat_key, batch_size) + # data is not guaranteed to have same indices as raw data and may be shifted. we handle this in the pipeline + mu = clean_array(data.mean(axis=0).compute()) + sigma = clean_array(data.std(axis=0).compute(), 1) + + loadings = None + loadings_name = reduction_method if cell_key == 'I' else cell_key + '_' + reduction_method + loadings_hash = hash((assay.create_subset_hash(cell_key, feat_key), dims)) + if loadings_name in assay.attrs and assay.attrs[loadings_name] == loadings_hash and \ + loadings_name in assay._z[from_assay]: + print("INFO: Loading cached component coefficients/loadings") + loadings = self._z[from_assay][loadings_name][:] + if dims is None and loadings is None: + raise ValueError("ERROR: No cached data found. Please provide a value for 'dims'") + + ann_obj = AnnStream(data, k, n_cluster, reduction_method, dims, loadings, + ann_metric, ann_efc, ann_ef, + ann_nthreads, rand_state, mu, sigma, **kmeans_kwargs) + if save_ann_obj: + assay.annObj = ann_obj # before calling fit, so that annObj can be diagnosed if an error arises + ann_obj.fit() + + if loadings is None: + g = create_zarr_dataset(self._z[from_assay], loadings_name, + (1000, 1000), 'f8', ann_obj.loadings.shape) + g[:, :] = ann_obj.loadings + assay.attrs[loadings_name] = loadings_hash + + self.cells.add(self._col_renamer(from_assay, cell_key, 'kmeans_cluster'), ann_obj.clusterLabels, + fill_val=-1, key=cell_key, overwrite=True) + kmeans_loc = 'kmeans_cluster_centers' if cell_key == 'I' else cell_key + '_kmeans_cluster_centers' + g = create_zarr_dataset(self._z[from_assay], kmeans_loc, + (1000, 1000), 'f8', ann_obj.kmeans.cluster_centers_.shape) + g[:, :] = ann_obj.kmeans.cluster_centers_ + + graph_loc = 'graph' if cell_key == 'I' else cell_key + '_graph' + if loadings is not None: + # This means that we used cached PCA data, which means that nothing changed upstream. + store = self._z[from_assay][graph_loc] + if store.attrs['k'] == k: + print("INFO: ANN index instantiated but will reuse the existing graph.") + return None + graph_dir = f"{self._fn}/{from_assay}/{graph_loc}" + if os.path.isdir(graph_dir): + shutil.rmtree(graph_dir, ignore_errors=True) + store = self._z[from_assay].create_group(graph_loc) + store.attrs['n_cells'] = ann_obj.nCells + store.attrs['k'] = ann_obj.k + store.attrs['self_uuid'] = uuid4().hex + make_knn_graph(ann_obj, batch_size, store, save_raw_dists=save_raw_dists) + return None + + def _load_graph(self, from_assay: str, cell_key: str, graph_format: str, + min_edge_weight: float = 0, symmetric: bool = True): + graph_loc = 'graph' if cell_key == 'I' else cell_key + '_graph' + if graph_loc not in self._z[from_assay]: + print(f"ERROR: {graph_loc} not found for assay {from_assay}. Run `make_graph` for assay {from_assay}") + return None + if graph_format not in ['coo', 'csr']: + raise KeyError("ERROR: format has to be either 'coo' or 'csr'") + store = self._z[from_assay][graph_loc] + n_cells = store.attrs['n_cells'] + # TODO: can we have a progress bar for graph loading. Append to coo matrix? + graph = coo_matrix((store['weights'][:], (store['edges'][:, 0], store['edges'][:, 1])), + shape=(n_cells, n_cells)) + if symmetric: + graph = triu((graph + graph.T)/2) + idx = graph.data > min_edge_weight + if graph_format == 'coo': + return coo_matrix((graph.data[idx], (graph.row[idx], graph.col[idx])), shape=(n_cells, n_cells)) + else: + return csr_matrix((graph.data[idx], (graph.row[idx], graph.col[idx])), shape=(n_cells, n_cells)) + + def _ini_embed(self, from_assay: str, cell_key: str): + pc = PCA(n_components=2).fit_transform(self._z[from_assay]['kmeans_cluster_centers'][:]) + pc[:, 0] = rescale_array(pc[:, 0]) + pc[:, 1] = rescale_array(pc[:, 1]) + clusters = self.cells.table[self._col_renamer(from_assay, 'I', 'kmeans_cluster')] + clusters = clusters[self.cells.table[cell_key]] + return np.array([pc[x] for x in clusters]).astype(np.float32) + + def run_umap(self, *, from_assay: str = None, cell_key: str = 'I', use_full_graph: bool = True, + min_edge_weight: float = 0, ini_embed: np.ndarray = None, + spread: float = 2.0, min_dist: float = 1, fit_n_epochs: int = 200, tx_n_epochs: int = 100, + random_seed: int = 4444, parallel: bool = False, **kwargs) -> None: + if from_assay is None: + from_assay = self.defaultAssay + if use_full_graph: + graph = self._load_graph(from_assay, cell_key, 'coo', min_edge_weight, symmetric=False) + else: + graph = self._load_graph(from_assay, 'I', 'csr', min_edge_weight, symmetric=False) + nodes = np.where(self.cells.table[self.cells.table.I][cell_key].values)[0] + graph = graph[nodes, :][:, nodes].tocoo() + if ini_embed is None: + ini_embed = self._ini_embed(from_assay, cell_key) + t = fit_transform(graph, ini_embed, spread=spread, min_dist=min_dist, + tx_n_epochs=tx_n_epochs, fit_n_epochs=fit_n_epochs, + random_seed=random_seed, parallel=parallel, **kwargs) + self.cells.add(self._col_renamer(from_assay, cell_key, 'UMAP1'), t[:, 0], key=cell_key, overwrite=True) + self.cells.add(self._col_renamer(from_assay, cell_key, 'UMAP2'), t[:, 1], key=cell_key, overwrite=True) + return None + + def run_clustering(self, *, from_assay: str = None, cell_key: str = 'I', + n_clusters: int = None, min_edge_weight: float = 0, balanced_cut: bool = False, + max_size: int = None, min_size: int = None, max_distance_fc: float = 2, + return_clusters: bool = False) -> Union[None, pd.Series]: + if from_assay is None: + from_assay = self.defaultAssay + if balanced_cut is False: + if n_clusters is None: + raise ValueError("ERROR: Please provide a value for n_clusters parameter. We are working on making " + "this parameter free") + else: + if n_clusters is not None: + print("INFO: Using balanced cut method for cutting dendrogram. `n_clusters` will be ignored.") + if max_size is None or min_size is None: + raise ValueError("ERROR: Please provide value for max_size and min_size") + graph = self._load_graph(from_assay, cell_key, 'csr', min_edge_weight=min_edge_weight, symmetric=True) + assay = self.__getattribute__(from_assay) + graph_loc = 'graph' if cell_key == 'I' else cell_key + '_graph' + graph_uuid = self._z[from_assay][graph_loc].attrs['self_uuid'] + params = [graph_uuid, min_edge_weight] # this should not be changed to a tuple. + # tuple are changed to list when saved as zarr attrs + if 'dendrogram' in self._z[from_assay][graph_loc] and \ + assay.attrs['dendrogram_hash'] == params: + dendrogram = self._z[from_assay][graph_loc]['dendrogram'][:] + print("INFO: Using existing dendrogram") + else: + paris = skn.hierarchy.Paris(engine='python') + dendrogram = paris.fit_transform(graph) + dendrogram[dendrogram == np.Inf] = 0 + g = create_zarr_dataset(self._z[from_assay][graph_loc], 'dendrogram', + (5000,), 'f8', (graph.shape[0] - 1, 4)) + g[:] = dendrogram + assay.attrs['dendrogram_hash'] = params + if balanced_cut: + labels = BalancedCut(dendrogram, max_size, min_size, max_distance_fc).get_clusters() + else: + labels = skn.hierarchy.straight_cut(dendrogram, n_clusters=n_clusters) + 1 + if return_clusters: + return pd.Series(labels, index=self.cells.table[cell_key].index[self.cells.table[cell_key]]) + else: + self.cells.add(self._col_renamer(from_assay, cell_key, 'cluster'), labels, + fill_val=-1, key=cell_key, overwrite=True) + + def plot_cells_dists(self, cols: List[str] = None, all_cells: bool = False, **kwargs): + plot_cols = ['nCounts', 'nFeatures'] + if cols is not None: + if type(cols) != list: + raise ValueError("ERROR: 'attrs' argument must be of type list") + for i in cols: + matches = [x for x in self.cells.table.columns if re.search(i, x)] + if len(matches) > 0: + plot_cols.extend(matches) + else: + print(f"WARNING: {i} not found in cell metadata") + if all_cells: + plot_qc(self.cells.table[plot_cols], **kwargs) + else: + if 'color' not in kwargs: + kwargs['color'] = 'coral' + plot_qc(self.cells.table[self.cells.table.I][plot_cols], **kwargs) + return None + + def plot_gene_mean_var(self, **kwargs): + assay = self.__getattribute__(self.defaultAssay) + if 'hvgs' not in assay.feats.table.columns: + raise KeyError(f"ERROR: HVGs have not been marked yet. Run 'mark_hvgs' first in {self.defaultAssay} assay.") + nzm, vf, nc = [assay.feats.fetch(x).astype('float') for x in ['nz_mean', 'c_var', 'nCells']] + plot_mean_var(nzm, vf, nc, assay.feats.fetch('hvgs'), **kwargs) + + def get_cell_vals(self, *, from_assay: str, cell_key: str, k: str, clip_fraction: float = 0): + cell_idx = self.cells.active_index(cell_key) + if k not in self.cells.table.columns: + assay = self.__getattribute__(from_assay) + feat_idx = assay.feats.get_idx_by_names([k], True) + if len(feat_idx) == 0: + raise ValueError(f"ERROR: {k} not found in {from_assay} assay.") + else: + if len(feat_idx) > 1: + print(f"WARNING: Plotting mean of {len(feat_idx)} features because {k} is not unique.") + vals = assay.normed(cell_idx, feat_idx).mean(axis=1).compute() + if clip_fraction > 0: + min_v = np.percentile(vals, 100 * clip_fraction) + max_v = np.percentile(vals, 100 - 100 * clip_fraction) + vals[vals < min_v] = min_v + vals[vals > max_v] = max_v + return pd.Series(vals, dtype=float) + else: + vals = self.cells.fetch(k, cell_key) + if vals.dtype == object: + vals = pd.Series(vals, dtype="category") + elif all(vals == vals.astype(int)): + vals = pd.Series(vals, dtype="category") + else: + vals = pd.Series(vals, dtype=float) + return vals + + def plot_umap(self, *, from_assay: str = None, cell_key: str = 'I', feat_assay: str = None, + color_by: str = None, clip_fraction: float = 0.01, shade: bool = False, + labels_kwargs: dict = None, legends_kwargs: dict = None, **kwargs): + if from_assay is None: + from_assay = self.defaultAssay + if feat_assay is None: + feat_assay = from_assay + if clip_fraction >= 0.5: + raise ValueError("ERROR: clip_fraction cannot be larger than or equal to 0.5") + x = self.cells.fetch(self._col_renamer(from_assay, cell_key, 'UMAP1'), cell_key) + y = self.cells.fetch(self._col_renamer(from_assay, cell_key, 'UMAP2'), cell_key) + if color_by is not None: + v = self.get_cell_vals(from_assay=feat_assay, cell_key=cell_key, k=color_by, + clip_fraction=clip_fraction) + else: + v = np.ones(len(x)) + df = pd.DataFrame({'UMAP 1': x, 'UMAP 2': y, 'v': v}) + if shade: + return shade_scatter(df, labels_kwargs=labels_kwargs, legends_kwargs=legends_kwargs, **kwargs) + else: + return plot_scatter(df, labels_kwargs=labels_kwargs, legends_kwargs=legends_kwargs, **kwargs) + + def __repr__(self): + x = ' '.join(self.assayNames) + return f"DataStore with {self.cells.N} cells containing {len(self.assayNames)} assays: {x}" diff --git a/scarf/knn_utils.py b/scarf/knn_utils.py new file mode 100644 index 0000000..c4eb53e --- /dev/null +++ b/scarf/knn_utils.py @@ -0,0 +1,45 @@ +from umap.umap_ import smooth_knn_dist, compute_membership_strengths +from .writers import create_zarr_dataset +from .ann import AnnStream + +__all__ = ['make_knn_graph'] + + +def make_knn_graph(ann_obj: AnnStream, chunk_size: int, store, + lc: float = 1, bw: float = 1.5, save_raw_dists: bool = False): + # bandwidth: Higher value will push the mean of distribution of graph edge weights towards right + # local_connectivity: Higher values will create push distribution of edge weights towards terminal values (binary + # like) Lower values will accumulate edge weights around the mean produced by bandwidth + + n_cells, n_neighbors = ann_obj.nCells, ann_obj.k + if save_raw_dists: + z_knn = create_zarr_dataset(store, 'indices', (chunk_size,), 'u8', + (n_cells, n_neighbors)) + z_dist = create_zarr_dataset(store, 'distances', (chunk_size,), 'f8', + (n_cells, n_neighbors)) + zge = create_zarr_dataset(store, 'edges', (chunk_size,), ('u8', 'u8'), + (n_cells * n_neighbors, 2)) + zgw = create_zarr_dataset(store, 'weights', (chunk_size,), 'f8', + (n_cells * n_neighbors,)) + last_row = 0 + val_counts = 0 + nsample_start = 0 + for i in ann_obj.iter_blocks(msg='Saving KNN graph'): + ki, kv = ann_obj.transform_ann(ann_obj.reducer(i)) + sigmas, rhos = smooth_knn_dist(kv, k=n_neighbors, + local_connectivity=lc, bandwidth=bw) + rows, cols, vals = compute_membership_strengths(ki, kv, sigmas, rhos) + rows = rows + last_row + start = val_counts + end = val_counts + len(rows) + last_row = rows[-1] + 1 + val_counts += len(rows) + nsample_end = nsample_start + len(ki) + if save_raw_dists: + z_knn[nsample_start:nsample_end, :] = ki + z_dist[nsample_start:nsample_end, :] = kv + zge[start:end, 0] = rows + zge[start:end, 1] = cols + zgw[start:end] = vals + nsample_start = nsample_end + return None diff --git a/scarf/metadata.py b/scarf/metadata.py new file mode 100644 index 0000000..d1233a0 --- /dev/null +++ b/scarf/metadata.py @@ -0,0 +1,147 @@ +import numpy as np +import re +import pandas as pd +from typing import List, Iterable +from .utils import fit_lowess +from .utils import load_zarr_table +from .writers import create_zarr_obj_array + + +__all__ = ['MetaData'] + + +class MetaData: + + def __init__(self, zgrp): + self._zgrp = zgrp + # TODO: think of a strategy that the datframe columns (except I) are made immutable + self.table = load_zarr_table(self._zgrp) + self.N = len(self.table) + + def active_index(self, key: str) -> np.ndarray: + if self.table[key].dtype != bool: + raise ValueError(f"ERROR: {key} is not of bool type") + return self.table.index[self.table[key]].values + + def fetch(self, k: str, key: str = 'I') -> np.ndarray: + """ + Get column values for only valid rows + """ + if k not in self.table.columns or key not in self.table.columns: + raise KeyError(f"ERROR: not found in MetaData") + return self.table[k].values[self.table[key].values] + + def add(self, k: str, v: np.array, fill_val=np.NaN, key: str = 'I', overwrite: bool = False) -> None: + """ + Add new column + """ + if k in ['I', 'ids', 'names']: + raise ValueError(f"ERROR: {k} is a protected name in MetaData class." + "Please choose any other name") + if k in self.table.columns and overwrite is False: + raise ValueError(f"ERROR: attribute {k} already exists. Set overwrite to True") + if len(v) == self.N: + self.table[k] = v + elif len(v) == self.table[key].sum(): + self.table[k] = self._expand(v, fill_val, key) + else: + raise ValueError("ERROR: Trying to add attr of incorrect length") + self._save(k) + return None + + def update(self, bool_arr: np.array, key: str = 'I') -> None: + """ + Update valid rows using a boolean array and 'and' operation + """ + if len(bool_arr) != self.N: + bool_arr = self._expand(bool_arr, False, key) + self.table[key] = self.table[key] & bool_arr + self._save(key) + return None + + @staticmethod + def sift(a: np.ndarray, min_v: float = -np.Inf, max_v: float = np.Inf) -> np.ndarray: + ret_val = ((a > min_v) & (a < max_v)) + return ret_val + + def multi_sift(self, cols: List[str], lows: Iterable, highs: Iterable) -> np.ndarray: + ret_val = self._and_bools([self.sift(self.table[i], j, k) for i, j, k + in zip(cols, lows, highs)]) + return ret_val + + def get_idx_by_names(self, names: List[str], all_names: bool = True) -> List[int]: + return self._get_idx([x.upper() for x in names], 'names', all_names) + + def get_idx_by_ids(self, ids: List[str], all_ids: bool = True) -> List[int]: + return self._get_idx([x.upper() for x in ids], 'ids', all_ids) + + def grep(self, pattern: str, only_valid=False) -> List[str]: + names = np.array(list(map(str.upper, self.table['names'].values))) + if only_valid: + names = names[self.table['I']] + return sorted(set([x for x in names + if re.match(pattern.upper(), x) is not None])) + + def remove_trend(self, name: str, x: str, y: str, n_bins: int = 200, + lowess_frac: float = 0.1) -> None: + a = fit_lowess(self.fetch(x).astype(float), + self.fetch(y).astype(float), + n_bins, lowess_frac) + self.add(name, a, overwrite=True) + + def idx_to_bool(self, idx, invert: bool = False) -> np.ndarray: + a = np.zeros(self.N, dtype=bool) + a[idx] = True + if invert: + a = ~a + return a + + @staticmethod + def _and_bools(bools: List[np.ndarray]) -> np.ndarray: + a = sum(bools) + a[a < len(bools)] = 0 + return a.astype(bool) + + def _get_idx(self, k, d, f) -> List[int]: + if isinstance(k, Iterable) and type(k) != str: + if f: + d = self.table[d].values + else: + d = self.fetch(d) + d = np.array([x.upper() for x in d]) + return sum([list(np.where(d == x)[0]) for x in k], []) + else: + raise TypeError("ERROR: Please provide the ID/names as list") + + def _expand(self, v: np.array, fill_val, key: str = 'I') -> np.ndarray: + """ + Makes sure that the array being added to the table is of the same shape. + If the array has same shape as the table then the input array is added straightaway. + It is assumed that the input array is in same order as table rows. + """ + a = np.empty(self.N) + k = self.table[key].values + a[k] = v + a[~k] = fill_val + return a.astype(type(v[0])) + + # def _reset(self) -> None: + # """ + # Set all rows to active + # """ + # self.active = np.array([True for _ in range(self.N)]).astype(bool) + # # TODO: also remove all the columns except three + + def _make_data_table(self) -> pd.DataFrame: + keys = ['I', 'ids', 'names'] + keys = keys + [x for x in self._zgrp.keys() if x not in keys] + return pd.DataFrame({x: self._zgrp[x][:] for x in keys}) + + def _save(self, key: str = None) -> None: + if key is not None and key in self.table: + create_zarr_obj_array(self._zgrp, key, self.table[key].values, self.table[key].dtype) + # FIXME: Why is following line here? + self._zgrp['I'] = self.table['I'].values + + def __repr__(self): + return f"MetaData of {self.table['I'].sum()}({self.N}) elements" diff --git a/scarf/plots.py b/scarf/plots.py new file mode 100644 index 0000000..e1dd5a3 --- /dev/null +++ b/scarf/plots.py @@ -0,0 +1,239 @@ +import matplotlib.pyplot as plt +import matplotlib as mpl +import seaborn as sns +import numpy as np +import pandas as pd +from typing import Tuple +from cmocean import cm +from IPython.display import display +from holoviews.plotting import mpl as hmpl +from holoviews.operation.datashader import datashade, dynspread +import holoviews as hv +import datashader as dsh + +__all__ = ['plot_qc', 'plot_mean_var', 'plot_graph_qc', 'plot_scatter', 'shade_scatter'] + +plt.style.use('fivethirtyeight') + + +def clean_axis(ax, ts=11, ga=0.4): + ax.xaxis.set_tick_params(labelsize=ts) + ax.yaxis.set_tick_params(labelsize=ts) + for i in ['top', 'bottom', 'left', 'right']: + ax.spines[i].set_visible(False) + ax.grid(which='major', linestyle='--', alpha=ga) + ax.figure.patch.set_alpha(0) + ax.patch.set_alpha(0) + return True + + +def plot_graph_qc(g): + _, axis = plt.subplots(1, 2, figsize=(12, 4)) + ax = axis[0] + x = np.array((g != 0).sum(axis=0))[0] + y = pd.Series(x).value_counts().sort_index() + ax.bar(y.index, y.values, width=0.5) + xlim = np.percentile(x, 99.5) + 5 + ax.set_xlim((0, xlim)) + ax.set_xlabel('Node degree') + ax.set_ylabel('Frequency') + ax.text(xlim, y.values.max(), f"plot is clipped (max degree: {y.index.max()})", + ha='right', fontsize=9) + clean_axis(ax) + ax = axis[1] + ax.hist(g.data, bins=30) + ax.set_xlabel('Edge weight') + ax.set_ylabel('Frequency') + clean_axis(ax) + plt.tight_layout() + plt.show() + + +def plot_qc(data: pd.DataFrame, color: str = 'steelblue', + fig_size: tuple = None, label_size: float = 10.0, title_size: float = 8.0, + scatter_size: float = 1.0, n_rows: int = 1, max_points: int = 10000): + n_plots = data.shape[1] + n_rows = max(n_rows, 1) + n_cols = max(n_plots//n_rows, 1) + if fig_size is None: + fig_size = (1+3*n_cols, 1+2.5*n_rows) + fig = plt.figure(figsize=fig_size) + for i in range(n_plots): + val = data[data.columns[i]].values + ax = fig.add_subplot(n_rows, n_cols, i+1) + sns.violinplot(val, ax=ax, linewidth=1, orient='v', alpha=0.6, + inner=None, cut=0, color=color) + val_dots = val + if len(val_dots) > max_points: + val_dots = data[data.columns[i]].sample(n=max_points).values + sns.stripplot(val_dots, jitter=0.4, ax=ax, orient='v', + s=scatter_size, color='k', alpha=0.4) + ax.set_ylabel(data.columns[i], fontsize=label_size) + ax.set_title('Min: %.1f, Max: %.1f, Median: %.1f' % ( + val.min(), val.max(), int(np.median(val))), fontsize=title_size) + clean_axis(ax) + plt.tight_layout() + plt.show() + + +def plot_mean_var(nzm: np.ndarray, fv: np.ndarray, n_cells: np.ndarray, hvg: np.ndarray, + ax_label_fs: float = 12, fig_size: Tuple[float, float] = (4.5, 4.0), + ss: Tuple[float, float] = (3, 30), cmaps: Tuple[str, str] = ('winter', 'magma_r')): + _, ax = plt.subplots(1, 1, figsize=fig_size) + nzm = np.log2(nzm) + fv = np.log2(fv) + ax.scatter(nzm[~hvg], fv[~hvg], alpha=0.6, c=n_cells[~hvg], cmap=cmaps[0], s=ss[0]) + ax.scatter(nzm[hvg], fv[hvg], alpha=0.8, c=n_cells[hvg], cmap=cmaps[1], s=ss[1], edgecolor='k', lw=0.5) + ax.set_xlabel('Log mean non-zero expression', fontsize=ax_label_fs) + ax.set_ylabel('Log corrected variance', fontsize=ax_label_fs) + clean_axis(ax) + plt.tight_layout() + plt.show() + + +def scatter_make_cmap(df, cmap=None, color_key=None): + v = df.columns[2] + if type(df[v].dtype) == pd.CategoricalDtype: + if color_key is None: + if cmap is None: + cmap = 'tab20' + pal = sns.color_palette(cmap, n_colors=df[v].nunique()).as_hex() + color_key = {i: j for i, j in zip(sorted(df[v].unique()), pal)} + else: + if type(color_key) is not dict: + raise TypeError("ERROR: colorkey need to be of dict type. With keys as categories in the `v` column.") + if len(set(list(color_key.keys())).union(df[v].unique())) != df[v].nunique(): + raise ValueError("ERROR: The keys in colorkey dict should contain all the categories in the v " + "column of data") + cmap = None + else: + if cmap is None: + cmap = cm.deep + color_key = None + return cmap, color_key + + +def scatter_ax_labels(ax, df, fontsize: float = 12, frame_offset: float = 0.05): + x = df.columns[0] + y = df.columns[1] + ax.set_xlabel(x, fontsize=fontsize) + ax.set_ylabel(y, fontsize=fontsize) + vmin, vmax = df[x].min(), df[x].max() + ax.set_xlim((vmin - abs(vmin * frame_offset), vmax + abs(vmax * frame_offset))) + vmin, vmax = df[y].min(), df[y].max() + ax.set_ylim((vmin - abs(vmin * frame_offset), vmax + abs(vmax * frame_offset))) + ax.set_xticks([]) + ax.set_yticks([]) + return None + + +def scatter_ax_legends(fig, ax, df: pd.DataFrame, color_key: dict, cmap, + show_ondata: bool = True, show_onside: bool = True, + fontsize: float = 12, n_per_col: int = 20, dummy_size: float = 0.01, + marker_scale: float = 70, lspacing: float = 0.1, cspacing: float = 1) -> None: + x, y, v = df.columns + if color_key is not None: + centers = df.groupby(v).median().T + for i in centers: + if show_ondata: + ax.text(centers[i][x], centers[i][y], i, fontsize=fontsize) + if show_onside: + ax.scatter([float(centers[i][x])], [float(centers[i][y])], c=color_key[i], + label=i, alpha=1, s=dummy_size) + if show_onside: + ncols = max(1, int(len(color_key)/n_per_col)) + ax.legend(ncol=ncols, loc=(1, 0), frameon=False, fontsize=fontsize, + markerscale=marker_scale, labelspacing=lspacing, columnspacing=cspacing) + else: + if df[v].nunique() < 2: + pass + elif fig is not None: + cbaxes = fig.add_axes([0.2, 1, 0.6, 0.05]) + norm = mpl.colors.Normalize(vmin=0, vmax=1) + cb = mpl.colorbar.ColorbarBase(cbaxes, cmap=cmap, norm=norm, + orientation='horizontal') + cb.set_label('Relative values', fontsize=8) + cb.ax.xaxis.set_label_position('top') + else: + print("WARNING: Not plotting the colorbar because fig object was not passed") + return None + + +def scatter_ax_cleanup(ax, spine_width: float = 0.5, spine_color: str = 'k', + displayed_sides: tuple = ('bottom', 'left')) -> None: + for i in ['bottom', 'left', 'top', 'right']: + spine = ax.spines[i] + if i in displayed_sides: + spine.set_visible(True) + spine.set_linewidth(spine_width) + spine.set_edgecolor(spine_color) + else: + spine.set_visible(False) + ax.figure.patch.set_alpha(0) + ax.patch.set_alpha(0) + ax.set_aspect('auto') + return None + + +def plot_scatter(df, in_ax=None, fig=None, width: float = 6, height: float = 6, + cmap=None, color_key: dict = None, + s: float = 10, lw: float = 0.1, edge_color='k', + labels_kwargs: dict = None, legends_kwargs: dict = None, **kwargs): + x, y, v = df.columns + if in_ax is None: + fig, ax = plt.subplots(1, 1, figsize=(width, height)) + else: + ax = in_ax + cmap, color_key = scatter_make_cmap(df, cmap, color_key) + if color_key is None: + if df[v].nunique() == 1: + kwargs['c'] = 'steelblue' + else: + df[v] = (df[v] - df[v].min()) / (df[v].max() - df[v].min()) + kwargs['c'] = df[v] + else: + kwargs['c'] = [color_key[x] for x in df[v].values] + ax.scatter(df[x].values, df[y].values, s=s, lw=lw, edgecolor=edge_color, cmap=cmap, **kwargs) + if labels_kwargs is None: + labels_kwargs = {} + scatter_ax_labels(ax, df, **labels_kwargs) + if legends_kwargs is None: + legends_kwargs = {} + scatter_ax_legends(fig, ax, df, color_key, cmap, **legends_kwargs) + scatter_ax_cleanup(ax) + if in_ax is None: + plt.show() + else: + return ax + + +def shade_scatter(df, fig_size: float = 7, width_px: int = 1000, height_px: int = 1000, + x_sampling: float = 0.5, y_sampling: float = 0.5, + spread_px: int = 1, min_alpha: int = 10, cmap=None, color_key: dict = None, + labels_kwargs: dict = None, legends_kwargs: dict = None): + x, y, v = df.columns + points = hv.Points(df, kdims=[x, y], vdims=v) + cmap, color_key = scatter_make_cmap(df, cmap, color_key) + if color_key is None: + if df[v].nunique() == 1: + agg = dsh.count(v) + else: + df[v] = (df[v] - df[v].min()) / (df[v].max() - df[v].min()) + agg = dsh.mean(v) + else: + agg = dsh.count_cat(v) + shader = datashade(points, aggregator=agg, cmap=cmap, color_key=color_key, + height=height_px, width=width_px, + x_sampling=x_sampling, y_sampling=y_sampling, min_alpha=min_alpha) + shader = dynspread(shader, max_px=spread_px) + renderer = hmpl.MPLRenderer.instance() + fig = renderer.get_plot(shader.opts(fig_inches=(fig_size, fig_size))).state + ax = fig.gca() + if labels_kwargs is None: + labels_kwargs = {} + scatter_ax_labels(ax, df, **labels_kwargs) + if legends_kwargs is None: + legends_kwargs = {} + scatter_ax_legends(fig, ax, df, color_key, cmap, **legends_kwargs) + scatter_ax_cleanup(ax) + display(fig) diff --git a/scarf/readers.py b/scarf/readers.py new file mode 100644 index 0000000..a30a41b --- /dev/null +++ b/scarf/readers.py @@ -0,0 +1,212 @@ +from abc import ABC, abstractmethod +from typing import Generator, Dict, List, Optional, Tuple +import numpy as np +import pandas as pd +import h5py +import os +import sparse +import gzip +from typing import IO + + +__all__ = ['CrH5Reader', 'CrDirReader', 'CrReader'] + + +def get_file_handle(fn: str) -> IO: + try: + if fn.rsplit('.', 1)[-1] == 'gz': + return gzip.open(fn, mode='rt') + else: + return open(fn, 'r') + except (OSError, IOError, FileNotFoundError): + raise FileNotFoundError("ERROR: FILE NOT FOUND: %s" % fn) + + +def read_file(fn: str): + fh = get_file_handle(fn) + for line in fh: + yield line.rstrip() + + +class CrReader(ABC): + def __init__(self, grp_names, file_type): + if file_type == 'rna': + self.autoNames = {'Gene Expression': 'RNA'} + elif file_type == 'atac': + self.autoNames = {'Peaks': 'ATAC'} + else: + raise ValueError("ERROR: Please provide a value for parameter 'file_type'.\n" + "The value can be either 'rna' or 'atac' depending on whether is is scRNA-Seq or " + "scATAC-Seq data") + self.grpNames: Dict = grp_names + self.nFeatures: int = len(self.feature_names()) + self.nCells: int = len(self.cell_names()) + self.assayFeats = self._make_feat_table() + self._auto_rename_assay_names() + + @abstractmethod + def _handle_version(self): + pass + + @abstractmethod + def _read_dataset(self, key: Optional[str] = None) -> List: + pass + + @abstractmethod + def consume(self, batch_size: int, lines_in_mem: int): + pass + + def _subset_by_assay(self, v, assay) -> List: + if assay is None: + return v + elif assay not in self.assayFeats: + raise ValueError("ERROR: Assay ID %s is not valid" % assay) + idx = self.assayFeats[assay] + return v[idx.start: idx.end] + + def _make_feat_table(self) -> pd.DataFrame: + s = self.feature_types() + span: List[Tuple] = [] + last = s[0] + last_n: int = 0 + for n, i in enumerate(s[1:], 1): + if i != last: + span.append((last, last_n, n)) + last_n = n + elif n == len(s) - 1: + span.append((last, last_n, n + 1)) + last = i + df = pd.DataFrame(span, columns=['type', 'start', 'end']) + df.index = ["assay%s" % str(x + 1) for x in df.index] + df['nFeatures'] = df.end - df.start + return df.T + + def _auto_rename_assay_names(self): + anames = list(map(str.upper, self.assayFeats.columns)) + main_name_k = list(self.autoNames.keys())[0] + main_name_v = list(self.autoNames.values())[0] + if main_name_v in anames: + print(f"INFO: {main_name_v} already present") + # Making sure that column name is in uppercase 'RNA' + newnames = list(self.assayFeats.columns) + newnames[anames.index(main_name_v)] = main_name_v + self.assayFeats.columns = newnames + else: + at = self.assayFeats.T['type'] == main_name_k + if at.sum() == 1: + main_assay = at[at].index[0] + else: + print('WARNING:') + main_assay = self.assayFeats.T[at].nFeatures.astype(int).idxmax() + self.rename_assays({main_assay: main_name_v}) + + def rename_assays(self, name_map: Dict[str, str]) -> None: + self.assayFeats.rename(columns=name_map, inplace=True) + + def feature_ids(self, assay: str = None) -> List[str]: + return self._subset_by_assay( + self._read_dataset('feature_ids'), assay) + + def feature_names(self, assay: str = None) -> List[str]: + return self._subset_by_assay( + self._read_dataset('feature_names'), assay) + + def feature_types(self) -> List[str]: + if self.grpNames['feature_types'] is not None: + return self._read_dataset('feature_types') + else: + default_name = list(self.autoNames.keys())[0] + return [default_name for _ in range(self.nFeatures)] + + def cell_names(self) -> List[str]: + return self._read_dataset('cell_names') + + +class CrH5Reader(CrReader): + def __init__(self, h5_fn, file_type: str = None): + self.h5obj = h5py.File(h5_fn, mode='r') + self.grp = None + super().__init__(self._handle_version(), file_type) + + def _handle_version(self): + root_key = list(self.h5obj.keys())[0] + self.grp = self.h5obj[root_key] + if root_key == 'matrix': + grps = {'feature_ids': 'features/id', + 'feature_names': 'features/name', + 'feature_types': 'features/feature_type', + 'cell_names': 'barcodes'} + else: + grps = {'feature_ids': 'genes', 'feature_names': 'gene_names', + 'feature_types': None, 'cell_names': 'barcodes'} + return grps + + def _read_dataset(self, key: Optional[str] = None): + return [x.decode('UTF-8') for x in self.grp[self.grpNames[key]][:]] + + def consume(self, batch_size: int, lines_in_mem: int): + s = 0 + for ind_n in range(0, self.nCells, batch_size): + i = self.grp['indptr'][ind_n:ind_n + batch_size] + e = i[-1] + if s != 0: + idx = np.array([s] + list(i)) + idx = idx - idx[0] + else: + idx = np.array(i) + n = idx.shape[0] - 1 + nidx = np.repeat(range(n), np.diff(idx).astype('int32')) + yield sparse.COO([nidx, self.grp['indices'][s: e]], self.grp['data'][s: e], + shape=(n, self.nFeatures)) + s = e + + def close(self) -> None: + self.h5obj.close() + + +class CrDirReader(CrReader): + def __init__(self, loc, file_type: str = None): + self.loc: str = loc.rstrip('/') + '/' + self.matFn = None + super().__init__(self._handle_version(), file_type) + + def _handle_version(self): + if os.path.isfile(self.loc + 'features.tsv.gz'): + self.matFn = self.loc + 'matrix.mtx.gz' + grps = {'feature_ids': ('features.tsv.gz', 0), + 'feature_names': ('features.tsv.gz', 1), + 'feature_types': ('features.tsv.gz', 2), + 'cell_names': ('barcodes.tsv.gz', 0)} + elif os.path.isfile(self.loc + 'genes.tsv'): + self.matFn = self.loc + 'matrix.mtx' + grps = {'feature_ids': ('genes.tsv', 0), + 'feature_names': ('genes.tsv', 1), + 'feature_types': None, 'cell_names': ('barcodes.tsv', 0)} + else: + raise IOError("ERROR: Couldn't find files") + return grps + + def _read_dataset(self, key: Optional[str] = None): + return [x.split('\t')[self.grpNames[key][1]] for x in + read_file(self.loc + self.grpNames[key][0])] + + def to_sparse(self, a): + idx = np.where(np.diff(a[:, 1]) == 1)[0] + 1 + return sparse.COO([a[:, 1] - a[0, 1], a[:, 0] - 1], a[:, 2], shape=(len(idx) + 1, self.nFeatures)) + + def consume(self, batch_size: int, lines_in_mem: int = int(1e5)) -> \ + Generator[List[np.ndarray], None, None]: + stream = pd.read_csv(self.matFn, skiprows=3, sep=' ', + header=None, chunksize=lines_in_mem) + start = 1 + dfs = [] + for df in stream: + if df.iloc[-1, 1] - start >= batch_size: + idx = df[1] < batch_size + start + dfs.append(df[idx]) + yield self.to_sparse(np.vstack(dfs)) + dfs = [df[~idx]] + start += batch_size + else: + dfs.append(df) + yield self.to_sparse(np.vstack(dfs)) diff --git a/scarf/umap.py b/scarf/umap.py new file mode 100644 index 0000000..f36ae98 --- /dev/null +++ b/scarf/umap.py @@ -0,0 +1,199 @@ +# Author: Leland McInnes +# Modified file +# +# License: BSD 3 clause + +import numpy as np +from sklearn.utils import check_random_state +from umap.umap_ import make_epochs_per_sample, find_ab_params, optimize_layout +import numba +import locale +from umap.utils import tau_rand_int +from tqdm import tqdm +locale.setlocale(locale.LC_NUMERIC, "C") + + +__all__ = ['fit', 'transform', 'fit_transform'] + + +def simplicial_set_embedding( + g, embedding, n_epochs, a, b, random_seed, repulsion_strength, + initial_alpha, negative_sample_rate, parallel): + g.data[g.data < (g.data.max() / float(n_epochs))] = 0.0 + g.eliminate_zeros() + epochs_per_sample = make_epochs_per_sample(g.data, n_epochs) + head = g.row + tail = g.col + rng_state = check_random_state(random_seed).randint( + np.iinfo(np.int32).min + 1, np.iinfo(np.int32).max - 1, 3).astype(np.int64) + if parallel: + embedding = optimize_layout_euclidean( + embedding, embedding, head, tail, n_epochs, + g.shape[1], epochs_per_sample, a, b, rng_state, + repulsion_strength, initial_alpha, negative_sample_rate) + else: + embedding = optimize_layout( + embedding, embedding, head, tail, n_epochs, g.shape[1], + epochs_per_sample, a, b, rng_state, repulsion_strength, initial_alpha, + negative_sample_rate, verbose=True) + return embedding + + +def fuzzy_simplicial_set(g, set_op_mix_ratio): + tg = g.transpose() + prod = g.multiply(tg) + res = ( + set_op_mix_ratio * (g + tg - prod) + + (1.0 - set_op_mix_ratio) * prod + ) + res.eliminate_zeros() + return res.tocoo() + + +def fit(graph, embedding, spread, min_dist, + set_op_mix_ratio, n_epochs, random_seed, + repulsion_strength, initial_alpha, negative_sample_rate, parallel): + a, b = find_ab_params(spread, min_dist) + sym_graph = fuzzy_simplicial_set(graph, set_op_mix_ratio) + embedding = simplicial_set_embedding( + sym_graph, embedding, n_epochs, a, b, random_seed, repulsion_strength, + initial_alpha, negative_sample_rate, parallel) + return embedding, a, b + + +def transform(graph, embedding, + a, b, n_epochs, random_seed, + repulsion_strength, initial_alpha, negative_sample_rate, parallel): + return simplicial_set_embedding( + graph, embedding, n_epochs, a, b, random_seed, + repulsion_strength, initial_alpha, negative_sample_rate, parallel) + + +def fit_transform(graph, ini_embed, spread: float, min_dist: float, tx_n_epochs: int, fit_n_epochs: int, + random_seed: int, set_op_mix_ratio: float = 1.0, repulsion_strength: float = 1.0, + initial_alpha: float = 1.0, negative_sample_rate: float = 5, parallel: bool = False): + e, a, b = fit(graph, ini_embed, + spread=spread, min_dist=min_dist, set_op_mix_ratio=set_op_mix_ratio, + n_epochs=fit_n_epochs, random_seed=random_seed, repulsion_strength=repulsion_strength, + initial_alpha=initial_alpha, negative_sample_rate=negative_sample_rate, parallel=parallel) + t = transform(graph, e, a, b, n_epochs=tx_n_epochs, random_seed=random_seed, + repulsion_strength=repulsion_strength, initial_alpha=initial_alpha, + negative_sample_rate=negative_sample_rate, parallel=parallel) + return t + + +@numba.njit() +def clip(val): + """Standard clamping of a value into a fixed range (in this case -4.0 to + 4.0) + Parameters + ---------- + val: float + The value to be clamped. + Returns + ------- + The clamped value, now fixed to be in the range -4.0 to 4.0. + """ + if val > 4.0: + return 4.0 + elif val < -4.0: + return -4.0 + else: + return val + + +@numba.njit( + "f4(f4[::1],f4[::1])", + fastmath=True, + cache=True, + locals={ + "result": numba.types.float32, + "diff": numba.types.float32, + "dim": numba.types.int32, + }, +) +def rdist(x, y): + """Reduced Euclidean distance. + Parameters + ---------- + x: array of shape (embedding_dim,) + y: array of shape (embedding_dim,) + Returns + ------- + The squared euclidean distance between x and y + """ + result = 0.0 + dim = x.shape[0] + for i in range(dim): + diff = x[i] - y[i] + result += diff * diff + + return result + + +def _optimize_layout_euclidean_single_epoch(head_embedding, tail_embedding, head, tail, + n_vertices, epochs_per_sample, a, b, rng_state, + gamma, dim, move_other, alpha, epochs_per_negative_sample, + epoch_of_next_negative_sample, epoch_of_next_sample, n): + for i in numba.prange(epochs_per_sample.shape[0]): + if epoch_of_next_sample[i] <= n: + j = head[i] + k = tail[i] + current = head_embedding[j] + other = tail_embedding[k] + dist_squared = rdist(current, other) + if dist_squared > 0.0: + grad_coeff = -2.0 * a * b * pow(dist_squared, b - 1.0) + grad_coeff /= a * pow(dist_squared, b) + 1.0 + else: + grad_coeff = 0.0 + for d in range(dim): + grad_d = clip(grad_coeff * (current[d] - other[d])) + current[d] += grad_d * alpha + if move_other: + other[d] += -grad_d * alpha + epoch_of_next_sample[i] += epochs_per_sample[i] + n_neg_samples = int( + (n - epoch_of_next_negative_sample[i]) / epochs_per_negative_sample[i] + ) + for p in range(n_neg_samples): + k = tau_rand_int(rng_state) % n_vertices + other = tail_embedding[k] + dist_squared = rdist(current, other) + if dist_squared > 0.0: + grad_coeff = 2.0 * gamma * b + grad_coeff /= (0.001 + dist_squared) * ( + a * pow(dist_squared, b) + 1 + ) + elif j == k: + continue + else: + grad_coeff = 0.0 + for d in range(dim): + if grad_coeff > 0.0: + grad_d = clip(grad_coeff * (current[d] - other[d])) + else: + grad_d = 4.0 + current[d] += grad_d * alpha + epoch_of_next_negative_sample[i] += ( + n_neg_samples * epochs_per_negative_sample[i] + ) + + +def optimize_layout_euclidean(head_embedding, tail_embedding, head, tail, n_epochs, + n_vertices, epochs_per_sample, a, b, rng_state, + gamma=1.0, initial_alpha=1.0, negative_sample_rate=5.0): + dim = head_embedding.shape[1] + move_other = head_embedding.shape[0] == tail_embedding.shape[0] + alpha = initial_alpha + epochs_per_negative_sample = epochs_per_sample / negative_sample_rate + epoch_of_next_negative_sample = epochs_per_negative_sample.copy() + epoch_of_next_sample = epochs_per_sample.copy() + optimize_fn = numba.njit(_optimize_layout_euclidean_single_epoch, fastmath=True) + for n in tqdm(range(n_epochs)): + optimize_fn(head_embedding, tail_embedding, head, tail, + n_vertices, epochs_per_sample, a, b, rng_state, + gamma, dim, move_other, alpha, epochs_per_negative_sample, + epoch_of_next_negative_sample, epoch_of_next_sample, n) + alpha = initial_alpha * (1.0 - (float(n) / float(n_epochs))) + return head_embedding diff --git a/scarf/utils.py b/scarf/utils.py new file mode 100644 index 0000000..a05822b --- /dev/null +++ b/scarf/utils.py @@ -0,0 +1,49 @@ +import pandas as pd +import numpy as np +from statsmodels.nonparametric.smoothers_lowess import lowess +from typing import Callable +from dask.diagnostics import ProgressBar +import functools + + +def fit_lowess(a, b, n_bins: int, + lowess_frac: float) -> np.array: + stats = pd.DataFrame({'a': a, 'b': b}).apply(np.log) + bin_edges = np.histogram(stats.a, bins=n_bins)[1] + bin_edges[-1] += 0.1 # For including last gene + bin_idx = [] + for i in range(n_bins): + idx = pd.Series((stats.a >= bin_edges[i]) & (stats.a < bin_edges[i + 1])) + if sum(idx) > 0: + bin_idx.append(list(idx[idx].index)) + bin_vals = [] + for idx in bin_idx: + temp_stat = stats.reindex(idx) + temp_gene = temp_stat.idxmin().b + bin_vals.append( + [temp_stat.b[temp_gene], temp_stat.a[temp_gene]]) + bin_vals = np.array(bin_vals).T + bin_cor_fac = lowess(bin_vals[0], bin_vals[1], return_sorted=False, + frac=lowess_frac, it=100).T + fixed_var = {} + for bcf, indices in zip(bin_cor_fac, bin_idx): + for idx in indices: + fixed_var[idx] = np.e ** (stats.b[idx] - bcf) + return np.array([fixed_var[x] for x in range(len(a))]) + + +def show_progress(func: Callable): + @functools.wraps(func) + def wrapper(*args, **kwargs): + pbar = ProgressBar() + pbar.register() + ret_val = func(*args, **kwargs) + pbar.unregister() + return ret_val + return wrapper + + +def load_zarr_table(zgrp) -> pd.DataFrame: + keys = ['I', 'ids', 'names'] + keys = keys + [x for x in zgrp.keys() if x not in keys] + return pd.DataFrame({x: zgrp[x][:] for x in keys}) diff --git a/scarf/writers.py b/scarf/writers.py new file mode 100644 index 0000000..e1bb169 --- /dev/null +++ b/scarf/writers.py @@ -0,0 +1,95 @@ +import zarr +from numcodecs import Blosc +from typing import Any, Tuple +import numpy as np +from tqdm import tqdm +from .readers import CrReader + +__all__ = ['CrToZarr', 'subset_assay_zarr', 'create_zarr_dataset', 'create_zarr_obj_array', 'dask_to_zarr'] + + +def create_zarr_dataset(g: zarr.hierarchy, name: str, chunks: tuple, + dtype: Any, shape: Tuple, overwrite: bool = True) -> zarr.hierarchy: + compressor = Blosc(cname='lz4', clevel=5, shuffle=Blosc.BITSHUFFLE) + return g.create_dataset(name, chunks=chunks, dtype=dtype, + shape=shape, compressor=compressor, overwrite=overwrite) + + +def create_zarr_obj_array(g: zarr.hierarchy, name: str, data, + dtype: str = None, overwrite: bool = True) -> zarr.hierarchy: + if dtype is None or dtype == object: + dtype = 'U' + str(max([len(x) for x in data])) + return g.create_dataset(name, data=data, chunks=False, + shape=len(data), dtype=dtype, overwrite=overwrite) + + +class CrToZarr: + def __init__(self, cr: CrReader, zarr_fn: str, chunk_sizes=(1000, 1000), dtype: str = 'uint32'): + self.cr = cr + self.fn = zarr_fn + self.chunkSizes = chunk_sizes + self.z = zarr.open(self.fn, mode='w') + self._ini_cell_data() + for assay_name in self.cr.assayFeats.columns: + g = self.z.create_group(assay_name) + g.attrs['is_assay'] = True + g.attrs['misc'] = {} + create_zarr_dataset(g, 'counts', chunk_sizes, dtype, + (self.cr.nCells, self.cr.assayFeats[assay_name]['nFeatures'])) + self._ini_feature_data(assay_name) + + def _ini_cell_data(self): + g = self.z.create_group('cellData') + create_zarr_obj_array(g, 'ids', self.cr.cell_names()) + create_zarr_obj_array(g, 'names', self.cr.cell_names()) + create_zarr_obj_array(g, 'I', [True for _ in range(self.cr.nCells)], 'bool') + + def _ini_feature_data(self, assay_name): + g = self.z[assay_name] + create_zarr_obj_array(g, 'featureData/ids', self.cr.feature_ids(assay_name)) + create_zarr_obj_array(g, 'featureData/names', self.cr.feature_names(assay_name)) + is_active_data = [True for _ in range(self.cr.assayFeats[assay_name].nFeatures)] + create_zarr_obj_array(g, 'featureData/I', is_active_data, 'bool') + + def dump(self, batch_size: int = 1000, lines_in_mem: int = 100000) -> None: + stores = [self.z["%s/counts" % x] for x in self.cr.assayFeats.columns] + spidx = [0] + list(self.cr.assayFeats.T.nFeatures.cumsum().values) + spidx = [(spidx[x - 1], spidx[x]) for x in range(1, len(spidx))] + s, e, = 0, 0 + n_chunks = self.cr.nCells//batch_size + 1 + for a in tqdm(self.cr.consume(batch_size, lines_in_mem), total=n_chunks): + e += a.shape[0] + a = a.todense() + for j in range(len(stores)): + stores[j][s:e] = a[:, spidx[j][0]:spidx[j][1]] + s = e + + +def subset_assay_zarr(zarr_fn: str, in_grp: str, out_grp: str, + cells_idx: np.ndarray, feat_idx: np.ndarray, + chunk_size: tuple): + z = zarr.open(zarr_fn, 'r+') + ig = z[in_grp] + og = z.create_dataset( + out_grp, chunks=chunk_size, dtype='uint32', shape=(len(cells_idx), len(feat_idx)), + compressor=Blosc(cname='lz4', clevel=5, shuffle=Blosc.BITSHUFFLE), overwrite=True) + pos_start, pos_end = 0, 0 + for i in tqdm(np.array_split(cells_idx, len(cells_idx) // chunk_size[0] + 1)): + pos_end += len(i) + og[pos_start:pos_end, :] = ig.get_orthogonal_selection((i, feat_idx)) + pos_start = pos_end + return None + + +def dask_to_zarr(df, z, loc, chunk_size): + re_df = df.rechunk(chunks=(chunk_size, df.shape[0])) + og = z.create_dataset( + loc, overwrite=True, chunks=(chunk_size, None), + shape=re_df.shape, dtype='float64', + compressor=Blosc(cname='lz4', clevel=5, shuffle=Blosc.BITSHUFFLE)) + pos_start, pos_end = 0, 0 + for i in tqdm(re_df.blocks, total=len(re_df.chunks[0]), desc=f"Writing data to {loc}"): + pos_end += len(i) + og[pos_start:pos_end, :] = i.compute() + pos_start = pos_end + return None diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..774e4ef --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +from setuptools import setup, find_packages +import os + + +def read(fname): + return open(os.path.join(os.path.dirname(__file__), fname)).read() + + +if __name__ == "__main__": + + CLASSIFIERS = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: BSD License", + "Natural Language :: English", + 'Operating System :: POSIX :: Linux', + "Programming Language :: Python :: 3", + 'Programming Language :: Python :: 3.7', + ] + KEYWORDS = ['store'] + VERSION = open('VERSION').readline().rstrip('\n') + setup( + name='scarf', + description='scarf', + long_description=read('README.rst'), + author='Parashar Dhapola', + author_email='parashar.dhapola@gmail.com', + url='https://github.com/karlssonlab/scarf', + license='BSD 3-Clause', + classifiers=CLASSIFIERS, + keywords=KEYWORDS, + install_requires=[x.strip() for x in open('requirements.txt')], + version=VERSION, + packages=find_packages(exclude=['data']), + include_package_data=False + )