
    ]ixj                        S SK r S SKrS SKrS SKJr  S SKJs  Jr  S SK	r
S SKrS SKJr   S&S\S\S\4S jjrS'S jr     S(S\S\S\S	\S
\S\S\S\S\S\4S jjrS rS rS r " S S\R0                  5      r " S S\R0                  5      r " S S\R6                  5      r " S S\R:                  5      r " S S\R0                  5      r " S S\R0                  5      r  " S  S!\R0                  5      r!S" r"S# r#S$ r$S)S% jr%g)*    N)	rearrangelengthdimshiftc                 Z   US-  S:X  d   eU[         R                  " XS9R                  SSS5      -   nUS-  n[         R                  " US-  US9R                  SSS5      nXTXvS-
  -  -  -  n[         R                  " [         R                  " U5      [         R
                  " U5      /SS9$ )N   r   device   r   )torcharangeviewcatcossin)	r   r   r   r
   
max_periodposhalf_dimadimphases	            O/mnt/rpi/tmp/demucs-venv-sys/lib/python3.13/site-packages/demucs/transformer.pycreate_sin_embeddingr      s     7a<<
%,,v5::2q!D
DCaxH<<q055aB?D$Q,"789E99IIeIIe	
      c                    U S-  S:w  a  [        SR                  U 5      5      e[        R                  " XU5      n[	        U S-  5      n [        R
                  " [        R                  " SU S5      [        R                  " U5      U -  * -  5      n[        R                  " SU5      R                  S5      n[        R                  " SU5      R                  S5      n[        R                  " Xv-  5      R                  SS5      R                  S5      R                  SUS5      USU S2SS2SS24'   [        R                  " Xv-  5      R                  SS5      R                  S5      R                  SUS5      USU S2SS2SS24'   [        R                  " X-  5      R                  SS5      R                  S5      R                  SSU5      XPSS2SS2SS24'   [        R                  " X-  5      R                  SS5      R                  S5      R                  SSU5      XPS-   SS2SS2SS24'   USSS24   R                  U5      $ )z
:param d_model: dimension of the model
:param height: height of the positions
:param width: width of the positions
:return: d_model*height*width position matrix
   r   zHCannot use sin/cos positional encoding with odd dimension (got dim={:d})r           r   N)
ValueErrorformatr   zerosintexpr   mathlog	unsqueezer   	transposerepeatr   to)	d_modelheightwidthr
   r   pediv_termpos_wpos_hs	            r   create_2d_sin_embeddingr1   %   s    {a++16'?
 	
 
We	,B'A+GyyS'1%$((:*>*H(IIH LLe$..q1ELLf%//2E		%"#--a3==a@GG6STU q{Aq 			%"#--a3==a@GG6STU q{Aq 			%"#--a3==a@GG1eT zz1a 			%"#--a3==a@GG1eT {Q1 dAg;>>&!!r   
batch_sizemean_normalizeaugmentmax_global_shiftmax_local_shift	max_scaler
   r   c
                 r   US-  S:X  d   eS[         R                  " U 5      R                  SSS5      -  n
U
R                  SUS5      n
U(       a  U
[         R                  " U
SSS9-  n
U(       a  [
        R                  R                  U* U7SUS/S9n[
        R                  R                  U* U7XS/S9n[
        R                  R                  [
        R                  " U5      * [
        R                  " U5      7SUS/S9nX-   U-   [
        R                  " U5      -  n
U
R                  U5      n
US-  n[         R                  " US-  US	9R                  SSS5      nXXS-
  -  -  -  n[         R                  " [         R                  " U5      [         R                  " U5      /SS
9R                  5       $ )Nr   r         ?r   r   T)r   keepdim)sizer	   r   )r   r   r   r(   nanmeannprandomuniformr%   r#   r)   r   r   r   float)r   r   r2   r3   r4   r5   r6   r7   r
   r   r   deltadelta_locallog_lambdasr   r   r   s                    r   create_sin_embedding_caperD   I   s    7a<<
V$))"a3
3C
**Q
A
&Cu}}Sa66		!! 00:q7I " 
 ii''.f!5L ( 
 ii''VVI	!2 2!Z9K ( 
 {[(BFF;,??
&&.CaxH<<q055aB?D$Q,"789E99IIeIIe	
  egr   c                 D    [         R                  " U 5      nXS S 2S 4   :  $ N)r   r   )r   r   s     r   get_causal_maskrG   v   s!    
,,v
CQWr   c           	         US;   d   eUS:X  aI  [         R                  " X[         R                  S9nSUSS2SU24'   [        XA-  U -  5      n	SUSU	2SS24'   US:X  a  [         R                  " X[         R                  S9n[         R                  " U5      SS2S4   n
X-  U
-  [         R                  " U* US-   5      -   R                  5       R                  SU S-
  5      nUR                  SU[         R                  " S[         R                  S9R                  U5      5        GOUS	:X  Ga0  [         R                  " US
-   U S
-   [         R                  S9n[         R                  " US
-   5      SS2S4   n
[         R                  " S[        S
U -  S-  S-   5      5      nXS-   -  S
-  R                  5       n[         R                  " UR                  S5      SS * U/5      nX-  U
-  U-   R                  5       R                  SU S-   5      nUR                  SU[         R                  " S[         R                  S9R                  U5      5        USS2SS24   nOTUS:X  aN  [         R                  " US9nUR                  U5        [         R                  " X-  XS9R                  X5      U:  nWR!                  U5      nU$ )zb
When the input of the Decoder has length T1 and the output T2
The mask matrix has shape (T2, T1)
)diagjmaskr>   globalrK   )dtypeTNrI   r   r   rJ   r         ?r   r>   r	   )	generatorr
   )r   r!   boolr"   r   longclampscatter_ones	expand_asr   flip	Generatormanual_seedrandreshaper)   )T1T2	mask_typesparse_attn_windowglobal_windowmask_random_seedsparsityr
   maskline_windowrowscolstgenes                 r   get_elementary_maskrg   {   sn    ====H{{24"&Q-,r12 $\k\1_F{{24||B4(Wt^ell,>+>@RUV@VWWTVU1b1f 	
 	auzz!5::>HHNO	g	{{2626<||BF#AtG,LLCRC! 345a%[1_!!#IIq	#2*+$"((*00BF;auzz!5::>HHNOAbD!B$J	h	f-)*JJrw$>FFrN 	
 776?DKr   c                     SSK Jn  UR                  S5      n	U	 V
s/ s H  n
[        U UU
UUUUU5      PM     nn
[        R
                  " U5      R                  SS9S:  nUR                  US   5      $ s  sn
f )z
Return a SparseCSRTensor mask that is a combination of elementary masks
mask_type can be a combination of multiple masks: for instance "diag_jmask_random"
r   )SparseCSRTensor_)axisN)xformers.sparseri   splitrg   r   stacksum
from_dense)rZ   r[   r\   r]   r^   r_   r`   r
   ri   
mask_typesra   	all_masks
final_masks                r   get_maskrt      s     0%J  D 			
    Y'+++3a7J%%j&677!s   A4c            	       Z   ^  \ rS rSr  S
S\S\S\S\4U 4S jjjr\S 5       rS r	S	r
U =r$ )ScaledEmbedding   num_embeddingsembedding_dimscaleboostc                    > [         TU ]  5         [        R                  " X5      U l        U R                  R
                  =R                  X4-  -  sl        X@l        g rF   )super__init__nn	Embedding	embeddingweightdatar{   )selfrx   ry   rz   r{   	__class__s        r   r~   ScaledEmbedding.__init__   sB     	nD""em3"
r   c                 H    U R                   R                  U R                  -  $ rF   )r   r   r{   )r   s    r   r   ScaledEmbedding.weight   s    ~~$$tzz11r   c                 >    U R                  U5      U R                  -  $ rF   )r   r{   r   xs     r   forwardScaledEmbedding.forward   s    ~~a 4::--r   )r{   r   )r9   g      @)__name__
__module____qualname____firstlineno__r"   r@   r~   propertyr   r   __static_attributes____classcell__r   s   @r   rv   rv      sY    
 

 
 	

 
 
 2 2. .r   rv   c                   B   ^  \ rS rSrSrSS\S\4U 4S jjjrS rSr	U =r
$ )	
LayerScale   zLayer scale from [Touvron et al 2021] (https://arxiv.org/pdf/2103.17239.pdf).
This rescales diagonaly residual outputs close to 0 initially, then learnt.
channelsinitc                    > [         TU ]  5         X0l        [        R                  " [
        R                  " USS95      U l        X R                  R                  SS& g)zl
channel_last = False corresponds to (B, C, T) tensors
channel_last = True corresponds to (T, B, C) tensors
T)requires_gradN)	r}   r~   channel_lastr   	Parameterr   r!   rz   r   )r   r   r   r   r   s       r   r~   LayerScale.__init__   sA    
 	(\\%++hd"KL
!

r   c                 n    U R                   (       a  U R                  U-  $ U R                  S S 2S 4   U-  $ rF   r   rz   r   s     r   r   LayerScale.forward   s1    ::>!::ag&**r   r   )r   F)r   r   r   r   __doc__r"   r@   r~   r   r   r   r   s   @r   r   r      s*    " "E " "+ +r   r   c                   4   ^  \ rS rSrU 4S jrU 4S jrSrU =r$ )MyGroupNormi  c                 &   > [         TU ]  " U0 UD6  g rF   )r}   r~   )r   argskwargsr   s      r   r~   MyGroupNorm.__init__  s    $)&)r   c                 f   > UR                  SS5      n[        TU ]	  U5      R                  SS5      $ )zP
x: (B, T, C)
if num_groups=1: Normalisation on all T and C together for each B
r   r   )r'   r}   r   )r   r   r   s     r   r   MyGroupNorm.forward  s1    
 KK1wq!++Aq11r    r   r   r   r   r~   r   r   r   r   s   @r   r   r     s    *2 2r   r   c                   p   ^  \ rS rSrSS\R
                  SSSSSSSSSS	S
SSSSS4U 4S jjrSS jrSrU =r	$ )MyTransformerEncoderLayeri     皙?r   Fh㈵>-C6?NrI   *     2   ffffff?c                   > XS.n[         TU ]  UUUUUU	UUUUS9
  Xl        UU l        U(       a"  U(       d  Xl        UU l        UU l        UU l        U(       a:  [        [        U5      U4SU	0UD6U l
        [        [        U5      U4SU	0UD6U l        S U l        U R                  U-  (       a  [        [        U5      US9U l        U
(       a  [        XS5      O[        R                   " 5       U l        U
(       a  [        XS5      O[        R                   " 5       U l        U(       aI  ['        XUUU(       a  UOSS9U l        U R+                  S[,        R.                  " S	S	5      5        UU l        g g )
Nr
   rL   )
r*   nheaddim_feedforwarddropout
activationlayer_norm_epsbatch_first
norm_firstr
   rL   eps
num_groupsnum_channelsTr   r   r   auto_sparsitysrc_maskr   )r}   r~   sparser   r\   r]   r^   r`   r   r"   norm1norm2norm_outr   r   r   Identitygamma_1gamma_2MultiheadAttention	self_attn__setattr__r   r!   r_   )r   r*   r   r   r   r   
group_normr   r   r   layer_scaleinit_valuesr
   rL   r   r\   r_   r]   r^   r   r`   r   factory_kwargsr   s                          r   r~   "MyTransformerEncoderLayer.__init__  sR   0 %+;+!)#! 	 	
 * !**<'%2"$DM$S_gd>dUcdDJ$S_gd>dUcdDJ??X%'3x=wWDM6AJwT2r{{} 	 7BJwT2r{{} 	 /[*7hQDN ZQ):;$4D! r   c           
      n   UR                   nUnUR                  u  pgnU R                  (       a  U R                  (       dz  Ub   eU R                  nUR                  S   U:w  aV  [        UUU R                  U R                  U R                  U R                  U R                  U5      nU R                  SU5        U R                  (       a  XPR                  U R                  U R                  U5      X#5      5      -   nXPR!                  U R#                  U R%                  U5      5      5      -   nU R&                  (       a  U R'                  U5      nU$ U R                  XPR                  U R                  XRU5      5      -   5      nU R%                  XPR!                  U R#                  U5      5      -   5      nU$ )z_
if batch_first = False, src shape is (T, B, C)
the case where batch_first=True is not covered
r   r   )r
   shaper   r   r   rt   r\   r]   r^   r_   r`   r   r   r   	_sa_blockr   r   	_ff_blockr   r   )	r   srcr   src_key_padding_maskr
   r   TBCs	            r   r   !MyTransformerEncoderLayer.forwardS  sg   
 ''a;;t11###}}H~~b!Q&#NN++&&))MM	   X6??LLtzz!}hM A LL

1!>??A}}MM!$  

LL=Q!RSSA 

1||DNN1,=>>?Ar   )r   r   r   r^   r_   r\   r   r   r   r   r   r]   r`   )NN)
r   r   r   r   Frelur~   r   r   r   r   s   @r   r   r     sS    
 66-A5F& &r   r   c                      ^  \ rS rSrSS\R
                  SSSSSSSSSS	S
SSSSS4S\S\S\S\S\S\S\S\S\S\4U 4S jjjr	SS jr
SS jrS rS rSrU =r$ )CrossTransformerEncoderLayeri|  r   r   r   Fr   rI   r   r   r   r   Nr*   r   r   r   r   r   r   r   r   r   c                   > UUS.n[         TU ]  5         Xl        UU l        U(       a!  U(       d  Xl        Xl        UU l        UU l        U   [        R                  " XUUS9U l
        [        R                  " X40 UD6U l        [        R                  " U5      U l        [        R                  " X140 UD6U l        Xl        U   U   U   U
(       aX  [#        [%        U
5      U4SU0UD6U l        [#        [%        U
5      U4SU0UD6U l        [#        [%        U
5      U4SU0UD6U l        OZ[        R,                  " U4SU0UD6U l        [        R,                  " U4SU0UD6U l        [        R,                  " U4SU0UD6U l        S U l        U R                   U-  (       a  [#        [%        U5      US9U l        U(       a  [1        XS5      O[        R2                  " 5       U l        U(       a  [1        XS5      O[        R2                  " 5       U l        [        R                  " U5      U l        [        R                  " U5      U l        [=        U[>        5      (       a  U RA                  U5      U l!        OXPl!        U(       aP  [        XUUU(       a  UOSS9U l
        U(       d.  U RE                  S[F        RH                  " S	S	5      5        Xl%        g g g )
Nr   )r   r   r   r   Tr   r   ra   r   )&r}   r~   r   r   r\   r]   r^   r`   r   r   
cross_attnLinearlinear1Dropoutr   linear2r   r   r"   r   r   norm3	LayerNormr   r   r   r   r   dropout1dropout2
isinstancestr_get_activation_fnr   r   r   r!   r_   )r   r*   r   r   r   r   r   r   r   r   r   r   r   r\   r_   r]   r^   r`   r   r
   rL   r   r   r   s                          r   r~   %CrossTransformerEncoderLayer.__init__}  sM   0 %+U;* !**<'%2"$DM//GF yyL^Lzz'*yyL^L$$S_gd>dUcdDJ$S_gd>dUcdDJ$S_gd>dUcdDJgT>T^TDJgT>T^TDJgT>T^TDJ??X%'3x=wWDM 7BJwT2r{{} 	 7BJwT2r{{} 	 

7+

7+ j#&&"55jADO(O0[*7hQ@DO !  Q):;(8% !	 r   c           
         UR                   nUR                  u  pVnUR                  u  pnU R                  (       a  U R                  (       d  Ub   eU R                  nUR                  S   U:w  d  UR                  S   U:w  aV  [        UUU R                  U R                  U R                  U R                  U R                  U5      nU R                  SU5        U R                  (       a  XR                  U R                  U R                  U5      U R!                  U5      U5      5      -   n	XR#                  U R%                  U R'                  U	5      5      5      -   n	U R(                  (       a  U R)                  U	5      n	U	$ U R                  XR                  U R                  XU5      5      -   5      n	U R!                  XR#                  U R%                  U	5      5      -   5      n	U	$ )zk
Args:
    q: tensor of shape (T, B, C)
    k: tensor of shape (S, B, C)
    mask: tensor of shape (T, S)

r   ra   )r
   r   r   r   ra   rt   r\   r]   r^   r_   r`   r   r   r   	_ca_blockr   r   r   r   r   r   )
r   qkra   r
   r   r   r   Sr   s
             r   r   $CrossTransformerEncoderLayer.forward  sx    ''a''a;;t11<<99Dzz"~"djjn&9NN++&&))MM	   .??LL

1tzz!}d!STTALL

1!>??A}}MM!$
  

1||DNN1,FGGHA

1||DNN1,=>>?Ar   c                 L    U R                  XX#SS9S   nU R                  U5      $ )NF)	attn_maskneed_weightsr   )r   r   )r   r   r   r   r   s        r   r   &CrossTransformerEncoderLayer._ca_block  s*    OOA!uOMaP}}Qr   c           	          U R                  U R                  U R                  U R                  U5      5      5      5      nU R	                  U5      $ rF   )r   r   r   r   r   r   s     r   r   &CrossTransformerEncoderLayer._ff_block  s9    LLdoodll1o&FGH}}Qr   c                     US:X  a  [         R                  $ US:X  a  [         R                  $ [        SR	                  U5      5      e)Nr   geluz&activation should be relu/gelu, not {})r   r   r  RuntimeErrorr    )r   r   s     r   r   /CrossTransformerEncoderLayer._get_activation_fn  s;    66M6!66MCJJ:VWWr   )r   r   r   r   r   r   r   r   r^   r   r   r_   r\   r   r   r   r   r   r   r]   r`   rF   )r   r   r   r   r   r   r"   r@   rO   r~   r   r   r   r   r   r   r   s   @r   r   r   |  s    
  $66 $!!  -S9S9 S9 	S9
 S9 S9 S9 S9 S9 S9 S9 S9j$N 
 X Xr   r   c            ?         ^  \ rS rSrSSSSSSSS	SSSSS
SSSS	SSS	S	/ SQSSSSSSSS4S\S\S\S\S\S\S\S\S\S\S\S\S \S!\S"\S#\R                  \   S$\S%\S&\S'\S(\S)\S*\
S+\S,\S-\S.\S/\S0\S1\S2\4>U 4S3 jjjrS4 rS5 rS6 rS7rU =r$ )8CrossTransformerEncoderi  r   g      @      Fr   i  T     @Nr   r9   )g     @r9   gffffff?rI   r   r   r   r   r   embhidden_scale	num_heads
num_layerscross_firstr   max_positionsnorm_innorm_in_groupr   r   r   r   weight_decaylrr   r  sin_random_shiftweight_pos_embedcape_mean_normalizecape_augmentcape_glob_loc_scalesparse_self_attnsparse_cross_attnr\   r_   r]   r^   r   r`   c                    > [         T&U ]  5          X-  S:X  d   e[        X-  5      n XPl        U(       a  SOSU l        X l        Xl        Xl        UU l        UU l	        US:X  a  UU l
        UU l        UU l        US:X  a  [        XSS9U l        UU l        U(       a  [         R"                  O[         R$                  n!U   U   U	(       a7  [&        R(                  " U5      U l        [&        R(                  " U5      U l        OpU
(       a5  [/        [        U
5      U5      U l        [/        [        U
5      U5      U l        O4[&        R0                  " 5       U l        [&        R0                  " 5       U l        [&        R2                  " 5       U l        [&        R2                  " 5       U l        0 SU_SU_S	U _S
U_SU!_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SS_n"[9        U"5      n#U#R;                  SU05        [9        U"5      n$U$R;                  SU05        [=        U5       H  n%U%S-  U R                  :X  aJ  U R4                  R?                  [A        S0 U#D65        U R6                  R?                  [A        S0 U#D65        M`  U R4                  R?                  [C        S0 U$D65        U R6                  R?                  [C        S0 U$D65        M     g )Nr   r   capescaledg?)rz   r*   r   r   r   r   r   r   r   r   r\   r_   r]   r^   r`   r   r   Tr   r   r   )"r}   r~   r"   r  classic_parityr	  r   r  r  r  r  r  r  rv   position_embeddingsr  r   r  r   r   r   r  	norm_in_tr   r   
ModuleListlayerslayers_tdictupdaterangeappendr   r   )'r   r   r	  r
  r  r  r  r   r  r  r  r   r   r   r   r  r  r   r  r  r  r  r  r  r  r  r\   r_   r]   r^   r   r`   
hidden_dimr   kwargs_commonkwargs_classic_encoderkwargs_cross_encoderidxr   s'                                         r   r~    CrossTransformerEncoder.__init__  s   D 		!###+,
$ $/aA$( 0 0&=':D$ ,D':D$(?'6}QT'UD$'+QVV
<<,DL\\#.DN&s='93?DL(]);SADN;;=DL[[]DN mmo
s
Y
 z
 w	

 *
 *
 *
 
 ;
 
  0
 !"4
 ]
 
 ]
  4!
& "&m!4%%&'
 	  $M2##'%
 	 $CQw$---""#<#V?U#VW$$-G0FG
 ""#?#WBV#WX$$0H3GH %r   c                    UR                   u  p4pV[        XEXaR                  U R                  5      n[	        US5      n[	        US5      nU R                  U5      nXR                  U-  -   nUR                   u  p4n[	        US5      nU R                  XXAR                  5      n	[	        U	S5      n	U R                  U5      nX R                  U	-  -   n[        U R                  5       Hn  n
U
S-  U R                  :X  a,  U R                  U
   " U5      nU R                  U
   " U5      nMB  UnU R                  U
   " X5      nU R                  U
   " X+5      nMp     [	        USUS9n[	        US5      nX4$ )Nzb c fr t1 -> b (t1 fr) czb c t2 -> b t2 czt2 b c -> b t2 cr   zb (t1 fr) c -> b c fr t1)t1zb t2 c -> b c t2)r   r1   r
   r   r   r  r  _get_pos_embeddingr  r%  r  r  r!  r"  )r   r   xtr   r   FrrZ   
pos_emb_2dr[   pos_embr+  old_xs               r   r   CrossTransformerEncoder.forward  s]   wwb,2xx

 z+EF
a34LLO%%
2288br-.))"HH=G%78^^B'''11)CQw$---KK$Q']]3'+KK$Q+]]3'2 * a3;r-.ur   c                 R   U R                   S:X  a:  [        R                  " U R                  S-   5      n[	        XXTU R
                  S9nU$ U R                   S:X  a  U R                  (       aY  [        UUUUU R
                  U R                  U R                  U R                  S   U R                  S   U R                  S   S9
nU$ [        UUUUU R
                  U R                  SS	9n U$ U R                   S
:X  a,  [        R                  " XS9nU R                  U5      S S 2S 4   nW$ )Nr   r   )r   r
   r   r  r   r   )r
   r   r3   r4   r5   r6   r7   F)r
   r   r3   r4   r  r	   )r	  r>   	randranger  r   r   trainingrD   r  r  r  r   r   r  )r   r   r   r   r
   r   r3  r   s           r   r/  *CrossTransformerEncoder._get_pos_embedding  s'   88u$$T%:%:Q%>?E*ET__G@ ; XX}}3!##'#;#; --%)%=%=a%@$($<$<Q$?"66q96  4!##'#;#;! 	 XX!,,q0C..s3AtG<Gr   c                     [        U R                  5       5      U R                  S.nU R                  b  U R                  US'   U$ )N)paramsr  r  )list
parametersr  r  )r   groups     r   make_optim_group(CrossTransformerEncoder.make_optim_group  s:     12DDUDUV77''E$Kr   )r  r  r  r  r	  r!  r"  r  r   r  r  r  r  r  r  r  )r   r   r   r   r"   r   r@   rO   tpOptionalr<  r~   r   r/  r?  r   r   r   s   @r   r  r    s    !!!# #!!%! !"%$(!$6!&"' ""%#Aww w 	w
 w w w w w w w w w w w  !w" KK#w$ %w& 'w( )w*  +w, "-w. /w0 "1w2 3w4  5w6 7w8 9w:  ;w< =w> ?w@ Aw wr<#J r   r  c                   N   ^  \ rS rSr        SU 4S jjr    SS jrSrU =r$ )r   i  c                   > [         TU ]  5         U
c   S5       eX l        [        R                  R                  XUS9U l        [        R                  R                  XUS9U l        [        R                  R                  XUS9U l        [        R                  R                  U5      U l
        [        R                  R                  XU5      U l        [        R                  R                  U5      U l        Xl        Xl        g )Nzsanity check)bias)r}   r~   r  r   r   r   r   r   vr   	attn_dropproj	proj_dropr   r   )r   	embed_dimr  r   rE  add_bias_kvadd_zero_attnkdimvdimr   r   r   s              r   r~   MultiheadAttention.__init__  s     	(8.8("DADADA))'2HHOOI$?	))'2&*r   c                    U R                   (       d9  UR                  SSS5      nUR                  SSS5      nUR                  SSS5      nUR                  u  pn
UR                  u  pn
U R                  U5      R	                  XU R
                  XR
                  -  5      R                  SSSS5      nUR                  SS5      nU R                  U5      R	                  XU R
                  XR
                  -  5      R                  SSSS5      nUR                  SS5      nU R                  U5      R	                  XU R
                  XR
                  -  5      R                  SSSS5      nUR                  SS5      nU R                  (       a  Ub   e[        XXR                  S9nO[        XXU R                  S9nUR	                  XR
                  XU R
                  -  5      nUR                  SS5      R	                  XU
5      nU R                  U5      nU R                  U5      nU R                   (       d  UR                  SSS5      nUS 4$ )Nr   r   r      )r`   )r   )r   permuter   r   rY   r  flattenr   rF  r   dynamic_sparse_attentionscaled_dot_product_attentionrG  r'   rH  rI  )r   querykeyvaluekey_padding_maskr   r   average_attn_weightsr   N_qr   N_kr   r   rF  r   s                   r   r   MultiheadAttention.forward  s    MM!Q*E++aA&CMM!Q*EKK	II	 FF5MWQT^^Q..-@AWQ1a  	

 IIaOFF3KWQT^^Q..-@AWQ1a  	

 IIaOFF5MWQT^^Q..-@AWQ1a  	

 IIaO$$$(q;M;MNA,Q1XAIIa4>>.ABKK1%%aa0IIaLNN1		!Q"A$wr   )	rG  r   r   r   r  rH  rI  r   rF  )r   TFFNNFN)NTNTr   r   s   @r   r   r     s:    
 +< !1 1r   r   c                     SSK Jn  XR                  S5      S-  -  n U" XR                  SS5      U5      n[        R
                  R                  R                  US5      nU$ )Nr   )masked_matmulr   rM   r   )xformers.opsr_  r;   r'   r   r   
functionalsoftmax)r   r   att_maskr_  atts        r   scaled_query_key_softmaxre  #  sR    *	VVBZCA
;;r2.
9C
((


%
%c2
.CJr   c                 2    [        XUS9nU" U5      nXR-  nU$ )N)rc  )re  )r   r   rF  rc  r   rd  ys          r   rU  rU  +  s#    
"1(
;C
#,CAHr   c                     [         R                  " SX5      n[         R                  " X"* /SS9nUR                  SS9nUR	                  SSS5      R                  5       R                  5       $ )Nzbtf,bfhi->bhtir   r   r   r   r   )r   einsumr   argmaxrR  byte
contiguous)r   Rqqbucketss       r   _compute_bucketsrp  2  s\    	&	-B	B9"	%BiiBiG??1a#((*5577r   c           	         SSK JnJn  SnSn	XU4 V
s/ s H  oR                  5       PM     sn
u  pn[        R
                  " 5          [        R                  " SU R                  S   XS-  U R                  S9n[        X5      n[        X5      nU" XX45      u  pS S S 5        U" XUWWU5      $ s  sn
f ! , (       d  f       N= f)	Nr   )find_locations!sparse_memory_efficient_attention    r   r   r   r   r	   )
r`  rr  rs  rl  r   no_gradrandnr   r
   rp  )rV  rW  rX  r`   infer_sparsity	attn_biasrr  rs  n_hashes	proj_sizer   rm  bucket_query
bucket_keyrow_offsetscolumn_indicess                   r   rT  rT  :  s    NHI27e1DE1DA1DEE	KK5;;r?H1nU\\Z'1%c-
&4h'@#	 
 -E;	C C F	s   B.AB33
C)r   cpu'  )r  r  )r   r   r9   r  r  )TN)&r>   typingrA  r   torch.nnr   torch.nn.functionalra  r   numpyr=   r$   einopsr   r"   r   r1   rO   r@   r   rD   rG   rg   rt   Modulerv   r   	GroupNormr   TransformerEncoderLayerr   r   r  r   re  rU  rp  rT  r   r   r   <module>r     si            EJ"%$!"T " **	* * 	*
 * * * * * *Z
4n"8J.bii .*+ +,
2",, 
2j : : jZLX299 LXdAbii ANK K\8Cr   