
    ]irB                        S SK r S SKrS SKrS SKrS SKJr  S SKJr  SSK	J
r
  SSKJrJr  SSKJr   " S S	\R                   5      rS
 rS r " S S\R                   5      r " S S\R                   5      r " S S\R                   5      rg)    N)nn)
functional   )capture_init)center_trimunfold)
LayerScalec                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )BLSTM   z
BiLSTM with same hidden units as input dim.
If `max_steps` is not None, input will be splitting in overlapping
chunks and the LSTM applied separately on each chunk.
c                    > [         TU ]  5         Ub  US-  S:X  d   eX0l        [        R                  " SX!US9U l        [        R                  " SU-  U5      U l        X@l        g )N   r   T)bidirectional
num_layershidden_size
input_size   )	super__init__	max_stepsr   LSTMlstmLinearlinearskip)selfdimlayersr   r   	__class__s        J/mnt/rpi/tmp/demucs-venv-sys/lib/python3.13/site-packages/demucs/demucs.pyr   BLSTM.__init__   sZ     IMQ$666"GG$6_bc	iiC-	    c           	      N   UR                   u  p#nUnSnU R                  ba  X@R                  :  aR  U R                  nUS-  n[        XU5      n	U	R                   S   n
SnU	R                  SSSS5      R	                  SX75      nUR                  SSS5      nU R                  U5      S   nU R                  U5      nUR                  SSS5      nU(       a  / nUR	                  USUW5      n	WS-  n[        W
5       Hp  nUS:X  a!  UR                  U	S S 2US S 2S U* 24   5        M*  XS-
  :X  a   UR                  U	S S 2US S 2US 24   5        MR  UR                  U	S S 2US S 2X* 24   5        Mr     [        R                  " US5      nUSS U24   nUnU R                  (       a  X-   nU$ )	NFr   Tr   r      .)shaper   r   permutereshaper   r   rangeappendtorchcatr   )r   xBCTyframedwidthstrideframesnframesoutlimitks                 r    forwardBLSTM.forward"   s   ''a>>%!nn*<NNEaZFAf-Fll1oGFq!Q*222q@AIIaAIIaLOKKNIIaACYYq"a/FaKE7^6JJvaAww&678A+%JJvaAuvo67JJvaAuV|&;<= $ ))C$Cc2A2g,CA99Ar"   )r   r   r   r   )r   NF)	__name__
__module____qualname____firstlineno____doc__r   r:   __static_attributes____classcell__r   s   @r    r   r      s    
! !r"   r   c                     U R                   R                  5       R                  5       nX!-  S-  nU R                   =R                  U-  sl        U R                  b   U R                  =R                  U-  sl        gg)zTRescale initial weight scale. It is unclear why it helps but it certainly does.
          ?N)weightstddetachdatabias)conv	referencerG   scales       r    rescale_convrN   F   s_     ++//

"
"
$C_s"EKKyy		% r"   c                     U R                  5        H]  n[        U[        R                  [        R                  [        R
                  [        R                  45      (       d  MR  [        X!5        M_     g N)modules
isinstancer   Conv1dConvTranspose1dConv2dConvTranspose2drN   )modulerL   subs      r    rescale_modulerY   P   sE    ~~cBIIr'9'9299bFXFXYZZ(  r"   c            	       P   ^  \ rS rSrSr   S
S\S\S\S\4U 4S jjjrS rS	r	U =r
$ )DConvV   z
New residual branches in each encoder layer.
This alternates dilated convolutions, potentially with LSTMs and attention.
Also before entering each residual branch, dimension is projected on a smaller subspace,
e.g. of dim `channels // compress`.
channelscompressdepthinitc                 L  > [         TU ]  5         US-  S:X  d   eXl        X l        [	        U5      U l        US:  nS nU(       a  S n[        X-  5      nU
(       a  [        R                  nO[        R                  n[        R                  " / 5      U l        [        U R
                  5       H  nU(       a  SU-  OSnUUS-  -  n[        R                  " XUUUS9U" U5      U" 5       [        R                  " USU-  S5      U" SU-  5      [        R                  " S5      [        X5      /nU(       a  UR!                  S[#        XUS95        U	(       a  UR!                  S[%        USS	S
S95        [        R&                  " U6 nU R                  R)                  U5        M     g)ax  
Args:
    channels: input/output channels for residual branch.
    compress: amount of channel compression inside the branch.
    depth: number of layers in the residual branch. Each layer has its own
        projection, and potentially LSTM and attention.
    init: initial scale for LayerNorm.
    norm: use GroupNorm.
    attn: use LocalAttention.
    heads: number of heads for the LocalAttention.
    ndecay: number of decay controls in the LocalAttention.
    lstm: use LSTM.
    gelu: Use GELU activation.
    kernel: kernel size for the (dilated) convolutions.
    dilate: if true, use dilation, increasing with the depth.
r   r   r   c                 ,    [         R                  " 5       $ rP   r   Identityds    r    <lambda> DConv.__init__.<locals>.<lambda>y   s
    BKKMr"   c                 0    [         R                  " SU 5      $ )Nr   r   	GroupNormre   s    r    rg   rh   {   s    Q 2r"   )dilationpaddingr$   )headsndecay   T)r   r   r   N)r   r   r]   r^   absr_   intr   GELUReLU
ModuleListr   r)   rS   GLUr	   insert
LocalStater   
Sequentialr*   )r   r]   r^   r_   r`   normattnrn   ro   r   gelukerneldilatenorm_fnhiddenactrf   rl   rm   modslayerr   s                        r    r   DConv.__init__]   s[   ( 	zQ  Z
 *2GX() ''C''CmmB'tzz"A!'qAvQH&A+.G		(FXwW		&!h,2H%rvvay8*D Az&fMNAuVA4PQMM4(EKKu% #r"   c                 @    U R                    H  nX" U5      -   nM     U$ rP   )r   )r   r-   r   s      r    r:   DConv.forward   s"    [[EE!HA !r"   )r]   r^   r_   r   )r   r   -C6?TFr   r   FTr$   T)r<   r=   r>   r?   r@   rr   floatr   r:   rA   rB   rC   s   @r    r[   r[   V   sG     Z^LP"&8& 8& 8&# 8&QV 8& 8&t r"   r[   c            	       J   ^  \ rS rSrSrS
S\S\S\S\4U 4S jjjrS rS	rU =r	$ )rx      zLocal state allows to have attention based only on data (no positional embedding),
but while setting a constraint on the time window (e.g. decaying penalty term).

Also a failed experiments with trying to provide some frequency based attention.
r]   rn   nfreqsro   c                   > [         TU ]  5         X-  S:X  d   X45       eX l        X0l        X@l        [
        R                  " XS5      U l        [
        R                  " XS5      U l        [
        R                  " XS5      U l	        U(       a  [
        R                  " XU-  S5      U l
        U(       a  [
        R                  " XU-  S5      U l        U R                  R                  =R                  S-  sl        U R                  R                  c   eSU R                  R                  R                  S S & [
        R                  " XU-  -   US5      U l        g )Nr   r   g{Gz?)r   r   rn   r   ro   r   rS   contentquerykeyquery_freqsquery_decayrF   rI   rJ   proj)r   r]   rn   r   ro   r   s        r    r   LocalState.__init__   s   1$7x&77$
yyQ7YYx15
99X3!yy6>1ED!yy6>1ED##((D0(##((444,.D!!&&q)IIh71E	r"   c                    UR                   u  p#nU R                  n[        R                  " XAR                  UR
                  S9nUS S 2S 4   US S S 24   -
  nU R                  U5      R                  X%SU5      nU R                  U5      R                  X%SU5      n	[        R                  " SX5      n
XR                   S   S-  -  n
U R                  (       a  [        R                  " SU R                  S-   UR                  UR
                  S9n[        R                  " S[        R                  -  U-  UR                  SSS5      -  5      nU R                  U5      R                  X%SU5      U R                  S-  -  nU
[        R                  " SX5      -  n
U R                  (       a  [        R                  " SU R                  S-   UR                  UR
                  S9nU R!                  U5      R                  X%SU5      n[        R"                  " U5      S-  nUR                  SSS5      * UR%                  5       -  U R                  S-  -  nU
[        R                  " SUU5      -  n
U
R'                  [        R(                  " XJR                  [        R*                  S9S5        [        R,                  " U
SS	9nU R/                  U5      R                  X%SU5      n[        R                  " S
UU5      nU R                  (       a1  [        R                  " SUW5      n[        R0                  " UU/S5      nUR3                  USU5      nXR5                  U5      -   $ )N)devicedtyper%   zbhct,bhcs->bhtsr   rE   r   zfts,bhfs->bhtsir   zbhts,bhct->bhcszbhts,fts->bhfs)r&   rn   r+   aranger   r   r   viewr   einsumr   cosmathpir   ro   r   sigmoidrq   masked_fill_eyeboolsoftmaxr   r,   r(   r   )r   r-   r.   r/   r0   rn   indexesdeltaquerieskeysdotsperiodsfreq_kernelfreq_qdecaysdecay_qdecay_kernelweightsr   resulttime_sigs                        r    r:   LocalState.forward   s   ''a

,,qA4 747#33**Q-$$Qr15xx{"a0||-t=

1s"";;ll1dkkAoahhaggVG))AK%$7',,r1a:P$PQK%%a(--aA>PSASSFELL!1;GGD;;\\!T[[1_QXXQWWUF&&q)..qQ?GmmG,q0G#[[Q22UYY[@4;;PSCSSLELL!1<IID 	%))AkkLdS--!,,,q/&&qQ7/'B;;||$4g{KHYY115F2q)99V$$$r"   )	r   rn   r   ro   r   r   r   r   r   )r   r   r   )
r<   r=   r>   r?   r@   rr   r   r:   rA   rB   rC   s   @r    rx   rx      s>    
F FS Fc Fs F F&"% "%r"   rx   c                      ^  \ rS rSr\                        SU 4S jj5       rS rS rSU 4S jjrSr	U =r
$ )	Demucs   c                   >^ [         T&U ]  5         X l        Xl        Xl        Xl        Xl        XPl        UU l        X0l	        UU l
        UU l        UU l        [        R                  " 5       U l        [        R                  " 5       U l        [        R                  " 5       U l        U(       a  [        R$                  " SS9nSnO[        R&                  " 5       nSnU(       a  [        R(                  nO[        R&                  nUnSn[+        U5       GH  nS n UU:  a  U4S jn / n!U![        R,                  " UX8U	5      U " U5      U" 5       /-  n!UU:  n"UU:  n#US-  (       a  U![/        UUUUU"U#S9/-  n!U(       a*  U![        R,                  " UUU-  S5      U " UU-  5      U/-  n!U R                  R1                  [        R2                  " U!6 5        / n$US:  a  Un%O[5        U R                  5      U-  n%U(       a/  U$[        R,                  " UUU-  SU
-  S-   U
S9U " UU-  5      U/-  n$US-  (       a  U$[/        UUUUU"U#S9/-  n$U$[        R6                  " UU%XUS9/-  n$US:  a  U$U " U%5      U" 5       /-  n$U R                   R9                  S[        R2                  " U$6 5        Un[;        XC-  5      nGM     UnU(       a  [=        X75      U l        OS	U l        U(       a  [A        U US
9  g	g	)a2  
Args:
    sources (list[str]): list of source names
    audio_channels (int): stereo or mono
    channels (int): first convolution channels
    depth (int): number of encoder/decoder layers
    growth (float): multiply (resp divide) number of channels by that
        for each layer of the encoder (resp decoder)
    depth (int): number of layers in the encoder and in the decoder.
    rewrite (bool): add 1x1 convolution to each layer.
    lstm_layers (int): number of lstm layers, 0 = no lstm. Deactivated
        by default, as this is now replaced by the smaller and faster small LSTMs
        in the DConv branches.
    kernel_size (int): kernel size for convolutions
    stride (int): stride for convolutions
    context (int): kernel size of the convolution in the
        decoder before the transposed convolution. If > 1,
        will provide some context from neighboring time steps.
    gelu: use GELU activation function.
    glu (bool): use glu instead of ReLU for the 1x1 rewrite conv.
    norm_starts: layer at which group norm starts being used.
        decoder layers are numbered in reverse order.
    norm_groups: number of groups for group norm.
    dconv_mode: if 1: dconv in encoder only, 2: decoder only, 3: both.
    dconv_depth: depth of residual DConv branch.
    dconv_comp: compression of DConv branch.
    dconv_attn: adds attention layers in DConv branch starting at this layer.
    dconv_lstm: adds a LSTM layer in DConv branch starting at this layer.
    dconv_init: initial scale for the DConv branch LayerScale.
    normalize (bool): normalizes the input audio on the fly, and scales back
        the output by the same amount.
    resample (bool): upsample x2 the input and downsample /2 the output.
    rescale (float): rescale initial weights of convolutions
        to get their standard deviation closer to `rescale`.
    samplerate (int): stored as meta information for easing
        future evaluations of the model.
    segment (float): duration of the chunks of audio to ideally evaluate the model on.
        This is used by `demucs.apply.apply_model`.
r   r   r   r   c                 ,    [         R                  " 5       $ rP   rc   re   s    r    rg   !Demucs.__init__.<locals>.<lambda>F  s
    r"   c                 2   > [         R                  " TU 5      $ rP   rj   )rf   norm_groupss    r    rg   r   H  s    BLLa$@r"   )r_   r`   r^   r{   r   )rm   N)rL   )!r   r   audio_channelssourceskernel_sizecontextr4   r_   resampler]   	normalize
sampleratesegmentr   ru   encoderdecoderskip_scalesrv   rt   rs   r)   rS   r[   r*   ry   lenrT   rw   rr   r   r   rY   )'r   r   r   r]   growthr_   rewritelstm_layersr   r4   r   r|   glunorm_startsr   
dconv_modedconv_depth
dconv_comp
dconv_attn
dconv_lstm
dconv_initr   r   rescaler   r   
activationch_scaleact2in_channelsrm   indexr   encoder{   r   decodeout_channelsr   s'                 `                       r    r   Demucs.__init__   s   X 	,&
  "$}}}}==?AJHJH77D77D$5\E-G#@F		+xfE! F
 J&DJ&DA~5:*44dL M MIIh8(;Q?Hx/0*> > LLv 67Fqy*"4<<0>AIIh8(;Q[1_V]^Hx/0*> > A~5:*44dL M Mr))(L"G= > >Fqy7<0$&99LL2==&#9:"K6,-HQ "T h4DIDI473 r"   c                    U R                   (       a  US-  n[        U R                  5       HA  n[        R                  " XR
                  -
  U R                  -  5      S-   n[        SU5      nMC     [        U R                  5       H"  nUS-
  U R                  -  U R
                  -   nM$     U R                   (       a  [        R                  " US-  5      n[        U5      $ )a(  
Return the nearest valid length to use with the model so that
there is no time steps left over in a convolution, e.g. for all
layers, size of the input - kernel_size % stride = 0.

Note that input are automatically padded if necessary to ensure that the output
has the same length as the input.
r   r   )	r   r)   r_   r   ceilr   r4   maxrr   )r   length_idxs       r    valid_lengthDemucs.valid_lengthx  s     ==aKFtzz"AYY)9)9 9T[[HIAMFF^F # $CqjDKK/$2B2BBF % ==YYvz*F6{r"   c                    UnUR                   S   nU R                  (       a;  UR                  SSS9nUR                  SSS9nUR                  SSS9nX%-
  SU-   -  nOSnSnU R	                  U5      U-
  n[
        R                  " X'S-  XwS-  -
  45      nU R                  (       a  [        R                  " USS5      n/ nU R                   H  n	U	" U5      nUR                  U5        M     U R                  (       a  U R                  U5      nU R                   H)  n
UR                  S5      n[        X5      nU
" X+-   5      nM+     U R                  (       a  [        R                  " USS5      nX&-  U-   n[        X#5      nUR!                  UR#                  S5      [%        U R&                  5      U R(                  UR#                  S5      5      nU$ )Nr%   r   T)r   keepdimgh㈵>r   r   )r&   r   meanrG   r   Fpadr   juliusresample_fracr   r*   r   r   popr   r   sizer   r   r   )r   mixr-   r   monor   rG   r   savedr   r   r   s               r    r:   Demucs.forward  s   >>88480D99T92D((r4(0CdSj)ADC!!&)F2EE!qj%1*"456==$$Q1-AllFq	ALLO # 99		!AllF99R=Dt'Dqx A #
 ==$$Q1-AGdN"FF166!9c$,,/1D1DaffRjQr"   c                    > [        U R                  5       HJ  nS HA  nS H8  nU SU SU 3nU SU SU 3nXq;   d  M  Xa;  d  M%  UR                  U5      X'   M:     MC     ML     [        TU ]  XS9  g )N)r   r   )rJ   rF   .z.3.z.2.)strict)r)   r_   r   r   load_state_dict)	r   stater   r   abnewoldr   s	           r    r   Demucs.load_state_dict  s    $C++ACqS,CCqS,C|(8%*YYs^
	 , , % 	5r"   )r   r]   r   r   r_   r   r   r   r   r   r   r   r   r   r4   )r   @   g       @   Tr      r   r   TTr   r   r   r   r   r   r   r   TTg?iD  (   )T)r<   r=   r>   r?   r   r   r   r:   r   rA   rB   rC   s   @r    r   r      sx     !" !EY4 Y4v.%N	6 	6r"   r   )r   typingtpr   r+   r   torch.nnr   r   statesr   utilsr   r   transformerr	   Moduler   rN   rY   r[   rx   r    r"   r    <module>r     ss         $   & #/BII /d )DBII DN;% ;%|d6RYY d6r"   