
    ]i)9                     T   S r SSKJrJr  SSKJrJr  SSKrSSKJ	r	  SSK
r
SSKrSSKrSSKrSSKrSSKJr  SSKrSSKJr  SS	KJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJ r   \RB                  " \"5      r# " S S5      r$ " S S5      r%S\&4S jr'\ " S S5      5       r( " S S5      r)g)z)Scheduling and job monitoring utilities.
    )contextmanager	ExitStack)	dataclassfieldN)Path)SlurmJob   )git_save)SlurmConfigSubmitRules)get_distrib_spec)DecoratedMain)try_load)XP_get_sigc                   F    \ rS rSrS\S\R                  \   4S jrS r	Sr
g)_SubmitItTarget"   mainargvc                     UR                  U5      U l        [        5       n[        UR                  5      [
        R                  S'   U[        R                  SS & U" 5         g )NRANKr	   )	get_xpxpr   strrankosenvironsysr   )selfr   r   specs       F/mnt/rpi/tmp/demucs-venv-sys/lib/python3.13/site-packages/dora/shep.py__call___SubmitItTarget.__call__#   sF    ++d#! !^

6    c                    [        5       R                  S:X  aM  U R                  R                  R	                  5       (       a$  U R                  R                  R                  5         [        R                  R                  " U /UQ70 UD6$ )Nr   )	r   r   r   rendezvous_fileexistsunlinksubmitithelpersDelayedSubmission)r    argskwargss      r"   
checkpoint_SubmitItTarget.checkpoint,   sc    ""a'ww&&--//''..011$HHHHr%   )r   N)__name__
__module____qualname____firstlineno__r   tpSequencer   r#   r/   __static_attributes__ r%   r"   r   r   "   s$    ] "++c2B Ir%   r   c                   h    \ rS rSrSrS\4S jr\S\4S j5       r	SS jr
SS jr\S	 5       rS
 rSrg)Sheep4   zO
A Sheep is a specific run for a given XP. Sheeps are managed
by the Shepherd.
r   c                     Xl         S U l        S U l        U R                  R	                  5       (       aA  [        U R                  5      n[        U[        5      (       a  Uu  U l        U l        g X l        g g N)r   job_other_jobs	_job_filer(   r   
isinstancetuple)r    r   contents      r"   __init__Sheep.__init__9   s]    37DH>>  ""t~~.G'5))-4*$*" #r%   returnc                     U R                   R                  U R                   R                  R                  R                  -  $ r=   )r   folderdorashepjob_filer    s    r"   r@   Sheep._job_fileE   s)    ww~~ 1 1 : :::r%   c                 8   U R                   c  gU R                   R                  R                  U R                   R                  U5      nUS:X  a4  U R                  (       a#  [        S U R                   5       5      (       a  SnUR                  S5      (       a  gU$ )z1Return the current state of the `Sheep`.
        NUNKNOWNc              3   >   #    U  H  oR                   S :g  v   M     g7f)rO   N)state).0r>   s     r"   	<genexpr>Sheep.state.<locals>.<genexpr>P   s     F5Ec99	)5Es   MISSING	CANCELLED)r>   watcher	get_statejob_idr?   any
startswith)r    moderQ   s      r"   rQ   Sheep.stateI   s|     88  **488??DAI$"2"2FT5E5EFFF "K((r%   c                     U R                   c  gU R                   R                  R                  U R                   R                  U5      $ )zDReturn True if the job is no longer running on the cluster.
        T)r>   rW   is_donerY   )r    r\   s     r"   r_   Sheep.is_doneY   s6     88xx''>>r%   c                 ~    U R                   b0  U R                  R                  U R                   R                   S3-  $ g)z)Return the path to the main log.
        Nz
_0_log.out)r>   r   r*   rY   rL   s    r"   log	Sheep.log`   s6     8877##(9&DDDr%   c                     SU R                   R                   SU R                  5        S3nU R                  b  USU R                  R                   S3-  nUSU R                   R
                   S3-  nU$ )NzSheep(z, state=z, zsid=zargv=))r   sigrQ   r>   rY   r   )r    outs     r"   __repr__Sheep.__repr__h   sj    tww{{m8DJJL><88T$((//*"--Ctww||nA&&
r%   )r?   r>   r   N)standard)r1   r2   r3   r4   __doc__r   rD   propertyr   r@   rQ   r_   rb   rh   r7   r8   r%   r"   r:   r:   4   sQ    
#2 
# ;4 ; ; ?  r%   r:   xc                     g)z7No logging logging function, passed to `Shepherd`.
    Nr8   )rm   s    r"   no_logro   q   s     	r%   c                   P    \ rS rSr% \\S'   \" \S9r\	R                  \   \S'   Srg)	_JobArrayw   slurm_config)default_factorysheepsr8   N)r1   r2   r3   r4   r   __annotations__r   listru   r5   Listr:   r7   r8   r%   r"   rq   rq   w   s    "48FBGGEN8r%   rq   c                      \ rS rSrSr\4S\S\R                  \	/S4   4S jjr
S\R                  \	   S\4S	 jrS
\	S\R                  \   4S jrS\	S\R                  \   4S jrS r\S\4S j5       rS\S\S\4S jrS\R0                  4S jrS r\S\4S j5       r\S\4S j5       r\S\4S j5       rS\R@                  \   4S jr!S\	S\S\S\RD                  4S jr#S r$\S\	4S  j5       r%S!\&4S" jr'S#r(g)$Shepherd}   z
Takes care of the little jobs.

Args:
    main (DecoratedMain): main function decorated by Dora.
    log (callable): log function, if provided should take a single string
        argument.
r   rb   Nc                    Xl         U R                  R                  SSS9  U R                  R                  SSS9  U R                  R                  SSS9  X l        SU l        S U l        / U l        / U l	        U R                  5         g )NT)exist_okparentsF)r   _by_idmkdir_orphans_arraysrb   _in_job_array_existing_git_clone
_to_cancel
_to_submit_check_orphans)r    r   rb   s      r"   rD   Shepherd.__init__   s{    	46T48D$7#(6: 68.0r%   r   rF   c                 |    [        U[        5      (       a   eU R                  R                  U5      n[	        U5      $ )z
Given a list of of arguments, return the matching `Sheep`,
which will contain both information on the `dora.xp.XP`, and on
the latest job associated with that XP.
)rA   r   r   r   r:   )r    r   r   s      r"   get_sheep_from_argvShepherd.get_sheep_from_argv   s5     dC((((YYd#Ryr%   rf   c                 N    U R                   R                  U5      n[        U5      $ )zR
Returns a `Sheep` given the XP signature, if any exists, otherwise
returns None.
)r   get_xp_from_sigr:   )r    rf   r   s      r"   get_sheep_from_sigShepherd.get_sheep_from_sig   s"    
 YY&&s+Ryr%   rY   c                     U R                   U-  nUR                  5       (       a@  UR                  5       R                  nU R                  R                  U5      n[        U5      $ g)z]
Returns the `Sheep` associated with the given `job_id`. If no sheep
is found, returns None.
N)r   
is_symlinkresolvenamer   r   r:   )r    rY   linkrf   r   s        r"   get_sheep_from_job_idShepherd.get_sheep_from_job_id   sQ    
 {{V#??,,.%%C**3/B9r%   c                 @    [         R                  R                  5         g)z2
Force an update of all job states with submitit.
N)r   rW   updaterL   s    r"   r   Shepherd.update   s     	!r%   rs   c              #      #    U R                   (       a   eU R                  R                  [        U5      5        SU l          Sv   SU l         g! SU l         f = f7f)z*Context manager to launch XP in job array.TNF)r   r   appendrq   )r    rs   s     r"   	job_arrayShepherd.job_array   sM      %%%%y67!	'!&DDs   ?AA A	AAsheeprulesc                 D   UR                   Gb	  UR                  5       nUS:X  aE  UR                  (       a3  [        R	                  SUR                   R
                   35        SUl         OUS;   aF  [        R	                  SUR                   R
                   S35        UR                  (       a  SUl         ObUR                  (       aQ  [        R	                  SUR                   R
                   SU 35        U R                  UR                   5        SUl         UR                   c|  U R                  (       d$  U R                  R                  [        U5      5        X R                  S	   R                  :X  d   eU R                  S	   R                  R                  U5        gg)
z
Decides whether to schedule a new job for the given sheep, based on the rules
given in `rules`.
Jobs are actually only scheduled once the `commit()` method is called.
N	COMPLETEDz"Ignoring previously completed job )FAILEDrV   OUT_OF_MEMORYTIMEOUTrU   	NODE_FAILzPrevious job z failed or was canceledzCancelling previous job z with status )r>   rQ   replace_doneloggerdebugrY   retryreplacecancel_lazyr   r   r   rq   rs   ru   )r    r   rs   r   rQ   s        r"   maybe_submit_lazyShepherd.maybe_submit_lazy   s<    99 KKME#%%LL#EeiiFVFVEW!XY $EI ( (}UYY-=-=,>>UVW;; $EI==LL#;EII<L<L;M][`Za!bc$$UYY/ $EI99%%&&y'>???2#6#C#CCCCOOB&&--e4	 r%   r>   c                 :    U R                   R                  U5        g)zM
Cancel a job. The job is actually cancelled only when `commit()` is called.
N)r   r   )r    r>   s     r"   r   Shepherd.cancel_lazy   s     	s#r%   c                    U R                   (       a"  U R                  U R                   5        / U l         SU l        U R                  (       a@  U R                  R	                  S5      nU R                  U5        U R                  (       a  M?  gg)z]
Commit all changes registered so far with either `maybe_submit_lazy()`
and `cancel_lazy()`.
Nr   )r   _cancelr   r   pop_submit)r    r   s     r"   commitShepherd.commit   s\    
 ??LL) DO#' oo++A.ILL# ooor%   c                     U R                   R                  R                  U R                   R                  R                  R                  -  $ r=   )r   rI   dirrJ   by_idrL   s    r"   r   Shepherd._by_id   s/    yy~~!!DIINN$7$7$=$===r%   c                     U R                   R                  R                  U R                   R                  R                  R                  -  $ r=   )r   rI   r   rJ   orphansrL   s    r"   r   Shepherd._orphans   s/    yy~~!!DIINN$7$7$?$???r%   c                     U R                   R                  R                  U R                   R                  R                  R                  -  $ r=   )r   rI   r   rJ   arraysrL   s    r"   r   Shepherd._arrays   s/    yy~~!!DIINN$7$7$>$>>>r%   jobsc                     S/U Vs/ s H  o"R                   PM     sn-   n[        R                  SSR                  U5      5        [        R
                  " USS9  g s  snf )Nscancelz
Running %s Tcheck)rY   r   r   joinsprun)r    r   r>   
cancel_cmds       r"   r   Shepherd._cancel   sJ    [$#?$3JJ$#??
\388J#78
z& $@s   Ar   rH   c                    S[         R                  S'   [        UR                  5      n[        R
                  " X$R                  S5      S9nUR                  nUS:  a  US-  S:w  a  [        S5      eUS-  US'   SnOUnS	US'   UR                  nU(       a  UR                  U-  n	U	 S
3US'   SU 3US'   UR                  (       a$  S	US'   UR                  c  XsR                  -  US'   O XtS'   UR                  c  UR                  US'   US	 US	 US	 US	 [        R                  SU5        UR                  " SUSS.UD6  U$ )N1SLURM_KILL_BAD_EXITmax_num_timeout)rH   r      r   z.Can only take <= 8 gpus, or multiple of 8 gpusnodesr	   GBmemzgpu:gresntasks_per_nodecpus_per_taskgpusmem_per_gpucpus_per_gpuone_task_per_nodezSlurm parameters %rT)job_namestderr_to_stdoutr8   )r   r   dict__dict__r*   SlurmExecutorr   r   
ValueErrorr   r   r   r   r   r   update_parameters)
r    r   rH   rs   r.   executorr   gpus_per_noder   r   s
             r"   _get_submitit_executorShepherd._get_submitit_executor  sw   ,/

()l++,))::6G+HJ  !8ax1} !QRR"aiF7OM MF7O"..**]:C"e2JF5M/v))()F$%))1*7:S:S*S'(5$%))1*6*C*C'6N=!>"&'*F3"" 	!	 	 r%   c                 ,   U R                   R                  5        H  nUR                  n[        R	                  SU S35        [
        R                  " SS[        R                  " 5       SUSSS/S	S	S
9nUR                  R                  5       R                  5       R                  S5       Vs/ s H  oD(       d  M  UPM     nnU(       a2  [        R	                  SU S35        [
        R                  " S/U-   S	S9  UR                  5         M     gs  snf )zCheck for orphaned jobs.zFound dirty tag z`, meaning a job might have been scheduled but Dora or Slurm crashed before the job id was saved.squeuez-uz-nz-oz%iz-hT)capture_outputr   
zFound orphan job ids z, will cancelr   r   N)r   iterdirr   r   warningr   r   r   getloginstdoutdecodestripsplitr)   )r    dirtyr   proclineidss         r"   r   Shepherd._check_orphans-  s    ]]**,E::DNN-dV 4T T U668T2;;=$dDRVW)-T;D$(KK$6$6$8$>$>$@$F$Ft$LU$LDPT4$LCU!6se=IJ	{S(5LLN - Vs   -
D;Dc              #      #    U R                   U-  nUR                  5          Sv   UR                  5         g! UR                  5         f = f7f)z,Context manager to enter a potential orphan.N)r   touchr)   )r    r   tokens      r"   _enter_orphanShepherd._enter_orphan;  s8      $	LLNELLNs    A8 AA

Ar   c           
      
  ^ UR                   nUR                  nU(       d  g [        U5      S:  nUS   nU R                  R	                  UR
                  5        UR
                  R                  R                  m[        U4S jU 5       5      (       d   S5       eU(       a9  [        [        U Vs/ s H  ofR
                  R                  PM     sn5      5      nOUR
                  R                  nU(       a  U R                  R                  S-   U-   nOU R                  R                  S-   U-   nU(       a  U R                  U-  n	OUR
                  R                  n	U	R                  SS9  U He  nUR
                  n
U R                  R	                  U
5        U
R                   R#                  5       (       d  MK  U
R                   R%                  5         Mg     U R'                  XU5      n/ nT(       a2  U R(                  c%  [        R*                  " U R                  5      U l        U R-                  U5         [/        5        nT(       a>  U R(                  c   eUR1                  [        R2                  " U R(                  5      5        U(       a  UR1                  UR5                  5       5        UR                    H  nT(       a:  U R(                  c   e[        R6                  " UR
                  U R(                  5        UR9                  UR;                  [=        5       U R                  UR
                  R>                  5      5        M     S S S 5        [A        X,5       GH  u  pn[B        RD                  " X4[G        URH                  S	5      5        [J        RM                  S
URN                  5        Xl(        Xl)        U RT                  URN                  -  nUnURW                  UR
                  RX                  R[                  5       5        U(       an  UR
                  RX                  U	R                  -  nUR#                  5       (       a%  UR[                  5       U	R[                  5       :X  d   eOURW                  U	5        UR
                  R\                  nUR#                  5       (       a  UR%                  5         URW                  U	5        U R                  R_                  UR
                  5      nU Ra                  SURN                   SUR
                  R                   SU 35        GM     S S S 5        g s  snf ! , (       d  f       GN= f! , (       d  f       g = f)Nr	   r   c              3   h   >#    U  H'  oR                   R                  R                  T:H  v   M)     g 7fr=   )r   rI   r
   )rR   otheruse_git_saves     r"   rS   #Shepherd._submit.<locals>.<genexpr>O  s"     Nve88==))\9vs   /2z?All jobs inside an array must have the same value for git_save._array__T)r}   wbzCreated job with id %szScheduled job z for sheep /)1ru   rs   lenr   init_xpr   rI   r
   allr   sortedrf   r   r   _xp_submititr   r'   r(   r)   r   r   get_new_cloner   r   enter_contextenter_clonebatchassign_cloner   submitr   r   zippickledumpopenr@   r   r   rY   r>   r?   r   
symlink_torH   r   _latest_submititget_namerb   )r    r   ru   rs   is_arrayfirstr   name_sigr   submitit_folderr   r   r   stackr>   r   submitit_linklatestr   s                     @r"   r   Shepherd._submitE  s   !! --v;?q			%((#xx}}--NvNNN 	PO	PN &'I&&'I JKHxx||H99>>I-8D99>>C'(2D"llT1O#hh33Ot,EBIIb!!!((**""))+	  ..tlS&(D44<'/'='=dii'HD$%33???''(<(<T=U=U(VW''(89&--E##77CCC --ehh8P8PQKK0A499ehhmm \]	 .  "&/
SKeoot)DE5szzB	$(!{{SZZ/ 7 7 9: &+XX__7K7K%KM$++--,446/:Q:Q:SSSS%00A22==??MMO!!/2yy))%((3>#**[aPTvVW/ 0 &%1 (J2  &%s,   0T46UDT9	G"U9
U	U
U)r   r   r   r   rb   r   ))r1   r2   r3   r4   rk   ro   r   r5   Callabler   rD   r6   r:   r   Optionalr   r   r   r   r   r   r   r   r*   r   r   r   rl   r   r   r   r   rx   r   r   r   r   r   rq   r   r7   r8   r%   r"   rz   rz   }   s    MS ] cUD[1I C(8 U c bkk%.@ 
C 
BKK4F 
" 'k ' '5u 5K 5P[ 5:$x00 $$ > > > @$ @ @ ? ? ?'BGGH- '
&3 & &-8&=E=S=S&P #  JX JXr%   rz   )*rk   
contextlibr   r   dataclassesr   r   loggingpathlibr   r  r   
subprocessr   r   typingr5   r*   r    r
   confr   r   distribr   r   r   utilsr   r   r   r   	getLoggerr1   r   r   r:   r   ro   rq   rz   r8   r%   r"   <module>r+     s    0 (    	  
     * %    
		8	$I I$: :z	c 	 9 9 9
RX RXr%   