
    .h8                        d Z ddlZddlZddlZddlZddlZddlZddlmZmZ ddl	m	Z	m
Z
 ddlmZ ddlZddlZddlmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZF  G d d      ZGy)zz
Train a model on a dataset.

Usage:
    $ yolo mode=train model=yolo11n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16
    N)copydeepcopy)datetime	timedelta)Path)distributed)nnoptim)__version__)get_cfgget_save_dir)check_cls_datasetcheck_det_dataset)load_checkpoint)DEFAULT_CFGGIT
LOCAL_RANKLOGGERRANKTQDMYAML	callbacks	clean_urlcolorstremojis)check_train_batch_size)	check_amp
check_filecheck_imgszcheck_model_file_from_stem
print_args)ddp_cleanupgenerate_ddp_command)get_latest_run)plot_results)	TORCH_2_4EarlyStoppingModelEMAattempt_compileautocast$convert_optimizer_state_dict_to_fp16
init_seeds	one_cycleselect_devicestrip_optimizertorch_distributed_zero_firstunset_deterministicunwrap_modelc                   2   e Zd ZdZeddfdZdefdZdefdZdefdZ	d Z
d	 Zd
 Zd Zd Zd-dZd.dZd/defdZd Zd Zd Zd Zd Zd Zd Zd Zd0dZd Zd1dZd2dZd3dZd Z d Z!d  Z"d! Z#d" Z$d# Z%d$ Z&d/d%Z'd& Z(d' Z)d( Z*d) Z+d* Z,d+ Z-d4d,Z.y)5BaseTrainera  
    A base class for creating trainers.

    This class provides the foundation for training YOLO models, handling the training loop, validation, checkpointing,
    and various training utilities. It supports both single-GPU and multi-GPU distributed training.

    Attributes:
        args (SimpleNamespace): Configuration for the trainer.
        validator (BaseValidator): Validator instance.
        model (nn.Module): Model instance.
        callbacks (defaultdict): Dictionary of callbacks.
        save_dir (Path): Directory to save results.
        wdir (Path): Directory to save weights.
        last (Path): Path to the last checkpoint.
        best (Path): Path to the best checkpoint.
        save_period (int): Save checkpoint every x epochs (disabled if < 1).
        batch_size (int): Batch size for training.
        epochs (int): Number of epochs to train for.
        start_epoch (int): Starting epoch for training.
        device (torch.device): Device to use for training.
        amp (bool): Flag to enable AMP (Automatic Mixed Precision).
        scaler (amp.GradScaler): Gradient scaler for AMP.
        data (str): Path to data.
        ema (nn.Module): EMA (Exponential Moving Average) of the model.
        resume (bool): Resume training from a checkpoint.
        lf (nn.Module): Loss function.
        scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
        best_fitness (float): The best fitness value achieved.
        fitness (float): Current fitness value.
        loss (float): Current loss value.
        tloss (float): Total loss value.
        loss_names (list): List of loss names.
        csv (Path): Path to results CSV file.
        metrics (dict): Dictionary of metrics.
        plots (dict): Dictionary of plots.

    Methods:
        train: Execute the training process.
        validate: Run validation on the test set.
        save_model: Save model training checkpoints.
        get_dataset: Get train and validation datasets.
        setup_model: Load, create, or download model.
        build_optimizer: Construct an optimizer for the model.

    Examples:
        Initialize a trainer and start training
        >>> trainer = BaseTrainer(cfg="config.yaml")
        >>> trainer.train()
    Nc                 N
   |j                  dd      | _        t        ||      | _        | j	                  |       t        | j                  j                        | _        dt        | j                        v rt        j                  d      nt        | j                        | j                  _        d| _
        d| _        i | _        t        | j                  j                  dz   t        z   | j                  j                          t#        | j                        | _        | j$                  j&                  | j                  _        | j$                  dz  | _        t        dv rw| j(                  j+                  d	d	
       t        | j$                        | j                  _        t-        j.                  | j$                  dz  t1        | j                               | j(                  dz  | j(                  dz  c| _        | _        | j                  j6                  | _        | j                  j8                  | _        | j                  j<                  xs d| _        d| _        t        dk(  rtA        t1        | j                               | j                  jB                  dv rd| j                  _"        tG        | j                  jH                        | _$        tK        tL              5  | jO                         | _(        ddd       d| _)        d| _*        d| _+        d| _,        d| _-        d| _.        d| _/        dg| _0        | j$                  dz  | _1        | jb                  je                         r0| j                  jf                  s| jb                  ji                          g d| _5        d| _6        |xs to        jp                         | _7        ts        | j                  j                  t              rNtu        | j                  j                        r/tu        | j                  j                  jw                  d            }nts        | j                  j                  tx        tz        f      r tu        | j                  j                        }n>| j                  j                  dv rd}n#t|        j~                  j                         rd}nd}|dkD  xr dt        j                  v| _B        || _C        t        dv r4| j                  s'to        j                  |        | j                  d       yyy# 1 sw Y   *xY w)a  
        Initialize the BaseTrainer class.

        Args:
            cfg (str, optional): Path to a configuration file.
            overrides (dict, optional): Configuration overrides.
            _callbacks (list, optional): List of callback functions.
        sessionNcudaCUDA_VISIBLE_DEVICES   )deterministicweights   r   Tparentsexist_okz	args.yamlzlast.ptzbest.ptd   r   r=   >   cpumpsLosszresults.csv)r   r9      ,r   on_pretrain_routine_start)Fpophub_sessionr   argscheck_resumer.   devicestrosgetenv	validatormetricsplotsr,   seedr   r:   r   save_dirnamewdirmkdirr   savevarslastbestsave_periodbatch
batch_sizeepochsstart_epochr!   typeworkersr    modelr0   r   get_datasetdataemalf	schedulerbest_fitnessfitnesslosstloss
loss_namescsvexistsresumeunlinkplot_idxnan_recovery_attemptsr   get_default_callbacks
isinstancelensplittuplelisttorchr7   is_availableenvironddp
world_sizeadd_integration_callbacksrun_callbacks)selfcfg	overrides
_callbacksr~   s        X/var/www/html/ai-service/venv/lib/python3.12/site-packages/ultralytics/engine/trainer.py__init__zBaseTrainer.__init__r   s    %==D9C+	)$#DII$4$45@F#dkkJZ@Z299%;<`cdhdodo`p		
499>>A%,DII<S<ST %TYY/++		MMI-	7?IIOOD4O8!$T]]!3DIIIIdmmk14		?C#yy94dii)6K	499900))//ii&&-#2:tDII' ;;~- !DII 0		@
)*5 	+((*DI	+   !	
!(===088??TYY%5%5HHOO!%&" $Hy'F'F'Hdii&&,TYY5E5E1FTYY--33C89J		((5$-8TYY--.JYY/JZZ$$&JJ>Dl"**&D$7?488//5:; $,?M	+ 	+s   TT$eventc                 @    | j                   |   j                  |       y)z7Append the given callback to the event's callback list.N)r   appendr   r   callbacks      r   add_callbackzBaseTrainer.add_callback   s    u$$X.    c                 $    |g| j                   |<   y)zPOverride the existing callbacks with the given callback for the specified event.N)r   r   s      r   set_callbackzBaseTrainer.set_callback   s    !)
ur   c                 V    | j                   j                  |g       D ]
  } ||         y)z>Run all existing callbacks associated with a particular event.N)r   getr   s      r   r   zBaseTrainer.run_callbacks   s)    **5"5 	HTN	r   c                 p   | j                   r| j                  j                  r&t        j                  d       d| j                  _        | j                  j
                  dk  r)t        d| j                   d| j                  dz   d      t        |       \  }}	 t        j                  t        d       d	d
j                  |              t        j                  |d       	 t        | t!        |             y| j#                          y# t        $ r}|d}~ww xY w# t        | t!        |             w xY w)zIAllow device='', device=None on Multi-GPU systems to default to device=0.zI'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'F      ?zuAutoBatch with batch<1 not supported for Multi-GPU training, please specify a valid batch size multiple of GPU count z, i.e. batch=   .zDDP:z debug command  T)checkN)r}   rJ   rectr   warningr]   
ValueErrorr~   r#   infor   join
subprocessrun	Exceptionr"   rM   	_do_train)r   cmdfilees       r   trainzBaseTrainer.train   s    88yy~~jk!&		yy$ OOSN__lmqm|m|  @A  nA  mB  BCD  -T2IC-x/0OPs$/ D#d), NN   D#d),s%   A	D 	DDDD D5c                      j                   j                  r1t        d j                   j                   j                         _        n
 fd _        t        j                  j                   j                   j
                         _
        y)z,Initialize training learning rate scheduler.r9   c                     t        d| j                  z  z
  d      dj                  j                  z
  z  j                  j                  z   S )Nr9   r   r   )maxr_   rJ   lrf)xr   s    r   <lambda>z.BaseTrainer._setup_scheduler.<locals>.<lambda>   s?    ADKK$7 ;sTYY]]?R SVZV_V_VcVc c r   )	lr_lambdaN)rJ   cos_lrr-   r   r_   rg   r
   lr_schedulerLambdaLR	optimizerrh   r   s   `r   _setup_schedulerzBaseTrainer._setup_scheduler   sX    99499==$++>DGcDG++44T^^tww4Wr   c                 <   t         j                  j                  t               t        j                  dt              | _        dt
        j                  d<   t        j                  t        j                         rdndt        d      t        | j                         y	)
zGInitialize and set the DistributedDataParallel parameters for training.r7   1TORCH_NCCL_BLOCKING_WAITncclglooi0*  )seconds)backendtimeoutrankr~   N)rz   r7   
set_devicer   rL   rN   r|   distinit_process_groupis_nccl_availabler   r~   r   s    r   
_setup_ddpzBaseTrainer._setup_ddp   se    

d#ll64014

-."446FFe,		
r   c           	      4   | j                         }| j                  j                  | j                        | _        | j	                          t        | j                  | j                  | j                  j                        | _        t        | j                  j                  t              r| j                  j                  nDt        | j                  j                  t              rt        | j                  j                        ng }dg}|D cg c]  }d| d
 c}|z   }|| _        | j                  j                         D ]~  \  }t        fd|D              r!t!        j"                  d d       d|_        ;|j$                  rH|j&                  j(                  s_t!        j*                  d	 d
       d|_         t-        j.                  | j                  j0                        j                  | j                        | _        | j0                  rjt2        dv rbt4        j6                  j9                         }t-        j.                  t;        | j                        | j                        | _        |t4        _        t2        dkD  r>| j<                  dkD  r/t?        j@                  | j0                  j                         d       tC        | j0                        | _        tD        r+t,        j0                  jG                  d| j0                        n3t,        jH                  j0                  jG                  | j0                        | _%        | j<                  dkD  r6tL        jN                  jQ                  | j                  t2        gd      | _        tS        t        tU        | j                  d      r$| j                  jV                  jS                         nd      d      }tY        | j                  jZ                  ||d      | j                  _-        || _+        | j\                  dk  r/t2        dk(  r&| j_                         x| j                  _0        | _.        | j\                  tS        | j<                  d      z  }	| jc                  | jd                  d   |	tf        d      | _4        t2        dv r| jc                  | jd                  jk                  d      xs | jd                  jk                  d      | j                  jl                  dk(  r|	n|	dz  dd      | _7        | jq                         | _9        | jr                  jt                  jv                  | jy                  d      z   }
t{        t}        |
dgt        |
      z              | _:        t        | j                        | _A        | j                  j                  r| j                          tS        t        | j                  j                  | j\                  z        d      | _F        | j                  j                  | j\                  z  | j                  z  | j                  j                  z  }t        j                  t        | jh                  j                        tS        | j\                  | j                  j                        z        | j                  z  }| j                  | j                  | j                  j                  | j                  j                  | j                  j                  ||      | _M        | j                          t        | j                  j                         dc| _S        | _T        | j                  |       | j                  dz
  | j                  _X        | j                  d!       y"c c}w )#z8Build dataloaders and optimizer on correct rank process.)rL   modez.dflzmodel.r   c              3   &   K   | ]  }|v  
 y wN ).0r   ks     r   	<genexpr>z+BaseTrainer._setup_train.<locals>.<genexpr>  s     6a166s   zFreezing layer ''Fz/setting 'requires_grad=True' for frozen layer 'zE'. See ultralytics.engine.trainer for customization of frozen layers.Tr<   )rL   r=   r9   r   )srcr7   )enabled)
device_idsfind_unused_parametersstride    )r   floormax_dimr   )r^   r   r   valtestobbrE   )prefix)rc   rU   lrmomentumdecay
iterations)patienceon_pretrain_routine_endN)Zsetup_modelrc   torL   set_model_attributesr)   rJ   compileru   freezery   intrangefreeze_layer_namesnamed_parametersanyr   r   requires_graddtypeis_floating_pointr   rz   tensorampr   r   default_callbacksr   r   r~   r   	broadcastboolr&   
GradScalerr7   scalerr	   parallelDistributedDataParallelr   hasattrr   r   imgszr^   
auto_batchr]   get_dataloaderre   r   train_loaderr   tasktest_loaderget_validatorrP   rQ   keyslabel_loss_itemsdictziprv   r(   rf   rR   plot_training_labelsroundnbs
accumulateweight_decaymathceildatasetr_   build_optimizerr   lr0r   r   r'   r   stopperstopresume_trainingr`   rh   
last_epochr   )r   ckptfreeze_listalways_freeze_namesr   r   vcallbacks_backupgsr^   metric_keysr  r   r   s                @r   _setup_trainzBaseTrainer._setup_train  s   !ZZ]]4;;/
!!# %TZZ$))J[J[\

 $))**D1 II $))**C0 tyy''( 	  &h5@AqcmADWW"4JJ//1 
	'DAq6#566.qc34"'__)B)BEaS IY Y #'
	' <<		.11$++>88(::??A||Idjj$9$++NDH*:I'"91,NN488<<>q1>>GEII   :UZZ^^MfMfosowowMfMx 	 ??Q<<TZZUYTZsw<xDJ 

H0MTZZ&&**,SUVXZ[%diioobTUV		 ??Q42:040AADIIOdo __DOOQ(??
 //IIg:JW 0 
 7?#22		e$=		f(=)-5)@:jSTn	  3  D "//1DN..00558M8MUZ8M8[[KK!s;7G1G HIDL

+DHyy))+ eDIIMMDOO$CDaHyy--?$//QTXT]T]TaTaaYYs4#4#4#<#<=DOOUYU^U^UbUb@ccdgkgrgrr
--**$$yy}}YY''! . 
 	"/9K9K"LediT"$($4$4q$8!45W Bs   ^c                 H   | j                   dkD  r| j                          | j                          t        | j                        }| j
                  j                  dkD  r,t        t        | j
                  j                  |z        d      nd}d}d| _	        t        j                         | _        t        j                         | _        | j                  d       t        j                  d| j
                  j                    d| j
                  j                    d	| j                  j"                  | j                   xs dz   d
t%        d| j&                         d	| j
                  j                  r| j
                  j                   dn| j(                   dz          | j
                  j*                  rJ| j(                  | j
                  j*                  z
  |z  }| j,                  j/                  ||dz   |dz   g       | j0                  }| j2                  j5                          	 || _        | j                  d       t9        j:                         5  t9        j<                  d       | j>                  jA                          ddd       | jC                          tD        dk7  r%| j                  jF                  jI                  |       tK        | j                        }|| j(                  | j
                  j*                  z
  k(  r*| jM                          | j                  jO                          tD        dv rCt        j                  | jQ                                tS        tK        | j                        |      }d| _*        |D ]  \  }}| j                  d       |||z  z   }	|	|k  r d|g}
t        dtW        tY        jZ                  |	|
d| j
                  j\                  | j^                  z  g      j                                     | _0        tK        | j2                  jb                        D ]  \  }}tY        jZ                  |	|
|dk(  r| j
                  jd                  nd|d   | jg                  |      z  g      |d<   d|v sWtY        jZ                  |	|
| j
                  jh                  | j
                  jj                  g      |d<    tm        | jn                        5  | jq                  |      }| j
                  jr                  rB| ju                  |d         }tw        | jt                        jy                  ||      \  }| _=        n| ju                  |      \  }| _=        |j}                         | _<        tD        dk7  r| xjx                  | j                   z  c_<        | jT                  | jz                  n!| jT                  |z  | jz                  z   |dz   z  | _*        ddd       | j~                  j                  | jx                        j                          |	|z
  | j`                  k\  r| j                          |	}| j
                  j                  rt        j                         | j                  z
  | j
                  j                  dz  kD  | _C        tD        dk7  r8tD        dk(  r| j                  ndg}t        j                  |d       |d   | _C        | j                  r n7tD        dv rt        | jT                  j                        r| jT                  j                  d   nd}|j                  ddd|z   z  z   |dz    d| j(                   | j                         ddg|dkD  r| jT                  nt        j                  | jT                  d      |d    j                  d   |d   j                  d   z         | j                  d!       | j
                  j                  r |	| j,                  v r| j                  ||	       | j                  d"        tK        | j2                  jb                        D ci c]  \  }}d#| |d    c}}| _M        | j                  d$       tD        dv r|dz   | j(                  k\  }| j                  j                  | jt                  g d%&       | j
                  j                  s$|s"| j                  j                  s| j                  r/| j                  d'(       | j                         \  | _U        | _V        | j                  |      r d| _X        tD        dv r| j                  i | j                  | jT                        | j                  | j                  )       | xj                  | j                  |dz   | j                        xs z  c_C        | j
                  j                  rN| xj                  t        j                         | j                  z
  | j
                  j                  dz  kD  z  c_C        | j
                  j                  sr!| j                          | j                  d*       t        j                         }|| j                  z
  | _	        || _        | j
                  j                  r|| j                  z
  || j0                  z
  dz   z  }t        j                  | j
                  j                  dz  |z        x| _        | j
                  _        | j                          | j6                  | j>                  _`        | xj                  || j(                  k\  z  c_C        | j                  d+       | j                  d'       tD        dk7  r8tD        dk(  r| j                  ndg}t        j                  |d       |d   | _C        | j                  rn|dz  }	tD        dv rt        j                         | j                  z
  }t        j                  d,|| j0                  z
  dz    d-|dz  d.d/       | j                          | j
                  j                  r| j                          | j                  d0       | j                          t                | j                  d1       y# 1 sw Y   
	xY w# 1 sw Y   xY wc c}}w )2z.Train the model with the specified world size.r9   r   rA   r=   Non_train_startzImage sizes z train, z val
Using z' dataloader workers
Logging results to boldz
Starting training for z	 hours...z
 epochs...rE   on_train_epoch_startignorer<   )totalon_train_batch_start        
initial_lrr   r   imgi  z%11s%11sz%11.4g/z.3gGclson_batch_endon_train_batch_endzlr/pgon_train_epoch_end)yamlncrJ   namesr   class_weights)includeg      ?)	threshold)rQ   on_model_saveon_fit_epoch_end
z epochs completed in z.3fz hours.on_train_endteardown)dr~   r   r  rv   r   rJ   warmup_epochsr   r   
epoch_timetimeepoch_time_starttrain_time_startr   r   r   r   num_workersr   rT   r_   close_mosaicrr   extendr`   r   	zero_gradepochwarningscatch_warningssimplefilterrh   step_model_trainr   sampler	set_epoch	enumerate_close_dataloader_mosaicresetprogress_stringr   rl   r   npinterpr   r^   r   param_groupswarmup_bias_lrrg   warmup_momentumr   r*   r   preprocess_batchr   rc   r2   rk   
loss_itemssumr   scalebackwardoptimizer_stepr  r   broadcast_object_listshapeset_description_get_memoryrz   	unsqueezerR   plot_training_samplesr   rf   update_attrr   r  possible_stop_clear_memoryvalidaterQ   rj   _handle_nan_recoveryrs   save_metricsr   rX   
save_modelr  r  r   r
  
final_evalplot_metricsr1   )r   nbnwlast_opt_stepbase_idxr7  pbarir]   nixijr   predsrk   broadcast_listloss_lengthirfinal_epochtmean_epoch_timer   s                         r   r   zBaseTrainer._do_trainc  s=
   ??QOO""#>Bii>U>UXY>YStyy..34c:_a $		 $		+,499??+8DIIOO3D E&&22doo6JKL M""*64=="A!B C%& JNDIINN+;9)E`d`k`k_llv]wy	
 99!!dii&<&<<BHMM  (HqL(Q,!GH    "DJ56((* &%%h/##%& rz!!))33E:T../Dtyy'='==>--/!!'')wD0023Id&7&78CDJ  >95""#9:e^8RB&)!S2rAtyy}}W[WfWfGfCg1h1n1n1p-q&rDO )$..*E*E F o1"$))aTYY%=%=SRST`RadhdkdklqdrRr$s#$ &?,.IIb"tyy?X?XZ^ZcZcZlZl>m,nAjMo dhh' y 11%8Eyy(( $

5< 80<TZZ0H0M0MeUZ0[-do04

50A-do $
DIrz		T__4	48JJ4FTZZZ[^^b^m^mMmrsvwrwLxDJy !!$)),557%8'')$&M yy~~%)YY[43H3H%HTYY^^^bMb$c	2:;?19dii$-ON 66~qI(6q(9DI99! 7?9<TZZ=M=M9N$**"2"21"5TUK((#h!k/&BB$qyk4;;-8#//1#6a8 -8!OdjjQUQ[Q[]^A_ "%L..q1	
 "%L..r2	 &&~6yy2+>225"=""#78}>9@ ;DDNND_D_:`aQrd|QtW,aDG34w#ai4;;6$$TZZ9s$t 99==K4<<3M3MQUQZQZ&&&515.DL$, ((/)*D&w!!*jT-B-B4::-N*jRVR^R^*jbfbibi*j!k		T\\%!)T\\BQkQ	99>>II$))+0E0E"E$))..[_J_!``I 99>>[OO%&&7 		A$"7"77DO$%D!yy~~#$t'<'<#<IYIYAY\]A]"^15499>>D;PSb;b1ccdii.%%',0JJ)		Udkk11	12s# rz/3qy$))d!C**>1=*1-	yyQJEI L 7?iikD$9$99GKK"UT%5%559::OPWZ^P^_bOccjklOOyy!!#~.:&[& &By y` bs   0rC?r4rrr	c                     t        | j                  | j                  j                  | j                  | j
                  |      S )zJCalculate optimal batch size based on model and device memory constraints.)rc   r   r   r]   max_num_obj)r   rc   rJ   r   r   r^   )r   rn  s     r   r   zBaseTrainer.auto_batch  s2    %**))////#
 	
r   c                    d\  }}| j                   j                  dk(  rFt        j                  j	                         }|rt        d      j                         j                  dz  S | j                   j                  dk7  rSt        j                  j                         }|r3t        j                  j                  | j                         j                  }|r|dkD  r||z  S dS |dz  S )zJGet accelerator memory utilization in GB or as a fraction of total memory.)r   r   rC   psutilrA   rB   r   i   @)rL   ra   rz   rC   driver_allocated_memory
__import__virtual_memorypercentr7   memory_reservedget_device_propertiestotal_memory)r   fractionmemoryr  s       r   rQ  zBaseTrainer._get_memory  s    ;;u$YY668F!(+::<DDsJJ[[&ZZ//1F

88ERR9AEAI%W1WQVWr   r(  c                 p   |r0d|cxk  rdk  sJ d        J d       | j                  d      |k  ryt        j                          | j                  j                  dk(  rt
        j                  j                          y| j                  j                  dk(  ryt
        j                  j                          y)	zIClear accelerator memory by calling garbage collector and emptying cache.r   r9   z"Threshold must be between 0 and 1.T)rx  NrC   rB   )	rQ  gccollectrL   ra   rz   rC   empty_cacher7   )r   r(  s     r   rV  zBaseTrainer._clear_memory$  s    	&Q&L(LL&L(LL&.);


;;u$II!!#[[&JJ""$r   c                     ddl }	 |j                  | j                  d      j                  d      S # t        $ r i cY S w xY w)z0Read results.csv into a dictionary using polars.r   N)infer_schema_lengthF)	as_series)polarsread_csvrn   to_dictr   )r   pls     r   read_results_csvzBaseTrainer.read_results_csv2  sD    	;;txxT;BJJUZJ[[ 	I	s   ,3 A Ac                    | j                   j                          | j                   j                         D ]S  \  }t        t	        fd| j
                              s)t        |t        j                        sD|j                          U y)zSet model in training mode.c                     | v S r   r   )fns    r   r   z*BaseTrainer._model_train.<locals>.<lambda>@  s    AF r   N)
rc   r   named_modulesr   filterr   ru   r	   BatchNorm2deval)r   mr  s     @r   r<  zBaseTrainer._model_train;  sc    

JJ,,. 	DAq6*D,C,CDE*UVXZXfXfJg	r   c                    ddl }|j                         }t        j                  | j                  | j
                  dt        t        | j                  j                              j                         | j                  j                  t        t        | j                  j                                     | j                  j                         t        | j                         i | j"                  d| j$                  i| j'                         t)        j*                         j-                         t.        t1        t2        j4                        t2        j6                  t2        j8                  t2        j:                  dddd|       |j=                         }| j>                  jA                  dd	       | jB                  jE                  |       | j
                  | j$                  k(  r| jF                  jE                  |       | jH                  dkD  rJ| j                  | jH                  z  dk(  r-| j>                  d
| j                   dz  jE                  |       yyy)z9Save model training checkpoints with additional metadata.r   Nrj   )rootbranchcommitoriginz*AGPL-3.0 (https://ultralytics.com/license)zhttps://docs.ultralytics.com)r7  ri   rc   rf   updatesr   r   
train_argstrain_metricstrain_resultsdateversiongitlicensedocsTr>   r7  .pt)%ioBytesIOrz   rX   r7  ri   r   r2   rf   halfr  r+   r   
state_dictr   rY   rJ   rQ   rj   r  r   now	isoformatr   rM   r   r  r  r  r  getvaluerV   rW   rZ   write_bytesr[   r\   )r   r  bufferserialized_ckpts       r   rZ  zBaseTrainer.save_modelC  s    

 $ 1 1TXX\\ :;@@B88++A(4>>KdKdKfBgh++002"499o!NDLL!NY4M!N!%!6!6!8 002&M!jj!jj!jj	 H6), /	
2 !//+ 			t4		o.,II!!/2q tzzD4D4D'D'IYY5C00==oN (J r   c           	         	 | j                   j                  dk(  r!t        | j                   j                        }n| j                   j                  j	                  dd      d   dk(  roddl}ddlm} |j                   || j                   j                              }t        |      | j                   _        t        | j                   j                        }nz| j                   j                  j	                  dd      d   d	v s| j                   j                  d
v r7t        | j                   j                        }d|v r|d   | j                   _        | j                   j                  r!t!        j"                  d       ddid<   d|d<   S # t        $ r=}t        t        dt        | j                   j                         d|             |d}~ww xY w)z
        Get train and validation datasets from data dictionary.

        Returns:
            (dict): A dictionary containing the training/validation/test dataset and category names.
        classifyr   r9   r=   ndjsonr   N)convert_ndjson_to_yolo>   ymlr#  >   r   posedetectsegment	yaml_filez	Dataset 'u   ' error ❌ z)Overriding class names with single class.itemr%  r$  )rJ   r   r   re   rsplitasyncioultralytics.data.converterr  r   rM   r   r   RuntimeErrorr   r   
single_clsr   r   )r   re   r  r  	yaml_pathr   s         r   rd   zBaseTrainer.get_datasetl  sn   	fyy~~+(8&&sA.r2h>M#KK(>tyy~~(NO	!$Y		(8&&sA.r2oE \ J )8$&%)+%6DIIN 99KKCDKDMDJ  	fv	)DIINN2K1LLYZX[&\]^dee	fs   EF
 
	G8GGc                    t        | j                  t        j                  j                        ry| j                  d}}d}t        | j                        j                  d      r%t        | j                        \  }}|j                  }nLt        | j                  j                  t
        t        f      r"t        | j                  j                        \  }}| j                  ||t        dk(        | _        |S )z
        Load, create, or download model for any task.

        Returns:
            (dict): Optional checkpoint to resume training from.
        Nr  r=   )r   r;   verbose)ru   rc   rz   r	   ModulerM   endswithr   r#  rJ   
pretrainedr   	get_modelr   )r   r   r;   r  _s        r   r   zBaseTrainer.setup_model  s     djj%((//2zz4Wtzz?##E*+DJJ7MGT,,C		,,sDk:()=)=>JGQ^^Wdbj^Q
r   c                    | j                   j                  | j                         t        j                  j
                  j                  | j                  j                         d       | j                   j                  | j                         | j                   j                          | j                  j                          | j                  r&| j                  j                  | j                         yy)zVPerform a single step of the training optimizer with gradient clipping and EMA update.g      $@)max_normN)r   unscale_r   rz   r	   utilsclip_grad_norm_rc   
parametersr;  updater6  rf   r   s    r   rM  zBaseTrainer.optimizer_step  s    T^^,&&tzz'<'<'>&N(  "88HHOODJJ' r   c                     |S )zQAllow custom preprocessing model inputs and ground truths depending on task type.r   )r   r]   s     r   rH  zBaseTrainer.preprocess_batch  s    r   c                     | j                  |       }|j                  d| j                  j                         j	                         j                                }| j                  r| j                  |k  r|| _        ||fS )z
        Run validation on val set using self.validator.

        Returns:
            metrics (dict): Dictionary of validation metrics.
            fitness (float): Fitness score for the validation.
        rj   )rP   rH   rk   detachrB   numpyri   )r   rQ   rj   s      r   rW  zBaseTrainer.validate  sm     ..&++i$))*:*:*<*@*@*B*H*H*J)JK  D$5$5$? 'Dr   c                     t        d      )z>Get model and raise NotImplementedError for loading cfg files.z3This task trainer doesn't support loading cfg filesNotImplementedError)r   r   r;   r  s       r   r  zBaseTrainer.get_model  s    !"WXXr   c                     t        d      )zGReturn a NotImplementedError when the get_validator function is called.z1get_validator function not implemented in trainerr  r   s    r   r   zBaseTrainer.get_validator      !"UVVr   c                     t        d      )z5Return dataloader derived from torch.data.Dataloader.z2get_dataloader function not implemented in trainerr  )r   dataset_pathr^   r   r   s        r   r   zBaseTrainer.get_dataloader  s    !"VWWr   c                     t        d      )zBuild dataset.z1build_dataset function not implemented in trainerr  )r   img_pathr   r]   s       r   build_datasetzBaseTrainer.build_dataset  r  r   c                     |d|iS dgS )z
        Return a loss dict with labelled training loss items tensor.

        Note:
            This is not needed for classification but necessary for segmentation & detection
        rk   r   )r   rI  r   s      r   r   zBaseTrainer.label_loss_items  s     (2'=
#KF8Kr   c                 @    | j                   d   | j                  _        y)z/Set or update model parameters before training.r%  N)re   rc   r%  r   s    r   r   z BaseTrainer.set_model_attributes  s    99W-

r   c                      y)z-Build target tensors for training YOLO model.Nr   )r   rf  targetss      r   build_targetszBaseTrainer.build_targets      r   c                      y)z-Return a string describing training progress. r   r   s    r   rB  zBaseTrainer.progress_string  s    r   c                      y)z+Plot training samples during YOLO training.Nr   )r   r]   rc  s      r   rS  z!BaseTrainer.plot_training_samples  r  r   c                      y)z$Plot training labels for YOLO model.Nr   r   s    r   r   z BaseTrainer.plot_training_labels  r  r   c           	         t        |j                               t        |j                               }}t        |      dz   }t	        j                         | j
                  z
  }| j                  j                  j                  dd       | j                  j                         rdn'd|z  t        ddg|z         z  j                  d      d	z   }t        | j                  d
d      5 }|j                  |d|z  t        | j                  dz   |g|z         z  j                  d      z   d	z          ddd       y# 1 sw Y   yxY w)z$Save training metrics to a CSV file.rE   Tr>   r  z%s,r7  r0  rF   r+  azutf-8)encodingz%.6g,r9   N)ry   r   valuesrv   r0  r2  rn   parentrW   ro   rx   rstripopenwriter7  )r   rQ   r   valsr  rk  sr  s           r   rY  zBaseTrainer.save_metrics  s   ',,.)40@+AdL1IIK$///dT:((//#B519ugv=NQU=U7V+V*^*^_b*cfj*j$((C'2 	^aGGA1udjj1na-@4-G'HHPPQTUUX\\]	^ 	^ 	^s   %AD66D?c                 F    t        | j                  | j                         y)zPlot metrics from a CSV file.)r   on_plotN)r%   rn   r  r   s    r   r\  zBaseTrainer.plot_metrics  s    $((DLL9r   c                 b    t        |      }|t        j                         d| j                  |<   y)z2Register plots (e.g. to be consumed in callbacks).)re   	timestampN)r   r0  rR   )r   rU   re   paths       r   r  zBaseTrainer.on_plot  s$    Dz$(tyy{C

4r   c                 8   i }| j                   | j                  fD ]  }|j                         s|| j                   u rt        |      }.|| j                  u s=d}t        |||v r|||   ind       t	        j
                  d| d       | j                  j                  | j                  j                  _        d| j                  j                  _	        | j                  |      | _
        | j                  j                  dd       | j                  d	        y)
zHPerform final evaluation and validation for object detection YOLO model.r  N)r  z
Validating z...F)rc   rj   r*  )rZ   r[   ro   r/   r   r   rJ   rR   rP   r   rQ   rH   r   )r   r  r  r   s       r   r[  zBaseTrainer.final_eval  s    DII% 	;Axxz		>*1-D$))^'A#AqDy47|dSKK-s# 6704		DNN''-27DNN''/#'>>>#:DLLL$$Y5&&'9:	;r   c                    | j                   j                  }|r	 t        |t        t        f      xr t	        |      j                         }t	        |rt        |      n	t                     }t        |      d   j                   }t        |d   t              s5t	        |d         j                         s| j                   j                  |d<   d}t        |      | _         t        |      x| j                   _        | j                   _        dD ]!  }||v st        | j                   |||          # 	 || _        y|| _        y# t        $ r}t        d      |d}~ww xY w)zCCheck if resume checkpoint exists and update arguments accordingly.r   re   T)r   r]   rL   r4  zzResume checkpoint not found. Please pass a valid checkpoint to resume from, i.e. 'yolo train resume model=path/to/last.pt'N)rJ   rp   ru   rM   r   ro   r   r$   r   r   re   r   rc   setattrr   FileNotFoundError)r   r   rp   ro   rZ   	ckpt_argsr   r   s           r   rK   zBaseTrainer.check_resume  s2   !!#FS$K8RT&\=P=P=R&Jv.n>NO ,D1!499	!)F"3T:4	RXHYCZCaCaCc(,		If%#I.	58Y>		$))"2 <A I~		1il;< f  'E s   C7E  E   	E	EEc                    |j                  d      | j                  j                  |d          |j                  d      | j                  j                  |d          | j                  r|j                  d      rrt        | j                        | _        | j                  j                  j                  |d   j                         j                                |d   | j                  _	        |j                  dd      | _
        y)z>Load optimizer, scaler, EMA, and best_fitness from checkpoint.r   Nr   rf   r  ri   r  )r   r   load_state_dictr   rf   r(   rc   floatr  r  ri   )r   r  s     r   _load_checkpoint_statez"BaseTrainer._load_checkpoint_state2  s    88K ,NN**4+<=88H)KK''X788

+DHHHLL((e):):)<)G)G)IJ#IDHH HH^S9r   c                    | j                   duxr | j                   j                          }| j                  duxr  t        j                  | j                         }| j                  xr  | j                  dkD  xr | j                  dk(  }t
        dv xr
 |xr |xs |}|rdn|rdnd}t
        dk7  r)t
        dk(  r|ndg}t        j                  |d       |d   }|sy|| j                  k(  s| j                  j                         st        j                  | d	       y| xj                  d
z  c_        | j                  dkD  rt        d| j                   d      t        j                  | d| j                   d       | j                          t!        | j                        \  }}	|	d   j#                         j%                         }
t'        d |
j)                         D              st        d| j                   d      t+        | j,                        j/                  |
       | j1                  |	       ~	~
|d
z
  | j2                  _        y)zUDetect and recover from NaN/Inf loss and fitness collapse by loading last checkpoint.Nr   r<   zLoss NaN/InfzFitness NaN/InfzFitness collapser=   Fz- detected but can not recover from last.pt...r9      z#Training failed: NaN persisted for z epochsz detected (attempt z/3), recovering from last.pt...rf   c              3      K   | ]B  }t        |t        j                        st        j                  |      j	                          D y wr   )ru   rz   Tensorisfiniteall)r   r  s     r   r   z3BaseTrainer._handle_nan_recovery.<locals>.<genexpr>U  s3     fq*UVX]XdXdJe5>>!$((*fs
   A
(A
zCheckpoint z" is corrupted with NaN/Inf weightsT)rk   r  rj   rC  ri   r   r   rN  r`   rZ   ro   r   r   rs   r  r<  r   r  r  r  r  r2   rc   r  r  rh   r
  )r   r7  loss_nanfitness_nanfitness_collapse	corruptedreasonrg  r  r  	ema_states              r   rX  z BaseTrainer._handle_nan_recovery>  s
   99D(E1C1C1E-Ell$.Pr{{4<<7P3P,,\1B1BQ1F\4<<[\K\GOVVk6UEU	#+k1BWi2:+/19i$?N&&~q9&q)ID$$$DII,<,<,>NNfX%RST""a'"%%)!DTE_E_D``ghii&!4T5O5O4PPopq!$)),4K%%'224	fI4D4D4FffTYYK7YZ[[TZZ 00;##D))$)AI!r   c           	         || j                   sy|j                  dd      dz   }|dkD  sBJ | j                  j                   d| j                   d| j                  j                   d       t        j                  d	| j                  j                   d
|dz    d| j                   d       | j                  |k  rMt        j                  | j                   d|d    d| j                   d       | xj                  |d   z  c_        | j                  |       || _        || j                  | j                  j                  z
  kD  r| j                          yy)z7Resume YOLO training from given epoch and best fitness.Nr7  r=   r9   r   z training to zf epochs is finished, nothing to resume.
Start a new training without resuming, i.e. 'yolo train model=r   zResuming training z from epoch z to z total epochsz has been trained for z epochs. Fine-tuning for z more epochs.)rp   r   rJ   rc   r_   r   r   r  r`   r4  r@  )r   r  r`   s      r   r	  zBaseTrainer.resume_training]  sB   <t{{hhw+a/Q 	
yy}T[[M :MMQYY__L]]^`	
 	((9kTUoEVVZ[_[f[fZggtuv;;$KK::,4T']OC\]a]h]h\iivw KK4=(K##D)&$++		(>(>>?))+ @r   c                 X   t        | j                  j                  d      rd| j                  j                  _        t        | j                  j                  d      rOt	        j
                  d       | j                  j                  j                  t        | j                               yy)z5Update dataloaders to stop using mosaic augmentation.mosaicFr4  zClosing dataloader mosaic)hypN)	r   r   r  r  r   r   r4  r   rJ   r   s    r   r@  z$BaseTrainer._close_dataloader_mosaicq  sy    4$$,,h7/4D%%,4$$,,n=KK34%%22tDII2G >r   c                    g g g f}t        d t        j                  j                         D              }|dk(  rt	        j
                  t        d       d| j                  j                   d| j                  j                   d       | j                  j                  dd      }	t        d	d
|	z   z  d      }
|dkD  rdnd|
df\  }}}d| j                  _        |j                         D ]|  \  }}|j                  d      D ]b  \  }}|r| d| n|}d|v r|d   j!                  |       *t#        ||      sd|v r|d   j!                  |       O|d   j!                  |       d ~ h d}|D ci c]  }|j%                         | c}j                  |j%                               }|dv r/ t'        t(        |t(        j*                        |d   ||dfd      }nU|dk(  rt)        j,                  |d   ||      }n4|dk(  rt)        j.                  |d   ||d !      }nt1        d"| d#| d$      |j3                  |d   |d%       |j3                  |d   dd%       t	        j
                  t        d       d&t5        |      j6                   d'| d(| d)t9        |d          d*t9        |d          d+| d,t9        |d          d-       |S c c}w ).a  
        Construct an optimizer for the given model.

        Args:
            model (torch.nn.Module): The model for which to build an optimizer.
            name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
                based on the number of iterations.
            lr (float, optional): The learning rate for the optimizer.
            momentum (float, optional): The momentum factor for the optimizer.
            decay (float, optional): The weight decay for the optimizer.
            iterations (float, optional): The number of iterations, which determines the optimizer if
                name is 'auto'.

        Returns:
            (torch.optim.Optimizer): The constructed optimizer.
        c              3   0   K   | ]  \  }}d |v s|  yw)NormNr   )r   r   r  s      r   r   z.BaseTrainer.build_optimizer.<locals>.<genexpr>  s     DA!1Ds   autoz
optimizer:z' 'optimizer=auto' found, ignoring 'lr0=z' and 'momentum=zJ' and determining best 'optimizer', 'lr0' and 'momentum' automatically... r$  
   {Gz?      i'  )SGDr  ?AdamWr  r  F)recurser   biasrE   logit_scaler9   r   >   r  Adamr	  r  NAdamRAdamAdamaxRMSProp>   r  r  r  r  r  g+?)r   betasr  r  )r   r   r  T)r   r   nesterovzOptimizer 'z,' not found in list of available optimizers zX. Request support for addition optimizers at https://github.com/ultralytics/ultralytics.)paramsr  r   z(lr=z, momentum=z) with parameter groups z weight(decay=0.0), z weight(decay=z), z bias(decay=0.0))rx   r	   __dict__itemsr   r   r   rJ   r  r   re   r   r   rF  r  r   r   ru   lowergetattrr
   r  RMSpropr  r  add_param_groupra   __name__rv   )r   rc   rU   r   r   r   r   gbnr$  lr_fitmodule_namemodule
param_nameparamfullname
optimizersr   r   s                      r   r  zBaseTrainer.build_optimizery  s   " BJD!2!2!4DD6>KKL)* +!!%/?		@R@R?S TWX
 tR(B9B/3F7AE7I!3PWY_adOeD"h'*DII$#(#6#6#8 		'K%+%<%<U%<%K '!
E<Gk]!J<8ZX%aDKK&+}/HaDKK&aDKK&'		' ]
&01	1155djjlC@@8tUZZ81"XW\L]lopIYadrHEIU]		!A$24PI%dV#OPZ| \i i 
 	!!QqT5"IJ!!QqT3"GH%&aY(@(@'AbTU]T^^v1Q4yk-c!A$i[ugSQTUVWXUYQZP[[km	
 ' 2s   .K)r   )Fr   )NNT)   r   r   )r   N)Nr   )r	  gMbP?r  gh㈵>g     j@)/r"  
__module____qualname____doc__r   r   rM   r   r   r   r   r   r   r  r   r   rQ  r  rV  r  r<  rZ  rd   r   rM  rH  rW  r  r   r   r  r   r   r  rB  rS  r   rY  r\  r  r[  rK   r  rX  r	  r@  r  r   r   r   r4   r4   ?   s	   0d '$4 T<l/# /+# +3 
6X

]6~h'T
X%u %'OR"H(( YWXWL.
^:D
;"@
:>,(H=r   r4   )Hr/  r{  r  rN   r   r0  r8  r   r   r   r   pathlibr   r  rC  rz   r   r   r	   r
   ultralyticsr   ultralytics.cfgr   r   ultralytics.data.utilsr   r   ultralytics.nn.tasksr   ultralytics.utilsr   r   r   r   r   r   r   r   r   r   r   ultralytics.utils.autobatchr   ultralytics.utils.checksr   r   r   r    r!   ultralytics.utils.distr"   r#   ultralytics.utils.filesr$   ultralytics.utils.plottingr%   ultralytics.utils.torch_utilsr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r4   r   r   r   <module>r<     s    
  	     (    %  # 1 G 0    ? o o D 2 3   "w wr   