@@ -836,14 +836,12 @@ def pretrain(
836
836
837
837
# Model, optimizer, and learning rate.
838
838
timers ('model-and-optimizer-setup' , log_level = 0 ).start (barrier = True )
839
- app_metrics ['app_build_optimizer_start_time' ] = one_logger_utils .get_timestamp_in_ms ()
840
839
model , optimizer , opt_param_scheduler = setup_model_and_optimizer (
841
840
model_provider , model_type , checkpointing_context = checkpointing_context
842
841
)
843
842
844
843
timers ('model-and-optimizer-setup' ).stop ()
845
844
print_datetime ('after model, optimizer, and learning rate ' 'scheduler are built' )
846
- app_metrics ['app_build_optimizer_finish_time' ] = one_logger_utils .get_timestamp_in_ms ()
847
845
config = get_model_config (model [0 ])
848
846
849
847
# Data stuff.
@@ -1234,6 +1232,7 @@ def setup_model_and_optimizer(
1234
1232
model = get_model (model_provider_func , model_type )
1235
1233
unwrapped_model = unwrap_model (model )
1236
1234
1235
+ one_logger and one_logger .log_metrics ({"app_build_optimzer_start_time" : one_logger_utils .get_timestamp_in_ms ()})
1237
1236
kwargs = {}
1238
1237
for f in dataclasses .fields (OptimizerConfig ):
1239
1238
if hasattr (args , f .name ):
@@ -1252,6 +1251,7 @@ def setup_model_and_optimizer(
1252
1251
default_skip_embedding_weight_decay = args .embedding_init_method_std is not None ,
1253
1252
)
1254
1253
opt_param_scheduler = get_optimizer_param_scheduler (optimizer )
1254
+ one_logger and one_logger .log_metrics ({"app_build_optimzer_finish_time" : one_logger_utils .get_timestamp_in_ms ()})
1255
1255
1256
1256
if args .moe_use_upcycling :
1257
1257
torch .distributed .barrier ()
0 commit comments