# 导入自定义模块 # Import custom modules """ 训练多任务 Vision Transformer (ViT) 模型,包括关键点检测、姿态估计和深度估计。 参数: - config (Config): 配置实例,包含模型、训练和路径配置。 """ """ Training a multi-task Vision Transformer (ViT) model with keypoint detection, pose estimation, and depth estimation. Parameters. - config (Config): configuration instance with model, training, and path configuration. """ # 设置设备 # Setting up the device print(f"使用设备: {device}") print(f"Utilization equipment: {device}") # 设置日志记录器 # Setting up the logger logger.info("开始训练 Feature Extractor 模型") logger.info("Start Training Feature Extractor Models") logger.info(f"配置: {config.to_dict()}") logger.info(f"configure: {config.to_dict()}") # 确保检查点目录存在 # Ensure checkpoint directory exists # 获取数据加载器 # Get Data Loader logger.info(f"数据加载器已初始化,批次数: {len(data_loader)}") logger.info(f"Data loader initialized, batch size: {len(data_loader)}") # 初始化模型 # Initialize the model logger.info("模型已初始化并移动到设备") logger.info("Model initialized and moved to device") # 定义损失函数 # Define the loss function logger.info("损失函数已初始化") logger.info("The loss function is initialized") # 定义优化器 # Define the optimizer logger.info(f"优化器已初始化: {config.training.optimizer_type}") logger.info(f"Optimizer initialized: {config.training.optimizer_type}") # 加载检查点(如果存在) # Load checkpoints (if present) logger.info(f"从检查点加载模型,开始训练的纪元数: {start_epoch}, 最佳损失: {best_loss:.4f}") logger.info(f"Number of epochs to load the model from the checkpoint and start the training: {start_epoch}, Best Loss: {best_loss:.4f}") # 设置学习率调度器(可选) # Set learning rate scheduler (optional) # 开始训练 # Start training logger.info(f"开始训练纪元 {epoch + 1}/{config.training.num_epochs}") logger.info(f"Beginning of Training Era {epoch + 1}/{config.training.num_epochs}") # 获取数据并移动到设备 # Fetch data and move it to the device # 假设所有样本执行所有任务,task_ids 为0,1,2代表不同任务 # Assume all samples perform all tasks and task_ids are 0,1,2 for different tasks # 这里为了简化,假设每个样本对应一个任务ID,可以根据实际需求调整 # Here, for simplicity, it is assumed that each sample corresponds to a task ID, which can be adjusted according to actual needs # 例如,如果每个样本同时执行多个任务,可以调整模型和训练方式 # For example, if each sample performs multiple tasks at the same time, the model and training can be adapted # 所有样本对应任务0 # All samples correspond to task 0 # 前向传播 # Forward propagation # 计算损失 # Calculated losses # 反向传播和优化 # Backpropagation and optimization # 累积损失 # Accumulated losses # 更新进度条描述 # Update the progress bar description # 调整学习率 # Adjusted learning rate # 计算平均损失 # Calculate average loss logger.info(f"纪元 {epoch + 1} 结束,平均损失: logger.info(f “Epoch {epoch + 1} ended, average loss: # 保存最佳模型 # Preservation of the best models logger.info(f"新最佳模型已保存,纪元 {epoch + 1}, 损失: {best_loss:.4f}") logger.info(f “New best model saved, epoch {epoch + 1}, loss: {best_loss:.4f}”) # 每个纪元结束后保存一个检查点 # Save a checkpoint at the end of each epoch logger.info(f"检查点已保存: {checkpoint_filename}") logger.info(f "Checkpoint saved: {checkpoint_filename}") logger.info("Feature Extractor 训练完成") logger.info("Feature Extractor Training completed") """ 训练语义编码器模型,整合CVAE和判别器进行对抗性特征学习。 参数: - config (Config): 配置实例,包含模型、训练和路径配置。 """ """ Training semantic coder models, integrating CVAE and discriminators for adversarial feature learning. Parameters. - config (Config): configuration instance with model, training and path configuration. """ # 设置设备 # Setting up the device print(f"使用设备: {device}") print(f"Utilization equipment: {device}") # 设置日志记录器 # Setting up the logger logger.info("开始训练 Semantic Encoder 模型") logger.info("Start Training Semantic Encoder Models") logger.info(f"配置: {config.to_dict()}") logger.info(f"configure: {config.to_dict()}") # 确保检查点目录存在 # Ensure checkpoint directory exists # 获取数据加载器 # Get Data Loader logger.info(f"数据加载器已初始化,批次数: {len(data_loader)}") logger.info(f "Data loader initialized, batch count: {len(data_loader)}") # 初始化模型 # Initialize the model logger.info("模型已初始化并移动到设备") logger.info("Model initialized and moved to device") # 定义损失函数 # Define the loss function # 由于 CombinedLoss 在 semantic_encoder.py 中未完全集成,这里直接使用 SemanticEncoder 的 compute_loss # Since CombinedLoss is not fully integrated in semantic_encoder.py, we use SemanticEncoder's compute_loss directly here # 并结合对抗性损失的实现 # and combines it with the adversarial loss implementation logger.info("损失函数已初始化") logger.info(“Loss function initialized”) # 定义优化器 # Define the optimizer logger.info("优化器已初始化: Adam") logger.info("Optimizer initialized: Adam") # 加载检查点(如果存在) # Load checkpoints (if present) logger.info(f"从检查点加载模型,开始训练的纪元数: {start_epoch}, 最佳损失: {best_loss:.4f}") logger.info(f “Loading model from checkpoint, number of epochs to start training: {start_epoch}, best loss: {best_loss:.4f}”) # 设置学习率调度器(可选) # Set the learning rate scheduler (optional) # 开始训练 # Start training logger.info(f"开始训练纪元 {epoch + 1}/{config.training.num_epochs}") logger.info(f"Beginning of Training Era {epoch + 1}/{config.training.num_epochs}") # 获取数据并移动到设备 # Fetch data and move it to the device # 训练判别器 # Training discriminators # 真实样本 # Real samples # 生成样本 # Generate samples # 判别器总损失 # Total discriminator losses # 训练生成器(CVAE) # Training generator (CVAE) # 生成样本重新判别 # Generate samples for reclassification # 对抗性损失,目标是使D_fake接近1 # Adversarial loss with the goal of bringing D_fake closer to 1 # CVAE的损失 # CVAE losses # 综合损失 # Comprehensive losses # 累积损失 # Accumulated losses # 更新进度条描述 # Update the progress bar description # 调整学习率 # Adjusted learning rate # 计算平均损失 # Calculate average loss logger.info(f"纪元 {epoch + 1} 结束,平均损失: logger.info(f"millennium {epoch + 1} close,average loss: # 保存最佳模型 # Preservation of the best models logger.info(f"新最佳模型已保存,纪元 {epoch + 1}, 损失: {best_loss:.4f}") logger.info(f “New best model saved, epoch {epoch + 1}, loss: {best_loss:.4f}”) # 每个纪元结束后保存一个检查点 # Save a checkpoint at the end of each epoch logger.info(f"检查点已保存: {checkpoint_filename}") logger.info(f"Checkpoint saved: {checkpoint_filename}") logger.info("Semantic Encoder 训练完成") logger.info("Semantic Encoder Training completed") """ 使用训练好的语义编码器和特征提取器,从原始2D图像生成3D模型。 参数: - config (Config): 配置实例,包含模型、训练和路径配置。 """ """ Generates a 3D model from a raw 2D image using a trained semantic coder and feature extractor. Parameters. - config (Config): configuration instance with model, training and path configuration. """ # 设置设备 # setup device print(f"使用设备: {device}") print(f "Using device: {device}") # 设置日志记录器 # Set up the logger logger.info("开始推理") logger.info("Start reasoning") logger.info(f"配置: {config.to_dict()}") logger.info(f"configure: {config.to_dict()}") # 加载模型 # Load model # 加载特征提取器 # Load the feature extractor logger.info(f"特征提取器模型已加载: {feature_extractor_checkpoint}") logger.info(f"Feature extractor model loaded: {feature_extractor_checkpoint}") logger.error(f"特征提取器检查点不存在: logger.error(f"Feature extractor checkpoint does not exist: # 加载语义编码器 # Load semantic encoder logger.info(f"语义编码器模型已加载: {semantic_encoder_checkpoint}") logger.info(f"Semantic coder model loaded: {semantic_encoder_checkpoint}") logger.error(f"语义编码器检查点不存在: logger.error(f"Semantic coder checkpoint does not exist: # 获取推理数据加载器 # Get inference data loader batch_size=1, # 推理时通常使用批大小为1 batch_size=1, # Reasoning is usually done with a batch size of 1 logger.info(f"推理数据加载器已初始化,样本数: {len(infer_data_loader)}") logger.info(f"The inference data loader is initialized and the number of samples: {len(infer_data_loader)}") # 初始化3D重建所需的模型(ViT-扩散模型等) # Initialize models needed for 3D reconstruction (ViT-diffusion models, etc.) # 这里假设有一个名为 ViTDiffusionModel 的模型,您需要根据具体实现进行调整 # Assume here that there is a model called ViTDiffusionModel that you need to adapt to your specific implementation # from vit_diffusion import ViTDiffusionModel # 假设您有一个 ViT-扩散模型 # from vit_diffusion import ViTDiffusionModel # Assuming you have a ViT-diffusion model # logger.info(f"ViT-扩散模型已加载: {vit_diffusion_checkpoint}") # logger.info(f"ViT-diffusion model loaded: {vit_diffusion_checkpoint}") # logger.error(f"ViT-扩散模型检查点不存在: {vit_diffusion_checkpoint}") # logger.error(f"ViT-diffusion model checkpoints do not exist: {vit_diffusion_checkpoint}") # 进行推理 # Reasoning for batch in tqdm(infer_data_loader, desc="推理"): for batch in tqdm(infer_data_loader, desc="inference"): # 特征提取器前向传播 # Feature extractor forward propagation # 假设任务ID为0 # Assuming a task ID of 0 # 语义编码器前向传播 # Semantic coder forward propagation # 语义数据 y' 可以通过 y_pred_one_hot 传输到接收端 # Semantic data y' can be transmitted to the receiver via y_pred_one_hot # 假设在本地进行传输模拟 # Transmission simulation is assumed to be done locally # 接收端解码 # Decoding at the receiver # 这里 recon_x_decoded 为重构的特征,可以进一步用于3D重建 # Here recon_x_decoded is a reconstructed feature that can be further used for 3D reconstruction # 使用 ViT-扩散模型进行3D重建 # 3D reconstruction using the ViT-Diffusion Model # 假设 ViTDiffusionModel 已经定义并加载 # Assuming ViTDiffusionModel is defined and loaded # 使用Marching Cubes算法生成网格模型 # Use the Marching Cubes algorithm to generate mesh models # 由于 ViTDiffusionModel 未定义,这里仅示例化流程 # As ViTDiffusionModel is undefined, only the process is exemplified here logger.info("推理步骤完成,但ViT-扩散模型和3D重建尚未实现。") logger.info("Inference step completed, but ViT-diffusion modeling and 3D reconstruction not yet realized。") # 示例保存生成的语义标签和重构特征 # Example to save generated semantic tags and reconstructed features logger.info(f"推理结果已保存到 {output_dir}") logger.info(f"The results of the reasoning have been saved to {output_dir}") logger.info("推理完成") logger.info("Reasoning complete") choices=['train_feature_extractor', 'train_semantic_encoder', 'infer'], help="选择要执行的操作:训练特征提取器(train_feature_extractor)、训练语义编码器(train_semantic_encoder)、推理(infer)") parser.add_argument('--config', type=str, default='config.json', help="配置文件路径,默认为 'config.json'") choices=['train_feature_extractor', 'train_semantic_encoder', 'infer'], help="Select the action to perform: train the feature extractor(train_feature_extractor)、Training semantic coders(train_semantic_encoder)、inference(infer)") parser.add_argument('--config', type=str, default='config.json', help="Configuration file path, default is 'config.json'") # 加载配置 # Load Configuration raise FileNotFoundError(f"配置文件不存在: {args.config}") raise FileNotFoundError(f"Configuration file does not exist: {args.config}") # 保存当前配置(可选) # Save current configuration (optional) # 根据选择的操作执行相应的函数 # Execute the appropriate function based on the selected operation raise ValueError(f"未知的操作: {args.action}") raise ValueError(f"unknown operation: {args.action}")