BLIP2源码解析

2024-01-29

.
├── LAVIS
│   ├── CODEOWNERS
│   ├── CODE_OF_CONDUCT.md
│   ├── LICENSE.txt
│   ├── MANIFEST.in
│   ├── README.md
│   ├── SECURITY.md
│   ├── app
│   ├── assets
│   ├── bert-base-uncased
│   ├── bert.zip
│   ├── coco14
│   ├── dataset_card
│   ├── docs
│   ├── evaluate.py
│   ├── examples
│   ├── lavis
│   ├── output
│   ├── projects
│   ├── pyproject.toml
│   ├── requirements.txt
│   ├── run_scripts
│   ├── salesforce_lavis.egg-info
│   ├── setup.py
│   ├── tests
│   ├── train.py
│   └── vg
└── opt-2.7b
    ├── README.md
    ├── config.json
    ├── flax_model.msgpack
    ├── generation_config.json
    ├── merges.txt
    ├── pytorch_model.bin
    ├── special_tokens_map.json
    ├── tf_model.h5
    ├── tokenizer_config.json
    └── vocab.json

第一阶段

python 
-m torch.distributed.run 
--nproc_per_node=1
--master_port=2564 train.py 
--cfg-path lavis/projects/blip2/train/pretrain_stage1.yaml

配置文件如下：lavis/projects/blip2/train/pretrain_stage1.yaml

model:
  arch: blip2
  model_type: pretrain
  load_pretrained: False #pretrain from scratch
  freeze_vit: True


datasets:
  coco_caption:
    vis_processor:
        train:
          name: "blip2_image_train"
          image_size: 224
    text_processor:
        train:
          name: "blip_caption"
    # build_info:
    #     images:
    #         storage: '/sqy/LAVIS/coco14/images/'          
  vg_caption: # name of the dataset builder
    vis_processor:
        train:
          name: "blip_image_train"
          image_size: 224
    text_processor:
        train:
          name: "blip_caption"
    # build_info:
    #     images:
    #         storage: '/sqy/LAVIS/vg/images/'

run:
  task: image_text_pretrain
  # optimizer
  lr_sched: "linear_warmup_cosine_lr"
  init_lr: 1e-4
  min_lr: 1e-5
  warmup_lr: 1e-6

  weight_decay: 0.05
  max_epoch: 3
  batch_size_train: 2
  batch_size_eval: 1
  num_workers: 4
  warmup_steps: 5000

  seed: 42
  output_dir: "/cyb/LAVIS/output/BLIP2/Pretrain_stage1"

  amp: True
  resume_ckpt_path: null

  evaluate: False 
  train_splits: ["train"]

  device: "cuda"
  world_size: 1
  dist_url: "env://"
  distributed: True

opt

targets:
tensor([[ -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, 2, 627, 8146, 16, 4204, 50118, -100],
[ -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, 2, 397, 3931, 8492, 19, 20744, 50118]],
device=‘cuda:0’)

目录

第一阶段

opt