Python 将模型配置从allennlp 0.9.0更新为1.3.0
我正在尝试使用Udify库对依赖项解析的预训练多语言BERT模型进行微调。这个库使用了allennlp==0.9.0,而我需要使用allennlp=1.3.0,并且我正在尝试为此更新代码。在对配置文件进行一些更改之后,我发现模型的forward()方法中的输入有问题。也就是说,Python 将模型配置从allennlp 0.9.0更新为1.3.0,python,allennlp,Python,Allennlp,我正在尝试使用Udify库对依赖项解析的预训练多语言BERT模型进行微调。这个库使用了allennlp==0.9.0,而我需要使用allennlp=1.3.0,并且我正在尝试为此更新代码。在对配置文件进行一些更改之后,我发现模型的forward()方法中的输入有问题。也就是说,tokens被传递,在我的例子中,它是一个如下所示的字典: tokens = { "bert": { "bert": tens
tokens
被传递,在我的例子中,它是一个如下所示的字典:
tokens = {
"bert": {
"bert": tensor(...),
"bert-offsets": tensor(...),
"bert-type-ids": tensor(...),
"mask": tensor(...)
},
"tokens": {
"tokens": tensor(...)
}
}
错误发生在:
适用于allennlp 0.9.0的相应配置如下:
{
"dataset_reader": {
"lazy": false,
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"bert": {
"type": "udify-bert-pretrained",
"pretrained_model": "config/archive/bert-base-multilingual-cased/vocab.txt",
"do_lowercase": false,
"use_starting_offsets": true
}
}
},
"train_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-train.conllu",
"validation_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-dev.conllu",
"test_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-test.conllu",
"vocabulary": {
"directory_path": "data/vocab/english_only_expmix4/vocabulary"
},
"model": {
"word_dropout": 0.2,
"mix_embedding": 12,
"layer_dropout": 0.1,
"tasks": ["deps"],
"text_field_embedder": {
"type": "udify_embedder",
"dropout": 0.5,
"allow_unmatched_keys": true,
"embedder_to_indexer_map": {
"bert": ["bert", "bert-offsets"]
},
"token_embedders": {
"bert": {
"type": "udify-bert-pretrained",
"pretrained_model": "bert-base-multilingual-cased",
"requires_grad": true,
"dropout": 0.15,
"layer_dropout": 0.1,
"combine_layers": "all"
}
}
},
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"decoders": {
"upos": {
"encoder": {
"type": "pass_through",
"input_dim": 768
}
},
"feats": {
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"adaptive": true
},
"lemmas": {
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"adaptive": true
},
"deps": {
"tag_representation_dim": 256,
"arc_representation_dim": 768,
"encoder": {
"type": "pass_through",
"input_dim": 768
}
}
}
},
"iterator": {
"batch_size": 16
},
"trainer": {
"num_epochs": 5,
"patience": 40,
"num_serialized_models_to_keep": 1,
"should_log_learning_rate": true,
"summary_interval": 100,
"optimizer": {
"type": "bert_adam",
"b1": 0.9,
"b2": 0.99,
"weight_decay": 0.01,
"lr": 1e-3,
"parameter_groups": [
[["^text_field_embedder.*.bert_model.embeddings",
"^text_field_embedder.*.bert_model.encoder"], {}],
[["^text_field_embedder.*._scalar_mix",
"^text_field_embedder.*.pooler",
"^scalar_mix",
"^decoders",
"^shared_encoder"], {}]
]
},
"learning_rate_scheduler": {
"type": "ulmfit_sqrt",
"model_size": 1,
"warmup_steps": 392,
"start_step": 392,
"factor": 5.0,
"gradual_unfreezing": true,
"discriminative_fine_tuning": true,
"decay_factor": 0.04
}
},
"udify_replace": [
"dataset_reader.token_indexers",
"model.text_field_embedder",
"model.encoder",
"model.decoders.xpos",
"model.decoders.deps.encoder",
"model.decoders.upos.encoder",
"model.decoders.feats.encoder",
"model.decoders.lemmas.encoder",
"trainer.learning_rate_scheduler",
"trainer.optimizer"
]
}
嗨,你找到解决办法了吗?更改配置后,我出现了此错误:“
返回火炬。嵌入(重量、输入、填充idx、比例、梯度、频率、稀疏)索引器:在self中索引超出范围
”
{
"dataset_reader": {
"lazy": false,
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"bert": {
"type": "udify-bert-pretrained",
"pretrained_model": "config/archive/bert-base-multilingual-cased/vocab.txt",
"do_lowercase": false,
"use_starting_offsets": true
}
}
},
"train_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-train.conllu",
"validation_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-dev.conllu",
"test_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-test.conllu",
"vocabulary": {
"type": "from_files",
"directory": "data/concat-exp-mix/vocab/concat-exp-mix/vocabulary/"
},
"model": {
"word_dropout": 0.2,
"mix_embedding": 12,
"layer_dropout": 0.1,
"tasks": ["deps"],
"pretrained_model": "bert-base-multilingual-cased",
"text_field_embedder": {
"type": "udify_embedder",
"dropout": 0.5,
"allow_unmatched_keys": true,
"embedder_to_indexer_map": {
"bert": ["bert", "bert-offsets"]
},
"token_embedders": {
"bert": {
"type": "udify-bert-pretrained",
"pretrained_model": "bert-base-multilingual-cased",
"requires_grad": true,
"dropout": 0.15,
"layer_dropout": 0.1,
"combine_layers": "all"
}
}
},
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"decoders": {
"upos": {
"encoder": {
"type": "pass_through",
"input_dim": 768
}
},
"feats": {
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"adaptive": true
},
"lemmas": {
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"adaptive": true
},
"deps": {
"tag_representation_dim": 256,
"arc_representation_dim": 768,
"encoder": {
"type": "pass_through",
"input_dim": 768
}
}
}
},
"data_loader": {
"batch_sampler":{
"batch_size": 16
}
},
"trainer": {
"num_epochs": 5,
"patience": 40,
"optimizer": {
"type": "adamw",
"betas": [0.9, 0.99],
"weight_decay": 0.01,
"lr": 1e-3,
"parameter_groups": [
[["^text_field_embedder.*.bert_model.embeddings",
"^text_field_embedder.*.bert_model.encoder"], {}],
[["^text_field_embedder.*._scalar_mix",
"^text_field_embedder.*.pooler",
"^scalar_mix",
"^decoders",
"^shared_encoder"], {}]
]
},
"learning_rate_scheduler": {
"type": "ulmfit_sqrt",
"model_size": 1,
"warmup_steps": 392,
"start_step": 392,
"factor": 5.0,
"gradual_unfreezing": true,
"discriminative_fine_tuning": true,
"decay_factor": 0.04
}
},
"udify_replace": [
"dataset_reader.token_indexers",
"model.text_field_embedder",
"model.encoder",
"model.decoders.xpos",
"model.decoders.deps.encoder",
"model.decoders.upos.encoder",
"model.decoders.feats.encoder",
"model.decoders.lemmas.encoder",
"trainer.learning_rate_scheduler",
"trainer.optimizer"
]
}
{
"dataset_reader": {
"lazy": false,
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"bert": {
"type": "udify-bert-pretrained",
"pretrained_model": "config/archive/bert-base-multilingual-cased/vocab.txt",
"do_lowercase": false,
"use_starting_offsets": true
}
}
},
"train_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-train.conllu",
"validation_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-dev.conllu",
"test_data_path": "data/ud-treebanks-v2.3/UD_English-EWT/en_ewt-ud-test.conllu",
"vocabulary": {
"directory_path": "data/vocab/english_only_expmix4/vocabulary"
},
"model": {
"word_dropout": 0.2,
"mix_embedding": 12,
"layer_dropout": 0.1,
"tasks": ["deps"],
"text_field_embedder": {
"type": "udify_embedder",
"dropout": 0.5,
"allow_unmatched_keys": true,
"embedder_to_indexer_map": {
"bert": ["bert", "bert-offsets"]
},
"token_embedders": {
"bert": {
"type": "udify-bert-pretrained",
"pretrained_model": "bert-base-multilingual-cased",
"requires_grad": true,
"dropout": 0.15,
"layer_dropout": 0.1,
"combine_layers": "all"
}
}
},
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"decoders": {
"upos": {
"encoder": {
"type": "pass_through",
"input_dim": 768
}
},
"feats": {
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"adaptive": true
},
"lemmas": {
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"adaptive": true
},
"deps": {
"tag_representation_dim": 256,
"arc_representation_dim": 768,
"encoder": {
"type": "pass_through",
"input_dim": 768
}
}
}
},
"iterator": {
"batch_size": 16
},
"trainer": {
"num_epochs": 5,
"patience": 40,
"num_serialized_models_to_keep": 1,
"should_log_learning_rate": true,
"summary_interval": 100,
"optimizer": {
"type": "bert_adam",
"b1": 0.9,
"b2": 0.99,
"weight_decay": 0.01,
"lr": 1e-3,
"parameter_groups": [
[["^text_field_embedder.*.bert_model.embeddings",
"^text_field_embedder.*.bert_model.encoder"], {}],
[["^text_field_embedder.*._scalar_mix",
"^text_field_embedder.*.pooler",
"^scalar_mix",
"^decoders",
"^shared_encoder"], {}]
]
},
"learning_rate_scheduler": {
"type": "ulmfit_sqrt",
"model_size": 1,
"warmup_steps": 392,
"start_step": 392,
"factor": 5.0,
"gradual_unfreezing": true,
"discriminative_fine_tuning": true,
"decay_factor": 0.04
}
},
"udify_replace": [
"dataset_reader.token_indexers",
"model.text_field_embedder",
"model.encoder",
"model.decoders.xpos",
"model.decoders.deps.encoder",
"model.decoders.upos.encoder",
"model.decoders.feats.encoder",
"model.decoders.lemmas.encoder",
"trainer.learning_rate_scheduler",
"trainer.optimizer"
]
}