accelerate tokenizer

#98
by lugim - opened
Files changed (1) hide show
  1. tokenization_chatglm.py +3 -0
tokenization_chatglm.py CHANGED
@@ -439,5 +439,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
439
  encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
440
  pad_width=[(0, 0), (difference, 0)])
441
  encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
 
 
 
442
 
443
  return encoded_inputs
 
439
  encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
440
  pad_width=[(0, 0), (difference, 0)])
441
  encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
442
+ encoded_inputs["attention_mask"] = encoded_inputs["attention_mask"].tolist()
443
+ encoded_inputs["position_ids"] = encoded_inputs["position_ids"].tolist()
444
+
445
 
446
  return encoded_inputs