fix: truncate finished output in stream_generate

【修复】修复在有多个回答长度不一致的inputs时，模型没有对finished_sequences进行截断的问题

Files changed (1) hide show

modeling_chatglm.py CHANGED Viewed

@@ -1404,6 +1404,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
                 next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
             else:
                 next_tokens = torch.argmax(probs, dim=-1)
             # update generated ids, model inputs, and length for next step
             input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)

                 next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
             else:
                 next_tokens = torch.argmax(probs, dim=-1)
+            next_tokens = torch.where(unfinished_sequences.bool(), next_tokens, eos_token_id[0])
             # update generated ids, model inputs, and length for next step
             input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)