chatlawv1 / json_convert.py
teachyourselfcoding's picture
Upload 245 files
fa6856c
import json
# 读取JSON文件
def read_json_file(file_path):
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
return data
# 写入JSON文件
def write_json_file(file_path, data):
with open(file_path, "w", encoding="utf-8") as file:
json.dump(data, file, ensure_ascii=False, indent=2)
if __name__ == "__main__":
# 假设原始数据存储在data.json文件中
input_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50k.jsonl"
output_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50knew.jsonl"
with open(input_file_path, "r",encoding='utf-8') as input_file, open(output_file_path, "w",encoding='utf-8') as output_file:
for line in input_file:
json_data = json.loads(line)
input_text = " ".join(json_data["input"])
output_text = " ".join(json_data["output"])
json_data["input"] = input_text
json_data["output"] = output_text
output_line = json.dumps(json_data,ensure_ascii=False) + "\n"
output_file.write(output_line)
# # 读取原始JSON文件
# with open(input_file_path, 'r', encoding='utf-8') as json_file:
# data = json.load(json_file)
# # 将数据写入JSONL文件
# with open(output_file_path, 'w', encoding='utf-8') as jsonl_file:
# for item in data:
# jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')
# # 读取JSON文件
# original_data = read_json_file(input_file_path)
# # # 进行转换,去掉多余的[]
# # corrected_data = [dialog[0] for dialog in original_data]
# processed_data = []
# for item in original_data:
# processed_item = {
# "input": item["instruction"],
# "output": item["output"]
# }
# processed_data.append(processed_item)
# # 保存转换后的数据到新的JSON文件
# write_json_file(output_file_path, processed_data)
# # print("数据转换完成,并保存到corrected_data.json文件中。")