# Summarize a long document by chunking and summarizing parts. Uses # aynchronous calls to the API. Adapted from LangChain [Map-Reduce # summary](https://langchain.readthedocs.io/en/stable/_modules/langchain/chains/mapreduce.html). import trio from minichain import TemplatePrompt, show_log, start_chain # Prompt that asks LLM to produce a bash command. class SummaryPrompt(TemplatePrompt): template_file = "summary.pmpt.tpl" def chunk(f, width=4000, overlap=800): "Split a documents into 4800 character overlapping chunks" text = open(f).read().replace("\n\n", "\n") chunks = [] for i in range(4): if i * width > len(text): break chunks.append({"text": text[i * width : (i + 1) * width + overlap]}) return chunks with start_chain("summary") as backend: prompt = SummaryPrompt(backend.OpenAI()) list_prompt = prompt.map() # Map - Summarize each chunk in parallel out = trio.run(list_prompt.arun, chunk("../state_of_the_union.txt")) # Reduce - Summarize the summarized chunks print(prompt({"text": "\n".join(out)})) # + tags=["hide_inp"] SummaryPrompt().show( {"text": "One way to fight is to drive down wages and make Americans poorer."}, "Make Americans poorer", ) # - show_log("summary.log")