lvwerra's picture
lvwerra HF staff
fix cache
8880e25
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from transformers import pipeline
import torch
import json
@st.cache(allow_output_mutation=True)
def load_tokenizer(model_ckpt):
return AutoTokenizer.from_pretrained(model_ckpt)
@st.cache(allow_output_mutation=True)
def load_model(model_ckpt):
model = AutoModelForCausalLM.from_pretrained(model_ckpt, low_cpu_mem_usage=True)
return model
def load_examples():
with open("examples.json", "r") as f:
examples = json.load(f)
return examples
st.set_page_config(page_icon=':parrot:', layout="wide")
model_ckpt = "codeparrot/codeparrot"
tokenizer = load_tokenizer(model_ckpt)
model = load_model(model_ckpt)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
examples = load_examples()
example_names = [example["name"] for example in examples]
name2id = dict([(name, i) for i, name in enumerate(example_names)])
set_seed(42)
gen_kwargs = {}
st.title("CodeParrot 🦜")
st.markdown('##')
st.sidebar.header("Examples:")
selected_example = st.sidebar.selectbox("Select one of the following examples:", example_names)
example_text = examples[name2id[selected_example]]["value"]
default_length = examples[name2id[selected_example]]["length"]
st.sidebar.header("Generation settings:")
gen_kwargs["do_sample"] = st.sidebar.radio("Decoding strategy", ["Greedy", "Sample"]) == "Sample"
gen_kwargs["max_new_tokens"] = st.sidebar.slider("Number of tokens to generate", value=default_length, min_value=8, step=8, max_value=256)
if gen_kwargs["do_sample"]:
gen_kwargs["temperature"] = st.sidebar.slider("Temperature", value = 0.2, min_value = 0.0, max_value=2.0, step=0.05)
gen_kwargs["top_k"] = st.sidebar.slider("Top-k", min_value = 0, max_value=100, value = 0)
gen_kwargs["top_p"] = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.01, value = 0.95)
gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
if st.button("Generate code!"):
with st.spinner("Generating code..."):
generated_text = pipe(gen_prompt, **gen_kwargs)[0]['generated_text']
st.code(generated_text)