| --- |
| library_name: peft |
| --- |
| ## Training procedure |
|
|
|
|
| The following `bitsandbytes` quantization config was used during training: |
| - load_in_8bit: False |
| - load_in_4bit: True |
| - llm_int8_threshold: 6.0 |
| - llm_int8_skip_modules: None |
| - llm_int8_enable_fp32_cpu_offload: False |
| - llm_int8_has_fp16_weight: False |
| - bnb_4bit_quant_type: fp4 |
| - bnb_4bit_use_double_quant: False |
| - bnb_4bit_compute_dtype: float32 |
| ### Framework versions |
|
|
|
|
| - PEFT 0.4.0 |
|
|
|
|
| notebook (training and inference): https://colab.research.google.com/drive/1GxbUYZiLidteVX4qu5iSox6oxxEOHk5O?usp=sharing |
|
|
|
|
| Usage: |
| ```python |
| import requests |
| |
| # Get a random Wikipedia article summary using their API |
| def random_extract(): |
| URL = "https://en.wikipedia.org/api/rest_v1/page/random/summary" |
| PARAMS = {} |
| r = requests.get(url = URL, params = PARAMS) |
| data = r.json() |
| return data['extract'] |
| |
| # Format this as a prompt that would hopefully result in the model completing with a question |
| def random_prompt(): |
| e = random_extract() |
| return f"""### CONTEXT: {e} ### QUESTION:""" |
| |
| import torch |
| from peft import AutoPeftModelForCausalLM |
| from transformers import AutoTokenizer |
| |
| output_dir = "mcqgen_test" |
| |
| # load base LLM model and tokenizer |
| model = AutoPeftModelForCausalLM.from_pretrained( |
| output_dir, |
| low_cpu_mem_usage=True, |
| torch_dtype=torch.float16, |
| load_in_4bit=True, |
| ) |
| tokenizer = AutoTokenizer.from_pretrained(output_dir) |
| |
| # We can feed in a random context prompt and see what question the model comes up with: |
| prompt = random_prompt() |
| |
| input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda() |
| # with torch.inference_mode(): |
| outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.9) |
| |
| print(f"Prompt:\n{prompt}\n") |
| print(f"Generated MCQ:\n### QUESTION:{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}") |
| |
| def process_outputs(outputs): |
| s = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] |
| split = s.split("### ")[1:][:7] |
| if len(split) != 7: |
| return None |
| # Check the starts |
| expected_starts = ['CONTEXT', 'QUESTION', 'A' , 'B', 'C', 'D', 'CORRECT'] |
| for i, s in enumerate(split): |
| if not split[i].startswith(expected_starts[i]): |
| return None |
| return { |
| "context": split[0].replace("CONTEXT: ", ""), |
| "question": split[1].replace("QUESTION: ", ""), |
| "a": split[2].replace("A: ", ""), |
| "b": split[3].replace("B: ", ""), |
| "c": split[4].replace("C: ", ""), |
| "d": split[5].replace("D: ", ""), |
| "correct": split[6].replace("CORRECT: ", "") |
| } |
| |
| |
| process_outputs(outputs) # A nice dictionary hopefully |
| |
| ``` |