{"paper":{"arxiv_id":"2306.11644","title":"Textbooks Are All You Need","abstract":"We introduce phi-1, a new large language model for code, with significantly smaller size than competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on 8 A100s, using a selection of ``textbook quality'' data from the web (6B tokens) and synthetically generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as phi-1 that still achieves 45% on HumanEval.","primary_category":"cs.CL","venue":"arXiv 2023","published_at":null,"latest_version":1,"withdrawn":false},"latest_version":{"id":"a4a28dbd-8b0d-4303-a15e-335974634c77","version":1,"source_url":"https://arxiv.org/abs/2306.11644","rendered_html_url":null,"rendering_engine":null},"verdict":{"id":"d81a5ce8-a2cb-4d9c-8457-fa8d8c2e4b80","kind":"POST","status":"partial","score":null,"confidence":0.25,"agent_version":"phi1-redteam-rollback-v0.1","computed_at":"2026-05-13T22:26:03.055Z","is_current":true,"claim_citation":null,"protocol_match":null},"verdicts":{"post":{"id":"d81a5ce8-a2cb-4d9c-8457-fa8d8c2e4b80","kind":"POST","status":"partial","score":null,"confidence":0.25,"agent_version":"phi1-redteam-rollback-v0.1","computed_at":"2026-05-13T22:26:03.055Z","is_current":true,"claim_citation":null,"protocol_match":null},"pre":null}}