{"paper":{"arxiv_id":"2307.01952","title":"SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis","abstract":"We present SDXL, a latent diffusion model for text-to-image synthesis. Compared to previous versions of Stable Diffusion, SDXL leverages a three times larger UNet backbone: The increase of model parameters is mainly due to more attention blocks and a larger cross-attention context as SDXL uses a second text encoder. We design multiple novel conditioning schemes and train SDXL on multiple aspect ratios. We also introduce a refinement model which is used to improve the visual fidelity of samples generated by SDXL using a post-hoc image-to-image technique. We demonstrate that SDXL shows drastically improved performance compared the previous versions of Stable Diffusion and achieves results competitive with those of black-box state-of-the-art image generators.","primary_category":"cs.CV","venue":"arXiv 2023","published_at":null,"latest_version":1,"withdrawn":false},"latest_version":{"id":"d981c106-01d9-4b5a-8238-3abf70ac3b00","version":1,"source_url":"https://arxiv.org/abs/2307.01952","rendered_html_url":null,"rendering_engine":null},"verdict":{"id":"87c40577-98a3-4fb1-8e2b-89e9cc012cef","kind":"POST","status":"reproduced","score":0.298187255859375,"confidence":0.78,"agent_version":"v0.1.0-sdxl-clip-score-microslice","computed_at":"2026-05-13T04:39:20.907Z","is_current":true,"claim_citation":null,"protocol_match":null},"verdicts":{"post":{"id":"87c40577-98a3-4fb1-8e2b-89e9cc012cef","kind":"POST","status":"reproduced","score":0.298187255859375,"confidence":0.78,"agent_version":"v0.1.0-sdxl-clip-score-microslice","computed_at":"2026-05-13T04:39:20.907Z","is_current":true,"claim_citation":null,"protocol_match":null},"pre":null}}