{"items":[{"arxiv_id":"2501.00656","title":"2 OLMo 2 Furious","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-olmo2-winogrande-microslice","computed_at":"2026-05-15T19:56:36.638Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"1907.11692","title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach","venue":"arXiv preprint","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-roberta-mnli-microslice","computed_at":"2026-05-15T19:19:53.709Z","confidence":0.85,"protocol_match":"proxy"}},{"arxiv_id":"2312.00752","title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces","venue":"COLM 2024","primary_category":"cs.LG","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-mamba-wikitext2-3slice8","computed_at":"2026-05-15T19:19:22.054Z","confidence":0.65,"protocol_match":"proxy"}},{"arxiv_id":"1910.01108","title":"DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter","venue":"NeurIPS 2019 EMC^2 Workshop","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-distilbert-sst2-microslice","computed_at":"2026-05-15T19:19:17.785Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"1810.04805","title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","venue":"NAACL 2019","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-bert-sst2-3slice100","computed_at":"2026-05-15T19:19:14.387Z","confidence":0.8,"protocol_match":"exact"}},{"arxiv_id":"2402.17834","title":"Stable LM 2 1.6B Technical Report","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-stablelm2-winogrande-microslice","computed_at":"2026-05-15T18:26:56.768Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"2409.02060","title":"OLMoE: Open Mixture-of-Experts Language Models","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-olmoe-winogrande-microslice","computed_at":"2026-05-15T17:51:44.730Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"2501.12948","title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","venue":"arXiv 2025","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-deepseek-r1-winogrande-microslice","computed_at":"2026-05-15T17:15:16.020Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"2502.02737","title":"SmolLM2: When Smol Goes Big — Data-Centric Training of a Small Language Model","venue":"arXiv 2025","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-smollm2-winogrande-microslice","computed_at":"2026-05-15T16:33:10.626Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"2412.15115","title":"Qwen2.5 Technical Report","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-qwen25-winogrande-microslice","computed_at":"2026-05-15T16:17:44.682Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"2403.04652","title":"Yi: Open Foundation Models by 01.AI","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-yi-lambada-microslice","computed_at":"2026-05-15T16:10:30.561Z","confidence":0.8,"protocol_match":"unknown"}},{"arxiv_id":"2404.14219","title":"Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-phi3-winogrande-microslice","computed_at":"2026-05-15T16:10:04.346Z","confidence":0.55,"protocol_match":"unknown"}},{"arxiv_id":"1905.02244","title":"Searching for MobileNetV3","venue":"ICCV 2019","primary_category":"cs.CV","current_verdict":{"status":"partial","agent_version":"v0.1.0-mobilenet-v3-large-microslice","computed_at":"2026-05-15T16:08:31.875Z","confidence":0.5,"protocol_match":"proxy"}},{"arxiv_id":"2310.06825","title":"Mistral 7B","venue":"arXiv 2023","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-mistral-hellaswag-microslice","computed_at":"2026-05-15T03:04:31.055Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2404.06395","title":"MiniCPM: Unveiling the Potential of Small Language Models with Scalable Training Strategies","venue":"COLM 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-minicpm-mmlu5shot-microslice","computed_at":"2026-05-15T00:04:42.753Z","confidence":0.55,"protocol_match":"proxy"}},{"arxiv_id":"2403.08295","title":"Gemma: Open Models Based on Gemini Research and Technology","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-gemma-hellaswag-microslice","computed_at":"2026-05-14T23:57:33.041Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2401.02385","title":"TinyLlama: An Open-Source Small Language Model","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-tinyllama-hellaswag-microslice","computed_at":"2026-05-14T23:56:41.156Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2111.09543","title":"DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing","venue":"ICLR 2023","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-deberta-mnli-microslice","computed_at":"2026-05-14T23:56:01.683Z","confidence":0.85,"protocol_match":"proxy"}},{"arxiv_id":"2211.05100","title":"BLOOM: A 176B-Parameter Open-Access Multilingual Language Model","venue":"arXiv 2022","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-bloom-lambada-microslice","computed_at":"2026-05-14T23:54:50.378Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2308.12950","title":"Code Llama: Open Foundation Models for Code","venue":"arXiv 2023","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-codellama-pythonppl-microslice","computed_at":"2026-05-14T23:53:26.429Z","confidence":0.6,"protocol_match":"unknown"}},{"arxiv_id":"2401.14196","title":"DeepSeek-Coder: When the Large Language Model Meets Programming -- The Rise of Code Intelligence","venue":"arXiv 2024","primary_category":"cs.SE","current_verdict":{"status":"partial","agent_version":"v0.1.0-deepseek-coder-pythonppl-microslice","computed_at":"2026-05-14T23:52:17.154Z","confidence":0.6,"protocol_match":"unknown"}},{"arxiv_id":"2304.01373","title":"Pythia: A Suite for Analyzing Large Language Models Across Training and Scaling","venue":"ICML 2023","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-pythia14-lambada-microslice","computed_at":"2026-05-14T23:52:16.424Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2205.01068","title":"OPT: Open Pre-trained Transformer Language Models","venue":"arXiv 2022","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-opt-lambada-microslice","computed_at":"2026-05-14T23:48:48.719Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2111.09883","title":"Swin Transformer V2: Scaling Up Capacity and Resolution","venue":"CVPR 2022","primary_category":"cs.CV","current_verdict":{"status":"partial","agent_version":"v0.1.0-swinv2-imagenet-microslice","computed_at":"2026-05-14T23:48:47.059Z","confidence":0.55,"protocol_match":"proxy"}},{"arxiv_id":"1906.08237","title":"XLNet: Generalized Autoregressive Pretraining for Language Understanding","venue":"NeurIPS 2019","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-xlnet-mnli-microslice","computed_at":"2026-05-14T23:48:37.291Z","confidence":0.85,"protocol_match":"proxy"}},{"arxiv_id":"1908.10084","title":"Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks","venue":"EMNLP 2019","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.2.0-sbert-stsb-test-3slice-table2","computed_at":"2026-05-14T23:48:16.102Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2305.06161","title":"StarCoder: may the source be with you!","venue":"arXiv 2023","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-starcoder-pythonppl-microslice","computed_at":"2026-05-14T23:48:07.124Z","confidence":0.6,"protocol_match":"unknown"}},{"arxiv_id":"2106.09685","title":"LoRA: Low-Rank Adaptation of Large Language Models","venue":"ICLR 2022","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-lora-mrpc-microslice","computed_at":"2026-05-14T23:40:53.397Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2102.05918","title":"Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision","venue":"ICML 2021","primary_category":"cs.CV","current_verdict":{"status":"partial","agent_version":"v0.1.0b-align-imagenette-3slice100","computed_at":"2026-05-14T23:39:29.603Z","confidence":0.55,"protocol_match":"proxy"}},{"arxiv_id":"2407.10671","title":"Qwen2 Technical Report","venue":"arXiv 2024","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-qwen2-lambada-microslice","computed_at":"2026-05-14T23:39:10.283Z","confidence":0.8,"protocol_match":"unknown"}},{"arxiv_id":"1905.11946","title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks","venue":"ICML 2019","primary_category":"cs.LG","current_verdict":{"status":"partial","agent_version":"v0.1.0-efficientnet-microslice","computed_at":"2026-05-14T23:38:34.290Z","confidence":0.5,"protocol_match":"proxy"}},{"arxiv_id":"2311.16867","title":"The Falcon Series of Open Language Models","venue":"arXiv 2023","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-falcon-hellaswag-microslice","computed_at":"2026-05-14T23:38:11.983Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2309.05463","title":"Textbooks Are All You Need II: phi-1.5 technical report","venue":"arXiv 2023","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-phi-winogrande-microslice","computed_at":"2026-05-14T23:31:48.994Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2007.14062","title":"Big Bird: Transformers for Longer Sequences","venue":"NeurIPS 2020","primary_category":"cs.LG","current_verdict":{"status":"partial","agent_version":"v0.1.0-bigbird-wikitext2-3slice6","computed_at":"2026-05-14T23:29:01.318Z","confidence":0.5,"protocol_match":"unknown"}},{"arxiv_id":"2210.11416","title":"Scaling Instruction-Finetuned Language Models","venue":"arXiv 2022","primary_category":"cs.LG","current_verdict":{"status":"partial","agent_version":"v0.1.0-flan-t5-mmlu-microslice","computed_at":"2026-05-14T23:28:30.248Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2301.12597","title":"BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models","venue":"ICML 2023","primary_category":"cs.CV","current_verdict":{"status":"partial","agent_version":"v0.2.0-blip2-flickr30k-beam5-n100","computed_at":"2026-05-14T23:27:31.653Z","confidence":0.6,"protocol_match":"proxy"}},{"arxiv_id":"2304.07193","title":"DINOv2: Learning Robust Visual Features without Supervision","venue":"TMLR 2024","primary_category":"cs.CV","current_verdict":{"status":"reproduced","agent_version":"v0.1.1-dinov2-imagenette-knn","computed_at":"2026-05-14T23:25:07.482Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2104.14294","title":"Emerging Properties in Self-Supervised Vision Transformers","venue":"ICCV 2021","primary_category":"cs.CV","current_verdict":{"status":"reproduced","agent_version":"v0.1.1-dino-imagenette-knn","computed_at":"2026-05-14T23:25:01.317Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2201.03545","title":"A ConvNet for the 2020s","venue":"CVPR 2022","primary_category":"cs.CV","current_verdict":{"status":"not_attempted","agent_version":"v0.1.0-convnext-imagenet-microslice","computed_at":"2026-05-14T23:24:31.282Z","confidence":null,"protocol_match":"proxy"}},{"arxiv_id":"1910.13461","title":"BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension","venue":"ACL 2020","primary_category":"cs.CL","current_verdict":{"status":"partial","agent_version":"v0.1.0-bart-cnndm-200slice","computed_at":"2026-05-14T23:22:17.517Z","confidence":0.6,"protocol_match":"exact"}},{"arxiv_id":"2006.03654","title":"DeBERTa: Decoding-enhanced BERT with Disentangled Attention","venue":"ICLR 2021","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-deberta-v2-mnli-microslice","computed_at":"2026-05-14T23:21:19.951Z","confidence":0.85,"protocol_match":"exact"}},{"arxiv_id":"2103.00020","title":"Learning Transferable Visual Models From Natural Language Supervision","venue":"ICML 2021","primary_category":"cs.CV","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-clip-cifar10-3slice100","computed_at":"2026-05-14T23:20:20.263Z","confidence":0.75,"protocol_match":"proxy"}},{"arxiv_id":"2212.04356","title":"Robust Speech Recognition via Large-Scale Weak Supervision","venue":"arXiv preprint (Whisper)","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-whisper-librispeech-3slice16","computed_at":"2026-05-14T23:20:14.367Z","confidence":0.75,"protocol_match":"proxy"}},{"arxiv_id":"2010.11929","title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","venue":"ICLR 2021","primary_category":"cs.CV","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-vit-cifar10-3slice100","computed_at":"2026-05-14T23:20:03.858Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"1512.03385","title":"Deep Residual Learning for Image Recognition","venue":"CVPR 2016","primary_category":"cs.CV","current_verdict":{"status":"partial","agent_version":"v0.1.0-resnet-microslice","computed_at":"2026-05-14T23:19:45.878Z","confidence":0.45,"protocol_match":"proxy"}},{"arxiv_id":"2003.10555","title":"ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators","venue":"ICLR 2020","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-electra-mnli-microslice","computed_at":"2026-05-14T23:17:46.124Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"1910.10683","title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","venue":"JMLR 2020","primary_category":"cs.LG","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-t5-mnli-microslice","computed_at":"2026-05-14T23:17:43.450Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"1909.11942","title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations","venue":"ICLR 2020","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-albert-mrpc-microslice","computed_at":"2026-05-14T23:17:42.537Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2004.02984","title":"MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices","venue":"ACL 2020","primary_category":"cs.CL","current_verdict":{"status":"reproduced","agent_version":"v0.1.0-mobilebert-mnli-microslice","computed_at":"2026-05-14T23:10:59.815Z","confidence":0.8,"protocol_match":"proxy"}},{"arxiv_id":"2307.09288","title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","venue":"arXiv preprint","primary_category":"cs.CL","current_verdict":{"status":"not_attempted","agent_version":"v0.1.0-llama2-hellaswag-microslice","computed_at":"2026-05-14T22:40:41.389Z","confidence":null,"protocol_match":"proxy"}}],"next_cursor":"MjAyNi0wNS0xNFQyMjo0MDo0MS4zODlafDIzMDcuMDkyODg","total":null}