[{"data":1,"prerenderedAt":83},["ShallowReactive",2],{"tool-helm":3},{"tool":4,"relatedTools":45},{"id":5,"name":6,"slug":7,"tagline":8,"description":9,"websiteUrl":10,"logoText":11,"pricing":12,"editorScore":13,"clickCount":14,"featured":15,"latest":15,"updatedAt":16,"category":17,"tags":26,"overview":28,"highlights":29,"useCases":38},"fc7f0324946b9775de8b4b28eb406f8a","HELM","helm","斯坦福大学推出的大模型评测体系","HELM是由斯坦福大学推出的大模型评测体系，该评测方法主要包括场景、适配、指标三个模块，每次评测的运行都需要指定一个场景，一个适配模型的提示，以及一个或多个指标。","https:\u002F\u002Fcrfm.stanford.edu\u002Fhelm\u002Flatest\u002F?utm_source=ai-bot.cn","HEL","官网可查看",98,8,false,"2026-04-15T04:26:34+08:00",{"id":18,"name":19,"slug":20,"description":21,"iconLabel":22,"accentColor":23,"toolCount":24,"sortOrder":25},"d6d303a9f41677e5f1a5f9b12c520225","智能体与自动化","agents","适合搭建业务工作流、插件编排和多步骤自动化，用来把 AI 接进真实流程。","AUTO","#6C7A2D",216,7,[27],"AI模型评测","HELM全称Holistic Evaluation of Language Models（语言模型整体评估）是斯坦福大学推出的大模型评测体系，评测方法主要包括场景、适配、指标三大模块，每次评测的运行都需要指定一个场景，一个适配模型的提示，以及一个或多个指标。它评测主要覆盖的是英语，通过准确率、不确定性\u002F校准、鲁棒性、公平性、偏差、毒性、推断效率综合评测模型表现，适用问答、信息检索、文本分类等任务，为语言模型提供更全面、系统的评估方法，帮助研究人员和开发者更好地理解和优化模型性能。 全面的评估能力 ：HELM支持多种语言模型任务（如问答、文本分类、信息检索、文本生成、摘要等），提供多种评估指标（包括准确率、鲁棒性、公平性、偏差、毒性、推断效率等），能够从多个维度全面评估语言模型的性能。",[30,31,32,33,34,35,36,37],"全面的评估能力","可复现性与透明性","多模态支持","自定义扩展","安装HELM","基于pip安装","从源代码安装","配置评估任务",[39,40,41,42,43,44],"场景","适配","指标三大模块","更全面","系统的评估方法","文本分类",[46,59,70],{"id":47,"name":48,"slug":49,"tagline":50,"description":51,"websiteUrl":52,"logoText":53,"pricing":12,"editorScore":13,"clickCount":54,"featured":55,"latest":55,"updatedAt":16,"category":56,"tags":57},"a638bfd4cc081d0294d7d12975a727dd","GPT-4","gpt-4","OpenAI旗下最新的GPT-4模型","GPT-4（Generative Pre-trained Transformer）是 OpenAI 开发的自然语言处理模型 GPT 家族中的第四个版本，该模型依靠强大的神经网络来理解和生成类似人类的语言。","https:\u002F\u002Fcn.bing.com\u002Fsearch?q=GPT-4&ensearch=1","GPT",2356,true,{"id":18,"name":19,"slug":20,"description":21,"iconLabel":22,"accentColor":23,"toolCount":24,"sortOrder":25},[58],"AI训练模型",{"id":60,"name":61,"slug":62,"tagline":63,"description":64,"websiteUrl":65,"logoText":66,"pricing":12,"editorScore":13,"clickCount":67,"featured":55,"latest":55,"updatedAt":16,"category":68,"tags":69},"a23e89d9e4203bd1c683c41e837f370e","LMArena","lmarena","AI模型评估平台","LMArena是加州大学伯克利分校推出的创新AI模型评估平台，基于让用户对不同AI模型的回答进行匿名投票，衡量模型的表现。用户输入问题后，平台提供两个模型的回答，用户根据偏好选择更优答案，投票结果直接塑造公共排行榜。","https:\u002F\u002Flmarena.ai\u002F?utm_source=ai-bot.cn","LMA",681,{"id":18,"name":19,"slug":20,"description":21,"iconLabel":22,"accentColor":23,"toolCount":24,"sortOrder":25},[27],{"id":71,"name":72,"slug":73,"tagline":74,"description":75,"websiteUrl":76,"logoText":77,"pricing":78,"editorScore":13,"clickCount":79,"featured":55,"latest":55,"updatedAt":16,"category":80,"tags":81},"32bb7d109995125694dc4a175a840117","Coze","coze","海量AI智能体免费用，一键复制同款","Coze是字节跳动推出的AI办公助手，能通过AI技术简化工作流程，提升工作效率。通过Agent Skills技能系统和Agent Plan长期计划功能，实现从单一工具到智能工作伙伴的转变。","https:\u002F\u002Fwww.coze.cn\u002Foverview?utm_medium=daohang&utm_source=aibot&utm_term=49156769m","COZ","免费",316,{"id":18,"name":19,"slug":20,"description":21,"iconLabel":22,"accentColor":23,"toolCount":24,"sortOrder":25},[82],"AI开发平台",1776262187732]