diff --git a/index.html b/index.html index 088fda7..8b4f79f 100644 --- a/index.html +++ b/index.html @@ -1 +1,21 @@ -
This study presents NewsBench, a novel benchmak framework developed to evaluate the capabiity of Large Lanquage Models (lMs)in Chinese jouralisticWriting Proficiency (JWP) and their Safety Adherence (SA), addressing the gap between journalistic ethics and the risks associated with Al utilization. Comprising1.267 tasks across 5 editoral applications, 7 aspects (incuding safety and iouralisic wrting with 4 detaled facets), and spanning 24 news topics domains,NewsBench emplys wo GPT-4 based automalic evalualion proiocols validated by buman assessment 0ur comprehensive anawsis of 41 ulMs highightedGPT-4 and ERNlE Bot as top performers, yet revealed a relative deficiency in joumalisic ethic adherence duing creative wrting tasks. These findings underscorthe need for enhanced ethical quidance in Al-generated joumalisic content, marking a step fonward in aigning Al capablies with jouralistic standards andsafety considerations.
',3);function f(t,e,d,a,n,r){return p}var v={name:"MyHome"};const g=(0,l.A)(v,[["render",f]]);var k=g;const I=(0,n.Fv)('# | Model | #Parameters | Open Weights | JWP-Generation | JWP-Multiple | SA-Generation | SA-Multiple |
---|---|---|---|---|---|---|---|
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
1 | GPT-4-1106 | - | × | 2.4438 | 0.4560 | 0.9000 | 0.9068 |
This study presents NewsBench, a novel benchmak framework developed to evaluate the capabiity of Large Lanquage Models (lMs)in Chinese jouralisticWriting Proficiency (JWP) and their Safety Adherence (SA), addressing the gap between journalistic ethics and the risks associated with Al utilization. Comprising1.267 tasks across 5 editoral applications, 7 aspects (incuding safety and iouralisic wrting with 4 detaled facets), and spanning 24 news topics domains,NewsBench emplys wo GPT-4 based automalic evalualion proiocols validated by buman assessment 0ur comprehensive anawsis of 41 ulMs highightedGPT-4 and ERNlE Bot as top performers, yet revealed a relative deficiency in joumalisic ethic adherence duing creative wrting tasks. These findings underscorthe need for enhanced ethical quidance in Al-generated joumalisic content, marking a step fonward in aigning Al capablies with jouralistic standards andsafety considerations.
\r\n\r\n \r\n\r\n\r\n\r\n\r\n\r\n\r\n","import { render } from \"./Home.vue?vue&type=template&id=09fed940\"\nimport script from \"./Home.vue?vue&type=script&lang=js\"\nexport * from \"./Home.vue?vue&type=script&lang=js\"\n\nimport \"./Home.vue?vue&type=style&index=0&id=09fed940&lang=css\"\n\nimport exportComponent from \"../../node_modules/vue-loader/dist/exportHelper.js\"\nconst __exports__ = /*#__PURE__*/exportComponent(script, [['render',render]])\n\nexport default __exports__","\r\n\r\n \r\n# | \r\nModel | \r\n#Parameters | \r\nOpen Weights | \r\nJWP-Generation | \r\nJWP-Multiple | \r\nSA-Generation | \r\nSA-Multiple | \r\n
---|---|---|---|---|---|---|---|
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
1 | \r\nGPT-4-1106 | \r\n- | \r\n× | \r\n2.4438 | \r\n0.4560 | \r\n0.9000 | \r\n0.9068 | \r\n
f?Q(e,i,s,!0,!1,p):P(t,n,r,i,s,c,l,u,p)},Z=(e,t,n,r,i,s,c,l,u)=>{let a=0;const f=t.length;let p=e.length-1,d=f-1;while(a<=p&&a<=d){const r=e[a],o=t[a]=u?fn(t[a]):an(t[a]);if(!Yt(r,o))break;m(r,o,n,null,i,s,c,l,u),a++}while(a<=p&&a<=d){const r=e[p],o=t[d]=u?fn(t[d]):an(t[d]);if(!Yt(r,o))break;m(r,o,n,null,i,s,c,l,u),p--,d--}if(a>p){if(a<=d){const e=d+1,o=e