mlphys101 - Exploring the performance of Large-Language Models in multilingual undergraduate physics education

Völschow, Marcel; Buczek, P.; Carreno-Mosquera, P.; Mousavias, C.; Reganova, S.; Roldan-Rodriguez, E.; Steinbach, Peter; Strube, A.

doi:10.14278/rodare.3137

September 9, 2024 Dataset Restricted Access

mlphys101 - Exploring the performance of Large-Language Models in multilingual undergraduate physics education

Völschow, Marcel; Buczek, P.; Carreno-Mosquera, P.; Mousavias, C.; Reganova, S.; Roldan-Rodriguez, E.; Steinbach, Peter; Strube, A.

DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.14278/rodare.3137">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.14278/rodare.3137</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.14278/rodare.3137"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Völschow, Marcel</foaf:name>
        <foaf:givenName>Marcel</foaf:givenName>
        <foaf:familyName>Völschow</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Buczek, P.</foaf:name>
        <foaf:givenName>P.</foaf:givenName>
        <foaf:familyName>Buczek</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Carreno-Mosquera, P.</foaf:name>
        <foaf:givenName>P.</foaf:givenName>
        <foaf:familyName>Carreno-Mosquera</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Mousavias, C.</foaf:name>
        <foaf:givenName>C.</foaf:givenName>
        <foaf:familyName>Mousavias</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Reganova, S.</foaf:name>
        <foaf:givenName>S.</foaf:givenName>
        <foaf:familyName>Reganova</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Roldan-Rodriguez, E.</foaf:name>
        <foaf:givenName>E.</foaf:givenName>
        <foaf:familyName>Roldan-Rodriguez</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-4974-230X">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Steinbach, Peter</foaf:name>
        <foaf:givenName>Peter</foaf:givenName>
        <foaf:familyName>Steinbach</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Strube, A.</foaf:name>
        <foaf:givenName>A.</foaf:givenName>
        <foaf:familyName>Strube</foaf:familyName>
      </rdf:Description>
    </dct:creator>
    <dct:title>mlphys101 - Exploring the performance of Large-Language Models in multilingual undergraduate physics education</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Rodare</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2024</dct:issued>
    <dcat:keyword>machine learning</dcat:keyword>
    <dcat:keyword>deep learning</dcat:keyword>
    <dcat:keyword>large language models</dcat:keyword>
    <dcat:keyword>chatgpt</dcat:keyword>
    <dcat:keyword>blablador</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-09-09</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://rodare.hzdr.de/record/3137"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://rodare.hzdr.de/record/3137</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <owl:sameAs rdf:resource="https://www.hzdr.de/publications/Publ-39561"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.14278/rodare.3136"/>
    <dct:isPartOf rdf:resource="https://rodare.hzdr.de/communities/rodare"/>
    <dct:description>&lt;p&gt;Large-Language Models such as ChatGPT have the potential to revo-&lt;br&gt; lutionize academic teaching in physics in a similar way the electronic calculator,&lt;br&gt; the home computer or the internet did. AI models are patient, produce answers&lt;br&gt; tailored to a student’s needs and are accessible whenever needed. Those involved&lt;br&gt; in academic teaching are facing a number of questions: Just how reliable are pub-&lt;br&gt; licly accessible models in answering, how does the question’s language affect the&lt;br&gt; models’ performance and how well do the models perform with more difficult tasks&lt;br&gt; beyond retrieval? To adress these questions, we benchmark a number of publicly&lt;br&gt; available models on the mlphys101 dataset, a new set of 823 university level MC5&lt;br&gt; questions and answers released alongside this work. While the original questions&lt;br&gt; are in English, we employ GPT-4 to translate them into various other languages,&lt;br&gt; followed by revision and refinement by native speakers. Our findings indicate that&lt;br&gt; state-of-the-art models perform well on questions involving the replication of facts,&lt;br&gt; definitions, and basic concepts, but struggle with multi-step quantitative reason-&lt;br&gt; ing. This aligns with existing literature that highlights the challenges LLMs face&lt;br&gt; in mathematical and logical reasoning tasks. We conclude that the most advanced&lt;br&gt; current LLMs are a valuable addition to the academic curriculum and LLM pow-&lt;br&gt; ered translations are a viable method to increase the accessibility of materials, but&lt;br&gt; their utility for more difficult quantitative tasks remains limited.&lt;/p&gt; &lt;p&gt;The dataset is available in English here only and will be removed, once the mlphys101 publication was accepted and released to the public.&lt;/p&gt;</dct:description>
    <dct:description xml:lang="">The dataset is available in English here only and will be removed, once the mlphys101 publication was accepted and released to the public.</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/RESTRICTED"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/restrictedAccess">
        <rdfs:label>Restricted Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.14278/rodare.3137"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>

595

views

downloads

See more details...

	All versions	This version
Views	595	595
Downloads	2	2
Data volume	660.5 kB	660.5 kB
Unique views	539	539
Unique downloads	2	2

More info on how stats are collected.

Publication date:

September 9, 2024

DOI:

Keyword(s):

machine learning deep learning large language models chatgpt blablador

Related identifiers:

Identical to:
https://www.hzdr.de/publications/Publ-39561

Communities:

RODARE

Versions

Version 1 10.14278/rodare.3137

Sep 9, 2024

Cite all versions? You can cite all versions by using the DOI 10.14278/rodare.3136. This DOI represents all versions, and will always resolve to the latest one. Read more.

mlphys101 - Exploring the performance of Large-Language Models in multilingual undergraduate physics education

DCAT Export

Versions

Share

Cite as

Export

About

Help

Contribute

Follow us

Registered in

mlphys101 - Exploring the performance of Large-Language Models in multilingual undergraduate physics education

DCAT Export

RODARE DOI Badge

DOI

10.14278/rodare.3137

Markdown

[![DOI](https://rodare.hzdr.de/badge/DOI/10.14278/rodare.3137.svg)](https://doi.org/10.14278/rodare.3137)

reStructedText

.. image:: https://rodare.hzdr.de/badge/DOI/10.14278/rodare.3137.svg :target: https://doi.org/10.14278/rodare.3137

HTML

<a href="https://doi.org/10.14278/rodare.3137"><img src="https://rodare.hzdr.de/badge/DOI/10.14278/rodare.3137.svg" alt="DOI"></a>

Image URL

https://rodare.hzdr.de/badge/DOI/10.14278/rodare.3137.svg

Target URL

https://doi.org/10.14278/rodare.3137

Versions

Share

Cite as

Export