Dataset Open Access

proteinNet3D

Li, Rui; Yushkevich, Artsemi; Kudryashev, Misha; Yakimovich, Artur


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:cnt="http://www.w3.org/2011/content#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.14278/rodare.4516">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.14278/rodare.4516</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.14278/rodare.4516"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-3085-5267">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Li, Rui</foaf:name>
        <foaf:givenName>Rui</foaf:givenName>
        <foaf:familyName>Li</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Center for Advanced Systems Understanding</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-8729-9281">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Yushkevich, Artsemi</foaf:name>
        <foaf:givenName>Artsemi</foaf:givenName>
        <foaf:familyName>Yushkevich</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Max Delbrück Center for Molecular Medicine</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-3550-6274">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Kudryashev, Misha</foaf:name>
        <foaf:givenName>Misha</foaf:givenName>
        <foaf:familyName>Kudryashev</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Max Delbrück Center for Molecular Medicine</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-2458-4904">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Yakimovich, Artur</foaf:name>
        <foaf:givenName>Artur</foaf:givenName>
        <foaf:familyName>Yakimovich</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Center for Advanced Systems Understanding</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>proteinNet3D</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Rodare</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2026</dct:issued>
    <dcat:keyword>cryo-EM</dcat:keyword>
    <dcat:keyword>deep learning</dcat:keyword>
    <dcat:keyword>proteins</dcat:keyword>
    <dcat:keyword>EMDB</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2026-02-18</dct:issued>
    <owl:sameAs rdf:resource="https://rodare.hzdr.de/record/4516"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://rodare.hzdr.de/record/4516</skos:notation>
      </adms:Identifier>
    </adms:identifier>
    <owl:sameAs rdf:resource="https://www.hzdr.de/publications/Publ-43018"/>
    <dct:isVersionOf rdf:resource="https://doi.org/10.14278/rodare.4515"/>
    <dct:isPartOf rdf:resource="https://rodare.hzdr.de/communities/health"/>
    <dct:isPartOf rdf:resource="https://rodare.hzdr.de/communities/rodare"/>
    <dct:description>&lt;p&gt;ProteinNet3D is a curated large-scale dataset of 3D macromolecular density volumes designed to support representation learning and benchmarking in structural biology. The dataset is derived from the publicly available Electron Microscopy Data Bank (EMDB), a comprehensive repository of experimentally determined cryo-electron microscopy (cryo-EM) maps spanning diverse macromolecules, molecular assemblies, and subcellular structures.&lt;/p&gt; &lt;p&gt;ProteinNet3D focuses specifically on individual macromolecules resolved by single-particle analysis (SPA) or subtomogram averaging (STA), ensuring methodological consistency across samples. To emphasize biologically meaningful structures while avoiding extreme cases, entries were restricted to a molecular weight range of 100&amp;ndash;1500 kDa. This criterion excludes small domains and excessively large complexes, resulting in a dataset well-suited for learning size-robust structural representations.&lt;/p&gt; &lt;p&gt;All volumes are standardized through isotropic resampling, spatial normalization to a fixed grid (64&amp;sup3; voxels), and intensity normalization to zero mean and unit variance. Background regions are masked using annotated contour levels to reduce noise contributions. To enhance diversity and rotational invariance, each structure is augmented with multiple random 3D rotations.&lt;/p&gt; &lt;p&gt;Overall, ProteinNet3D comprises 26,110 processed samples and captures substantial structural heterogeneity, experimental variability, and realistic noise characteristics, making it a rigorous benchmark for 3D deep learning in cryo-EM.&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:rights>
          <dct:RightsStatement rdf:about="https://creativecommons.org/licenses/by/4.0/legalcode">
            <rdfs:label>Creative Commons Attribution 4.0 International</rdfs:label>
          </dct:RightsStatement>
        </dct:rights>
        <dcat:accessURL rdf:resource="https://doi.org/10.14278/rodare.4516"/>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
40
8
views
downloads
All versions This version
Views 4040
Downloads 88
Data volume 73.0 GB73.0 GB
Unique views 3535
Unique downloads 77

Share

Cite as