{"@context":{"@vocab":"https://cir.nii.ac.jp/schema/1.0/","rdfs":"http://www.w3.org/2000/01/rdf-schema#","dc":"http://purl.org/dc/elements/1.1/","dcterms":"http://purl.org/dc/terms/","foaf":"http://xmlns.com/foaf/0.1/","prism":"http://prismstandard.org/namespaces/basic/2.0/","cinii":"http://ci.nii.ac.jp/ns/1.0/","datacite":"https://schema.datacite.org/meta/kernel-4/","ndl":"http://ndl.go.jp/dcndl/terms/","jpcoar":"https://github.com/JPCOAR/schema/blob/master/2.0/"},"@id":"https://cir.nii.ac.jp/crid/1363670320706445312.json","@type":"Article","productIdentifier":[{"identifier":{"@type":"DOI","@value":"10.1145/2929908.2929911"}},{"identifier":{"@type":"URI","@value":"https://dl.acm.org/doi/10.1145/2929908.2929911"}},{"identifier":{"@type":"URI","@value":"https://dl.acm.org/doi/pdf/10.1145/2929908.2929911"}}],"dc:title":[{"@value":"Performance Analysis and Optimization of Nonhydrostatic ICosahedral Atmospheric Model (NICAM) on the K Computer and TSUBAME2.5"}],"description":[{"notation":[{"@value":"We summarize the optimization and performance evaluation of the Nonhydrostatic ICosahedral Atmospheric Model (NICAM) on two different types of supercomputers: the K computer and TSUBAME2.5. First, we evaluated and improved several kernels extracted from the model code on the K computer. We did not significantly change the loop and data ordering for sufficient usage of the features of the K computer, such as the hardware-aided thread barrier mechanism and the relatively high bandwidth of the memory, i.e., a 0.5 Byte/FLOP ratio. Loop optimizations and code cleaning for a reduction in memory transfer contributed to a speed-up of the model execution time. The sustained performance ratio of the main loop of the NICAM reached 0.87 PFLOPS with 81,920 nodes on the K computer. For GPU-based calculations, we applied OpenACC to the dynamical core of NICAM. The performance and scalability were evaluated using the TSUBAME2.5 supercomputer. We achieved good performance results, which showed efficient use of the memory throughput performance of the GPU as well as good weak scalability. A dry dynamical core experiment was carried out using 2560 GPUs, which achieved 60 TFLOPS of sustained performance."}]}],"creator":[{"@id":"https://cir.nii.ac.jp/crid/1383670320706445314","@type":"Researcher","foaf:name":[{"@value":"Hisashi Yashiro"}],"jpcoar:affiliationName":[{"@value":"RIKEN Advanced Institute for Computational Science, Kobe, Japan"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320706445317","@type":"Researcher","foaf:name":[{"@value":"Masaaki Terai"}],"jpcoar:affiliationName":[{"@value":"RIKEN Advanced Institute for Computational Science, Kobe, Japan"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320706445312","@type":"Researcher","foaf:name":[{"@value":"Ryuji Yoshida"}],"jpcoar:affiliationName":[{"@value":"RIKEN Advanced Institute for Computational Science, Kobe, Japan"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320706445315","@type":"Researcher","foaf:name":[{"@value":"Shin-ichi Iga"}],"jpcoar:affiliationName":[{"@value":"RIKEN Advanced Institute for Computational Science, Kobe, Japan"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320706445313","@type":"Researcher","foaf:name":[{"@value":"Kazuo Minami"}],"jpcoar:affiliationName":[{"@value":"RIKEN Advanced Institute for Computational Science, Kobe, Japan"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320706445316","@type":"Researcher","foaf:name":[{"@value":"Hirofumi Tomita"}],"jpcoar:affiliationName":[{"@value":"RIKEN Advanced Institute for Computational Science, Kobe, Japan"}]}],"publication":{"prism:publicationName":[{"@value":"Proceedings of the Platform for Advanced Scientific Computing Conference"}],"dc:publisher":[{"@value":"ACM"}],"prism:publicationDate":"2016-06-08","prism:startingPage":"1","prism:endingPage":"8"},"reviewed":"false","dcterms:accessRights":"http://purl.org/coar/access_right/c_abf2","dc:rights":["https://www.acm.org/publications/policies/copyright_policy#Background"],"url":[{"@id":"https://dl.acm.org/doi/10.1145/2929908.2929911"},{"@id":"https://dl.acm.org/doi/pdf/10.1145/2929908.2929911"}],"createdAt":"2016-06-02","modifiedAt":"2025-06-18","relatedProduct":[{"@id":"https://cir.nii.ac.jp/crid/1050012570393522816","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@language":"en","@value":"Outcomes and challenges of global high-resolution non-hydrostatic atmospheric simulations using the K computer"}]},{"@id":"https://cir.nii.ac.jp/crid/1360009142459319936","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"The Nonhydrostatic ICosahedral Atmospheric Model for CMIP6 HighResMIP simulations (NICAM16-S): experimental design, model description, and impacts of model updates"}]}],"dataSourceIdentifier":[{"@type":"CROSSREF","@value":"10.1145/2929908.2929911"},{"@type":"OPENAIRE","@value":"doi_dedup___::b8c507cbf37a2ef594435f04de914f75"},{"@type":"CROSSREF","@value":"10.5194/gmd-14-795-2021_references_DOI_JgKkMxLvsk57Ul4TlnOUtkQV3dt"},{"@type":"CROSSREF","@value":"10.1186/s40645-017-0127-8_references_DOI_JgKkMxLvsk57Ul4TlnOUtkQV3dt"}]}