{"@context":{"@vocab":"https://cir.nii.ac.jp/schema/1.0/","rdfs":"http://www.w3.org/2000/01/rdf-schema#","dc":"http://purl.org/dc/elements/1.1/","dcterms":"http://purl.org/dc/terms/","foaf":"http://xmlns.com/foaf/0.1/","prism":"http://prismstandard.org/namespaces/basic/2.0/","cinii":"http://ci.nii.ac.jp/ns/1.0/","datacite":"https://schema.datacite.org/meta/kernel-4/","ndl":"http://ndl.go.jp/dcndl/terms/","jpcoar":"https://github.com/JPCOAR/schema/blob/master/2.0/"},"@id":"https://cir.nii.ac.jp/crid/1360021391855437824.json","@type":"Article","productIdentifier":[{"identifier":{"@type":"DOI","@value":"10.1007/978-3-031-32041-5_14"}},{"identifier":{"@type":"URI","@value":"https://link.springer.com/content/pdf/10.1007/978-3-031-32041-5_14"}},{"identifier":{"@type":"DOI","@value":"10.48550/arxiv.2303.08989"}}],"resourceType":"学術雑誌論文(journal article)","dc:title":[{"@value":"Quantum Circuit Simulation by SGEMM Emulation on Tensor Cores and Automatic Precision Selection"}],"description":[{"notation":[{"@value":"Quantum circuit simulation provides the foundation for the development of quantum algorithms and the verification of quantum supremacy. Among the various methods for quantum circuit simulation, tensor network contraction has been increasing in popularity due to its ability to simulate a larger number of qubits. During tensor contraction, the input tensors are reshaped to matrices and computed by a GEMM operation, where these GEMM operations could reach up to 90\\% of the total calculation time. GEMM throughput can be improved by utilizing mixed-precision hardware such as Tensor Cores, but straightforward implementation results in insufficient fidelity for deep and large quantum circuits. Prior work has demonstrated that compensated summation with special care of the rounding mode can fully recover the FP32 precision of SGEMM even when using TF32 or FP16 Tensor Cores. The exponent range is a critical issue when applying such techniques to quantum circuit simulation. While TF32 supports almost the same exponent range as FP32, FP16 supports a much smaller exponent range. In this work, we use the exponent range statistics of input tensor elements to select which Tensor Cores we use for the GEMM. We evaluate our method on Random Circuit Sampling (RCS), including Sycamore's quantum circuit, and show that the throughput is 1.86 times higher at maximum while maintaining accuracy."}]},{"notation":[{"@value":"This paper has been accepted to ISC'23"}]}],"creator":[{"@id":"https://cir.nii.ac.jp/crid/1380021391855437825","@type":"Researcher","foaf:name":[{"@value":"Hidetaka Manabe"}]},{"@id":"https://cir.nii.ac.jp/crid/1420564276178201216","@type":"Researcher","personIdentifier":[{"@type":"KAKEN_RESEARCHERS","@value":"80303882"},{"@type":"NRID","@value":"1000080303882"},{"@type":"ORCID","@value":"0000-0003-0231-7880"},{"@type":"NRID","@value":"9000003195585"},{"@type":"NRID","@value":"9000024224884"},{"@type":"NRID","@value":"9000297476993"},{"@type":"NRID","@value":"9000411073166"},{"@type":"NRID","@value":"9000024286264"},{"@type":"NRID","@value":"9000404335597"},{"@type":"NRID","@value":"9000309580026"},{"@type":"NRID","@value":"9000309580033"},{"@type":"NRID","@value":"9000309580042"},{"@type":"NRID","@value":"9000401878622"},{"@type":"NRID","@value":"9000024211441"},{"@type":"NRID","@value":"9000401879560"},{"@type":"NRID","@value":"9000414949203"},{"@type":"NRID","@value":"9000409596105"},{"@type":"NRID","@value":"9000346924789"},{"@type":"NRID","@value":"9000046156640"},{"@type":"NRID","@value":"9000309580029"},{"@type":"NRID","@value":"9000401901174"},{"@type":"NRID","@value":"9000309580049"},{"@type":"NRID","@value":"9000401946349"},{"@type":"NRID","@value":"9000309580045"},{"@type":"NRID","@value":"9000411542818"},{"@type":"NRID","@value":"9000297499687"},{"@type":"NRID","@value":"9000309580034"},{"@type":"NRID","@value":"9000309580059"},{"@type":"NRID","@value":"9000309580068"},{"@type":"NRID","@value":"9000415062141"},{"@type":"NRID","@value":"9000258191789"},{"@type":"NRID","@value":"9000364774796"},{"@type":"NRID","@value":"9000309580053"},{"@type":"NRID","@value":"9000309580051"},{"@type":"NRID","@value":"9000401946858"},{"@type":"NRID","@value":"9000401885729"},{"@type":"NRID","@value":"9000401892103"},{"@type":"NRID","@value":"9000045667510"},{"@type":"NRID","@value":"9000401898691"},{"@type":"NRID","@value":"9000309580065"},{"@type":"NRID","@value":"9000309580056"},{"@type":"NRID","@value":"9000345325241"},{"@type":"NRID","@value":"9000258191231"},{"@type":"NRID","@value":"9000309580073"},{"@type":"NRID","@value":"9000390431325"},{"@type":"RESEARCHMAP","@value":"https://researchmap.jp/Kenji_Harada"}],"foaf:name":[{"@value":"Kenji Harada"}]},{"@id":"https://cir.nii.ac.jp/crid/1380021391855437827","@type":"Researcher","foaf:name":[{"@value":"Rio Yokota"}]},{"@id":"https://cir.nii.ac.jp/crid/1380021391855437824","@type":"Researcher","foaf:name":[{"@value":"Hiryuki Ootomo"}]}],"publication":{"publicationIdentifier":[{"@type":"PISSN","@value":"03029743"},{"@type":"EISSN","@value":"16113349"},{"@type":"ISBN","@value":"9783031320408"},{"@type":"ISBN","@value":"9783031320415"}],"prism:publicationName":[{"@value":"Lecture Notes in Computer Science"}],"dc:publisher":[{"@value":"Springer Nature Switzerland"}],"prism:publicationDate":"2023","prism:startingPage":"259","prism:endingPage":"276"},"reviewed":"false","dc:rights":["https://www.springernature.com/gp/researchers/text-and-data-mining","https://www.springernature.com/gp/researchers/text-and-data-mining"],"url":[{"@id":"https://link.springer.com/content/pdf/10.1007/978-3-031-32041-5_14"}],"createdAt":"2023-05-10","modifiedAt":"2023-05-10","foaf:topic":[{"@id":"https://cir.nii.ac.jp/all?q=FOS:%20Computer%20and%20information%20sciences","dc:title":"FOS: Computer and information sciences"},{"@id":"https://cir.nii.ac.jp/all?q=Quantum%20Physics","dc:title":"Quantum Physics"},{"@id":"https://cir.nii.ac.jp/all?q=Computer%20Science%20-%20Distributed,%20Parallel,%20and%20Cluster%20Computing","dc:title":"Computer Science - Distributed, Parallel, and Cluster Computing"},{"@id":"https://cir.nii.ac.jp/all?q=FOS:%20Physical%20sciences","dc:title":"FOS: Physical sciences"},{"@id":"https://cir.nii.ac.jp/all?q=Distributed,%20Parallel,%20and%20Cluster%20Computing%20(cs.DC)","dc:title":"Distributed, Parallel, and Cluster Computing (cs.DC)"},{"@id":"https://cir.nii.ac.jp/all?q=Quantum%20Physics%20(quant-ph)","dc:title":"Quantum Physics (quant-ph)"}],"project":[{"@id":"https://cir.nii.ac.jp/crid/1040566775676123264","@type":"Project","projectIdentifier":[{"@type":"KAKEN","@value":"20K03766"},{"@type":"JGN","@value":"JP20K03766"},{"@type":"URI","@value":"https://kaken.nii.ac.jp/grant/KAKENHI-PROJECT-20K03766/"}],"notation":[{"@language":"ja","@value":"多体問題におけるエンタングルメント構造の最適化とその応用"},{"@language":"en","@value":"Optimization of entanglement structure in many-body problems and its applications"}]}],"relatedProduct":[{"@id":"https://cir.nii.ac.jp/crid/1360017290239657216","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"quimb: A python package for quantum information and many-body calculations"}]},{"@id":"https://cir.nii.ac.jp/crid/1360021393305086592","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"opt\\_einsum - A Python package for optimizing contraction order for einsum-like expressions"}]},{"@id":"https://cir.nii.ac.jp/crid/1360021396524311936","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Solving the Sampling Problem of the Sycamore Quantum Circuits"}]},{"@id":"https://cir.nii.ac.jp/crid/1360021396524317184","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Fast Search of the Optimal Contraction Sequence in Tensor Networks"}]},{"@id":"https://cir.nii.ac.jp/crid/1360021396524466176","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"High-Quality Hypergraph Partitioning"}]},{"@id":"https://cir.nii.ac.jp/crid/1360292620490978816","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Characterizing quantum supremacy in near-term devices"}]},{"@id":"https://cir.nii.ac.jp/crid/1360302867629293568","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"On Optimizing a Class of Multi-Dimensional Loops with Reduction for  Parallel Execution"}]},{"@id":"https://cir.nii.ac.jp/crid/1360302867629302528","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Tensor Network Quantum Virtual Machine for Simulating Quantum Circuits at Exascale"}]},{"@id":"https://cir.nii.ac.jp/crid/1360302871320585600","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Efficient parallelization of tensor network contraction for simulating quantum computation"}]},{"@id":"https://cir.nii.ac.jp/crid/1360302871320770304","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Simulation of Quantum Circuits Using the Big-Batch Tensor Network Method"}]},{"@id":"https://cir.nii.ac.jp/crid/1360576122734011136","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Qulacs: a fast and versatile quantum circuit simulator for research purpose"}]},{"@id":"https://cir.nii.ac.jp/crid/1360576216237266688","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Closing the \"quantum supremacy\" gap"}]},{"@id":"https://cir.nii.ac.jp/crid/1360580235950895360","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Hyper-optimized tensor network contraction"}]},{"@id":"https://cir.nii.ac.jp/crid/1360584345434635904","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Reducing shared memory footprint to leverage high throughput on Tensor Cores and its flexible API extension library"}]},{"@id":"https://cir.nii.ac.jp/crid/1360861291444151296","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"QuEST and High Performance Simulation of Quantum Computers"}]},{"@id":"https://cir.nii.ac.jp/crid/1360861293104791936","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Intel Quantum Simulator: a cloud-ready high-performance simulator of quantum circuits"}]},{"@id":"https://cir.nii.ac.jp/crid/1360861705599165312","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Recovering single precision accuracy from Tensor Cores while surpassing the FP32 theoretical peak performance"}]},{"@id":"https://cir.nii.ac.jp/crid/1361418521202607232","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Establishing the quantum supremacy frontier with a 281 Pflop/s simulation"}]},{"@id":"https://cir.nii.ac.jp/crid/1361699994807128320","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"NVIDIA Tensor Core Programmability, Performance & Precision"}]},{"@id":"https://cir.nii.ac.jp/crid/1362544420218650240","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Simulating Quantum Computation by Contracting Tensor Networks"}]},{"@id":"https://cir.nii.ac.jp/crid/1362825893553835136","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Computational Complexity of Projected Entangled Pair States"}]},{"@id":"https://cir.nii.ac.jp/crid/1363107371033589376","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Quantum supremacy using a programmable superconducting processor"}]},{"@id":"https://cir.nii.ac.jp/crid/1363670319134515712","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"64-qubit quantum circuit simulation"}]}],"dataSourceIdentifier":[{"@type":"CROSSREF","@value":"10.1007/978-3-031-32041-5_14"},{"@type":"KAKEN","@value":"PRODUCT-24997770"},{"@type":"OPENAIRE","@value":"doi_dedup___::e5e109e20c3c6a007fbc4332cfcfff50"}]}