{"@context":{"@vocab":"https://cir.nii.ac.jp/schema/1.0/","rdfs":"http://www.w3.org/2000/01/rdf-schema#","dc":"http://purl.org/dc/elements/1.1/","dcterms":"http://purl.org/dc/terms/","foaf":"http://xmlns.com/foaf/0.1/","prism":"http://prismstandard.org/namespaces/basic/2.0/","cinii":"http://ci.nii.ac.jp/ns/1.0/","datacite":"https://schema.datacite.org/meta/kernel-4/","ndl":"http://ndl.go.jp/dcndl/terms/","jpcoar":"https://github.com/JPCOAR/schema/blob/master/2.0/"},"@id":"https://cir.nii.ac.jp/crid/1360004236279750144.json","@type":"Article","productIdentifier":[{"identifier":{"@type":"DOI","@value":"10.1145/3095770.3095773"}},{"identifier":{"@type":"URI","@value":"https://dl.acm.org/doi/10.1145/3095770.3095773"}},{"identifier":{"@type":"URI","@value":"https://dl.acm.org/doi/pdf/10.1145/3095770.3095773"}}],"resourceType":"学術雑誌論文(journal article)","dc:title":[{"@value":"Quantitative Evaluation of Intel PEBS Overhead for Online System-Noise Analysis"}],"description":[{"notation":[{"@value":"Analyzing system-noise incurred to high-throughput systems (e.g., Spark, RDBMS) from the underlying machines must be in the granularity of the message- or request-level to find the root causes of performance anomalies, because messages are passed through many components in very short periods. To this end, we consider using Precise Event Based Sampling (PEBS) equipped in Intel CPUs at higher sampling rates than used normally is promising. It saves context information (e.g., the general purpose registers) at occurrences of various hardware events such as cache misses. The information can be used to associate performance anomalies caused by system noise with specific messages. One challenge is that quantitative analysis of PEBS overhead with high sampling rates has not yet been studied. This is critical because high sampling rates can cause severe overhead but performance problems are often reproducible only in real environments. In this paper, we evaluate the overhead of PEBS and show: (1) every time PEBS saves context information, the target workload slows down by 200-300 ns due to the CPU overhead of PEBS, (2) the CPU overhead can be used to predict actual overhead incurred with complex workloads including multi-threaded ones with high accuracy, and (3) PEBS incurs cache pollution and extra memory IO since PEBS writes data into the CPU cache, and the severity of cache pollution is affected both by the sampling rate and the buffer size allocated for PEBS. To the best of our knowledge, we are the first to quantitatively analyze the overhead of PEBS."}]}],"creator":[{"@id":"https://cir.nii.ac.jp/crid/1380004236279750144","@type":"Researcher","foaf:name":[{"@value":"Soramichi Akiyama"}],"jpcoar:affiliationName":[{"@value":"Artificial Intelligence Research Center, National Institute of Advanced Industrial Science and Technology (AIST), Japan"}]},{"@id":"https://cir.nii.ac.jp/crid/1420845751137518720","@type":"Researcher","personIdentifier":[{"@type":"KAKEN_RESEARCHERS","@value":"20462864"},{"@type":"NRID","@value":"1000020462864"},{"@type":"NRID","@value":"9000006723646"},{"@type":"NRID","@value":"9000024981802"},{"@type":"NRID","@value":"9000408916452"},{"@type":"NRID","@value":"9000004519580"},{"@type":"NRID","@value":"9000405642960"},{"@type":"NRID","@value":"9000020674007"},{"@type":"NRID","@value":"9000287169764"},{"@type":"NRID","@value":"9000404294126"},{"@type":"NRID","@value":"9000019956036"},{"@type":"NRID","@value":"9000242085692"},{"@type":"NRID","@value":"9000414516958"},{"@type":"NRID","@value":"9000287155762"},{"@type":"NRID","@value":"9000004380054"},{"@type":"NRID","@value":"9000021609842"},{"@type":"NRID","@value":"9000345196588"},{"@type":"NRID","@value":"9000404294170"},{"@type":"NRID","@value":"9000242085695"},{"@type":"NRID","@value":"9000311504919"},{"@type":"RESEARCHMAP","@value":"https://researchmap.jp/t.hirofuchi"}],"foaf:name":[{"@value":"Takahiro Hirofuchi"}],"jpcoar:affiliationName":[{"@value":"Information Technology Research Institute, National Institute of Advanced Industrial Science and Technology (AIST), Japan"}]}],"publication":{"prism:publicationName":[{"@value":"Proceedings of the 7th International Workshop on Runtime and Operating Systems for Supercomputers ROSS 2017"}],"dc:publisher":[{"@value":"ACM"}],"prism:publicationDate":"2017-06-27","prism:startingPage":"1","prism:endingPage":"8"},"reviewed":"false","dc:rights":["https://www.acm.org/publications/policies/copyright_policy#Background"],"url":[{"@id":"https://dl.acm.org/doi/10.1145/3095770.3095773"},{"@id":"https://dl.acm.org/doi/pdf/10.1145/3095770.3095773"}],"createdAt":"2017-06-26","modifiedAt":"2025-06-18","project":[{"@id":"https://cir.nii.ac.jp/crid/1040282256882890240","@type":"Project","projectIdentifier":[{"@type":"KAKEN","@value":"16K00115"},{"@type":"JGN","@value":"JP16K00115"},{"@type":"URI","@value":"https://kaken.nii.ac.jp/grant/KAKENHI-PROJECT-16K00115/"}],"notation":[{"@language":"ja","@value":"次世代メモリのソフトウェア・エミュレーション技術の研究"},{"@language":"en","@value":"A study on software-based emulation of next-generation memory devices"}]}],"relatedProduct":[{"@id":"https://cir.nii.ac.jp/crid/1361981469407357312","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"DEMU: A DPDK-based network latency emulator"}]},{"@id":"https://cir.nii.ac.jp/crid/1362262943961745664","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"A proper performance evaluation system that summarizes code placement effects"}]},{"@id":"https://cir.nii.ac.jp/crid/1362544420260281088","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Cuanta"}]},{"@id":"https://cir.nii.ac.jp/crid/1362825895881286784","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Predicting cache needs and cache sensitivity for applications in cloud computing on CMP servers with configurable caches"}]},{"@id":"https://cir.nii.ac.jp/crid/1363670318679886336","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Non-determinism and overcount on modern hardware performance counter implementations"}]},{"@id":"https://cir.nii.ac.jp/crid/1363951796093119104","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Low-Overhead Detection of Memory Access Patterns and Their Time Evolution"}]},{"@id":"https://cir.nii.ac.jp/crid/1364233270461778688","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Accuracy of performance counter measurements"}]}],"dataSourceIdentifier":[{"@type":"CROSSREF","@value":"10.1145/3095770.3095773"},{"@type":"KAKEN","@value":"PRODUCT-21392327"},{"@type":"OPENAIRE","@value":"doi_dedup___::90716515815b43bc38d4f4a5b8bfbfc2"}]}