{"@context":{"@vocab":"https://cir.nii.ac.jp/schema/1.0/","rdfs":"http://www.w3.org/2000/01/rdf-schema#","dc":"http://purl.org/dc/elements/1.1/","dcterms":"http://purl.org/dc/terms/","foaf":"http://xmlns.com/foaf/0.1/","prism":"http://prismstandard.org/namespaces/basic/2.0/","cinii":"http://ci.nii.ac.jp/ns/1.0/","datacite":"https://schema.datacite.org/meta/kernel-4/","ndl":"http://ndl.go.jp/dcndl/terms/","jpcoar":"https://github.com/JPCOAR/schema/blob/master/2.0/"},"@id":"https://cir.nii.ac.jp/crid/1360580230600084480.json","@type":"Article","productIdentifier":[{"identifier":{"@type":"DOI","@value":"10.1016/j.rico.2022.100192"}},{"identifier":{"@type":"URI","@value":"https://api.elsevier.com/content/article/PII:S2666720722000649?httpAccept=text/xml"}},{"identifier":{"@type":"URI","@value":"https://api.elsevier.com/content/article/PII:S2666720722000649?httpAccept=text/plain"}}],"resourceType":"学術雑誌論文(journal article)","dc:title":[{"@value":"Proximal policy optimization with adaptive threshold for symmetric relative density ratio"}],"creator":[{"@id":"https://cir.nii.ac.jp/crid/1420001326228879872","@type":"Researcher","personIdentifier":[{"@type":"KAKEN_RESEARCHERS","@value":"10796452"},{"@type":"NRID","@value":"1000010796452"},{"@type":"NRID","@value":"9000299561684"},{"@type":"NRID","@value":"9000412372852"},{"@type":"NRID","@value":"9000331421946"},{"@type":"NRID","@value":"9000414369522"},{"@type":"NRID","@value":"9000405878919"},{"@type":"NRID","@value":"9000291596511"},{"@type":"NRID","@value":"9000308050815"},{"@type":"NRID","@value":"9000331421965"},{"@type":"NRID","@value":"9000248231751"},{"@type":"NRID","@value":"9000375892659"},{"@type":"NRID","@value":"9000405879592"},{"@type":"NRID","@value":"9000392525900"},{"@type":"NRID","@value":"9000309159272"},{"@type":"NRID","@value":"9000404786379"},{"@type":"NRID","@value":"9000399363389"},{"@type":"NRID","@value":"9000390922546"},{"@type":"NRID","@value":"9000411116288"},{"@type":"NRID","@value":"9000399364722"},{"@type":"RESEARCHMAP","@value":"https://researchmap.jp/kbys-t"}],"foaf:name":[{"@value":"Taisuke Kobayashi"}]}],"publication":{"publicationIdentifier":[{"@type":"PISSN","@value":"26667207"}],"prism:publicationName":[{"@value":"Results in Control and Optimization"}],"dc:publisher":[{"@value":"Elsevier BV"}],"prism:publicationDate":"2023-03","prism:volume":"10","prism:startingPage":"100192"},"reviewed":"false","dc:rights":["https://www.elsevier.com/tdm/userlicense/1.0/","https://www.elsevier.com/legal/tdmrep-license","http://creativecommons.org/licenses/by/4.0/"],"url":[{"@id":"https://api.elsevier.com/content/article/PII:S2666720722000649?httpAccept=text/xml"},{"@id":"https://api.elsevier.com/content/article/PII:S2666720722000649?httpAccept=text/plain"}],"createdAt":"2022-12-13","modifiedAt":"2025-11-13","project":[{"@id":"https://cir.nii.ac.jp/crid/1040285300696560512","@type":"Project","projectIdentifier":[{"@type":"KAKEN","@value":"20H04265"},{"@type":"JGN","@value":"JP20H04265"},{"@type":"URI","@value":"https://kaken.nii.ac.jp/grant/KAKENHI-PROJECT-20H04265/"}],"notation":[{"@language":"ja","@value":"ヒトとの物理的接触モデルを紐解く深層学習の開発と安全なロボット制御への応用"},{"@language":"en","@value":"Development of deep learning to reveal physical human-robot interaction and its application to safe robot control"}]}],"relatedProduct":[{"@id":"https://cir.nii.ac.jp/crid/1050282677439916928","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@language":"en","@value":"Sigmoid-weighted linear units for neural network function approximation in reinforcement learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1360002218735883904","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Relative Density-Ratio Estimation for Robust Distribution Comparison"}]},{"@id":"https://cir.nii.ac.jp/crid/1360004240290554496","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Direct Divergence Approximation between Probability Distributions and Its Applications in Machine Learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1360009142928678016","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"t-soft update of target network for deep reinforcement learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1360017288460037760","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Soft Policy Gradient Method for Maximum Entropy Deep Reinforcement Learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1360292618847870720","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"On Divergences and Informations in Statistics and Information Theory"}]},{"@id":"https://cir.nii.ac.jp/crid/1360298761841474688","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"A Functional Clipping Approach for Policy Optimization Algorithms"}]},{"@id":"https://cir.nii.ac.jp/crid/1360572092372443904","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Student-t policy in reinforcement learning to acquire global optimum of robot control"}]},{"@id":"https://cir.nii.ac.jp/crid/1360580232396109184","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Adaptive and multiple time-scale eligibility traces for online deep reinforcement learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1362262945048936960","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Self-Improving Reactive Agents Based on Reinforcement Learning, Planning and Teaching"}]},{"@id":"https://cir.nii.ac.jp/crid/1362262945808266752","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Mastering the game of Go with deep neural networks and tree search"}]},{"@id":"https://cir.nii.ac.jp/crid/1363388843215756416","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Human-level control through deep reinforcement learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1363951794115240960","@type":"Article","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Deep learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084483","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Revisiting fundamentals of experience replay"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084485","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Guided exploration with proximal policy optimization using a single demonstration"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084486","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084487","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Robust stochastic gradient descent with student-t distribution based first-order momentum"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084490","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Towards deep robot learning with optimizer applicable to non-stationary problems"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084492","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Pybullet, a python module for physics simulation for games, robotics and machine learning"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084493","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Proximal policy optimization with relative pearson divergence"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084497","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"TD-regularized actor-critic methods"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084498","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"PPO-CMA: Proximal policy optimization with covariance matrix adaptation"}]},{"@id":"https://cir.nii.ac.jp/crid/1370580230600084499","@type":"Product","relationType":["references"],"jpcoar:relatedTitle":[{"@value":"Truly proximal policy optimization"}]}],"dataSourceIdentifier":[{"@type":"CROSSREF","@value":"10.1016/j.rico.2022.100192"},{"@type":"KAKEN","@value":"PRODUCT-24435993"}]}