{"@context":{"@vocab":"https://cir.nii.ac.jp/schema/1.0/","rdfs":"http://www.w3.org/2000/01/rdf-schema#","dc":"http://purl.org/dc/elements/1.1/","dcterms":"http://purl.org/dc/terms/","foaf":"http://xmlns.com/foaf/0.1/","prism":"http://prismstandard.org/namespaces/basic/2.0/","cinii":"http://ci.nii.ac.jp/ns/1.0/","datacite":"https://schema.datacite.org/meta/kernel-4/","ndl":"http://ndl.go.jp/dcndl/terms/","jpcoar":"https://github.com/JPCOAR/schema/blob/master/2.0/"},"@id":"https://cir.nii.ac.jp/crid/1363670320394938240.json","@type":"Article","productIdentifier":[{"identifier":{"@type":"DOI","@value":"10.1109/tasl.2010.2045183"}},{"identifier":{"@type":"URI","@value":"http://xplorestaging.ieee.org/ielx5/10376/5594819/05428853.pdf?arnumber=5428853"}}],"dc:title":[{"@value":"Blind Separation and Dereverberation of Speech Mixtures by Joint Optimization"}],"description":[{"notation":[{"@value":"This paper proposes a method for performing blind source separation (BSS) and blind dereverberation (BD) at the same time for speech mixtures. In most previous studies, BSS and BD have been investigated separately. The separation performance of conventional BSS methods deteriorates as the reverberation time increases while many existing BD methods rely on the assumption that there is only one sound source in a room. Therefore, it has been difficult to perform both BSS and BD when the reverberation time is long. The proposed method uses a network, in which dereverberation and separation networks are connected in tandem, to estimate source signals. The parameters for the dereverberation network (prediction matrices) and those for the separation network (separation matrices) are jointly optimized. This enables a BD process to take a BSS process into account. The prediction and separation matrices are alternately optimized with each depending on the other; hence, we call the proposed method the conditional separation and dereverberation (CSD) method. Comprehensive evaluation results are reported, where all the speech materials contained in the complete test set of the TIMIT corpus are used. The CSD method improves the signal-to-interference ratio by an average of about 4 dB over the conventional frequency-domain BSS approach for reverberation times of 0.3 and 0.5 s. The direct-to-reverberation ratio is also improved by about 10 dB."}]}],"creator":[{"@id":"https://cir.nii.ac.jp/crid/1383670320394938243","@type":"Researcher","foaf:name":[{"@value":"Takuya Yoshioka"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320394938240","@type":"Researcher","foaf:name":[{"@value":"Tomohiro Nakatani"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320394938241","@type":"Researcher","foaf:name":[{"@value":"Masato Miyoshi"}]},{"@id":"https://cir.nii.ac.jp/crid/1383670320394938242","@type":"Researcher","foaf:name":[{"@value":"Hiroshi G. Okuno"}]}],"publication":{"publicationIdentifier":[{"@type":"PISSN","@value":"15587916"},{"@type":"EISSN","@value":"15587924"}],"prism:publicationName":[{"@value":"IEEE Transactions on Audio, Speech, and Language Processing"}],"dc:publisher":[{"@value":"Institute of Electrical and Electronics Engineers (IEEE)"}],"prism:publicationDate":"2011-01","prism:volume":"19","prism:number":"1","prism:startingPage":"69","prism:endingPage":"84"},"reviewed":"false","dc:rights":["https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/IEEE.html"],"url":[{"@id":"http://xplorestaging.ieee.org/ielx5/10376/5594819/05428853.pdf?arnumber=5428853"}],"createdAt":"2010-03-12","modifiedAt":"2021-10-11","relatedProduct":[{"@id":"https://cir.nii.ac.jp/crid/1050282810784930304","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@language":"en","@value":"Nonparametric Bayesian Dereverberation of Power Spectrograms Based on Infinite-Order Autoregressive Processes"}]},{"@id":"https://cir.nii.ac.jp/crid/1360285710368551040","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"Bayesian Nonparametrics for Microphone Array Processing"}]},{"@id":"https://cir.nii.ac.jp/crid/1360285710368567552","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"Multichannel Sound Source Dereverberation and Separation for Arbitrary Number of Sources Based on Bayesian Nonparametrics"}]},{"@id":"https://cir.nii.ac.jp/crid/1360285711768712064","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"Single-channel dereverberation by feature mapping using cascade neural networks for robust distant speaker identification and speech recognition"}]},{"@id":"https://cir.nii.ac.jp/crid/1360298757172601984","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"Autoregressive Moving Average Jointly-Diagonalizable Spatial Covariance Analysis for Joint Source Separation and Dereverberation"}]},{"@id":"https://cir.nii.ac.jp/crid/1360567185157850240","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"Reverberation-robust underdetermined source separation with non-negative tensor double deconvolution"}]},{"@id":"https://cir.nii.ac.jp/crid/1361975846308410496","@type":"Article","resourceType":"学術雑誌論文(journal article)","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@value":"Supervised Determined Source Separation with Multichannel Variational Autoencoder"}]},{"@id":"https://cir.nii.ac.jp/crid/1390282681287497600","@type":"Article","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@language":"en","@value":"Bayesian Nonparametric Approach to Blind Separation of Infinitely Many Sparse Sources"}]},{"@id":"https://cir.nii.ac.jp/crid/1390295658299596928","@type":"Article","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@language":"en","@value":"Fundamentals and Trends on Sound Source Separation : Overview of Approaches with Probabilistic Model and Deep Learning"},{"@language":"ja","@value":"音源分離技術の基礎と動向"}]},{"@id":"https://cir.nii.ac.jp/crid/1390866647407670016","@type":"Article","relationType":["isReferencedBy"],"jpcoar:relatedTitle":[{"@language":"ja","@value":"混ざった声を聞き分ける最新技術：音源分離と目的音声抽出"},{"@language":"en","@value":"Listening to Speech in Mixture: Advances in Source Separation and Target Speech Extraction"}]}],"dataSourceIdentifier":[{"@type":"CROSSREF","@value":"10.1109/tasl.2010.2045183"},{"@type":"OPENAIRE","@value":"doi_dedup___::42818eeca0d1c78aebc144319f3c09ef"},{"@type":"CROSSREF","@value":"10.1109/taslp.2014.2363790_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1109/taslp.2014.2355772_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1186/1687-4722-2014-13_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1587/essfr.16.4_257_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1109/taslp.2022.3190734_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1109/taslp.2013.2294582_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1109/eusipco.2016.7760528_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1587/essfr.18.4_267_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1587/transfun.e96.a.1928_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"},{"@type":"CROSSREF","@value":"10.1162/neco_a_01217_references_DOI_8qw1vG5m0NCOSlDu2oWX0jllmo4"}]}