{"created":"2023-05-15T16:37:25.192636+00:00","id":10791,"links":{},"metadata":{"_buckets":{"deposit":"259a482e-70c3-4ae6-8547-3748b744e8eb"},"_deposit":{"created_by":2,"id":"10791","owners":[2],"pid":{"revision_id":0,"type":"depid","value":"10791"},"status":"published"},"_oai":{"id":"oai:nagasaki-u.repo.nii.ac.jp:00010791","sets":["14:21"]},"author_link":["42454","42455","42453","42452"],"item_2_biblio_info_6":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2012-05-20","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"34","bibliographicPageStart":"19","bibliographicVolumeNumber":"102","bibliographic_titles":[{"bibliographic_title":"Lecture Notes in Business Information Processing"}]}]},"item_2_description_4":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"This paper provides experimental results showing that we can use maximal substrings as elementary building blocks of documents in place of the words extracted by a current state-of-the-art supervised word extraction. Maximal substrings are defined as the substrings each giving a smaller number of occurrences even by appending only one character to its head or tail. The main feature of maximal substrings is that they can be extracted quite efficiently in an unsupervised manner. We extract maximal substrings from a document set and represent each document as a bag of maximal substrings. We also obtain a bag of words representation by using a state-of-the-art supervised word extraction over the same document set. We then apply the same document clustering method to both representations and obtain two clustering results for a comparison of their quality. We adopt a Bayesian document clustering based on Dirichlet compound multinomials for avoiding overfitting. Our experiment shows that the clustering quality achieved with maximal substrings is acceptable enough to use them in place of the words extracted by a supervised word extraction.","subitem_description_type":"Abstract"}]},"item_2_description_63":{"attribute_name":"引用","attribute_value_mlt":[{"subitem_description":"Lecture Notes in Business Information Processing, 102, pp.19-34; 2012","subitem_description_type":"Other"}]},"item_2_publisher_33":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Springer Verlag"}]},"item_2_relation_12":{"attribute_name":"DOI","attribute_value_mlt":[{"subitem_relation_type":"isVersionOf","subitem_relation_type_id":{"subitem_relation_type_id_text":"10.1007/978-3-642-29958-2_2","subitem_relation_type_select":"DOI"}}]},"item_2_rights_13":{"attribute_name":"権利","attribute_value_mlt":[{"subitem_rights":"© 2012 Springer-Verlag."},{"subitem_rights":"The original publication is available at www.springerlink.com."}]},"item_2_source_id_7":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"18651348","subitem_source_identifier_type":"ISSN"}]},"item_2_source_id_8":{"attribute_name":"EISSN","attribute_value_mlt":[{"subitem_source_identifier":"18651356","subitem_source_identifier_type":"ISSN"}]},"item_2_version_type_16":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_ab4af688f83e57aa","subitem_version_type":"AM"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Masada, Tomonari"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Takasu, Atsuhiro"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Shibata, Yuichiro"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Oguri, Kiyoshi"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2020-12-22"}],"displaytype":"detail","filename":"LNBIP102_19.pdf","filesize":[{"value":"267.1 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"LNBIP102_19.pdf","url":"https://nagasaki-u.repo.nii.ac.jp/record/10791/files/LNBIP102_19.pdf"},"version_id":"fa245889-67aa-4dd7-93af-871dc92a8e07"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Bayesian modeling","subitem_subject_scheme":"Other"},{"subitem_subject":"Document clustering","subitem_subject_scheme":"Other"},{"subitem_subject":"Maximal substring","subitem_subject_scheme":"Other"},{"subitem_subject":"Suffix array","subitem_subject_scheme":"Other"},{"subitem_subject":"Unsupervised method","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Clustering Documents with Maximal Substrings","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Clustering Documents with Maximal Substrings"}]},"item_type_id":"2","owner":"2","path":["21"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-10-02"},"publish_date":"2012-10-02","publish_status":"0","recid":"10791","relation_version_is_last":true,"title":["Clustering Documents with Maximal Substrings"],"weko_creator_id":"2","weko_shared_id":-1},"updated":"2023-05-16T01:23:59.476385+00:00"}