@inproceedings{4f01ae9d5c2b4294877ed2751772b391,
title = "Hybridized character-word embedding for korean traditional document translation",
abstract = "Translating traditional documents is quite laborious and time consuming for human translators owing to the voluminous nature and a complexity of grammatical patterns. In recent times, a neural network-based machine translation architecture such as sequence-to-sequence (seq2seq) model showed superior performance in translation. However, it suffers out-of-vocabulary (OOV) issue when dealing with very complex and vocabulary languages such as Chinese characters, resulting in performance degradation. To cope with the OOV issue, we propose a new method by combining word embedding and character embedding to supplement loss from unknown words with character embedding. Experimental results show that the proposed method is efficient to translate old Korean archives (Hanja) to modern Korean documents (Hangul).",
keywords = "Character-word embedding, Deep learning, Natural language processing, Neural machine translation, Seq2seq",
author = "Hosang Yu and Jang, {Gil Jin} and Minho Lee",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2018.; 25th International Conference on Neural Information Processing, ICONIP 2018 ; Conference date: 13-12-2018 Through 16-12-2018",
year = "2018",
doi = "10.1007/978-3-030-04182-3_8",
language = "English",
isbn = "9783030041816",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "82--89",
editor = "Long Cheng and Seiichi Ozawa and Leung, {Andrew Chi Sing}",
booktitle = "Neural Information Processing - 25th International Conference, ICONIP 2018, Proceedings",
address = "Germany",
}