@inproceedings{f7b1865f6e714371b6699c234fc9c51d,
title = "Korean Traditional Document Translation Using Transformer In Bidirectional-CRF",
abstract = "This paper proposes a solution to solve the Out of Vocabulary(OOV) problem in a framework built with a transformer-based machine translation algorithm. The translation input is a traditional Korean document written in Chinese characters, and the output is a decoding of modern Korean paragraphs written in Korean alphabet. We used the word2vec algorithm to represent symbolic characters as numeric vectors and used them as input to the converter. Also, to solve the OOV problem, Bi-Directional LSTM + CRF has been used. To show the validity of the data set, the Annals of the Joseon Dynasty were presented as translations prepared by experts. Another source was collected at Kyungpook National University (Diary dataset), which is much smaller than the Annals of the Joseon Dynasty. According to the BLEU score, after learning the Annals of the Joseon Dynasty, fine-tune with data collected at Kyungpook National University showed a lower BLEU score than general machine translation in the results of applying CRF When learning only with the dataset collected at Kyungpook National University, it can be seen that a slightly high BLEU score was obtained.",
keywords = "CRF, deep learning, Hangul, nueral machine translation, seq2seq, transformer",
author = "Jungi Lee and Jang, {Jong Won} and Jangwon Lee and Jang, {Gil Jin} and Lee, {Min Ho}",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 12th International Conference on Information and Communication Technology Convergence, ICTC 2021 ; Conference date: 20-10-2021 Through 22-10-2021",
year = "2021",
doi = "10.1109/ICTC52510.2021.9621001",
language = "English",
series = "International Conference on ICT Convergence",
publisher = "IEEE Computer Society",
pages = "1738--1742",
booktitle = "ICTC 2021 - 12th International Conference on ICT Convergence",
address = "United States",
}