@InProceedings{fadaee-bisazza-monz:2017:Short2, author = {Fadaee, Marzieh and Bisazza, Arianna and Monz, Christof}, title = {Data Augmentation for Low-Resource Neural Machine Translation}, booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, month = {July}, year = {2017}, address = {Vancouver, Canada}, publisher = {Association for Computational Linguistics}, pages = {567--573}, abstract = {The quality of a Neural Machine Translation system depends substantially on the availability of sizable parallel corpora. For low-resource language pairs this is not the case, resulting in poor translation quality. Inspired by work in computer vision, we propose a novel data augmentation approach that targets low-frequency words by generating new sentence pairs containing rare words in new, synthetically created contexts. Experimental results on simulated low-resource settings show that our method improves translation quality by up to 2.9 BLEU points over the baseline and up to 3.2 BLEU over back-translation.}, url = {http://aclweb.org/anthology/P17-2090} }