@inproceedings{d9b0d4067dde4008841484b3383c7f7e,
title = "Layout analysis for Arabic historical document images using machine learning",
abstract = "Page layout analysis is a fundamental step of any document image understanding system. We introduce an approach that segments text appearing in page margins (a.k.a side-notes text) from manuscripts with complex layout format. Simple and discriminative features are extracted in a connected-component level and subsequently robust feature vectors are generated. Multilayer perception classifier is exploited to classify connected components to the relevant class of text. A voting scheme is then applied to refine the resulting segmentation and produce the final classification. In contrast to state-of-the-art segmentation approaches, this method is independent of block segmentation, as well as pixel level analysis. The proposed method has been trained and tested on a dataset that contains a variety of complex side-notes layout formats, achieving a segmentation accuracy of about 95%.",
author = "Bukhari, {Syed Saqib} and Breuel, {Thomas M.} and Abedelkadir Asi and Jihad El-Sana",
year = "2012",
month = dec,
day = "1",
doi = "10.1109/ICFHR.2012.227",
language = "American English",
isbn = "9780769547749",
series = "Proceedings - International Workshop on Frontiers in Handwriting Recognition, IWFHR",
pages = "639--644",
booktitle = "Proceedings - 13th International Conference on Frontiers in Handwriting Recognition, ICFHR 2012",
note = "13th International Conference on Frontiers in Handwriting Recognition, ICFHR 2012 ; Conference date: 18-09-2012 Through 20-09-2012",
}