@inbook{f622cc473beb4f089194a7585bc7bc09,
title = "How Much Event Data Is Enough? A Statistical Framework for Process Discovery",
abstract = "With the increasing availability of business process related event logs, the scalability of techniques that discover a process model from such logs becomes a performance bottleneck. In particular, exploratory analysis that investigates manifold parameter settings of discovery algorithms, potentially using a software-as-a-service tool, relies on fast response times. However, common approaches for process model discovery always parse and analyse all available event data, whereas a small fraction of a log could have already led to a high-quality model. In this paper, we therefore present a framework for process discovery that relies on statistical pre-processing of an event log and significantly reduce its size by means of sampling. It thereby reduces the runtime and memory footprint of process discovery algorithms, while providing guarantees on the introduced sampling error. Experiments with two public real-world event logs reveal that our approach speeds up state-of-the-art discovery algorithms by a factor of up to 20.",
keywords = "Log pre-processing, Log sampling, Process discovery",
author = "Martin Bauer and Arik Senderovich and Avigdor Gal and Lars Grunske and Matthias Weidlich",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG, part of Springer Nature 2018.; 30th International Conference on Advanced Information Systems Engineering, CAiSE 2018 ; Conference date: 11-06-2018 Through 15-06-2018",
year = "2018",
doi = "10.1007/978-3-319-91563-0\_15",
language = "الإنجليزيّة",
isbn = "978-3-319-91562-3",
volume = "10816",
series = "Lecture Notes in Computer Science",
pages = "239--256",
editor = "John Krogstie and Reijers, \{Hajo A.\}",
booktitle = "ADVANCED INFORMATION SYSTEMS ENGINEERING, CAISE 2018",
}