@article {peternier2012CPE, title = {High Performance Execution of Service Compositions: a Multicore-aware Engine Design}, journal = {Concurrency and Computation: Practice and Experience (CPE)}, volume = {26}, year = {2014}, month = {January}, pages = {71-97}, publisher = {Wiley}, abstract = {Although modern computer hardware offers an increasing number of processing elements organized in nonuniform memory access (NUMA) architectures, prevailing middleware engines for executing business processes, workflows, and Web service compositions have not been optimized for properly exploiting the abundant processing resources of such machines. Amongst others, factors limiting performance are inefficient thread scheduling by the operating system, which can result in suboptimal use of system memory and CPU caches, and sequential code sections that cannot take advantage of multiple available cores. In this article, we study the performance of the JOpera process execution engine on recent multicore machines. We first evaluate its performance without any dedicated optimization for multicore hardware, showing that additional cores do not significantly improve performance, although the engine has a multithreaded design. Therefore, we apply optimizations on the basis of replication together with an improved, hardware-aware usage of the underlying resources such as NUMA nodes and CPU caches. Thanks to our optimizations, we achieve speedups from a factor of 2 up to a factor of 20 (depending on the target machine) when compared with a baseline execution {\textquoteleft}as is{\textquoteright}. }, keywords = {JOpera, multicores, nonuniform memory access architecture, performance optimization, service composition and execution}, doi = {10.1002/cpe.2948}, author = {Achille Peternier and Walter Binder and Cesare Pautasso and Daniele Bonetta} } @conference {jopera:2010:soca, title = {Exploiting multicores to optimize business process execution}, booktitle = {International Conference on Service-Oriented Computing and Applications (SOCA 2010)}, year = {2010}, month = {December}, pages = {1-8}, publisher = {IEEE}, organization = {IEEE}, address = {Perth, Australia}, abstract = {While modern CPUs offer an increasing number of cores with shared caches, prevailing execution engines for business processes, workflows, or Web service compositions have not been optimized for properly exploiting the abundant processing resources of such CPUs. One factor limiting performance is the inefficient thread scheduling by the operating system, which can result in suboptimal use of shared caches. In this paper we study performance of the JOpera business process execution engine on a recent multicore machine. By analyzing the engine{\textquoteright}s architecture and by binding threads that are likely to access shared data to cores with a common cache, we achieve speedups up to 13\% for a variety of workloads, without modifying the engine{\textquoteright}s architecture and implementation, apart from binding threads to CPUs. As the engine is implemented in Java, we provide a new Java library to manage thread bindings and hardware performance counters. We also leverage hardware performance counters to explain the observed speedup in our performance analysis.}, keywords = {business data processing, business process execution engines, business process execution optimization, hardware performance counters, Java, JOpera, multicores, performance optimization, thread-CPU bindings, Web service composition, Web services, workflow}, doi = {10.1109/SOCA.2010.5707156}, author = {Achille Peternier and Daniele Bonetta and Cesare Pautasso and Walter Binder} } @inproceedings {1948635, title = {Towards scalable service composition on multicores}, year = {2010}, month = {October}, pages = {655{\textendash}664}, publisher = {Springer}, address = {Crete}, abstract = {The advent of modern multicore machines, comprising several chip multi-processors each offering multiple cores and often featuring a large shared cache, offers the opportunity to redesign the architecture of service composition engines in order to take full advantage of the underlying hardware resources. In this paper we introduce an innovative service composition engine architecture, which takes into account specific features of multicore machines while not being constrained to run on any particular processor architecture. Our preliminary performance evaluation results show that the system can scale to run thousands of concurrent business process instances per second.}, keywords = {JOpera, multicore, Web service composition}, isbn = {3-642-16960-0}, doi = {10.1007/978-3-642-16961-8_90}, author = {Daniele Bonetta and Achille Peternier and Cesare Pautasso and Walter Binder} }