@article {DBLP:journals/bioinformatics/QuandtHMHMPAL08, title = {swissPIT: a novel approach for pipelined analysis of mass spectrometry data}, journal = {Bioinformatics}, volume = {24}, number = {11}, year = {2008}, pages = {1416-1417}, abstract = {The identification and characterization of peptides from tandem mass spectrometry (MS/MS) data represents a critical aspect of proteomics. Today, tandem MS analysis is often performed by only using a single identification program achieving identification rates between 10-50\% (Elias and Gygi, 2007). Beside the development of new analysis tools, recent publications describe also the pipelining of different search programs to increase the identification rate (Hartler et al., 2007; Keller et al., 2005). The Swiss Protein Identification Toolbox (swissPIT) follows this approach, but goes a step further by providing the user an expandable multi-tool platform capable of executing workflows to analyze tandem MS-based data. One of the major problems in proteomics is the absent of standardized workflows to analyze the produced data. This includes the pre-processing part as well as the final identification of peptides and proteins. The main idea of swissPIT is not only the usage of different identification tool in parallel, but also the meaningful concatenation of different identification strategies at the same time. The swissPIT is open source software but we also provide a user-friendly web platform, which demonstrates the capabilities of our software and which is available at http://swisspit.cscs.ch upon request for account.}, keywords = {bioinformatics, scientific workflow management}, doi = {10.1093/bioinformatics/btn139}, url = {http://www.ncbi.nlm.nih.gov/pubmed/18436540}, author = {Andreas Quandt and Patricia Hernandez and Alexandre Masselot and C{\'e}line Hernandez and Sergio Maffioletti and Cesare Pautasso and Ron D. Appel and Fr{\'e}d{\'e}rique Lisacek} } @conference {jophealthgrid07, title = {Grid-based Analysis of Tandem Mass Spectrometry Data in Clinical Proteomics}, booktitle = {Health Grid 2007}, year = {2007}, address = {Geneva, Switzerland}, abstract = {Biomarker detection is one of the greatest challenges in Clinical Proteomics. Today, great hopes are placed into tandem mass spectrometry (MS/MS) to discover potential biomarkers. MS/MS is a technique that allows large scale data analysis, including the identification, characterization, and quantification of molecules. Especially the identification process, that implies to compare experimental spectra with theoretical amino acid sequences stored in specialized databases, has been subject for extensive research in bioinformatics since many years. Dozens of identification programs have been developed addressing different aspects of the identification process but in general, clinicians are only using a single tools for their data analysis along with a single set of specific parameters. Hence, a significant proportion of the experimental spectra do not lead to a confident identification score due to inappropriate parameters or scoring schemes of the applied analysis software. The swissPIT (Swiss Protein Identification Toolbox) project was initiated to provide the scientific community with an expandable multi-tool platform for automated and in-depth analysis of mass spectrometry data. The swissPIT uses multiple identification tools to automatic analyze mass spectra. The tools are concatenated as analysis workflows. In order to realize these calculation-intensive workflows we are using the Swiss Bio Grid infrastructure. A first version of the web-based front-end is available (http://www.swisspit.cscs.ch) and can be freely accessed after requesting an account. The source code of the project will be also made available in near future.}, keywords = {grid computing, JOpera, scientific workflow management}, url = {http://www.ncbi.nlm.nih.gov/pubmed/17476043}, author = {Andreas Quandt and Patricia Hernandez and Peter Kunzst and Cesare Pautasso and Marc Tuloup and Ron D. Appel} } @inbook {116, title = {Autonomic Computing for Virtual Laboratories}, booktitle = {Dependable Systems: Software, Computing, Networks}, number = {4028}, year = {2006}, pages = {211-230}, publisher = {Springer}, organization = {Springer}, abstract = {Virtual laboratories can be characterized by their long-lasting, large-scale computations, where a collection of heterogeneous tools is integrated into data processing pipelines. Such virtual experiments are typically modeled as scientific workflows in order to guarantee their reproduceability. In this chapter we present JOpera, one of the first autonomic infrastructures for managing virtual laboratories. JOpera provides a sophisticated Eclipse-based graphical environment to design, monitor and debug distributed computations at a high level of abstraction. The chapter describes the architecture of the workflow execution environment, emphasizing its support for the integration of heterogeneous tools and evaluating its autonomic capabilities, both in terms of reliable execution (self-healing) and automatic performance optimization (self-tuning).}, keywords = {autonomic computing, JOpera, scientific workflow management}, isbn = {3-540-36821-3}, doi = {10.1007/11808107_10}, author = {Cesare Pautasso and Win Bausch and Gustavo Alonso}, editor = {J{\"u}rg Kohlas and Bertrand Meyer and Andr{\'e} Schiper} } @article {119, title = {Developing scientific workflows from heterogeneous services}, journal = {SIGMOD Rec.}, volume = {35}, year = {2006}, month = {June}, pages = {22{\textendash}28}, abstract = {Scientific Workflows (SWFs) need to utilize components and applications in order to satisfy the requirements of specific workflow tasks. Technology trends in software development signify a move from component-based to service-oriented approach, therefore SWF will inevitably need appropriate tools to discover and integrate heterogeneous services. In this paper we present the SODIUM platform consisting of a set of languages and tools as well as related middleware, for the development and execution of scientific workflows composed of heterogeneous services.}, keywords = {JOpera, scientific workflow management}, issn = {0163-5808}, doi = {10.1145/1147376.1147380}, author = {Aphrodite Tsalgatidou and Georgios Athanasopoulos and Michael Pantazoglou and Cesare Pautasso and Thomas Heinis and Roy Gr{\o}nmo and Hoff Hj{\o}rdis and Arne-J{\o}rgen Berre and Magne Glittum and Simela Topouzidou} } @conference {jopera:2006:ccgrid, title = {Mirroring Resources or Mapping Requests: implementing WS-RF for Grid workflows}, booktitle = {6th IEEE International Symposium on Cluster Computing and the Grid (CCGrid2006)}, year = {2006}, month = {May}, address = {Singapore}, abstract = {The Web Services Resource Framework (WS-RF) and the Web Services Notification (WS-N) specifications are a crucial component of Grid infrastructures. They provide a standardized interface to stateful services so that they can be managed remotely. There are already several implementations of these specifications and initial performance studies have compared them in terms of the overhead observed by a single client. In this paper we address the problem of implementing the WS-RF and WS-N specifications for large scale systems. In particular, we discuss how to implement WS-RF and WSN as the management interfaces to a Grid workflow engine. In the paper we describe and compare two different architectures for mapping resources to processes. The first one mirrors the state of the process as a resource. The second one maps the client requests to access the state of a resource embedded into the Grid workflow engine. We include an extensive performance evaluation, comparing the resulting systems in terms of scalability when servicing a large number of concurrent clients. }, keywords = {grid computing, JOpera, scientific workflow management}, doi = {10.1109/CCGRID.2006.69}, author = {Thomas Heinis and Cesare Pautasso and Gustavo Alonso} } @inproceedings {jopera:2006:works, title = {Parallel Computing Patterns for Grid Workflows}, year = {2006}, month = {June}, address = {Paris, France}, abstract = {Whereas a consensus has been reached on defining the set of workflow patterns for business process modeling languages, no such patterns exists for workflows applied to scientific computing on the Grid. By looking at different kinds of parallelism, in this paper we identify a set of workflow patterns related to parallel and pipelined execution. The paper presents how these patterns can be represented in different Grid workflow languages and discusses their implications for the design of the underlying workflow management and execution infrastructure. A preliminary classification of these patterns is introduced by surveying how they are supported by several existing advanced scientific and Grid workflow languages.}, keywords = {grid computing, scientific workflow management, survey, workflow patterns}, author = {Cesare Pautasso and Gustavo Alonso} } @conference {jopera:2005:escience, title = {Publishing Persistent Grid Computations as WS Resources}, booktitle = {1st IEEE International Conference on e-Science and Grid Computing (e-Science 2005)}, year = {2005}, month = {December}, publisher = {IEEE}, organization = {IEEE}, address = {Melbourne, Australia}, abstract = {Grid services can be composed into processes, providing a high level definition of the computations involved in terms of their data exchanges and control flow dependencies. We show how processes themselves can be efficiently published as Grid services by mapping the persistent state of the process executions to standard compliant interfaces as defined by the Web Services Resource Framework (WS-RF). Mapping processes to resources is a fundamental step to enable recursive Grid service composition, where composite Grid services are themselves published as services. This gives processes a standardized and wellunderstood interface that enables their management, monitoring, steering and adaptation. Additionally it eases their reusability and simplifies integration into existing Grid applications and portals. In order to determine the mapping{\textquoteright}s overhead, we include the results of a comprehensive performance evaluation.}, keywords = {grid computing, JOpera, scientific workflow management}, doi = {10.1109/E-SCIENCE.2005.67}, author = {Thomas Heinis and Cesare Pautasso and Oliver Deak and Gustavo Alonso} } @article {98, title = {JOpera: Visual Composition of Grid Services}, year = {2004}, month = {October}, pages = {46-47}, abstract = {The recent shift to service-based Grids enables the use of service composition tools for rapidly building and efficiently running distributed computations. At ETH Zurich, we have developed JOpera, a visual composition language and a set of integrated software development tools for composing Grid services. The system can be freely downloaded and has been successfully tested with data-intensive bioinformatics applications as well as large-scale, Monte-Carlo network simulations.}, keywords = {grid computing, JOpera, scientific workflow management}, url = {http://www.ercim.eu/publication/Ercim_News/enw59/pautasso.html}, author = {Cesare Pautasso} } @conference {92, title = {BioOpera: cluster-aware computing}, booktitle = {IEEE International Conference on Cluster Computing (CLUSTER 2002)}, year = {2002}, month = {September}, pages = {99 - 106}, publisher = {IEEE}, organization = {IEEE}, address = {Chicago, IL, USA}, abstract = {In this paper we present BioOpera, an extensible process support system for cluster-aware computing. It features an intuitive way to specify computations, as well as improved support for running them over a cluster providing monitoring, persistence, fault tolerance and interaction capabilities without sacrificing efficiency and scalability.}, keywords = {BioOpera, cluster computing, scientific workflow management}, doi = {10.1109/CLUSTR.2002.1137734}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1137734}, author = {Win Bausch and Cesare Pautasso and Reto Schaeppi and Gustavo Alonso} } @conference {91, title = {Dependable computing in virtual laboratories}, booktitle = {17th IEEE International Conference on Data Engineering (ICDE 2001)}, year = {2001}, month = {April}, pages = {235 - 242}, publisher = {IEEE}, organization = {IEEE}, address = {Heidelberg, Germany}, abstract = {Many scientific disciplines are shifting from in vitro to in silico research as more physical processes and natural phenomena are examined in a computer (in silico) instead of being observed (in vitro). In many of these virtual laboratories, the computations involved are very complex and long lived. Currently, users are required to manually handle almost all aspects of such computations, including their dependability. Not surprisingly, this is a major bottleneck and a significant source of inefficiencies. To address this issue, we have developed BioOpera, an extensible process support management system for virtual laboratories. The authors briefly discuss the architecture and functionality of BioOpera and show how it can be used to efficiently manage long lived computations}, keywords = {bioinformatics, BioOpera, scientific workflow management, virtual laboratories}, doi = {10.1109/ICDE.2001.914834}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=914834}, author = {Gustavo Alonso and Win Bausch and Cesare Pautasso and Ari Kahn} }