challenge06.bib


@ARTICLE{Editorial:Challenge06_old,
  AUTHOR = {Luc Moreau and 
Bertram Lud\"ascher and 
Ilkay Altintas and
Roger S. Barga and
Shawn Bowers and
Steven Callahan and
George {Chin Jr.} and
Ben Clifford and
Shirley Cohen and 
Sarah Cohen-Boulakia and
Susan Davidson and
Ewa Deelman and
Luciano Digiampietri and
Ian Foster and
Juliana Freire and
James Frew and
Joe Futrelle and
Tara Gibson and 
Yolanda Gil and
Carole Goble and
Jennifer Golbeck and
Paul Groth and
David A. Holland and
Sheng Jiang and
Jihie Kim and
David Koop and
Ales Krenek and
Timothy McPhillips and
Gaurang Mehta and
Simon Miles and
Dominic Metzger and
Steve Munroe and
Jim Myers and
Beth Plale and
Norbert Podhorszki and
Varun Ratnakar and
Emanuele Santos and
Carlos Scheidegger and
Karen Schuchardt and
Margo Seltzer and
Yogesh L. Simmhan and
Claudio Silva and
Peter Slaughter and
Eric Stephan and 
Robert Stevens and
Daniele Turi and 
Huy Vo and
Mike Wilde and
Jun Zhao and
Yong Zhao
},
  TITLE = {{The First Provenance Challenge}},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  EUPUB = {yes},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {The first Provenance Challenge was set up in order to provide a forum for the community to help understand the capabilities of different provenance systems
and the expressiveness of their provenance representations.
  To this end, a Functional
            Magnetic Resonance Imaging workflow was defined, which participants
            had to either simulate or run in order to produce some provenance
            representation, from which a set of identified queries had to be
            implemented and executed.  Sixteen teams responded to the
            challenge, and submitted their inputs. In this paper, we present
            the challenge workflow and queries, and summarise the participants
            contributions.}
}


@ARTICLE{SCM:Challenge06,
  AUTHOR = {Karen Schuchardt and Tara Gibson and Eric Stephan and George
                  {Chin, Jr.}},
  TITLE = {Applying Content Management to Automated Provenance Capture},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  OPTNUMBER = {},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Systems science research uses a mixture of experiments, theoretical
            computations, and simulations, often augmented by further analyses,
            to solve complex problems. In many cases, these processes are
            conducted manually and often by multiple people and computing
            systems resulting in an incomplete record of results. Scientific
            workflow tools are beginning to be employed as a means of executing
            and repeating processing. Adding automatic provenance capture to
            workflow tools can result in complete, accurate records of data
            history as well as enable more efficient, robust workflow
            environments. Our goal in addressing the provenance challenge was
            to combine, and as necessary, extend a variety of standard
            technologies, protocols, and schemas to implement workflow
            provenance and data capture, answer the challenge queries, and
            explore a general architecture for scientific provenance
            capture. Our implementation applies a scientific content management
            system for provenance and data persistence, RDF over HTTP for a
            provenance API, and our own semantic query language based on the
            DAV Searching and Locating protocol. Our implementation offers
            several unique capabilities, and through the use of standards, is
            able to accommodate a variety of widely available client tools
            against the provenance record.}
}


@ARTICLE{REDUX:Challenge06,
  AUTHOR = {Roger S. Barga and Luciano A. Digiampietri},
  TITLE = {Automatic Capture and Efficient Storage of eScience Experiment Provenance},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Workflow is playing an increasingly important role in conducting
                  e-Science experiments, but most commercial systems lack the
                  necessary support for the collection and management of
                  provenance data.  We argue that eScience provenance data
                  should be automatically generated by the workflow enactment
                  engine and managed over time by an underlying storage
                  service.  In this paper, we introduce a layered model for
                  workflow execution provenance, which allows navigation from
                  an abstract model of the experiment to instance data
                  collected during a specific experiment run.  We outline
                  modest extensions to a commercial workflow engine so it will
                  automatically capture this provenance data at runtime.  We
                  then present an approach to store this provenance data in a
                  relational database engine. Finally, we identify important
                  properties of provenance data captured by our model that can
                  significantly reduce the amount of storage required, and
                  demonstrate we can reduce the size of provenance data
                  captured from an actual experiment to 0.4\% of the original
                  size, with modest performance overhead.}
}


@ARTICLE{COMAD:Challenge06,
  AUTHOR = {Shawn Bowers and Timothy M. McPhillips and Bertram Lud\"ascher},
  TITLE = {Provenance in Collection-Oriented Scientific Workflows},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Scientific workflows frequently operate over nested collections of
            data.  These collections are often produced by workflow steps,
            e.g., where one actor outputs a collection of data items (such as a
            list of transcription factors), which is read by another actor that
            produces a nested collection for each item (such as a list of
            functions associated with each transcription factor).  As a result,
            data flow becomes increasingly nested, requiring workflows to
            implement complex data management tasks. In previous work, we have
            proposed a framework for transparently supporting nested data
            collections in scientific workflows. Our framework provides a
            number of advantages, including simpler workflow designs (compared
            to conventional approaches), the ability to concurrently execute
            actors over collection contents, on-the-fly customization of actor
            behavior, and improved handling of workflow exceptions.

            In this paper, we describe a provenance model tailored to
            collection-oriented workflows, in which only a minimal number of
            provenance events are required to recreate data dependencies and
            process details. We also describe an implementation in Kepler for
            (semi-) automatically capturing this provenance information.  Our
            implementation embeds provence events as tokens directly within a
            data stream, and produces self-contained trace files for workflow
            runs. Finally, we describe a prototype provenance reasoning and
            query engine for collection-oriented traces, and demonstrate our
            approach using the workflow and queries of the Provenance
            Challenge.}
}


@ARTICLE{RWS:Challenge06,
  AUTHOR = {Bertram Lud\"ascher and Norbert Podhorszki  and Ilkay Altintas and
                  Shawn Bowers and Timothy M. McPhillips},
  TITLE = {From Computation Models to models of Provenance: the RWS Approach},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Scientific workflows often either require or can take benefit from
            the use of complex modeling constructs such as sub-workflow
            nesting, cycles for executing loops, and pipelined
            execution. However, for such workflows, it is not obvious how to
            capture, represent, and query associated provenance information.
            The Kepler Provenance Recorder provides an extensible framework for
            capturing provenance information in actor-oriented scientific
            workflows. The primary application of the Provenance Recorder so
            far, however, has been for implementing ``smart'' re-run, where
            previous workflow runs are reused to optimize future runs with
            different input data or parameter settings. Alternatively, the
            Read, Write, State-reset (RWS) provenance model is designed to
            capture, and subsequently query, detailed data and invocation
            dependencies in scientific workflows. The RWS model is designed to
            explicitly support scientific workflows using pipeline parallelism
            over streaming data as well as cycles. This paper describes an
            implementation of the RWS model within Kepler, including the
            required extensions to the Kepler Provenance Recorder.  We also
            describe additional extensions to the RWS model for capturing
            nested workflows, in which workflow components (actors) represent
            sub-workflows. Finally, we present examples from the provenance
            challenge that highlight the capabilities of our approach.}
}


@ARTICLE{Zoom:Challenge06,
  AUTHOR = {Sarah Cohen-Boulakia and Olivier Biton and Shirley Cohen and Susan Davidson},
  TITLE = {Addressing the Provenance challenge using ZOOM},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {ZOOM*UserViews presents a formal model of provenance for scientific workflows that is simple, generic, and yet sufficiently expressive to answer questions of data and step provenance
that have been encountered in a large variety of scientific case studies.
In addition, ZOOM builds on the concept of composite step-classes -- or
sub-workflows -- which is present in many scientific workflow systems
to develop a notion of user views. This paper discusses the design and
implementation of ZOOM in the context of the queries posed by the
provenance challenge, and shows how user views affect the level of
granularity at which provenance information can be seen and reasoned about.}
}


@ARTICLE{Karma:Challenge06,
  AUTHOR = {Yogesh L. Simmhan and Beth Plale and Dennis Gannon},
  TITLE = {Querying Capabilities of the Karma Provenance Framework},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  PAGES = {441--451},
  DOI = {10.1002/cpe.1229},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Provenance in e-Science is a form of metadata capturing the
            derivation history of data products and scientific
            workflows. Provenance forms a glue linking workflow executions with
            associated data products, and finds use in determining the quality
            of derived data, tracking resource usage, verifying and validating
            scientific experiments, and for different forms information
            discovery through querying and mining. In this article, we discuss
            the scope of provenance collected in the Karma Provenance Framework
            used in the LEAD Project, distinguishing provenance metadata from
            generic annotations. We further describe our approaches to querying
            for different kinds of provenance in Karma while addressing the
            queries proposed in the Provenance Challenge Workshop. We use a
            building-block method to construct provenance queries, with the
            Karma service providing fundamental querying capabilities centered
            on the provenance metadata model and client-side libraries using
            those to iteratively perform complex queries. This has the
            advantage of keeping the Karma service generic and simple, and yet
            supports a wide range of queries. We conclude with opportunities
            that we see for optimizing the Karma query interface to tackle
            potentially costly deep provenance queries.}
}


@ARTICLE{MINDSWAP:Challenge06,
  AUTHOR = {Jennifer Golbeck and James Hendler},
  TITLE = {A Semantic Web Approach to Tracking Provenance in Scientific Workflows},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Provenance is a critical concept in scientific workflows, since it
            allows scientists to understand the origin of their results, to
            repeat their experiments, and to validate the processes that were
            used to derive data products. When working in a online environment,
            such as the Semantic Web is a natural fit for executing workflows
            and producing provenance information for the process and files. In
            this paper, we present our Semantic Web-based approach to the
            provenance challenge. Web services execute each step of the
            workflow and output files onto the web where they are represented
            uniquely by URIs. The services also output RDF files that represent
            metadata about their execution as well as the provenance of the
            output files. When these files are aggregated, simple SPARQL can
            answer all the queries in the challenge. We will also discuss how
            this distributed approach contrasts with systems.}
}


@ARTICLE{JP:Challenge06,
  AUTHOR = {Ales Krenek and Jiri Sitera and Ludek Matyska and Frantisek
Dvorak and Milos Mulac and Miroslav Ruda and Zdenek Salvet},
  TITLE = {gLite Job Provenance -- a Job-Centric View},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Job Provenance (JP) is a Grid service that keeps long-term track on completed
computations for further reference. It is a job-centric service, keeping
records about job life cycle, environment, inputs/outputs, user parameters
etc. The data collected from the Grid middleware where the job has run can
be complemented with user annotations that add a personalized view.

JP is a part of the gLite Grid middleware developed within the EU EGEE project.
During the first provenance challenge, we explored the relation between a
specific job-centric Grid oriented provenance and a more general data
provenance approach. We demonstrated how the job-centric view of computations
can be connected with data-centric user queries. We present important design
decisions and user-level procedures used in the challenge to implement
individual prescribed scenarios. We also show how JP can store data about
complex workflows and how these data can be used to answer user queries.

The implementation of the first provenance challenge workflow in a real
production level Grid system (gLite based EGEE Grid) provides an insight
how the workflow tasks can be implemented and run on a Grid.

We conclude with ``lessons learnt'' -- the challenge represents a usecase with
emphasis in fields that were not priorities in the original JP design,
namely dealing with structured computations (workflows), and types of
annotations which are logically related to data rather than jobs.
However, we proved that the design is sufficiently general to cope with
this usage approach. We also identified several areas where it is feasible
to extend the current implementation.}
}


@ARTICLE{OPA:Challenge06,
  AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Sheng Jiang
                  and Thibaut Assandri and Luc Moreau},
  TITLE = {{Extracting Causal Graphs from an Open Provenance Data Model}},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  EUPUB = {yes},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {The open provenance architecture (OPA) approach to the challenge
            was distinct in several regards.  In particular, it is based on an
            open, well-defined data model and architecture, allowing different
            components of the challenge workflow to independently record
            documentation, and for the workflow to be executed in any
            environment.  Another noticeable feature is that we distinguish
            between the data recorded about what has occurred, \emph{process
            documentation}, and the \emph{provenance} of a data item, which is
            all that caused the data item to be as it is and is obtained as the
            result of a query over process documentation.  This distinction
            allows us to tailor the system to separately best address the
            requirements of recording and querying documentation.  Other
            notable features include the explicit recording of causal
            relationships between both events and data items, an
            interaction-based world model, intensional definition of data items
            in queries rather than relying on explicit naming mechanisms, and
            \emph{styling} of documentation to support non-functional
            application requirements such as reducing storage costs or ensuring
            privacy of data.  In this paper we describe how each of these
            features aid us in answering the challenge provenance queries.}
}


@ARTICLE{ES3:Challenge06,
  AUTHOR = {James Frew and Dominic Metzger and Peter Slaughter},
  TITLE = {Automatic Capture and Reconstruction of Computational Provenance},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {The Earth System Science Server (ES3) project is developing a local infrastructure for managing Earth science data products derived from satellite remote sensing. By ``local,'' we mean the infrastructure that a scientist uses to manage the creation and dissemination of her own data products, particularly those that are constantly incorporating corrections or improvements based on the scientist’s own research. Therefore, in addition to being robust and capacious enough to support public access, ES3 is intended to be flexible enough to manage the idiosyncratic computing ensembles that typify scientific research.

Instead of specifying provenance explicitly with a workflow model, ES3 extracts provenance information automatically from arbitrary applications by monitoring their interactions with their execution environment. These interactions (arguments, file I/O, system calls, etc.) are logged to the ES3 database, which assembles them into provenance graphs. These graphs resemble workflow specifications, but are really reports -- they describe what actually happened, as opposed to what was requested. The ES3 database supports forward and backward navigation through provenance graphs (i.e. ancestor/descendant queries), as well as graph retrieval.}
}


@ARTICLE{VDL:Challenge06,
  AUTHOR = {Ben Clifford and Ian Foster and Mihael Hategan and Tiberiu
                  Stef-Praun and Michael Wilde and Yong Zhao.},
  TITLE = {Tracking Provenance in a Virtual Data Grid},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {The virtual data model allows data sets to be described prior to,
            and separate from, their physical materialization. Virtual data
            products are described by the three dimensions of the workflow that
            must be performed to materialize data sets, the runtime logs
            produced by the execution of these workflows, and the metadata
            annotations that permit application semantics to be associated with
            the data. This model is implemented by a Virtual Data Language
            (VDL) and its supporting processing tools and runtime
            environment. The VDL environment enables the work of deriving data
            products to be spread over a global Grid of storage and processing
            services, and uses both XML and relational data models to capture
            and query annotation, workflow and provenance in this widely
            distributed environment. This paper describes the implementation
            and data modeling aspects of these mechanisms in the context of a
            standardized data provenance challenge exercise.}
}


@ARTICLE{NCSA:Challenge06,
  AUTHOR = {Joe Futrelle and Jim Myers},
  TITLE = {Tracking Provenance Semantics in Heterogeneous Execution Systems},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Digital artifacts result from complex, heterogeneous work processes involving content management, process execution, and curation. Accordingly, systems for tracking provenance of digital artifacts need to be able to integrate heterogeneous descriptions produced by loosely-coupled or independent software components and work processes. In the approach described in this paper, two independently-developed execution environments, D2K and CyberIntegrator, were instrumented by their developers to produce process and content descriptions in the form of Resource Description Framework (RDF) statements. Using the open-source Kowari RDF database, these heterogeneous semantic descriptions were integrated to demonstrate the general applicability of RDF databases to answering provenance-related queries. The results suggest that the ``open-world'' semantic model provided by RDF, and the powerful query languages provided by RDF databases, can be extended to integrate a wide variety of heterogeneous provenance-related information with minimal investment in new standard API's, metadata formats, and execution environments.}
}


@ARTICLE{PASS:Challenge06,
  AUTHOR = {Margo Seltzer and David A. Holland and Uri Braun and Kiran-Kumar Muniswamy-Reddy},
  TITLE = {PASS-ing the Provenance Challenge},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Provenance Aware Storage Systems (PASS) are a new class of storage
            system treating provenance as a first-class object, providing
            automatic collection, storage, and management of provenance as well
            as query capabilities.  We developed the first PASS prototype
            between 2005 and 2006, targeting scientific end-users. Prior to
            undertaking the Provenance Challenge, we had focused on provenance
            collection and storage, without much emphasis on a query model or
            language.  The challenge forced us to (quickly) develop a query
            model and infrastructure implementing this model.  We present a
            brief overview of the PASS prototype and a discussion of the
            evolution of the query model that we developed for the challenge.}
}


@ARTICLE{Vistrail:Challenge06,
  AUTHOR = {Carlos Scheidegger and  David Koop and Emanuele Santos and
                  Huy Vo and Steven Callahan and Juliana Freire and Claudio
                  Silva},
  TITLE = {Tackling the Provenance Challenge One Layer at a Time},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {VisTrails is a new workflow management system that provides support
for scientific data exploration and visualization. Whereas workflows
have been traditionally used to automate repetitive tasks, for
applications that are exploratory in nature, very little is
repeated---change is the norm.  VisTrails uses a new change-based
provenance mechanism which was designed to manage rapidly-evolving
workflows. It uniformly and automatically captures provenance
information for data products and for the evolution of the workflows
used to generate these products. In this paper, we describe how the
provenance data is organized in layers and present a first
approach to querying these data that we developed to tackle the
Provenance Challenge queries.}
}


@ARTICLE{Wings+Pegasus:Challenge06,
  AUTHOR = {Jihie Kim and Ewa Deelman and Yolanda Gil and Gaurang Mehta
                  and Varun Ratnakar},
  TITLE = {Provenance trails in the Wings/Pegasus system},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Creation of valid scientific workflows involves keeping track of
            various workflow constraints, including data independent
            constraints on workflow components, data driven constraints, and
            resource management constraints.  We describe an approach to
            workflow instantiation and refinement that uses semantic
            representations of workflow constraints to 1) describe complex
            scientific applications in a data-independent manner, then 2)
            automatically generates workflows of computations for given data
            sets, and 3) finally maps them to available computing resources. We
            illustrate the provenance data generated by Wings during workflow
            instantiation and the refinement provenance by the Pegasus mapping
            system for execution over grid computing environments. We show how
            the results are mapped to the queries of the Provenance
            Challenge. }
}


@ARTICLE{myGrid:Challenge06,
  AUTHOR = {Jun Zhao and Carole Goble and Robert Stevens and Daniele
                  Turi},
  TITLE = {Mining Taverna's Semantic Web of Provenance},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2008},
  OPTKEY = {},
  VOLUME = {20},
  NUMBER = {5},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Taverna is a workflow workbench and execution environment developed as part of the UK's myGrid project. Taverna's provenance model captures: information about the origin of experimental data results collected during workflow runs; derivation paths that present a datum's lineage; an audit trail of the experiment execution leading to the data; the context of the workflow; and the evidence of the knowledge outcomes as a result of its execution. Flexible and open models are required to cater for an accumulative body of knowledge as workflows are multiply re-run, and as the same data are gathered from external repositories by different workflows. Hence we adopt the RDF graph-based data model formalism. The provenance graphs generated by workflow runs are semantically enriched with descriptions about the workflows and their products, captured during workflow design, execution, interpretation and publication. This enables context-based analysis combining origin and domain knowledge about these experimental entities. Previous work has shown how Taverna's provenance is represented using Semantic Web technologies, combining external third-party metadata with semantic annotations capturing the signatures of workflow components, leading to a ``Semantic Web of Provenance''. This paper shows how this Semantic Web of Provenance can be mined by a 5-tiered provenance usage framework, ProQA (Provenance Query and Answer). ProQA supports a wide range of provenance operations, from fine-grained provenance retrieval, to high-level provenance analysis and reasoning for supporting a collection of user scenarios. This framework is implemented as the ProQA query API, and a set of system provenance workflows that analyze experiment results using the provenance records. These provenance workflows are consistent with the experiment practice of Taverna's users, and enable the provenance of the data analysis and interpretation process to be automatically collected during the runs of these workflows. We show how these features of Taverna's provenance support us in answering the questions from the provenance challenge workshop and a set of additional provenance queries.}
}


This file has been generated by bibtex2html 1.52