diff --git a/.github/workflows/cd-docs.yml b/.github/workflows/cd-docs.yml new file mode 100644 index 0000000000..93536f52bb --- /dev/null +++ b/.github/workflows/cd-docs.yml @@ -0,0 +1,45 @@ +name: deploy-docs +on: + workflow_dispatch: + push: + branches: + - master +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: '3.9' + cache: 'pip' + cache-dependency-path: | + setup.py + tfx/dependencies.py + + - name: Save time for cache for mkdocs + run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + + - name: Caching + uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + + - name: Install Dependencies + run: pip install mkdocs mkdocs-material mkdocstrings[python] griffe-inherited-docstrings mkdocs-autorefs mkdocs-jupyter mkdocs-caption + + - name: Deploy to GitHub Pages + run: mkdocs gh-deploy --force diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml index 377f6420d4..c68f87848f 100644 --- a/.github/workflows/ci-test.yml +++ b/.github/workflows/ci-test.yml @@ -52,7 +52,7 @@ jobs: python -m pip install --upgrade pip wheel # TODO(b/232490018): Cython need to be installed separately to build pycocotools. python -m pip install Cython -c ./test_constraints.txt - pip install -c ./test_constraints.txt --extra-index-url https://pypi-nightly.tensorflow.org/simple --pre --editable .[all] + pip install -c ./test_constraints.txt --extra-index-url https://pypi-nightly.tensorflow.org/simple --pre .[all] env: TFX_DEPENDENCY_SELECTOR: ${{ matrix.dependency-selector }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a669857afc..613ccf4452 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: exclude: '\.svg$' - id: check-json - id: check-yaml - args: [--allow-multiple-documents] + args: [--allow-multiple-documents, --unsafe] - id: check-toml - repo: https://github.com/astral-sh/ruff-pre-commit diff --git a/docs/api/v1/components.md b/docs/api/v1/components.md new file mode 100644 index 0000000000..7fbf4391be --- /dev/null +++ b/docs/api/v1/components.md @@ -0,0 +1,3 @@ +# Components + +::: tfx.v1.components diff --git a/docs/api/v1/dsl.md b/docs/api/v1/dsl.md new file mode 100644 index 0000000000..d31a9551c3 --- /dev/null +++ b/docs/api/v1/dsl.md @@ -0,0 +1,3 @@ +# DSL + +::: tfx.v1.dsl diff --git a/docs/api/v1/extensions.md b/docs/api/v1/extensions.md new file mode 100644 index 0000000000..2679aae75d --- /dev/null +++ b/docs/api/v1/extensions.md @@ -0,0 +1,3 @@ +# Extension + +::: tfx.v1.extensions diff --git a/docs/api/v1/index.md b/docs/api/v1/index.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/api/v1/orchestration.md b/docs/api/v1/orchestration.md new file mode 100644 index 0000000000..26250ca1d9 --- /dev/null +++ b/docs/api/v1/orchestration.md @@ -0,0 +1,3 @@ +# Orchestration + +::: tfx.v1.orchestration diff --git a/docs/api/v1/proto.md b/docs/api/v1/proto.md new file mode 100644 index 0000000000..350264eaf4 --- /dev/null +++ b/docs/api/v1/proto.md @@ -0,0 +1,5 @@ +# Proto + +::: tfx.v1.proto + options: + show_if_no_docstring: true diff --git a/docs/api/v1/root.md b/docs/api/v1/root.md new file mode 100644 index 0000000000..b06cb920bf --- /dev/null +++ b/docs/api/v1/root.md @@ -0,0 +1,17 @@ +# Modules + +[components][tfx.v1.components] module: TFX components module. + +[dsl][tfx.v1.dsl] module: TFX DSL module. + +[extensions][tfx.v1.extensions] module: TFX extensions module. + +[orchestration][tfx.v1.orchestration] module: TFX orchestration module. + +[proto][tfx.v1.proto] module: TFX proto module. + +[testing][tfx.v1.testing] module: Public testing modules for TFX. + +[types][tfx.v1.types] module: TFX types module. + +[utils][tfx.v1.utils] module: TFX utils module. diff --git a/docs/api/v1/testing.md b/docs/api/v1/testing.md new file mode 100644 index 0000000000..1369879c3a --- /dev/null +++ b/docs/api/v1/testing.md @@ -0,0 +1,3 @@ +# Testing + +::: tfx.v1.testing diff --git a/docs/api/v1/types.md b/docs/api/v1/types.md new file mode 100644 index 0000000000..4b30de7ab2 --- /dev/null +++ b/docs/api/v1/types.md @@ -0,0 +1,3 @@ +# Types + +::: tfx.v1.types diff --git a/docs/api/v1/utils.md b/docs/api/v1/utils.md new file mode 100644 index 0000000000..349a42c01b --- /dev/null +++ b/docs/api/v1/utils.md @@ -0,0 +1,3 @@ +# Utils + +::: tfx.v1.utils diff --git a/docs/assets/tf_full_color_primary_icon.svg b/docs/assets/tf_full_color_primary_icon.svg new file mode 100644 index 0000000000..3e7247778d --- /dev/null +++ b/docs/assets/tf_full_color_primary_icon.svg @@ -0,0 +1 @@ +FullColorPrimary Icon \ No newline at end of file diff --git a/docs/guide/beam.md b/docs/guide/beam.md index 59410ac8af..165e03551c 100644 --- a/docs/guide/beam.md +++ b/docs/guide/beam.md @@ -56,9 +56,9 @@ Please follow one of the paths in [Managing Python Pipeline Dependencies](https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/) to provide this using one of the following beam_pipeline_args: -* --setup_file -* --extra_package -* --requirements_file +* `--setup_file` +* `--extra_package` +* `--requirements_file` Notice: In any of above cases, please make sure that the same version of `tfx` is listed as a dependency. diff --git a/docs/guide/build_local_pipeline.md b/docs/guide/build_local_pipeline.md index ca725d001d..c5a4e3a998 100644 --- a/docs/guide/build_local_pipeline.md +++ b/docs/guide/build_local_pipeline.md @@ -35,7 +35,7 @@ pip install tfx ``` If you are new to TFX pipelines, -[learn more about the core concepts for TFX pipelines](understanding_tfx_pipelines) +[learn more about the core concepts for TFX pipelines](understanding_tfx_pipelines.md) before continuing. ## Build a pipeline using a template @@ -51,24 +51,24 @@ it to meet your needs. 1. See list of the available TFX pipeline templates: -
+    ```bash
     tfx template list
-    
+ ``` 1. Select a template from the list -
-    tfx template copy --model=template --pipeline_name=pipeline-name \
-    --destination_path=destination-path
-    
+ ```bash + tfx template copy --model=template --pipeline_name=pipeline-name \ + --destination_path=destination-path + ``` Replace the following: - * template: The name of the template you want to copy. - * pipeline-name: The name of the pipeline to create. - * destination-path: The path to copy the template into. + * `template`: The name of the template you want to copy. + * `pipeline-name`: The name of the pipeline to create. + * `destination-path`: The path to copy the template into. - Learn more about the [`tfx template copy` command](cli#copy). + Learn more about the [`tfx template copy` command](cli.md#copy). 1. A copy of the pipeline template has been created at the path you specified. @@ -99,13 +99,13 @@ This section provides an overview of the scaffolding created by a template. 1. Run the following commands in your pipeline directory: -
+    ```bash
     tfx pipeline create --pipeline_path local_runner.py
-    
+ ``` -
+    ```bash
     tfx run create --pipeline_name pipeline_name
-    
+ ``` The command creates a pipeline run using `LocalDagRunner`, which adds the following directories to your pipeline: @@ -157,8 +157,8 @@ template. implement a pipeline for tabular data using the TFX standard components. If you are moving an existing ML workflow into a pipeline, you may need to revise your code to make full use of - [TFX standard components](index#tfx_standard_components). You may also need - to create [custom components](understanding_custom_components) that + [TFX standard components](index.md#tfx_standard_components). You may also need + to create [custom components](understanding_custom_components.md) that implement features which are unique to your workflow or that are not yet supported by TFX standard components. @@ -194,17 +194,17 @@ without using a template. functionality to help you implement a complete ML workflow. If you are moving an existing ML workflow into a pipeline, you may need to revise your code to make full use of TFX standard components. You may also need to - create [custom components](understanding_custom_components) that implement + create [custom components](understanding_custom_components.md) that implement features such as data augmentation. * Learn more about - [standard TFX components](index#tfx_standard_components). - * Learn more about [custom components](understanding_custom_components). + [standard TFX components](index.md#tfx_standard_components). + * Learn more about [custom components](understanding_custom_components.md). 1. Create a script file to define your pipeline using the following example. This guide refers to this file as `my_pipeline.py`. -
+    ```python
     import os
     from typing import Optional, Text, List
     from absl import logging
@@ -248,7 +248,7 @@ without using a template.
     if __name__ == '__main__':
       logging.set_verbosity(logging.INFO)
       run_pipeline()
-    
+ ``` In the coming steps, you define your pipeline in `create_pipeline` and run your pipeline locally using the local runner. @@ -277,7 +277,7 @@ without using a template. pipeline uses the `ExampleGen` standard component to load a CSV from a directory at `./data`. -
+    ```python
     from tfx.components import CsvExampleGen
 
     DATA_PATH = os.path.join('.', 'data')
@@ -315,7 +315,7 @@ without using a template.
         )
 
       tfx.orchestration.LocalDagRunner().run(my_pipeline)
-    
+ ``` `CsvExampleGen` creates serialized example records using the data in the CSV at the specified data path. By setting the `CsvExampleGen` component's @@ -326,13 +326,13 @@ without using a template. 1. Use the following command to run your `my_pipeline.py` script. -
+    ```bash
     python my_pipeline.py
-    
+ ``` The result should be something like the following: -
+    ```
     INFO:absl:Component CsvExampleGen depends on [].
     INFO:absl:Component CsvExampleGen is scheduled.
     INFO:absl:Component CsvExampleGen is running.
@@ -347,6 +347,6 @@ without using a template.
     INFO:absl:Running publisher for CsvExampleGen
     INFO:absl:MetadataStore with DB connection initialized
     INFO:absl:Component CsvExampleGen is finished.
-    
+ ``` 1. Continue to iteratively add components to your pipeline. diff --git a/docs/guide/build_tfx_pipeline.md b/docs/guide/build_tfx_pipeline.md index 5cfbe0f85b..f03a5f4648 100644 --- a/docs/guide/build_tfx_pipeline.md +++ b/docs/guide/build_tfx_pipeline.md @@ -1,11 +1,11 @@ # Building TFX pipelines Note: For a conceptual view of TFX Pipelines, see -[Understanding TFX Pipelines](understanding_tfx_pipelines). +[Understanding TFX Pipelines](understanding_tfx_pipelines.md). Note: Want to build your first pipeline before you dive into the details? Get started -[building a pipeline using a template](https://www.tensorflow.org/tfx/guide/build_local_pipeline#build_a_pipeline_using_a_template). +[building a pipeline using a template](build_local_pipeline.md#build-a-pipeline-using-a-template). ## Using the `Pipeline` class @@ -13,37 +13,37 @@ TFX pipelines are defined using the [`Pipeline` class](https://github.com/tensorflow/tfx/blob/master/tfx/orchestration/pipeline.py){: .external }. The following example demonstrates how to use the `Pipeline` class. -
+```python
 pipeline.Pipeline(
-    pipeline_name=pipeline-name,
-    pipeline_root=pipeline-root,
-    components=components,
-    enable_cache=enable-cache,
-    metadata_connection_config=metadata-connection-config,
+    pipeline_name=pipeline-name,
+    pipeline_root=pipeline-root,
+    components=components,
+    enable_cache=enable-cache,
+    metadata_connection_config=metadata-connection-config,
 )
-
+``` Replace the following: -* pipeline-name: The name of this pipeline. The pipeline name must +* `pipeline-name`: The name of this pipeline. The pipeline name must be unique. TFX uses the pipeline name when querying ML Metadata for component input artifacts. Reusing a pipeline name may result in unexpected behaviors. -* pipeline-root: The root path of this pipeline's outputs. The root +* `pipeline-root`: The root path of this pipeline's outputs. The root path must be the full path to a directory that your orchestrator has read and write access to. At runtime, TFX uses the pipeline root to generate output paths for component artifacts. This directory can be local, or on a supported distributed file system, such as Google Cloud Storage or HDFS. -* components: A list of component instances that make up this +* `components`: A list of component instances that make up this pipeline's workflow. -* enable-cache: (Optional.) A boolean value that indicates if this +* `enable-cache`: (Optional.) A boolean value that indicates if this pipeline uses caching to speed up pipeline execution. -* metadata-connection-config: (Optional.) A connection +* `metadata-connection-config`: (Optional.) A connection configuration for ML Metadata. ## Defining the component execution graph diff --git a/docs/guide/bulkinferrer.md b/docs/guide/bulkinferrer.md index e96735d014..9b5e364d55 100644 --- a/docs/guide/bulkinferrer.md +++ b/docs/guide/bulkinferrer.md @@ -2,7 +2,7 @@ The BulkInferrer TFX component performs batch inference on unlabeled data. The generated -InferenceResult([tensorflow_serving.apis.prediction_log_pb2.PredictionLog](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_log.proto)) +InferenceResult([`tensorflow_serving.apis.prediction_log_pb2.PredictionLog`](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_log.proto)) contains the original features and the prediction results. BulkInferrer consumes: @@ -11,7 +11,7 @@ BulkInferrer consumes: [SavedModel](https://www.tensorflow.org/guide/saved_model.md) format. * Unlabelled tf.Examples that contain features. * (Optional) Validation result from - [Evaluator](https://www.tensorflow.org/tfx/guide/evaluator.md) component. + [Evaluator](evaluator.md) component. BulkInferrer emits: @@ -21,9 +21,9 @@ BulkInferrer emits: A BulkInferrer TFX component is used to perform batch inference on unlabeled tf.Examples. It is typically deployed after an -[Evaluator](https://www.tensorflow.org/tfx/guide/evaluator.md) component to +[Evaluator](evaluator.md) component to perform inference with a validated model, or after a -[Trainer](https://www.tensorflow.org/tfx/guide/trainer.md) component to directly +[Trainer](trainer.md) component to directly perform inference on exported model. It currently performs in-memory model inference and remote inference. @@ -42,4 +42,4 @@ bulk_inferrer = BulkInferrer( ``` More details are available in the -[BulkInferrer API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/BulkInferrer). +[BulkInferrer API reference][tfx.v1.components.BulkInferrer]. diff --git a/docs/guide/cli.md b/docs/guide/cli.md index 46fa26a138..855f5d2bdd 100644 --- a/docs/guide/cli.md +++ b/docs/guide/cli.md @@ -18,19 +18,19 @@ interface might change as new versions are released. The TFX CLI is installed as a part of the TFX package. All CLI commands follow the structure below: -
-tfx command-group command flags
-
+```bash +tfx +``` -The following command-group options are currently supported: +The following command-group options are currently supported: -* [tfx pipeline](#tfx-pipeline) - Create and manage TFX pipelines. -* [tfx run](#tfx-run) - Create and manage runs of TFX pipelines on various +* [`tfx pipeline`](#tfx-pipeline) - Create and manage TFX pipelines. +* [`tfx run`](#tfx-run) - Create and manage runs of TFX pipelines on various orchestration platforms. -* [tfx template](#tfx-template-experimental) - Experimental commands for +* [`tfx template`](#tfx-template-experimental) - Experimental commands for listing and copying TFX pipeline templates. -Each command group provides a set of commands. Follow the +Each command group provides a set of commands. Follow the instructions in the [pipeline commands](#tfx-pipeline), [run commands](#tfx-run), and [template commands](#tfx-template-experimental) sections to learn more about using these commands. @@ -42,15 +42,15 @@ Flags let you pass arguments into CLI commands. Words in flags are separated with either a hyphen (`-`) or an underscore (`_`). For example, the pipeline name flag can be specified as either `--pipeline-name` or `--pipeline_name`. This document specifies flags with underscores for brevity. Learn more about -[flags used in the TFX CLI](#understanding-tfx-cli-flags). +[flags used in the TFX CLI](#understanding-tfx-cli-flags). ## tfx pipeline The structure for commands in the `tfx pipeline` command group is as follows: -
-tfx pipeline command required-flags [optional-flags]
-
+```bash +tfx pipeline command required-flags [optional-flags] +``` Use the following sections to learn more about the commands in the `tfx pipeline` command group. @@ -61,11 +61,11 @@ Creates a new pipeline in the given orchestrator. Usage: -
+```bash
 tfx pipeline create --pipeline_path=pipeline-path [--endpoint=endpoint --engine=engine \
 --iap_client_id=iap-client-id --namespace=namespace \
 --build_image --build_base_image=build-base-image]
-
+```
--pipeline_path=pipeline-path
@@ -154,35 +154,35 @@ tfx pipeline create --pipeline_path=pipeline-path [--endpoint=en
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx pipeline create --engine=kubeflow --pipeline_path=pipeline-path \
 --iap_client_id=iap-client-id --namespace=namespace --endpoint=endpoint \
 --build_image
-
+``` Local: -
+```bash
 tfx pipeline create --engine=local --pipeline_path=pipeline-path
-
+``` Vertex: -
+```bash
 tfx pipeline create --engine=vertex --pipeline_path=pipeline-path \
 --build_image
-
+``` To autodetect engine from user environment, simply avoid using the engine flag like the example below. For more details, check the flags section. -
+```bash
 tfx pipeline create --pipeline_path=pipeline-path
-
+``` ### update @@ -190,10 +190,10 @@ Updates an existing pipeline in the given orchestrator. Usage: -
+```bash
 tfx pipeline update --pipeline_path=pipeline-path [--endpoint=endpoint --engine=engine \
 --iap_client_id=iap-client-id --namespace=namespace --build_image]
-
+```
--pipeline_path=pipeline-path
@@ -271,28 +271,28 @@ tfx pipeline update --pipeline_path=pipeline-path [--endpoint=en
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx pipeline update --engine=kubeflow --pipeline_path=pipeline-path \
 --iap_client_id=iap-client-id --namespace=namespace --endpoint=endpoint \
 --build_image
-
+``` Local: -
+```bash
 tfx pipeline update --engine=local --pipeline_path=pipeline-path
-
+``` Vertex: -
+```bash
 tfx pipeline update --engine=vertex --pipeline_path=pipeline-path \
 --build_image
-
+``` ### compile @@ -310,9 +310,9 @@ Recommended to use before creating or updating a pipeline. Usage: -
+```bash
 tfx pipeline compile --pipeline_path=pipeline-path [--engine=engine]
-
+```
--pipeline_path=pipeline-path
@@ -344,25 +344,25 @@ tfx pipeline compile --pipeline_path=pipeline-path [--engine=eng
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx pipeline compile --engine=kubeflow --pipeline_path=pipeline-path
-
+``` Local: -
+```bash
 tfx pipeline compile --engine=local --pipeline_path=pipeline-path
-
+``` Vertex: -
+```bash
 tfx pipeline compile --engine=vertex --pipeline_path=pipeline-path
-
+``` ### delete @@ -370,10 +370,10 @@ Deletes a pipeline from the given orchestrator. Usage: -
+```bash
 tfx pipeline delete --pipeline_path=pipeline-path [--endpoint=endpoint --engine=engine \
 --iap_client_id=iap-client-id --namespace=namespace]
-
+```
--pipeline_path=pipeline-path
@@ -439,26 +439,26 @@ tfx pipeline delete --pipeline_path=pipeline-path [--endpoint=en
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx pipeline delete --engine=kubeflow --pipeline_name=pipeline-name \
 --iap_client_id=iap-client-id --namespace=namespace --endpoint=endpoint
-
+``` Local: -
+```bash
 tfx pipeline delete --engine=local --pipeline_name=pipeline-name
-
+``` Vertex: -
+```bash
 tfx pipeline delete --engine=vertex --pipeline_name=pipeline-name
-
+``` ### list @@ -466,10 +466,10 @@ Lists all the pipelines in the given orchestrator. Usage: -
+```bash
 tfx pipeline list [--endpoint=endpoint --engine=engine \
 --iap_client_id=iap-client-id --namespace=namespace]
-
+```
--endpoint=endpoint
@@ -533,34 +533,34 @@ tfx pipeline list [--endpoint=endpoint --engine=engine \
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx pipeline list --engine=kubeflow --iap_client_id=iap-client-id \
 --namespace=namespace --endpoint=endpoint
-
+``` Local: -
+```bash
 tfx pipeline list --engine=local
-
+``` Vertex: -
+```bash
 tfx pipeline list --engine=vertex
-
+``` ## tfx run The structure for commands in the `tfx run` command group is as follows: -
+```bash
 tfx run command required-flags [optional-flags]
-
+``` Use the following sections to learn more about the commands in the `tfx run` command group. @@ -572,10 +572,10 @@ most recent pipeline version of the pipeline in the cluster is used. Usage: -
+```bash
 tfx run create --pipeline_name=pipeline-name [--endpoint=endpoint \
 --engine=engine --iap_client_id=iap-client-id --namespace=namespace]
-
+```
--pipeline_name=pipeline-name
@@ -660,28 +660,28 @@ tfx run create --pipeline_name=pipeline-name [--endpoint=endpoin
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx run create --engine=kubeflow --pipeline_name=pipeline-name --iap_client_id=iap-client-id \
 --namespace=namespace --endpoint=endpoint
-
+``` Local: -
+```bash
 tfx run create --engine=local --pipeline_name=pipeline-name
-
+``` Vertex: -
+```bash
 tfx run create --engine=vertex --pipeline_name=pipeline-name \
   --runtime_parameter=var_name=var_value \
   --project=gcp-project-id --region=gcp-region
-
+``` ### terminate @@ -691,10 +691,10 @@ Stops a run of a given pipeline. Usage: -
+```bash
 tfx run terminate --run_id=run-id [--endpoint=endpoint --engine=engine \
 --iap_client_id=iap-client-id --namespace=namespace]
-
+```
--run_id=run-id
@@ -756,14 +756,14 @@ tfx run terminate --run_id=run-id [--endpoint=endpoint --e
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx run delete --engine=kubeflow --run_id=run-id --iap_client_id=iap-client-id \
 --namespace=namespace --endpoint=endpoint
-
+``` ### list @@ -773,10 +773,10 @@ Lists all runs of a pipeline. Usage: -
+```bash
 tfx run list --pipeline_name=pipeline-name [--endpoint=endpoint \
 --engine=engine --iap_client_id=iap-client-id --namespace=namespace]
-
+```
--pipeline_name=pipeline-name
@@ -839,14 +839,14 @@ tfx run list --pipeline_name=pipeline-name [--endpoint=endpoint<
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx run list --engine=kubeflow --pipeline_name=pipeline-name --iap_client_id=iap-client-id \
 --namespace=namespace --endpoint=endpoint
-
+``` ### status @@ -856,10 +856,10 @@ Returns the current status of a run. Usage: -
+```bash
 tfx run status --pipeline_name=pipeline-name --run_id=run-id [--endpoint=endpoint \
 --engine=engine --iap_client_id=iap-client-id --namespace=namespace]
-
+```
--pipeline_name=pipeline-name
@@ -924,14 +924,14 @@ tfx run status --pipeline_name=pipeline-name --run_id=run-id
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx run status --engine=kubeflow --run_id=run-id --pipeline_name=pipeline-name \
 --iap_client_id=iap-client-id --namespace=namespace --endpoint=endpoint
-
+``` ### delete @@ -941,10 +941,10 @@ Deletes a run of a given pipeline. Usage: -
+```bash
 tfx run delete --run_id=run-id [--engine=engine --iap_client_id=iap-client-id \
 --namespace=namespace --endpoint=endpoint]
-
+```
--run_id=run-id
@@ -1006,22 +1006,22 @@ tfx run delete --run_id=run-id [--engine=engine --iap_clie
-#### Examples: +#### Examples Kubeflow: -
+```bash
 tfx run delete --engine=kubeflow --run_id=run-id --iap_client_id=iap-client-id \
 --namespace=namespace --endpoint=endpoint
-
+``` ## tfx template [Experimental] The structure for commands in the `tfx template` command group is as follows: -
+```bash
 tfx template command required-flags [optional-flags]
-
+``` Use the following sections to learn more about the commands in the `tfx template` command group. Template is an experimental feature and subject to @@ -1033,9 +1033,9 @@ List available TFX pipeline templates. Usage: -
+```bash
 tfx template list
-
+``` ### copy @@ -1043,10 +1043,10 @@ Copy a template to the destination directory. Usage: -
+```bash
 tfx template copy --model=model --pipeline_name=pipeline-name \
 --destination_path=destination-path
-
+```
--model=model
diff --git a/docs/guide/container_component.md b/docs/guide/container_component.md index 4deb61e786..67449cc7b9 100644 --- a/docs/guide/container_component.md +++ b/docs/guide/container_component.md @@ -5,7 +5,7 @@ any language into your pipeline, so long as you can execute that code in a Docker container. If you are new to TFX pipelines, -[learn more about the core concepts of TFX pipelines](understanding_tfx_pipelines). +[learn more about the core concepts of TFX pipelines](understanding_tfx_pipelines.md). ## Creating a Container-based Component diff --git a/docs/guide/custom_component.md b/docs/guide/custom_component.md index f9c12ca41f..9527f3bbe2 100644 --- a/docs/guide/custom_component.md +++ b/docs/guide/custom_component.md @@ -6,7 +6,7 @@ specification, executor, and component interface classes. This approach lets you reuse and extend a standard component to fit your needs. If you are new to TFX pipelines, -[learn more about the core concepts of TFX pipelines](understanding_tfx_pipelines). +[learn more about the core concepts of TFX pipelines](understanding_tfx_pipelines.md). ## Custom executor or custom component diff --git a/docs/guide/custom_function_component.md b/docs/guide/custom_function_component.md index 432ad28215..8aca8be9aa 100644 --- a/docs/guide/custom_function_component.md +++ b/docs/guide/custom_function_component.md @@ -64,7 +64,7 @@ def MyDataProcessor( ``` If you are new to TFX pipelines, -[learn more about the core concepts of TFX pipelines](understanding_tfx_pipelines). +[learn more about the core concepts of TFX pipelines](understanding_tfx_pipelines.md). ## Inputs, outputs, and parameters diff --git a/docs/guide/evaluator.md b/docs/guide/evaluator.md index ed99871521..a1a72ab15e 100644 --- a/docs/guide/evaluator.md +++ b/docs/guide/evaluator.md @@ -15,7 +15,7 @@ the [Pusher](pusher.md) that it is ok to push the model to production. * Consumes: * An eval split from - [Examples](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/types/standard_artifacts/Examples) + [Examples][tfx.v1.types.standard_artifacts.Examples] * A trained model from [Trainer](trainer.md) * A previously blessed model (if validation to be performed) * Emits: @@ -142,4 +142,4 @@ if not validation_result.validation_ok: ``` More details are available in the -[Evaluator API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/Evaluator). +[Evaluator API reference][tfx.v1.components.Evaluator]. diff --git a/docs/guide/examplegen.md b/docs/guide/examplegen.md index 9f4712fdb8..aff3284de2 100644 --- a/docs/guide/examplegen.md +++ b/docs/guide/examplegen.md @@ -34,7 +34,7 @@ components for these data sources and formats: * [Parquet](https://github.com/tensorflow/tfx/blob/master/tfx/components/example_gen/custom_executors/parquet_executor.py) See the usage examples in the source code and -[this discussion](/tfx/guide/examplegen#custom_examplegen) for more information on +[this discussion](examplegen.md#custom_examplegen) for more information on how to use and develop custom executors. Note: In most case it's better to inherit from `base_example_gen_executor` @@ -42,7 +42,7 @@ instead of `base_executor`. So following the Avro or Parquet example in the Executor source code may be advisable. In addition, these data sources and formats are available as -[custom component](/tfx/guide/understanding_custom_components) examples: +[custom component](understanding_custom_components.md) examples: * [Presto](https://github.com/tensorflow/tfx/tree/master/tfx/examples/custom_components/presto_example_gen) @@ -629,7 +629,7 @@ evaluator = Evaluator( ``` More details are available in the -[CsvExampleGen API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/CsvExampleGen), -[FileBasedExampleGen API implementation](https://github.com/tensorflow/tfx/blob/master/tfx/components/example_gen/component.py) +[CsvExampleGen API reference][tfx.v1.components.CsvExampleGen], +[FileBasedExampleGen API implementation][tfx.v1.components.example_gen.component], and -[ImportExampleGen API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/ImportExampleGen). +[ImportExampleGen API reference][tfx.v1.components/ImportExampleGen]. diff --git a/docs/guide/exampleval.md b/docs/guide/exampleval.md index 3f9c6ef949..e41823373e 100644 --- a/docs/guide/exampleval.md +++ b/docs/guide/exampleval.md @@ -38,4 +38,4 @@ validate_stats = ExampleValidator( ``` More details are available in the -[ExampleValidator API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/ExampleValidator). +[ExampleValidator API reference][tfx.v1.components.ExampleValidator]. diff --git a/docs/guide/fairness_indicators.md b/docs/guide/fairness_indicators.md index 785faab5f9..88192873ae 100644 --- a/docs/guide/fairness_indicators.md +++ b/docs/guide/fairness_indicators.md @@ -51,7 +51,7 @@ model, please see the “Model-Agnostic TFMA” section below. After your Estimator is trained, you will need to export a saved model for evaluation purposes. To learn more, see the -[TFMA guide](/tfx/model_analysis/get_started). +[TFMA guide](https://www.tensorflow.org/tfx/model_analysis/get_started). ### Configuring Slices diff --git a/docs/guide/index.md b/docs/guide/index.md index 4af4795144..dd1001ca38 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -62,19 +62,19 @@ environment. TFX provides the following: ML workflow on several platforms, such as: Apache Airflow, Apache Beam, and Kubeflow Pipelines. - [Learn more about TFX pipelines](https://www.tensorflow.org/tfx/guide/understanding_tfx_pipelines). + [Learn more about TFX pipelines](understanding_tfx_pipelines.md). * A set of standard components that you can use as a part of a pipeline, or as a part of your ML training script. TFX standard components provide proven functionality to help you get started building an ML process easily. - [Learn more about TFX standard components](#tfx_standard_components). + [Learn more about TFX standard components](#tfx-standard-components). * Libraries which provide the base functionality for many of the standard components. You can use the TFX libraries to add this functionality to your own custom components, or use them separately. - [Learn more about the TFX libraries](#tfx_libraries). + [Learn more about the TFX libraries](#tfx-libraries). TFX is a Google-production-scale machine learning toolkit based on TensorFlow. It provides a configuration framework and shared libraries to integrate common @@ -412,7 +412,7 @@ A typical TFX pipeline will include a [Transform](transform.md) component, which will perform feature engineering by leveraging the capabilities of the [TensorFlow Transform (TFT)](tft.md) library. A Transform component consumes the schema created by a SchemaGen component, and applies -[data transformations](https://www.tensorflow.org/tfx/tutorials/transform/simple) +[data transformations](../tutorials/transform/simple) to create, combine, and transform the features that will be used to train your model. Cleanup of missing values and conversion of types should also be done in the Transform component if there is ever a possibility that these will also be @@ -568,7 +568,7 @@ on using TensorFlow JS. ## Creating a TFX Pipeline With Airflow Check -[airflow workshop](https://www.tensorflow.org/tfx/tutorials/tfx/airflow_workshop/) +[airflow workshop](../tutorials/tfx/airflow_workshop/) for details ## Creating a TFX Pipeline With Kubeflow @@ -582,7 +582,7 @@ Kubeflow deployment guideline that guide through the options for ### Configure and run TFX pipeline Please follow the -[TFX on Cloud AI Platform Pipeline tutorial](https://www.tensorflow.org/tfx/tutorials/tfx/cloud-ai-platform-pipelines) +[TFX on Cloud AI Platform Pipeline tutorial](../tutorials/tfx/cloud-ai-platform-pipelines/) to run the TFX example pipeline on Kubeflow. TFX components have been containerized to compose the Kubeflow pipeline and the sample illustrates the ability to configure the pipeline to read large public dataset and execute diff --git a/docs/guide/infra_validator.md b/docs/guide/infra_validator.md index 021026997c..1daeea2856 100644 --- a/docs/guide/infra_validator.md +++ b/docs/guide/infra_validator.md @@ -54,7 +54,7 @@ modes: Usually InfraValidator is defined next to an Evaluator component, and its output is fed to a Pusher. If InfraValidator fails, the model will not be pushed. -```python {highlight="lines:8-11 context:infra_blessing,1"} +```python hl_lines="8-11" evaluator = Evaluator( model=trainer.outputs['model'], examples=example_gen.outputs['examples'], @@ -108,7 +108,7 @@ block of the `ServingSpec`. For example to use TensorFlow Serving binary running on the Kubernetes cluster, `tensorflow_serving` and `kubernetes` field should be set. -```python {highlight="lines:4:9-4:26,7:9-7:18"} +```python hl_lines="4 7" infra_validator=InfraValidator( model=trainer.outputs['model'], serving_spec=tfx.proto.ServingSpec( @@ -127,7 +127,7 @@ To further configure `ServingSpec`, please check out the Optional configuration to adjust the infra validation criteria or workflow. -```python {highlight="lines:4-10"} +```python hl_lines="4-10" infra_validator=InfraValidator( model=trainer.outputs['model'], serving_spec=tfx.proto.ServingSpec(...), @@ -151,7 +151,7 @@ infra validation in `LOAD_AND_QUERY` mode. In order to use `LOAD_AND_QUERY` mode, it is required to specify both `request_spec` execution properties as well as `examples` input channel in the component definition. -```python {highlight="lines:7:9-7:62 lines:10-16"} +```python hl_lines="8 11-17" infra_validator = InfraValidator( model=trainer.outputs['model'], # This is the source for the data that will be used to build a request. @@ -198,7 +198,7 @@ and can also be pushed by the [Pusher](pusher.md), just like `Model` artifact. Current InfraValidator is not complete yet, and has some limitations. -- Only TensorFlow [SavedModel](/guide/saved_model) model format can be +- Only TensorFlow [SavedModel](https://www.tensorflow.org/guide/saved_model) model format can be validated. - When running TFX on Kubernetes, the pipeline should be executed by `KubeflowDagRunner` inside Kubeflow Pipelines. The model server will be @@ -206,13 +206,13 @@ Current InfraValidator is not complete yet, and has some limitations. using. - InfraValidator is primarily focused on deployments to [TensorFlow Serving](serving.md), and while still useful it is less accurate - for deployments to [TensorFlow Lite](/lite) and [TensorFlow.js](/js), or + for deployments to [TensorFlow Lite](https://www.tensorflow.org/lite) and [TensorFlow.js](https://www.tensorflow.org/js), or other inference frameworks. - There's a limited support on `LOAD_AND_QUERY` mode for the [Predict](/versions/r1.15/api_docs/python/tf/saved_model/predict_signature_def) method signature (which is the only exportable method in TensorFlow 2). InfraValidator requires the Predict signature to consume a serialized - [`tf.Example`](/tutorials/load_data/tfrecord#tfexample) as the only input. + [`tf.Example`](https://www.tensorflow.org/tutorials/load_data/tfrecord#tfexample) as the only input. ```python @tf.function diff --git a/docs/guide/keras.md b/docs/guide/keras.md index 275a3bd61c..dd1454db9a 100644 --- a/docs/guide/keras.md +++ b/docs/guide/keras.md @@ -106,7 +106,7 @@ Here are several examples with native Keras: end-to-end example with advanced Transform usage. We also have a per-component -[Keras Colab](https://www.tensorflow.org/tfx/tutorials/tfx/components_keras). +[Keras Colab](../../tutorials/tfx/components_keras). ### TFX Components diff --git a/docs/guide/kubeflow.md b/docs/guide/kubeflow.md index ad94a26c64..e29b531851 100644 --- a/docs/guide/kubeflow.md +++ b/docs/guide/kubeflow.md @@ -15,5 +15,5 @@ Pipelines SDK allows for creation and sharing of components and composition and of pipelines programmatically. See the -[TFX example on Kubeflow Pipelines](https://www.tensorflow.org/tfx/tutorials/tfx/cloud-ai-platform-pipelines) +[TFX example on Kubeflow Pipelines](../../tutorials/tfx/cloud-ai-platform-pipelines) for details on running TFX at scale on Google cloud. diff --git a/docs/guide/local_orchestrator.md b/docs/guide/local_orchestrator.md index 74bd5c6fb3..049a2e2421 100644 --- a/docs/guide/local_orchestrator.md +++ b/docs/guide/local_orchestrator.md @@ -5,8 +5,8 @@ Local orchestrator is a simple orchestrator that is included in the TFX Python package. It runs pipelines in the local environment in a single process. It provides fast iterations for development and debugging, but it is not suitable for -large production workloads. Please use [Vertex Pipelines](/tfx/guide/vertex) or -[Kubeflow Pipelines](/tfx/guide/kubeflow) for production use cases. +large production workloads. Please use [Vertex Pipelines](vertex.md) or +[Kubeflow Pipelines](kubeflow.md) for production use cases. -Try the [TFX tutorials](/tfx/tutorials/tfx/penguin_simple) running in Colab to +Try the [TFX tutorials](../../tutorials/tfx/penguin_simple) running in Colab to learn how to use the local orchestrator. diff --git a/docs/guide/mlmd.md b/docs/guide/mlmd.md index a283e1f7a3..b2cdb58973 100644 --- a/docs/guide/mlmd.md +++ b/docs/guide/mlmd.md @@ -191,7 +191,7 @@ following list provides a non-exhaustive overview of some of the major benefits. within a range; find previous executions in a context with the same inputs. See the -[MLMD tutorial](https://www.tensorflow.org/tfx/tutorials/mlmd/mlmd_tutorial) for +[MLMD tutorial](../../tutorials/mlmd/mlmd_tutorial) for an example that shows you how to use the MLMD API and the metadata store to retrieve lineage information. @@ -439,7 +439,7 @@ to learn how to use MLMD declarative nodes filtering capabilities on properties and 1-hop neighborhood nodes. Also check out the -[MLMD tutorial](https://www.tensorflow.org/tfx/tutorials/mlmd/mlmd_tutorial) to +[MLMD tutorial](../../tutorials/mlmd/mlmd_tutorial) to learn how to use MLMD to trace the lineage of your pipeline components. MLMD provides utilities to handle schema and data migrations across releases. diff --git a/docs/guide/non_tf.md b/docs/guide/non_tf.md index 1727bb4c7f..0bfde25fc3 100644 --- a/docs/guide/non_tf.md +++ b/docs/guide/non_tf.md @@ -32,7 +32,7 @@ using the standard TFX components with other frameworks include: instead of raw features, and users can run transform as a preprocessing step before calling the model prediction when serving. * **Trainer** supports - [GenericTraining](https://www.tensorflow.org/tfx/guide/trainer#generic_trainer) + [GenericTraining](trainer.md#generic-trainer) so users can train their models using any ML framework. * **Evaluator** by default only supports `saved_model`, but users can provide a UDF that generates predictions for model evaluation. @@ -49,7 +49,7 @@ high-performance machine learning research. is a neural network library and ecosystem for JAX, designed for flexibility. With [jax2tf](https://github.com/google/jax/tree/main/jax/experimental/jax2tf), -we are able to convert trained JAX/Flax models into `saved_model` format, +we are able to convert trained JAX/Flax models into `saved_model` format, which can be used seamlessly in TFX with generic training and model evaluation. For details, check this [example](https://github.com/tensorflow/tfx/blob/master/tfx/examples/penguin/penguin_utils_flax_experimental.py). diff --git a/docs/guide/pusher.md b/docs/guide/pusher.md index 1b3b386f7c..8b68f73727 100644 --- a/docs/guide/pusher.md +++ b/docs/guide/pusher.md @@ -1,16 +1,16 @@ # The Pusher TFX Pipeline Component The Pusher component is used to push a validated model to a -[deployment target](index.md#deployment_targets) during model training or +[deployment target](index.md#deployment-targets) during model training or re-training. Before the deployment, Pusher relies on one or more blessings from other validation components to decide whether to push the model or not. -- [Evaluator](evaluator) blesses the model if the new trained model is "good +- [Evaluator](evaluator.md) blesses the model if the new trained model is "good enough" to be pushed to production. -- (Optional but recommended) [InfraValidator](infra_validator) blesses the +- (Optional but recommended) [InfraValidator](infra_validator.md) blesses the model if the model is mechanically servable in a production environment. -A Pusher component consumes a trained model in [SavedModel](/guide/saved_model) +A Pusher component consumes a trained model in [SavedModel](https://www.tensorflow.org/guide/saved_model) format, and produces the same SavedModel, along with versioning metadata. ## Using the Pusher Component @@ -36,7 +36,7 @@ pusher = Pusher( (From version 0.30.0) InfraValidator can also produce `InfraBlessing` artifact containing a -[model with warmup](infra_validator#producing_a_savedmodel_with_warmup), and +[model with warmup](infra_validator.md#producing-a-savedmodel-with-warmup), and Pusher can push it just like a `Model` artifact. ```python @@ -55,4 +55,4 @@ pusher = Pusher( ``` More details are available in the -[Pusher API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/Pusher). +[Pusher API reference][tfx.v1.components.Pusher]. diff --git a/docs/guide/schemagen.md b/docs/guide/schemagen.md index d1fd36230d..2bbd50b0fe 100644 --- a/docs/guide/schemagen.md +++ b/docs/guide/schemagen.md @@ -58,7 +58,7 @@ The modified schema can be brought back into the pipeline using ImportSchemaGen component. The SchemaGen component for the initial schema generation can be removed and all downstream components can use the output of ImportSchemaGen. It is also recommended to add -[ExampleValidator](https://www.tensorflow.org/tfx/guide/exampleval) using the +[ExampleValidator](exampleval.md) using the imported schema to examine the training data continuously. ## SchemaGen and TensorFlow Data Validation @@ -78,7 +78,7 @@ schema_gen = tfx.components.SchemaGen( ``` More details are available in the -[SchemaGen API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/SchemaGen). +[SchemaGen API reference][tfx.v1.components.SchemaGen]. ### For the reviewed schema import @@ -93,4 +93,4 @@ schema_gen = tfx.components.ImportSchemaGen( The `schema_file` should be a full path to the text protobuf file. More details are available in the -[ImportSchemaGen API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/ImportSchemaGen). +[ImportSchemaGen API reference][tfx.v1.components.ImportSchemaGen]. diff --git a/docs/guide/solutions.md b/docs/guide/solutions.md index 0f8f9e9da1..f14b6fb47f 100644 --- a/docs/guide/solutions.md +++ b/docs/guide/solutions.md @@ -18,8 +18,7 @@ understand what items your customers consider to be similar, which enables you to offer real-time "similar item" suggestions in your application. This solution shows you how to identify similar songs in a dataset, and then use this information to make song recommendations. -Read -more +[Read more](https://cloud.google.com/solutions/real-time-item-matching) ## Data preprocessing for machine learning: options and recommendations @@ -31,10 +30,8 @@ article focuses on using TensorFlow and the open source TensorFlow Transform prediction. This part highlights the challenges of preprocessing data for machine learning, and illustrates the options and scenarios for performing data transformation on Google Cloud effectively. -Part -1 -Part -2 +[Part 1](https://cloud.google.com/solutions/machine-learning/data-preprocessing-for-ml-with-tf-transform-pt1) +[Part 2](https://cloud.google.com/solutions/machine-learning/data-preprocessing-for-ml-with-tf-transform-pt2) ## Architecture for MLOps using TFX, Kubeflow Pipelines, and Cloud Build @@ -42,8 +39,7 @@ This document describes the overall architecture of a machine learning (ML) system using TensorFlow Extended (TFX) libraries. It also discusses how to set up a continuous integration (CI), continuous delivery (CD), and continuous training (CT) for the ML system using Cloud Build and Kubeflow Pipelines. -Read -more +[Read more](https://cloud.google.com/solutions/machine-learning/architecture-for-mlops-using-tfx-kubeflow-pipelines-and-cloud-build) ## MLOps: Continuous delivery and automation pipelines in machine learning @@ -52,8 +48,7 @@ integration (CI), continuous delivery (CD), and continuous training (CT) for machine learning (ML) systems. Data science and ML are becoming core capabilities for solving complex real-world problems, transforming industries, and delivering value in all domains. -Read -more +[Read more](https://cloud.google.com/solutions/machine-learning/mlops-continuous-delivery-and-automation-pipelines-in-machine-learning) ## Setting up an MLOps environment on Google Cloud @@ -64,8 +59,7 @@ environment described here. Virtually all industries are adopting machine learning (ML) at a rapidly accelerating pace. A key challenge for getting value from ML is to create ways to deploy and operate ML systems effectively. This guide is intended for machine learning (ML) and DevOps engineers. -Read -more +[Read more](https://cloud.google.com/solutions/machine-learning/setting-up-an-mlops-environment) ## Key requirements for an MLOps foundation @@ -78,8 +72,7 @@ McKinsey Global Institute. But it’s not easy right now. Machine learning (ML) systems have a special capacity for creating technical debt if not managed well. -Read -more +[Read more](https://cloud.google.com/blog/products/ai-machine-learning/key-requirements-for-an-mlops-foundation) ## How to create and deploy a model card in the cloud with Scikit-Learn @@ -88,8 +81,7 @@ With their vast potential, ML models also raise questions about their usage, construction, and limitations. Documenting the answers to these questions helps to bring clarity and shared understanding. To help advance these goals, Google has introduced model cards. -Read -more +[Read more](https://cloud.google.com/blog/products/ai-machine-learning/create-a-model-card-with-scikit-learn) ## Analyzing and validating data at scale for machine learning with TensorFlow Data Validation @@ -99,5 +91,4 @@ scientists and machine learning (ML) engineers can use TFDV in a production ML system to validate data that's used in a continuous training (CT) pipeline, and to detect skews and outliers in data received for prediction serving. It includes **hands-on labs**. -Read -more +[Read more](https://cloud.google.com/solutions/machine-learning/analyzing-and-validating-data-at-scale-for-ml-using-tfx) diff --git a/docs/guide/statsgen.md b/docs/guide/statsgen.md index 7d734fa4f6..04ad7a4fa5 100644 --- a/docs/guide/statsgen.md +++ b/docs/guide/statsgen.md @@ -64,8 +64,8 @@ Where `` represents a unique ID for this version of the schema in MLMD. This schema proto can then be modified to communicate information about the dataset which cannot be reliably inferred, which will make the output of `StatisticsGen` more useful and the validation performed in the -[`ExampleValidator`](https://www.tensorflow.org/tfx/guide/exampleval) component +[`ExampleValidator`](exampleval.md) component more stringent. More details are available in the -[StatisticsGen API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/StatisticsGen). +[StatisticsGen API reference][tfx.v1.components.StatisticsGen]. diff --git a/docs/guide/tfdv.md b/docs/guide/tfdv.md index 938ef2e261..b496170d86 100644 --- a/docs/guide/tfdv.md +++ b/docs/guide/tfdv.md @@ -24,9 +24,9 @@ TFX tools can both help find data bugs, and help with feature engineering. ## TensorFlow Data Validation * [Overview](#overview) -* [Schema Based Example Validation](#schema_based_example_validation) +* [Schema Based Example Validation](#schema_based-example-validation) * [Training-Serving Skew Detection](#skewdetect) -* [Drift Detection](#drift_detection) +* [Drift Detection](#drift-detection) ### Overview @@ -42,9 +42,9 @@ be configured to detect different classes of anomalies in the data. It can We document each of these functionalities independently: -* [Schema Based Example Validation](#schema_based_example_validation) +* [Schema Based Example Validation](#schema_based-example-validation) * [Training-Serving Skew Detection](#skewdetect) -* [Drift Detection](#drift_detection) +* [Drift Detection](#drift-detection) ### Schema Based Example Validation diff --git a/docs/guide/tfma.md b/docs/guide/tfma.md index be7380ff7a..6facaa1e06 100644 --- a/docs/guide/tfma.md +++ b/docs/guide/tfma.md @@ -15,25 +15,25 @@ evaluation in TFX. TensorFlow Model Analysis allows you to perform model evaluations in the TFX pipeline, and view resultant metrics and plots in a Jupyter notebook. Specifically, it can provide: -* [Metrics](../model_analysis/metrics) computed on entire training and holdout +* [Metrics](https://www.tensorflow.org/tfx/model_analysis/metrics) computed on entire training and holdout dataset, as well as next-day evaluations * Tracking metrics over time * Model quality performance on different feature slices -* [Model validation](../model_analysis/model_validations) for ensuring that +* [Model validation](https://www.tensorflow.org/tfx/model_analysis/model_validations) for ensuring that model's maintain consistent performance ## Next Steps -Try our [TFMA tutorial](../tutorials/model_analysis/tfma_basic). +Try our [TFMA tutorial](https://www.tensorflow.org/tfx/tutorials/model_analysis/tfma_basic). Check out our [github](https://github.com/tensorflow/model-analysis) page for details on the supported -[metrics and plots](../model_analysis/metrics) and associated notebook -[visualizations](../model_analysis/visualizations). +[metrics and plots](https://www.tensorflow.org/tfx/model_analysis/metrics) and associated notebook +[visualizations](https://www.tensorflow.org/tfx/model_analysis/visualizations). -See the [installation](../model_analysis/install) and -[getting started](../model_analysis/get_started) guides for information and -examples on how to get [set up](../model_analysis/setup) in a standalone +See the [installation](https://www.tensorflow.org/tfx/model_analysis/install) and +[getting started](https://www.tensorflow.org/tfx/model_analysis/get_started) guides for information and +examples on how to get [set up](https://www.tensorflow.org/tfx/model_analysis/setup) in a standalone pipeline. Recall that TFMA is also used within the [Evaluator](evaluator.md) component in TFX, so these resources will be useful for getting started in TFX as well. diff --git a/docs/guide/tft_bestpractices.md b/docs/guide/tft_bestpractices.md index 4beb024b59..11bd10ad52 100644 --- a/docs/guide/tft_bestpractices.md +++ b/docs/guide/tft_bestpractices.md @@ -22,7 +22,7 @@ and the TensorFlow [Keras](https://www.tensorflow.org/guide/keras/overview) API. The second document, -[Data preprocessing for ML with Google Cloud](../tutorials/transform/data_preprocessing_with_cloud), +[Data preprocessing for ML with Google Cloud](../../tutorials/transform/data_preprocessing_with_cloud), provides a step-by-step tutorial for how to implement a `tf.Transform` pipeline. ## Introduction @@ -100,7 +100,7 @@ meanings: features that are created by performing certain ML-specific operations on the columns in the prepared dataset, and creating new features for your model during training and prediction, as described later in - [Preprocessing operations](#preprocessing_operations). + [Preprocessing operations](#preprocessing-operations). Examples of these operations include scaling numerical columns to a value between 0 and 1, clipping values, and [one-hot-encoding](https://developers.google.com/machine-learning/glossary/#one-hot_encoding){: .external } @@ -109,12 +109,17 @@ meanings: The following diagram, figure 1, shows the steps that are involved in preparing preprocessed data: -
+ +Figure: The flow of data from raw data to prepared data to engineered features to machine learning. {data-flow-raw-prepared-engineered-features} + +![Flow diagram showing raw data moving to prepared data moving to engineered features.](images/data-preprocessing-for-ml-with-tf-transform-data-preprocessing-flow.svg) + + In practice, data from the same source is often at different stages of readiness. For example, a field from a table in your data warehouse might be @@ -216,7 +221,7 @@ on operation granularity: then the model behaves poorly because it is presented with data that has a distribution of values that it wasn't trained with. For more information, see the discussion of training-serving skew in the - [Preprocessing challenges](#preprocessing_challenges) + [Preprocessing challenges](#preprocessing-challenges) section. - **Full-pass transformations during training, but instance-level transformations during prediction**. In this scenario, transformations are @@ -233,7 +238,7 @@ on operation granularity: values that are computed during training are used to adjust the feature value, which is the following simple *instance-level* operation: -
$$ value_{scaled} = (value_{raw} - \mu) \div \sigma $$
+ \[ value_{scaled} = (value_{raw} - \mu) \div \sigma \] Full-pass transformations include the following: @@ -308,7 +313,7 @@ train and serve TensorFlow ML models on Google Cloud using managed services. It also discusses where you can implement different categories of the data preprocessing operations, and common challenges that you might face when you implement such transformations. The -[How tf.Transform works](#how_tftransform_works) +[How tf.Transform works](#how-tftransform-works) section shows how the TensorFlow Transform library helps to address these challenges. @@ -320,12 +325,16 @@ labels A, B, and C in the diagram refer to the different places in the pipeline where data preprocessing can take place. Details about these steps are provided in the following section. -
+Figure: High-level architecture for ML training and serving on Google Cloud. {#high-level-architecture-for-training-and-serving} + +![Architecture diagram showing stages for processing data.](images/data-preprocessing-for-ml-with-tf-transform-ml-training-serving-architecture.svg) + + The pipeline consists of the following steps: @@ -369,7 +378,7 @@ take place in BigQuery, Dataflow, or TensorFlow. The following sections describe how each of these options work. -#### Option A: BigQuery{: id="option_a_bigquery"} +#### Option A: BigQuery Typically, logic is implemented in BigQuery for the following operations: @@ -402,7 +411,7 @@ prediction. For example, if your client app is written in Java, you need to reimplement the logic in Java. This can introduce errors due to implementation discrepancies, as described in the training-serving skew section of -[Preprocessing challenges](#preprocessing_challenges) +[Preprocessing challenges](#preprocessing-challenges) later in this document. It's also extra overhead to maintain two different implementations. Whenever you change the logic in SQL to preprocess the training data, you need to change the Java implementation accordingly to preprocess data @@ -424,7 +433,7 @@ features. Further, implementation of full-pass transformations using SQL on BigQuery creates increased complexity in the SQL scripts, and creates intricate dependency between training and the scoring SQL scripts. -#### Option B: Dataflow{: id="option_b_cloud_dataflow"} +#### Option B: Dataflow As shown in figure 2, you can implement computationally expensive preprocessing operations in Apache Beam, and run them at scale using Dataflow. @@ -441,19 +450,23 @@ Apache Beam can compute these features based on aggregating the values of time windows of real-time (streaming) events data (for example, click events). In the earlier discussion of -[granularity of transformations](#preprocessing_granularity), +[granularity of transformations](#preprocessing-granularity), this was referred to as "Historical aggregations during training, but real-time aggregations during prediction." The following diagram, figure 3, shows the role of Dataflow in processing stream data for near real-time predictions. -
+Figure: High-level architecture using stream data for prediction in Dataflow. {#high-level-architecture-for-stream-data} + +![Architecture for using stream data for prediction.](images/data-preprocessing-for-ml-with-tf-transform-streaming-data-with-dataflow-architecture.svg) + + As shown in figure 3, during processing, events called *data points* are ingested into [Pub/Sub](https://cloud.google.com/pubsub/docs){: .external }. @@ -485,9 +498,9 @@ stored somewhere to be used during prediction to transform prediction data points. By using the TensorFlow Transform (`tf.Transform`) library, you can directly embed these statistics in the model instead of storing them elsewhere. This approach is explained later in -[How tf.Transform works](#how_tftransform_works). +[How tf.Transform works](#how-tftransform-works). -#### Option C: TensorFlow{: id="option_c_tensorflow"} +#### Option C: TensorFlow As shown in figure 2, you can implement data preprocessing and transformation operations in the TensorFlow model itself. As shown in the @@ -538,7 +551,7 @@ The following are the primary challenges of implementing data preprocessing: If the transformations become part of the model itself, it can be straightforward to handle instance-level transformations, as described earlier in - [Option C: TensorFlow](#option_c_tensorflow). + [Option C: TensorFlow](#option-c-tensorflow). In that case, the model serving interface (the [`serving_fn`](https://www.tensorflow.org/guide/saved_model#savedmodels_from_estimators) function) expects raw data, while the model internally transforms this data @@ -550,14 +563,14 @@ The following are the primary challenges of implementing data preprocessing: TensorFlow model. In full-pass transformations, some statistics (for example, `max` and `min` values to scale numeric features) must be computed on the training data beforehand, as described in - [Option B: Dataflow](#option_b_dataflow). + [Option B: Dataflow](#option-b-dataflow). The values then have to be stored somewhere to be used during model serving for prediction to transform the new raw data points as instance-level transformations, which avoids training-serving skew. You can use the TensorFlow Transform (`tf.Transform`) library to directly embed the statistics in your TensorFlow model. This approach is explained later in - [How tf.Transform works](#how_tftransform_works). + [How tf.Transform works](#how-tftransform-works). - **Preparing the data up front for better training efficiency**. Implementing instance-level transformations as part of the model can degrade the efficiency of the training process. This degradation occurs @@ -573,7 +586,7 @@ The following are the primary challenges of implementing data preprocessing: Ideally, the training data is transformed before training, using the technique described under - [Option B: Dataflow](#option_b_dataflow), + [Option B: Dataflow](#option-b-dataflow), where the 10,000 transformation operations are applied only once on each training instance. The transformed training data is then presented to the model. No further transformations are applied, and the accelerators are @@ -583,9 +596,9 @@ The following are the primary challenges of implementing data preprocessing: Preparing the training data up front can improve training efficiency. However, implementing the transformation logic outside of the model (the approaches described in - [Option A: BigQuery](#option_a_bigquery) + [Option A: BigQuery](#option-a-bigquery) or - [Option B: Dataflow](#option_b_dataflow)) + [Option B: Dataflow](#option-b-dataflow)) doesn't resolve the issue of training-serving skew. Unless you store the engineered feature in the feature store to be used for both training and prediction, the transformation logic must be implemented somewhere to be @@ -594,7 +607,7 @@ The following are the primary challenges of implementing data preprocessing: (`tf.Transform`) library can help you to address this issue, as described in the following section. -## How tf.Transform works{:#how_tftransform_works} +## How tf.Transform works The `tf.Transform` library is useful for transformations that require a full pass. The output of the `tf.Transform` library is exported as a @@ -610,12 +623,16 @@ The following diagram, figure 4, shows how the `tf.Transform` library preprocesses and transforms data for training and prediction. The process is described in the following sections. -
+Figure: Behavior of `tf.Transform` for preprocessing and transforming data. + +![Diagram showing flow from raw data through tf.Transform to predictions.](images/data-preprocessing-for-ml-with-tf-transform-tf-transform-behavior-flow.svg) + + ### Transform training and evaluation data @@ -637,7 +654,7 @@ Dataflow. The preprocessing occurs in the following phases: columns) in an instance-level fashion. A two-phase approach like this addresses the -[preprocessing challenge](#preprocessing_challenges) +[preprocessing challenge](#preprocessing-challenges) of performing full-pass transformations. When the evaluation data is preprocessed, only instance-level operations are @@ -651,7 +668,7 @@ an instance-level fashion. The transformed training and evaluation data are prepared at scale using Dataflow, before they are used to train the model. This batch data-preparation process addresses the -[preprocessing challenge](#preprocessing_challenges) +[preprocessing challenge](#preprocessing-challenges) of preparing the data up front to improve training efficiency. As shown in figure 4, the model internal interface expects transformed features. @@ -678,7 +695,7 @@ the model internal interface in order to produce prediction, as shown in figure 4. This mechanism resolves the -[preprocessing challenge](#preprocessing_challenges) +[preprocessing challenge](#preprocessing-challenges) of the training-serving skew, because the same logic (implementation) that is used to transform the training and evaluation data is applied to transform the new data points during prediction serving. diff --git a/docs/guide/train.md b/docs/guide/train.md index ad5a2dd214..395db2814f 100644 --- a/docs/guide/train.md +++ b/docs/guide/train.md @@ -7,29 +7,15 @@ aware of, including the choice of a modeling API. [ExampleGen](examplegen.md) * Emits: Trained model in SavedModel format - + To keep up to date on TFX releases, see the [TFX OSS Roadmap](https://github.com/tensorflow/tfx/blob/master/ROADMAP.md), read [the TFX blog](https://blog.tensorflow.org/search?label=TFX&max-results=20) and subscribe to the [TensorFlow newsletter](https://services.google.com/fb/forms/tensorflow/). Your model's input layer should consume from the SavedModel that was created by a [Transform](transform.md) component, and the layers of the Transform model should diff --git a/docs/guide/trainer.md b/docs/guide/trainer.md index 91a64a59d3..0b94a62c09 100644 --- a/docs/guide/trainer.md +++ b/docs/guide/trainer.md @@ -91,4 +91,4 @@ trainer = Trainer( ``` More details are available in the -[Trainer API reference](https://www.tensorflow.org/tfx/api_docs/python/tfx/v1/components/Trainer). +[Trainer API reference][tfx.v1.components.Trainer]. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000000..a881f163a4 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,57 @@ +# TFX + +TFX is an end-to-end platform for deploying production ML pipelines. + +When you're ready to move your models from research to production, use TFX to +create and manage a production pipeline. + +[![Python](https://img.shields.io/pypi/pyversions/tfx.svg?style=plastic)]( +https://github.com/tensorflow/tfx) +[![PyPI](https://badge.fury.io/py/tfx.svg)](https://badge.fury.io/py/tfx) + +## How it works + +A TFX pipeline is a sequence of components that implement an ML pipeline which +is specifically designed for scalable, high-performance machine learning tasks. +Components are built using TFX libraries which can also be used individually. + +
+ +- :material-download:{ .lg .middle } __Install TFX__ + + --- + + Install [`tfx`](#) with [`pip`](#): + + ```shell + pip install tfx + ``` + + [:octicons-arrow-right-24: Getting started](guide/index.md#installation) + +- :material-book-open-blank-variant-outline:{ .lg .middle } __User Guide__ + + --- + + Learn more about how to get started with TFX in the user guide. + + [:octicons-arrow-right-24: User Guide](guide/index.md) + +- :material-school:{ .lg .middle } __View The Tutorials__ + + --- + + Learn from real world examples that use TFX. + + [:octicons-arrow-right-24: Tutorials](tutorials/index.md) + +- :material-text-search:{ .lg .middle } __API Reference__ + + --- + + The API reference contains details about functions, classes, and modules + that are part of TFX. + + [:octicons-arrow-right-24: API Reference](api/v1/index.md) + +
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000000..e734efefd6 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,15 @@ +:root { + --md-primary-fg-color: #FFA800; + --md-primary-fg-color--light: #CCCCCC; + --md-primary-fg-color--dark: #425066; +} + +.video-wrapper { + max-width: 240px; + display: flex; + flex-direction: row; +} +.video-wrapper > iframe { + width: 100%; + aspect-ratio: 16 / 9; +} diff --git a/docs/tutorials/_index.yaml b/docs/tutorials/_index.yaml deleted file mode 100644 index 20d870d80e..0000000000 --- a/docs/tutorials/_index.yaml +++ /dev/null @@ -1,152 +0,0 @@ -book_path: /tfx/_book.yaml -project_path: /tfx/_project.yaml -title: TFX tutorials -landing_page: - nav: left - custom_css_path: /site-assets/css/style.css - meta_tags: - - name: description - content: > - Learn how to move models to production with TFX. Follow end-to-end examples for beginners and - users. Create and manage machine learning pipelines with TensorFlow. - rows: - - classname: - devsite-landing-row-100 - heading: "TensorFlow in Production Tutorials" - items: - - description: > -

These tutorials will get you started, and help you learn a few different ways of - working with TFX for production workflows and deployments. In particular, you'll - learn the two main styles of developing a TFX pipeline:

-
    -
  • Using the InteractiveContext to develop a pipeline in a notebook, - working with one component at a time. This style makes development easier - and more Pythonic.
  • -
  • Defining an entire pipeline and executing it with a runner. This is what - your pipelines will look like when you deploy them.
  • -
- - heading: "Getting started tutorials" - classname: devsite-landing-row-100 - items: - - classname: tfo-landing-page-card - description: > - - Probably the simplest pipeline you can build, to help you get started. - Click the Run in Google Colab button. - path: /tfx/tutorials/tfx/penguin_simple - - classname: tfo-landing-page-card - description: > - - Building on the simple pipeline to add data validation components. - path: /tfx/tutorials/tfx/penguin_tfdv - - classname: tfo-landing-page-card - description: > - - Building on the data validation pipeline to add a feature engineering component. - path: /tfx/tutorials/tfx/penguin_tft - - classname: tfo-landing-page-card - description: > - - Building on the simple pipeline to add a model analysis component. - path: /tfx/tutorials/tfx/penguin_tfma - - - heading: "TFX on Google Cloud" - classname: devsite-landing-row-100 - description: > - Google Cloud provides various products like BigQuery, Vertex AI to make your ML workflow - cost-effective and scalable. You will learn how to use those products in your TFX pipeline. - items: - - classname: tfo-landing-page-card - description: > - - Running pipelines on a managed pipeline service, Vertex Pipelines. - path: /tfx/tutorials/tfx/gcp/vertex_pipelines_simple - - classname: tfo-landing-page-card - description: > - - Using BigQuery as a data source of ML pipelines. - path: /tfx/tutorials/tfx/gcp/vertex_pipelines_bq - - classname: tfo-landing-page-card - description: > - - Using cloud resources for ML training and serving with Vertex AI. - path: /tfx/tutorials/tfx/gcp/vertex_pipelines_vertex_training - - classname: tfo-landing-page-card - description: > - - An introduction to using TFX and Cloud AI Platform Pipelines. - path: /tfx/tutorials/tfx/cloud-ai-platform-pipelines - - - - heading: "Next steps" - - classname: devsite-landing-row-100 - items: - - description: > - Once you have a basic understanding of TFX, check these additional tutorials and guides. - And don't forget to read the TFX User Guide. - - - classname: devsite-landing-row-100 - items: - - classname: tfo-landing-page-card - description: > - - A component-by-component introduction to TFX, including the interactive context, a - very useful development tool. Click the Run in Google Colab button. - path: /tfx/tutorials/tfx/components_keras - - classname: tfo-landing-page-card - description: > - - A tutorial showing how to develop your own custom TFX components. - path: /tfx/tutorials/tfx/python_function_component - - - classname: devsite-landing-row-100 - items: - - classname: tfo-landing-page-card - description: > - - This Google Colab notebook demonstrates how TensorFlow Data Validation (TFDV) can be used to - investigate and visualize a dataset, including generating descriptive statistics, inferring - a schema, and finding anomalies. - path: /tfx/tutorials/data_validation/tfdv_basic - - classname: tfo-landing-page-card - description: > - - This Google Colab notebook demonstrates how TensorFlow Model Analysis (TFMA) can be used to - investigate and visualize the characteristics of a dataset and evaluate the performance of a - model along several axes of accuracy. - path: /tfx/tutorials/model_analysis/tfma_basic - - classname: tfo-landing-page-card - description: > - - This tutorial demonstrates how TensorFlow Serving can be used to serve a model using a - simple REST API. - path: /tfx/tutorials/serving/rest_simple - - - heading: "Videos and updates" - description: > -

- Subscribe to the - TFX YouTube Playlist - and blog for the latest videos and updates. -

- items: - - heading: "TFX: Production ML with TensorFlow in 2020" - description: "TF Dev Summit 2020" - youtube_id: I3MjuFGmJrg - buttons: - - label: Watch the video - path: https://youtu.be/I3MjuFGmJrg - - heading: "TFX: Production ML pipelines with TensorFlow" - description: "TF World 2019" - youtube_id: TA5kbFgeUlk - buttons: - - label: Watch the video - path: https://youtu.be/TA5kbFgeUlk - - heading: "Taking Machine Learning from Research to Production" - description: "GOTO Copenhagen 2019" - youtube_id: rly7DqCbtKw - buttons: - - label: Watch the video - path: https://youtu.be/rly7DqCbtKw diff --git a/docs/tutorials/_toc.yaml b/docs/tutorials/_toc.yaml deleted file mode 100644 index 91df2347a7..0000000000 --- a/docs/tutorials/_toc.yaml +++ /dev/null @@ -1,71 +0,0 @@ -toc: -- title: "Get started with TFX" - path: /tfx/tutorials/ - -- heading: "TFX: Getting started tutorials" -- title: "1. Starter pipeline" - path: /tfx/tutorials/tfx/penguin_simple -- title: "2. Adding data validation" - path: /tfx/tutorials/tfx/penguin_tfdv -- title: "3. Adding feature engineering" - path: /tfx/tutorials/tfx/penguin_tft -- title: "4. Adding model analysis" - path: /tfx/tutorials/tfx/penguin_tfma - -- heading: "TFX: Interactive tutorials" -- title: "Interactive tutorial (TF2 Keras)" - path: /tfx/tutorials/tfx/components_keras -- title: "Interactive tutorial (Estimator)" - path: /tfx/tutorials/tfx/components - -- heading: "TFX on Google Cloud" -- title: "Running on Vertex Pipelines" - path: /tfx/tutorials/tfx/gcp/vertex_pipelines_simple -- title: "Read data from BigQuery" - path: /tfx/tutorials/tfx/gcp/vertex_pipelines_bq -- title: "Vertex AI Training and Serving" - path: /tfx/tutorials/tfx/gcp/vertex_pipelines_vertex_training -- title: "Cloud AI Platform Pipelines tutorial" - path: /tfx/tutorials/tfx/cloud-ai-platform-pipelines - -- heading: "TFX: Advanced tutorials" -- title: "LLM finetuning and conversion" - path: /tfx/tutorials/tfx/gpt2_finetuning_and_conversion -- title: "Custom component tutorial" - path: /tfx/tutorials/tfx/python_function_component -- title: "Recommenders with TFX" - path: /tfx/tutorials/tfx/recommenders -- title: "Ranking with TFX" - path: /recommenders/examples/ranking_tfx -- title: "Airflow tutorial" - path: /tfx/tutorials/tfx/airflow_workshop -- title: "Neural Structured Learning in TFX" - path: /tfx/tutorials/tfx/neural_structured_learning - -- heading: "Data Validation" -- title: "Get started with TFDV" - path: /tfx/tutorials/data_validation/tfdv_basic - -- heading: "Transform" -- title: "Preprocess data (beginner)" - path: /tfx/tutorials/transform/simple -- title: "Preprocess data (advanced)" - path: /tfx/tutorials/transform/census -- title: "Data preprocessing for ML with Google Cloud" - path: /tfx/tutorials/transform/data_preprocessing_with_cloud - -- heading: "Model Analysis" -- title: "Get started with TFMA" - path: /tfx/tutorials/model_analysis/tfma_basic -- title: "Fairness Indicators tutorial" - path: /responsible_ai/fairness_indicators/tutorials/Fairness_Indicators_Example_Colab - -- heading: "Deploy a trained model" -- title: "Servers: TFX for TensorFlow Serving" - path: /tfx/tutorials/serving/rest_simple -- title: "Mobile & IoT: TFX for TensorFlow Lite" - path: /tfx/tutorials/tfx/tfx_for_mobile - -- heading: "ML Metadata" -- title: "Get started with MLMD" - path: /tfx/tutorials/mlmd/mlmd_tutorial diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md new file mode 100644 index 0000000000..d4163ca297 --- /dev/null +++ b/docs/tutorials/index.md @@ -0,0 +1,171 @@ +# Tensorflow in Production Tutorials + +These tutorials will get you started, and help you learn a few different ways of +working with TFX for production workflows and deployments. In particular, +you'll learn the two main styles of developing a TFX pipeline: + +* Using the `InteractiveContext` to develop a pipeline in a notebook, working + with one component at a time. This style makes development easier and more + Pythonic. +* Defining an entire pipeline and executing it with a runner. This is what your + pipelines will look like when you deploy them. + +## Getting Started Tutorials + +
+ +- __1. Starter Pipeline__ + + --- + + Probably the simplest pipeline you can build, to help you get started. Click + the _Run in Google Colab_ button. + + [:octicons-arrow-right-24: Starter Pipeline](tutorials/tfx/penguin_simple.md) + +- __2. Adding Data Validation__ + + --- + + Building on the simple pipeline to add data validation components. + + [:octicons-arrow-right-24: Data Validation](tutorials/tfx/penguin_tfdv) + +- __3. Adding Feature Engineering__ + + --- + + Building on the data validation pipeline to add a feature engineering component. + + [:octicons-arrow-right-24: Feature Engineering](tutorials/tfx/penguin_tft) + +- __4. Adding Model Analysis__ + + --- + + Building on the simple pipeline to add a model analysis component. + + [:octicons-arrow-right-24: Model Analysis](tutorials/tfx/penguin_tfma) + +
+ + +## TFX on Google Cloud + +Google Cloud provides various products like BigQuery, Vertex AI to make your ML +workflow cost-effective and scalable. You will learn how to use those products +in your TFX pipeline. + +
+ +- __Running on Vertex Pipelines__ + + --- + + Running pipelines on a managed pipeline service, Vertex Pipelines. + + [:octicons-arrow-right-24: Vertex Pipelines](tutorials/tfx/gcp/vertex_pipelines_simple) + +- __Read data from BigQuery__ + + --- + + Using BigQuery as a data source of ML pipelines. + + [:octicons-arrow-right-24: BigQuery](tutorials/tfx/gcp/vertex_pipelines_bq) + +- __Vertex AI Training and Serving__ + + --- + + Using cloud resources for ML training and serving with Vertex AI. + + [:octicons-arrow-right-24: Vertex Training and Serving](tutorials/tfx/gcp/vertex_pipelines_vertex_training) + +- __TFX on Cloud AI Platform Pipelines__ + + --- + + An introduction to using TFX and Cloud AI Platform Pipelines. + + [:octicons-arrow-right-24: Cloud Pipelines](tutorials/tfx/cloud-ai-platform-pipelines) + +
+ +## Next Steps + +Once you have a basic understanding of TFX, check these additional tutorials and +guides. And don't forget to read the [TFX User Guide](guide/index.md). + +
+ +- __Complete Pipeline Tutorial__ + + --- + + A component-by-component introduction to TFX, including the _interactive + context_, a very useful development tool. Click the _Run in + Google Colab_ button. + + [:octicons-arrow-right-24: Keras](tutorials/tfx/components_keras) + +- __Custom Component Tutorial__ + + --- + + A tutorial showing how to develop your own custom TFX components. + + [:octicons-arrow-right-24: Custom Component](tutorials/tfx/python_function_component) + +- __Data Validation__ + + --- + + This Google Colab notebook demonstrates how TensorFlow Data Validation + (TFDV) can be used to investigate and visualize a dataset, including + generating descriptive statistics, inferring a schema, and finding + anomalies. + + [:octicons-arrow-right-24: Data Validation](tutorials/data_validation/tfdv_basic) + +- __Model Analysis__ + + --- + + This Google Colab notebook demonstrates how TensorFlow Model Analysis + (TFMA) can be used to investigate and visualize the characteristics of a + dataset and evaluate the performance of a model along several axes of + accuracy. + + [:octicons-arrow-right-24: Model Analysis](tutorials/model_analysis/tfma_basic) + +- __Serve a Model__ + + --- + + This tutorial demonstrates how TensorFlow Serving can be used to serve a + model using a simple REST API. + + [:octicons-arrow-right-24: Model Analysis](tutorials/serving/rest_simple) + +
+ +## Videos and Updates + +Subscribe to the [TFX YouTube +Playlist](https://www.youtube.com/playlist?list=PLQY2H8rRoyvxR15n04JiW0ezF5HQRs_8F) +and [blog](https://blog.tensorflow.org/search?label=TFX&max-results=20) for the +latest videos and updates. + + +- [TFX: Production ML with TensorFlow in 2020](https://youtu.be/I3MjuFGmJrg) + +
+ +- [TFX: Production ML pipelines with TensorFlow](https://youtu.be/TA5kbFgeUlk) + +
+ +- [Taking Machine Learning from Research to Production](https://youtu.be/rly7DqCbtKw) + +
diff --git a/docs/tutorials/tfx/tfx_for_mobile.md b/docs/tutorials/tfx/tfx_for_mobile.md index 004526fbb7..95fe2899a8 100644 --- a/docs/tutorials/tfx/tfx_for_mobile.md +++ b/docs/tutorials/tfx/tfx_for_mobile.md @@ -109,4 +109,3 @@ is analyzed, the output of the `Evaluator` will have exactly the same structure. However, please note that the Evaluator assumes that the TFLite model is saved in a file named `tflite` within trainer_lite.outputs['model']. - diff --git a/docs/tutorials/transform/data_preprocessing_with_cloud.md b/docs/tutorials/transform/data_preprocessing_with_cloud.md index 37843e2cc0..88d6ef9428 100644 --- a/docs/tutorials/transform/data_preprocessing_with_cloud.md +++ b/docs/tutorials/transform/data_preprocessing_with_cloud.md @@ -53,12 +53,12 @@ an entire day, use the preconfigured ## Before you begin 1. In the Google Cloud console, on the project selector page, select or - [create a Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects). + [create a Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects). Note: If you don't plan to keep the resources that you create in this procedure, create a project instead of selecting an existing project. After you finish these steps, you can delete the project, removing all - resources associated with the project. + resources associated with the project. [Go to project selector](https://console.cloud.google.com/projectselector2/home/dashboard){: class="button button-primary" target="console" track-type="solution" track-name="consoleLink" track-metadata-position="body" } @@ -1156,7 +1156,7 @@ resources used in this tutorial, delete the project that contains the resources. go to the **Manage resources** page. [Go to Manage resources](https://console.cloud.google.com/iam-admin/projects){: class="button button-primary" target="console" track-type="solution" track-name="consoleLink" track-metadata-position="body" } - + 1. In the project list, select the project that you want to delete, and then click **Delete**. 1. In the dialog, type the project ID, and then click **Shut down** to delete diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000000..4fa2d04b08 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,253 @@ +site_name: TFX +repo_name: "Tensorflow TFX" +repo_url: https://github.com/tensorflow/tfx + +theme: + name: material + palette: + # Palette toggle for automatic mode + - media: "(prefers-color-scheme)" + primary: custom + accent: custom + toggle: + icon: material/brightness-auto + name: Switch to light mode + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + primary: custom + accent: custom + scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + primary: custom + accent: custom + scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to system preference + logo: assets/tf_full_color_primary_icon.svg + favicon: assets/tf_full_color_primary_icon.svg + + features: + - content.code.copy + - content.code.select +plugins: + - search + - autorefs + - mkdocstrings: + default_handler: python + handlers: + python: + options: + show_source: true + show_root_heading: true + unwrap_annotated: true + show_symbol_type_toc: true + show_symbol_type_heading: true + merge_init_into_class: true + show_signature_annotations: true + separate_signature: true + signature_crossrefs: true + group_by_category: true + show_category_heading: true + inherited_members: true + show_submodules: true + show_object_full_path: false + show_root_full_path: true + docstring_section_style: "spacy" + summary: true + filters: + - "!^_" + - "^__init__$" + - "^__call__$" + - "!^logger" + extensions: + - griffe_inherited_docstrings + import: + - https://docs.python.org/3/objects.inv + - mkdocs-jupyter: + execute: false + execute_ignore: # There are issues with executing these notebooks + - tutorials/serving/rest_simple.ipynb + - tutorials/tfx/gcp/*.ipynb + - caption: + figure: + ignore_alt: true + +markdown_extensions: + - admonition + - attr_list + - toc: + permalink: true + - pymdownx.highlight: + anchor_linenums: true + linenums: false + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - pymdownx.arithmatex: + generic: true + - md_in_html + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + +extra_css: + - stylesheets/extra.css + +extra_javascript: + - javascripts/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js + +watch: + - tfx +nav: + - Overview: index.md + + - Tutorials: + - Get started with TFX: tutorials/index.md + - 'TFX: Getting started tutorials': + - 1. Starter pipeline: tutorials/tfx/penguin_simple + - 2. Adding data validation: tutorials/tfx/penguin_tfdv + - 3. Adding feature engineering: tutorials/tfx/penguin_tft + - 4. Adding model analysis: tutorials/tfx/penguin_tfma + - 'TFX: Interactive tutorials': + - Interactive tutorial (TF2 Keras): tutorials/tfx/components_keras + - Interactive tutorial (Estimator): tutorials/tfx/components + - TFX on Google Cloud: + - Running on Vertex Pipelines: tutorials/tfx/gcp/vertex_pipelines_simple + - Read data from BigQuery: tutorials/tfx/gcp/vertex_pipelines_bq + - Vertex AI Training and Serving: tutorials/tfx/gcp/vertex_pipelines_vertex_training + - Cloud AI Platform Pipelines tutorial: tutorials/tfx/cloud-ai-platform-pipelines + - 'TFX: Advanced tutorials': + - LLM finetuning and conversion: tutorials/tfx/gpt2_finetuning_and_conversion + - Custom component tutorial: tutorials/tfx/python_function_component + - Recommenders with TFX: tutorials/tfx/recommenders + - Ranking with TFX: mmenders/examples/ranking_tfx + - Airflow tutorial: tutorials/tfx/airflow_workshop + - Neural Structured Learning in TFX: tutorials/tfx/neural_structured_learning + - Data Validation: + - Get started with TFDV: tutorials/data_validation/tfdv_basic + - Transform: + - Preprocess data (beginner): tutorials/transform/simple + - Preprocess data (advanced): tutorials/transform/census + - Data preprocessing for ML with Google Cloud: tutorials/transform/data_preprocessing_with_cloud + - Model Analysis: + - Get started with TFMA: tutorials/model_analysis/tfma_basic + - Fairness Indicators tutorial: onsible_ai/fairness_indicators/tutorials/Fairness_Indicators_Example_Colab + - Deploy a trained model: + - 'Servers: TFX for TensorFlow Serving': tutorials/serving/rest_simple + - 'Mobile & IoT: TFX for TensorFlow Lite': tutorials/tfx/tfx_for_mobile + - ML Metadata: + - Get started with MLMD: tutorials/mlmd/mlmd_tutorial + + - Guide: + - Guide: guide/index.md + + - "What's New": + - "TFX-Addons": addons + - "TFX Cloud Solutions": guide/solutions.md + - "Using Keras with TFX": guide/keras + - "Using Non-TensorFlow Frameworks in TFX": guide/non_tf + - "Mobile & IoT: TFX for TensorFlow Lite": tutorials/tfx_for_mobile + + - "TFX Pipelines": + - "Understanding TFX pipelines": guide/understanding_tfx_pipelines + - "Building a TFX pipeline": guide/build_tfx_pipeline + - "Local Pipelines": guide/build_local_pipeline + + - "TFX Standard Components": + - "ExampleGen": guide/examplegen + - "StatisticsGen": guide/statsgen + - "SchemaGen": guide/schemagen + - "ExampleValidator": guide/exampleval + - "Transform": guide/transform + - "Trainer": guide/trainer + - "Tuner": guide/tuner + - "Evaluator": guide/evaluator + - "InfraValidator": guide/infra_validator + - "Pusher": guide/pusher + - "BulkInferrer": guide/bulkinferrer + + - "TFX Custom Components": + - "Understanding custom components": guide/understanding_custom_components + - "Python function-based components": guide/custom_function_component + - "Container-based components": guide/container_component + - "Fully custom components": guide/custom_component + + - "Orchestrators": + - "Local orchestrator": guide/local_orchestrator + - "Vertex AI Pipelines": guide/vertex + - "Apache Airflow": guide/airflow + - "Kubeflow Pipelines": guide/kubeflow + + - "TFX CLI": + - "Using the TFX CLI": guide/cli + + - "Libraries": + - "Data Validation": + - "Check and analyze data": guide/tfdv + - "Install": https://www.tensorflow.org/tfx/data_validation/install + - "Get started": https://www.tensorflow.org/tfx/data_validation/get_started + + - "Transform": + - "Preprocess and transform data": guide/tft + - "Install": "https://www.tensorflow.org/tfx/transform/install" + - "Get started": "https://www.tensorflow.org/tfx/transform/get_started" + - "Using `tf.Transform` with TensorFlow 2.x": "https://www.tensorflow.org/tfx/transform/tf2_support" + - "Common transformations": "https://www.tensorflow.org/tfx/transform/common_transformations" + - "Data preprocessing best practices": guide/tft_bestpractices + + - "Modeling": + - "Design modeling code": guide/train + + - "Model Analysis": + - "Improving Model Quality": guide/tfma + - "Install": https://www.tensorflow.org/tfx/model_analysis/install + - "Get started": https://www.tensorflow.org/tfx/model_analysis/get_started + - "Setup": https://www.tensorflow.org/tfx/model_analysis/setup + - "Metrics and Plots": https://www.tensorflow.org/tfx/model_analysis/metrics + - "Visualizations": https://www.tensorflow.org/tfx/model_analysis/visualizations + - "Model Validations": https://www.tensorflow.org/tfx/model_analysis/model_validations + - "Using Fairness Indicators": guide/fairness_indicators + - "Using Fairness Indicators with Pandas DataFrames": https://www.tensorflow.org/responsible_ai/fairness_indicators/tutorials/Fairness_Indicators_Pandas_Case_Study + - "Architecture": https://www.tensorflow.org/tfx/model_analysis/architecture + - "FAQ": https://www.tensorflow.org/tfx/model_analysis/faq + + - "Serving": + - "Serving models": guide/serving + - TensorFlow Serving with Docker: https://www.tensorflow.org/tfx/serving/docker + - Installation: https://www.tensorflow.org/tfx/serving/setup + - Serve a TensorFlow model: https://www.tensorflow.org/tfx/serving/serving_basic + - Architecture: https://www.tensorflow.org/tfx/serving/architecture + - Advanced model server configuration: https://www.tensorflow.org/tfx/serving/serving_config + - Build a TensorFlow ModelServer: https://www.tensorflow.org/tfx/serving/serving_advanced + - Use TensorFlow Serving with Kubernetes: https://www.tensorflow.org/tfx/serving/serving_kubernetes + - Create a new kind of servable: https://www.tensorflow.org/tfx/serving/custom_servable + - Create a module that discovers new servable paths: https://www.tensorflow.org/tfx/serving/custom_source + - Serving TensorFlow models with custom ops: https://www.tensorflow.org/tfx/serving/custom_op + - SignatureDefs in SavedModel for TensorFlow Serving: https://www.tensorflow.org/tfx/serving/signature_defs + + - "Related projects": + - "Apache Beam": "https://beam.apache.org/" + - "MLTransform": "https://cloud.google.com/dataflow/docs/machine-learning/ml-preprocess-data" + - "ML Metadata": guide/mlmd + - "TensorBoard": "https://www.tensorflow.org/tensorboard" + - API: + - v1: + - "Overview": api/v1/root + - "components": api/v1/components + - "dsl": api/v1/dsl + - "extensions": api/v1/extensions + - "orchestration": api/v1/orchestration + - "proto": api/v1/proto + - "testing": api/v1/testing + - "types": api/v1/types + - "utils": api/v1/utils diff --git a/setup.py b/setup.py index de4ec0163f..4b00875569 100644 --- a/setup.py +++ b/setup.py @@ -202,6 +202,7 @@ def run(self): 'tflite-support': dependencies.make_extra_packages_tflite_support(), 'examples': dependencies.make_extra_packages_examples(), 'test': dependencies.make_extra_packages_test(), + 'docs': dependencies.make_extra_packages_docs(), 'all': dependencies.make_extra_packages_all(), } diff --git a/tfx/components/__init__.py b/tfx/components/__init__.py index b8780ec23a..d5d586be25 100644 --- a/tfx/components/__init__.py +++ b/tfx/components/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. """Subpackage for TFX components.""" # For component user to direct use tfx.components.[...] as an alias. + from tfx.components.bulk_inferrer.component import BulkInferrer from tfx.components.distribution_validator.component import DistributionValidator from tfx.components.evaluator.component import Evaluator @@ -29,3 +30,22 @@ from tfx.components.trainer.component import Trainer from tfx.components.transform.component import Transform from tfx.components.tuner.component import Tuner + +__all__ = [ + "BulkInferrer", + "DistributionValidator", + "Evaluator", + "ExampleDiff", + "FileBasedExampleGen", + "CsvExampleGen", + "ImportExampleGen", + "ExampleValidator", + "InfraValidator", + "ModelValidator", + "Pusher", + "SchemaGen", + "StatisticsGen", + "Trainer", + "Transform", + "Tuner", +] diff --git a/tfx/components/transform/executor_test.py b/tfx/components/transform/executor_test.py index 1829b54cb1..cf82909bc8 100644 --- a/tfx/components/transform/executor_test.py +++ b/tfx/components/transform/executor_test.py @@ -20,6 +20,8 @@ import tempfile from unittest import mock +import pytest + from absl.testing import parameterized import apache_beam as beam import tensorflow as tf @@ -45,6 +47,7 @@ class _TempPath(types.Artifact): # TODO(b/122478841): Add more detailed tests. +@pytest.mark.xfail(run=False, reason="Test is flaky.") class ExecutorTest(tft_unit.TransformTestCase): _TEMP_ARTIFACTS_DIR = tempfile.mkdtemp() diff --git a/tfx/dependencies.py b/tfx/dependencies.py index b80256fc08..8ed768835b 100644 --- a/tfx/dependencies.py +++ b/tfx/dependencies.py @@ -33,228 +33,254 @@ branch HEAD. - For the release, we use a range of version, which is also used as a default. """ +from __future__ import annotations + import os def select_constraint(default, nightly=None, git_master=None): - """Select dependency constraint based on TFX_DEPENDENCY_SELECTOR env var.""" - selector = os.environ.get('TFX_DEPENDENCY_SELECTOR') - if selector == 'UNCONSTRAINED': - return '' - elif selector == 'NIGHTLY' and nightly is not None: - return nightly - elif selector == 'GIT_MASTER' and git_master is not None: - return git_master - else: - return default + """Select dependency constraint based on TFX_DEPENDENCY_SELECTOR env var.""" + selector = os.environ.get("TFX_DEPENDENCY_SELECTOR") + if selector == "UNCONSTRAINED": + return "" + elif selector == "NIGHTLY" and nightly is not None: + return nightly + elif selector == "GIT_MASTER" and git_master is not None: + return git_master + else: + return default def make_pipeline_sdk_required_install_packages(): - return [ - 'absl-py>=0.9,<2.0.0', - 'ml-metadata' - + select_constraint( - # LINT.IfChange - default='>=1.15.0,<1.16.0', - # LINT.ThenChange(tfx/workspace.bzl) - nightly='>=1.16.0.dev', - git_master='@git+https://github.com/google/ml-metadata@master', - ), - 'packaging>=22', - 'portpicker>=1.3.1,<2', - 'protobuf>=3.20.3,<5', - 'docker>=7,<8', - 'google-apitools>=0.5,<1', - 'google-api-python-client>=1.8,<2', - # TODO(b/176812386): Deprecate usage of jinja2 for placeholders. - 'jinja2>=2.7.3,<4', - # typing-extensions allows consistent & future-proof interface for typing. - # Since kfp<2 uses typing-extensions<4, lower bound is the latest 3.x, and - # upper bound is <5 as the semver started from 4.0 according to their doc. - 'typing-extensions>=3.10.0.2,<5', - ] + return [ + "absl-py>=0.9,<2.0.0", + "ml-metadata" + + select_constraint( + # LINT.IfChange + default=">=1.15.0,<1.16.0", + # LINT.ThenChange(tfx/workspace.bzl) + nightly=">=1.16.0.dev", + git_master="@git+https://github.com/google/ml-metadata@master", + ), + "packaging>=22", + "portpicker>=1.3.1,<2", + "protobuf>=3.20.3,<5", + "docker>=7,<8", + "google-apitools>=0.5,<1", + "google-api-python-client>=1.8,<2", + # TODO(b/176812386): Deprecate usage of jinja2 for placeholders. + "jinja2>=2.7.3,<4", + # typing-extensions allows consistent & future-proof interface for typing. + # Since kfp<2 uses typing-extensions<4, lower bound is the latest 3.x, and + # upper bound is <5 as the semver started from 4.0 according to their doc. + "typing-extensions>=3.10.0.2,<5", + ] def make_required_install_packages(): - # Make sure to sync the versions of common dependencies (absl-py, numpy, - # and protobuf) with TF. - return make_pipeline_sdk_required_install_packages() + [ - 'apache-beam[gcp]>=2.47,<3', - 'attrs>=19.3.0,<24', - 'click>=7,<9', - 'google-api-core<3', - 'google-cloud-aiplatform>=1.6.2,<2', - 'google-cloud-bigquery>=3,<4', - 'grpcio>=1.28.1,<2', - 'keras-tuner>=1.0.4,<2,!=1.4.0,!=1.4.1', - 'kubernetes>=10.0.1,<13', - 'numpy>=1.16,<2', - 'pyarrow>=10,<11', - # TODO: b/358471141 - Orjson 3.10.7 breaks TFX OSS tests. - # Unpin once the issue with installation is resolved. - 'orjson!=3.10.7', - # TODO(b/332616741): Scipy version 1.13 breaks the TFX OSS test. - # Unpin once the issue is resolved. - 'scipy<1.13', - 'scikit-learn==1.5.1', - # TODO(b/291837844): Pinned pyyaml to 5.3.1. - # Unpin once the issue with installation is resolved. - 'pyyaml>=6,<7', - # Keep the TF version same as TFT to help Pip version resolution. - # Pip might stuck in a TF 1.15 dependency although there is a working - # dependency set with TF 2.x without the sync. - # pylint: disable=line-too-long - 'tensorflow' + select_constraint('>=2.15.0,<2.16'), - # pylint: enable=line-too-long - 'tensorflow-hub>=0.15.0,<0.16', - 'tensorflow-data-validation' - + select_constraint( - default='>=1.15.1,<1.16.0', - nightly='>=1.16.0.dev', - git_master=( - '@git+https://github.com/tensorflow/data-validation@master' - ), - ), - 'tensorflow-model-analysis' - + select_constraint( - default='>=0.46.0,<0.47.0', - nightly='>=0.47.0.dev', - git_master='@git+https://github.com/tensorflow/model-analysis@master', - ), - 'tensorflow-serving-api>=2.15,<2.16', - 'tensorflow-transform' - + select_constraint( - default='>=1.15.0,<1.16.0', - nightly='>=1.16.0.dev', - git_master='@git+https://github.com/tensorflow/transform@master', - ), - 'tfx-bsl' - + select_constraint( - default='>=1.15.1,<1.16.0', - nightly='>=1.16.0.dev', - git_master='@git+https://github.com/tensorflow/tfx-bsl@master', - ), - ] + # Make sure to sync the versions of common dependencies (absl-py, numpy, + # and protobuf) with TF. + return make_pipeline_sdk_required_install_packages() + [ + "apache-beam[gcp]>=2.47,<3", + "attrs>=19.3.0,<24", + "click>=7,<9", + "google-api-core<3", + "google-cloud-aiplatform>=1.6.2,<2", + "google-cloud-bigquery>=3,<4", + "grpcio>=1.28.1,<2", + "keras-tuner>=1.0.4,<2,!=1.4.0,!=1.4.1", + "kubernetes>=10.0.1,<13", + "numpy>=1.16,<2", + "pyarrow>=10,<11", + # TODO: b/358471141 - Orjson 3.10.7 breaks TFX OSS tests. + # Unpin once the issue with installation is resolved. + "orjson!=3.10.7", + # TODO(b/332616741): Scipy version 1.13 breaks the TFX OSS test. + # Unpin once the issue is resolved. + "scipy<1.13", + 'scikit-learn==1.5.1', + # TODO(b/291837844): Pinned pyyaml to 5.3.1. + # Unpin once the issue with installation is resolved. + "pyyaml>=6,<7", + # Keep the TF version same as TFT to help Pip version resolution. + # Pip might stuck in a TF 1.15 dependency although there is a working + # dependency set with TF 2.x without the sync. + # pylint: disable=line-too-long + "tensorflow" + select_constraint(">=2.15.0,<2.16"), + # pylint: enable=line-too-long + "tensorflow-hub>=0.15.0,<0.16", + "tensorflow-data-validation" + + select_constraint( + default=">=1.15.1,<1.16.0", + nightly=">=1.16.0.dev", + git_master=("@git+https://github.com/tensorflow/data-validation@master"), + ), + "tensorflow-model-analysis" + + select_constraint( + default=">=0.46.0,<0.47.0", + nightly=">=0.47.0.dev", + git_master="@git+https://github.com/tensorflow/model-analysis@master", + ), + "tensorflow-serving-api>=2.15,<2.16", + "tensorflow-transform" + + select_constraint( + default=">=1.15.0,<1.16.0", + nightly=">=1.16.0.dev", + git_master="@git+https://github.com/tensorflow/transform@master", + ), + "tfx-bsl" + + select_constraint( + default=">=1.15.1,<1.16.0", + nightly=">=1.16.0.dev", + git_master="@git+https://github.com/tensorflow/tfx-bsl@master", + ), + ] def make_extra_packages_airflow(): - """Prepare extra packages needed for Apache Airflow orchestrator.""" - return [ - 'apache-airflow[mysql]>=1.10.14,<3', - ] + """Prepare extra packages needed for Apache Airflow orchestrator.""" + return [ + "apache-airflow[mysql]>=1.10.14,<3", + ] def make_extra_packages_kfp(): - """Prepare extra packages needed for Kubeflow Pipelines orchestrator.""" - return [ - # TODO(b/304892416): Migrate from KFP SDK v1 to v2. - 'kfp>=1.8.14,<2', - 'kfp-pipeline-spec>0.1.13,<0.2', - ] + """Prepare extra packages needed for Kubeflow Pipelines orchestrator.""" + return [ + # TODO(b/304892416): Migrate from KFP SDK v1 to v2. + "kfp>=1.8.14,<2", + "kfp-pipeline-spec>0.1.13,<0.2", + ] def make_extra_packages_test(): - """Prepare extra packages needed for running unit tests.""" - # Note: It is okay to pin packages to exact versions in this list to minimize - # conflicts. - return make_extra_packages_airflow() + make_extra_packages_kfp() + [ - 'pytest>=5,<=8', - 'pytest-subtests==0.13.1', - ] + """Prepare extra packages needed for running unit tests.""" + # Note: It is okay to pin packages to exact versions in this list to minimize + # conflicts. + return ( + make_extra_packages_airflow() + + make_extra_packages_kfp() + + [ + "pytest>=5,<=8", + "pytest-subtests==0.13.1", + ] + ) def make_extra_packages_docker_image(): - # Packages needed for tfx docker image. - return [ - # TODO(b/304892416): Migrate from KFP SDK v1 to v2. - 'kfp>=1.8.14,<2', - 'kfp-pipeline-spec>0.1.13,<0.2', - 'mmh>=2.2,<3', - 'python-snappy>=0.5,<0.6', - # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py - 'tensorflow-cloud>=0.1,<0.2', - 'tensorflow-io>=0.9.0, <=0.24.0', - ] + # Packages needed for tfx docker image. + return [ + # TODO(b/304892416): Migrate from KFP SDK v1 to v2. + "kfp>=1.8.14,<2", + "kfp-pipeline-spec>0.1.13,<0.2", + "mmh>=2.2,<3", + "python-snappy>=0.5,<0.6", + # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py + "tensorflow-cloud>=0.1,<0.2", + "tensorflow-io>=0.9.0, <=0.24.0", + ] def make_extra_packages_tfjs(): - # Packages needed for tfjs. - return [ - 'tensorflowjs>=4.5,<5', - ] + # Packages needed for tfjs. + return [ + "tensorflowjs>=4.5,<5", + ] def make_extra_packages_tflite_support(): - # Required for tfx/examples/cifar10 - return [ - 'flatbuffers>=1.12', - 'tflite-support>=0.4.3,<0.4.5', - ] + # Required for tfx/examples/cifar10 + return [ + "flatbuffers>=1.12", + "tflite-support>=0.4.3,<0.4.5", + ] def make_extra_packages_tf_ranking(): - # Packages needed for tf-ranking which is used in tfx/examples/ranking. - return [ - 'tensorflow-ranking>=0.5,<0.6', - 'struct2tensor' + select_constraint( - default='>=0.46.0,<0.47.0', - nightly='>=0.47.0.dev', - git_master='@git+https://github.com/google/struct2tensor@master'), - ] + # Packages needed for tf-ranking which is used in tfx/examples/ranking. + return [ + "tensorflow-ranking>=0.5,<0.6", + "struct2tensor" + + select_constraint( + default=">=0.46.0,<0.47.0", + nightly=">=0.47.0.dev", + git_master="@git+https://github.com/google/struct2tensor@master", + ), + ] def make_extra_packages_tfdf(): - # Packages needed for tensorflow-decision-forests. - # Required for tfx/examples/penguin/penguin_utils_tfdf_experimental.py - return [ - # NOTE: TFDF 1.0.1 is only compatible with TF 2.10.x. - 'tensorflow-decision-forests>=1.0.1,<1.9', - ] + # Packages needed for tensorflow-decision-forests. + # Required for tfx/examples/penguin/penguin_utils_tfdf_experimental.py + return [ + # NOTE: TFDF 1.0.1 is only compatible with TF 2.10.x. + "tensorflow-decision-forests>=1.0.1,<1.9", + ] def make_extra_packages_flax(): - # Packages needed for the flax example. - # Required for the experimental tfx/examples using Flax, e.g., - # tfx/examples/penguin. - return [ - # TODO(b/324157691): Upgrade jax once we upgrade TF version. - 'jax<0.4.24', - 'jaxlib<0.4.24', - 'flax<1', - 'optax<1', - ] + # Packages needed for the flax example. + # Required for the experimental tfx/examples using Flax, e.g., + # tfx/examples/penguin. + return [ + # TODO(b/324157691): Upgrade jax once we upgrade TF version. + "jax<0.4.24", + "jaxlib<0.4.24", + "flax<1", + "optax<1", + ] def make_extra_packages_examples(): - # Extra dependencies required for tfx/examples. - return [ - # Required for presto ExampleGen custom component in - # tfx/examples/custom_components/presto_example_gen - 'presto-python-client>=0.7,<0.8', - # Required for slack custom component in - # tfx/examples/custom_components/slack - 'slackclient>=2.8.2,<3', - 'websocket-client>=0.57,<1', - # Required for bert examples in tfx/examples/bert - 'tensorflow-text>=1.15.1,<3', - # Required for tfx/examples/penguin/experimental - # LINT.IfChange - 'scikit-learn>=1.0,<2', - # LINT.ThenChange( - # examples/penguin/experimental/penguin_pipeline_sklearn_gcp.py) - # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py - 'tensorflow-cloud>=0.1,<0.2', - ] + # Extra dependencies required for tfx/examples. + return [ + # Required for presto ExampleGen custom component in + # tfx/examples/custom_components/presto_example_gen + "presto-python-client>=0.7,<0.8", + # Required for slack custom component in + # tfx/examples/custom_components/slack + "slackclient>=2.8.2,<3", + "websocket-client>=0.57,<1", + # Required for bert examples in tfx/examples/bert + "tensorflow-text>=1.15.1,<3", + # Required for tfx/examples/penguin/experimental + # LINT.IfChange + "scikit-learn>=1.0,<2", + # LINT.ThenChange( + # examples/penguin/experimental/penguin_pipeline_sklearn_gcp.py) + # Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py + "tensorflow-cloud>=0.1,<0.2", + ] + + +def make_extra_packages_docs() -> list[str]: + """Get a list of packages required for building docs as HTML. + + Returns + ------- + list[str] + List of packages required for building docs + """ + return [ + "mkdocs", + "mkdocstrings[python]", + "mkdocs-material", + "griffe-inherited-docstrings", + "mkdocs-autorefs", + "mkdocs-jupyter", + "mkdocs-caption", + "pymdown-extensions", + ] def make_extra_packages_all(): - # All extra dependencies. - return [ - *make_extra_packages_test(), - *make_extra_packages_tfjs(), - *make_extra_packages_tflite_support(), - *make_extra_packages_tf_ranking(), - *make_extra_packages_tfdf(), - *make_extra_packages_flax(), - *make_extra_packages_examples(), - ] + # All extra dependencies, not including lint or docs dependencies + return [ + *make_extra_packages_test(), + *make_extra_packages_tfjs(), + *make_extra_packages_tflite_support(), + *make_extra_packages_tf_ranking(), + *make_extra_packages_tfdf(), + *make_extra_packages_flax(), + *make_extra_packages_examples(), + ] diff --git a/tfx/dsl/input_resolution/ops/latest_policy_model_op_test.py b/tfx/dsl/input_resolution/ops/latest_policy_model_op_test.py index cc984ff020..847b963ce7 100644 --- a/tfx/dsl/input_resolution/ops/latest_policy_model_op_test.py +++ b/tfx/dsl/input_resolution/ops/latest_policy_model_op_test.py @@ -111,9 +111,10 @@ def test_add_downstream_artifact_model(self): ) -@pytest.mark.xfail(run=False, +@pytest.mark.xfail( + run=False, reason="PR 6889 This class contains tests that fail and needs to be fixed. " - "If all tests pass, please remove this mark." + "If all tests pass, please remove this mark.", ) class LatestPolicyModelOpTest( test_utils.ResolverTestCase, @@ -272,7 +273,8 @@ def testLatestPolicyModelOpTest_DoesNotRaiseSkipSignal(self): policy=_LATEST_PUSHED, ) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " "If this test passes, please remove this mark.", strict=True, @@ -316,7 +318,8 @@ def testLatestPolicyModelOpTest_LatestTrainedModel(self): actual = self._latest_policy_model(_LATEST_EXPORTED) self.assertArtifactMapsEqual(actual, {"model": [self.model_3]}) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " "If this test passes, please remove this mark.", strict=True, @@ -370,7 +373,8 @@ def testLatestPolicyModelOp_SeqeuntialExecutions_LatestModelChanges(self): actual, {"model": [self.model_3], "model_push": [model_push_3]} ) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " "If this test passes, please remove this mark.", strict=True, @@ -457,7 +461,8 @@ def testLatestPolicyModelOp_NonBlessedArtifacts(self): }, ) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " "If this test passes, please remove this mark.", strict=True, @@ -556,7 +561,8 @@ def testLatestPolicyModelOp_MultipleModelInputEventsSameExecutionId(self): {"model": [self.model_2], "model_blessing": [model_blessing_2_3]}, ) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " "If this test passes, please remove this mark.", strict=True, @@ -655,9 +661,10 @@ def testLatestPolicyModelOp_InputDictContainsAllKeys(self): (["m1", "m2", "m3"], ["m2", "m3"], ["m1"], _LATEST_PUSHED, "m1"), (["m2", "m1"], [], [], _LATEST_EVALUATOR_BLESSED, "m2"), ) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " - "If this test passes, please remove this mark." + "If this test passes, please remove this mark.", ) def testLatestPolicyModelOp_RealisticModelExecutions_ModelResolvedCorrectly( self, @@ -685,9 +692,10 @@ def testLatestPolicyModelOp_RealisticModelExecutions_ModelResolvedCorrectly( actual = self._latest_policy_model(policy)["model"][0] self.assertArtifactEqual(actual, str_to_model[expected]) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " - "If this test passes, please remove this mark." + "If this test passes, please remove this mark.", ) def testLatestPolicyModelOp_ModelIsNotDirectParentOfModelBlessing(self): # Manually create a path: @@ -738,7 +746,8 @@ def testLatestPolicyModelOp_ModelIsNotDirectParentOfModelBlessing(self): }, ) - @pytest.mark.xfail(run=False, + @pytest.mark.xfail( + run=False, reason="PR 6889 This test fails and needs to be fixed. " "If this test passes, please remove this mark.", strict=True, diff --git a/tfx/types/__init__.py b/tfx/types/__init__.py index be69a64d38..46d1bf0cd5 100644 --- a/tfx/types/__init__.py +++ b/tfx/types/__init__.py @@ -24,10 +24,23 @@ """ from tfx.types.artifact import Artifact -from tfx.types.channel import BaseChannel -from tfx.types.channel import Channel -from tfx.types.channel import ExecPropertyTypes -from tfx.types.channel import OutputChannel -from tfx.types.channel import Property # Type alias. +from tfx.types.channel import ( + BaseChannel, + Channel, + ExecPropertyTypes, + OutputChannel, + Property, +) from tfx.types.component_spec import ComponentSpec from tfx.types.value_artifact import ValueArtifact + +__all__ = [ + "Artifact", + "BaseChannel", + "Channel", + "ComponentSpec", + "ExecPropertyTypes", + "OutputChannel", + "Property", + "ValueArtifact", +] diff --git a/tfx/types/artifact_utils.py b/tfx/types/artifact_utils.py index 5ebaf57ac7..358400cbc4 100644 --- a/tfx/types/artifact_utils.py +++ b/tfx/types/artifact_utils.py @@ -65,8 +65,8 @@ def parse_artifact_dict(json_str: str) -> Dict[str, List[Artifact]]: """Parse a dict from key to list of Artifact from its json format.""" tfx_artifacts = {} - for k, l in json.loads(json_str).items(): - tfx_artifacts[k] = [Artifact.from_json_dict(v) for v in l] + for k, j in json.loads(json_str).items(): + tfx_artifacts[k] = [Artifact.from_json_dict(v) for v in j] return tfx_artifacts @@ -74,8 +74,8 @@ def parse_artifact_dict(json_str: str) -> Dict[str, List[Artifact]]: def jsonify_artifact_dict(artifact_dict: Dict[str, List[Artifact]]) -> str: """Serialize a dict from key to list of Artifact into json format.""" d = {} - for k, l in artifact_dict.items(): - d[k] = [v.to_json_dict() for v in l] + for k, j in artifact_dict.items(): + d[k] = [v.to_json_dict() for v in j] return json.dumps(d) @@ -143,7 +143,6 @@ def get_artifact_type_class( # definitions is imported. Modules containing custom artifact subclasses that # need to be deserialized should be imported by the entrypoint of the # application or container. - from tfx.types import standard_artifacts # pylint: disable=g-import-not-at-top,import-outside-toplevel,unused-import,unused-variable # Enumerate the Artifact type ontology, separated into auto-generated and # natively-defined classes. diff --git a/tfx/types/standard_artifacts.py b/tfx/types/standard_artifacts.py index 344e889a91..b67a5978b3 100644 --- a/tfx/types/standard_artifacts.py +++ b/tfx/types/standard_artifacts.py @@ -24,20 +24,13 @@ from typing import Sequence from absl import logging -from tfx.types import artifact + from tfx.types import standard_artifact_utils -from tfx.types import system_artifacts -from tfx.types import value_artifact -from tfx.utils import json_utils -from tfx.utils import pure_typing_utils - -Artifact = artifact.Artifact -Property = artifact.Property -PropertyType = artifact.PropertyType -Dataset = system_artifacts.Dataset -SystemModel = system_artifacts.Model -Statistics = system_artifacts.Statistics -ValueArtifact = value_artifact.ValueArtifact +from tfx.types.artifact import Artifact, Property, PropertyType +from tfx.types.system_artifacts import Dataset, Statistics +from tfx.types.system_artifacts import Model as SystemModel +from tfx.types.value_artifact import ValueArtifact +from tfx.utils import json_utils, pure_typing_utils SPAN_PROPERTY = Property(type=PropertyType.INT) VERSION_PROPERTY = Property(type=PropertyType.INT) @@ -47,421 +40,476 @@ class _TfxArtifact(Artifact): - """TFX first-party component artifact definition. - - Do not construct directly, used for creating Channel, e.g., - ``` - Channel(type=standard_artifacts.Model) - ``` - """ - - def __init__(self, *args, **kwargs): - """Construct TFX first-party component artifact.""" - # TODO(b/176795331): Refactor directory structure to make it clearer that - # TFX-specific artifacts require the full "tfx" package be installed. - # - # Do not allow usage of TFX-specific artifact if only the core pipeline - # SDK package is installed. - try: - import setuptools as _ # pytype: disable=import-error # pylint: disable=g-import-not-at-top - # Test import only when setuptools is available. - try: - # `extensions` is not included in ml_pipelines_sdk and doesn't have any - # transitive import. - import tfx.extensions as _ # type: ignore # pylint: disable=g-import-not-at-top - except ModuleNotFoundError as err: - # The following condition detects exactly whether only the DSL package - # is installed, and is bypassed when tests run in Bazel. - raise RuntimeError('The "tfx" and all dependent packages need to be ' - 'installed to use this functionality.') from err - except ModuleNotFoundError: - pass - - super().__init__(*args, **kwargs) + """TFX first-party component artifact definition. + + Do not construct directly, used for creating Channel, e.g., + ``` + Channel(type=standard_artifacts.Model) + ``` + """ + + def __init__(self, *args, **kwargs): + """Construct TFX first-party component artifact.""" + # TODO(b/176795331): Refactor directory structure to make it clearer that + # TFX-specific artifacts require the full "tfx" package be installed. + # + # Do not allow usage of TFX-specific artifact if only the core pipeline + # SDK package is installed. + try: + import setuptools # pytype: disable=import-error # noqa: F401 + + # Test import only when setuptools is available. + try: + # `extensions` is not included in ml_pipelines_sdk and doesn't have any + # transitive import. + import tfx.extensions as _ # type: ignore # noqa: F401 # pylint: disable=g-import-not-at-top + except ModuleNotFoundError as err: + # The following condition detects exactly whether only the DSL package + # is installed, and is bypassed when tests run in Bazel. + raise RuntimeError( + 'The "tfx" and all dependent packages need to be ' + "installed to use this functionality." + ) from err + except ModuleNotFoundError: + pass + + super().__init__(*args, **kwargs) class Examples(_TfxArtifact): - """Artifact that contains the training data. - - Training data should be brought in to the TFX pipeline using components - like ExampleGen. Data in Examples artifact is split and stored separately. - The file and payload format must be specified as optional custom properties - if not using default formats. - Please see - https://www.tensorflow.org/tfx/guide/examplegen#span_version_and_split to - understand about span, version and splits. - - * Properties: - - `span`: Integer to distinguish group of Examples. - - `version`: Integer to represent updated data. - - `splits`: A list of split names. For example, ["train", "test"]. - - * File structure: - - `{uri}/` - - `Split-{split_name1}/`: Files for split - - All direct children files are recognized as the data. - - File format and payload format are determined by custom properties. - - `Split-{split_name2}/`: Another split... - - * Commonly used custom properties of the Examples artifact: - - `file_format`: a string that represents the file format. See - tfx/components/util/tfxio_utils.py:make_tfxio for - available values. - - `payload_format`: int (enum) value of the data payload format. - See tfx/proto/example_gen.proto:PayloadFormat for available formats. - """ - TYPE_NAME = 'Examples' - TYPE_ANNOTATION = Dataset - PROPERTIES = { - 'span': SPAN_PROPERTY, - 'version': VERSION_PROPERTY, - 'split_names': SPLIT_NAMES_PROPERTY, - } - - @property - def splits(self) -> Sequence[str]: - return standard_artifact_utils.decode_split_names(self.split_names) - - @splits.setter - def splits(self, splits: Sequence[str]) -> None: - if not pure_typing_utils.is_compatible(splits, Sequence[str]): - raise TypeError(f'splits should be Sequence[str] but got {splits}') - self.split_names = standard_artifact_utils.encode_split_names(list(splits)) - - def path(self, *, split: str) -> str: - """Path to the artifact URI's split subdirectory. - - This method DOES NOT create a directory path it returns; caller must make - a directory of the returned path value before writing. - - Args: - split: A name of the split, e.g. `"train"`, `"validation"`, `"test"`. - - Raises: - ValueError: if the `split` is not in the `self.splits`. - - Returns: - A path to `{self.uri}/Split-{split}`. + """Artifact that contains the training data. + + Training data should be brought in to the TFX pipeline using components + like ExampleGen. Data in Examples artifact is split and stored separately. + The file and payload format must be specified as optional custom properties + if not using default formats. + Please see + https://www.tensorflow.org/tfx/guide/examplegen#span_version_and_split to + understand about span, version and splits. + + * Properties: + - `span`: Integer to distinguish group of Examples. + - `version`: Integer to represent updated data. + - `splits`: A list of split names. For example, ["train", "test"]. + + * File structure: + - `{uri}/` + - `Split-{split_name1}/`: Files for split + - All direct children files are recognized as the data. + - File format and payload format are determined by custom properties. + - `Split-{split_name2}/`: Another split... + + * Commonly used custom properties of the Examples artifact: + - `file_format`: a string that represents the file format. See + tfx/components/util/tfxio_utils.py:make_tfxio for + available values. + - `payload_format`: int (enum) value of the data payload format. + See tfx/proto/example_gen.proto:PayloadFormat for available formats. """ - if split not in self.splits: - raise ValueError( - f'Split {split} not found in {self.splits=}. Did you forget to update' - ' Examples.splits first?' - ) - return standard_artifact_utils.get_split_uris([self], split)[0] - - -class ExampleAnomalies(_TfxArtifact): # pylint: disable=missing-class-docstring - TYPE_NAME = 'ExampleAnomalies' - PROPERTIES = { - 'span': SPAN_PROPERTY, - 'split_names': SPLIT_NAMES_PROPERTY, - } - - @property - def splits(self) -> Sequence[str]: - return standard_artifact_utils.decode_split_names(self.split_names) - - @splits.setter - def splits(self, splits: Sequence[str]) -> None: - if not pure_typing_utils.is_compatible(splits, Sequence[str]): - raise TypeError(f'splits should be Sequence[str] but got {splits}') - self.split_names = standard_artifact_utils.encode_split_names(list(splits)) - - -class ExampleValidationMetrics(_TfxArtifact): # pylint: disable=missing-class-docstring - TYPE_NAME = 'ExampleValidationMetrics' - PROPERTIES = { - 'span': SPAN_PROPERTY, - 'split_names': SPLIT_NAMES_PROPERTY, - } - - @property - def splits(self) -> Sequence[str]: - return standard_artifact_utils.decode_split_names(self.split_names) - - @splits.setter - def splits(self, splits: Sequence[str]) -> None: - if not pure_typing_utils.is_compatible(splits, Sequence[str]): - raise TypeError(f'splits should be Sequence[str] but got {splits}') - self.split_names = standard_artifact_utils.encode_split_names(list(splits)) - - -class ExampleStatistics(_TfxArtifact): # pylint: disable=missing-class-docstring - TYPE_NAME = 'ExampleStatistics' - TYPE_ANNOTATION = Statistics - PROPERTIES = { - 'span': SPAN_PROPERTY, - 'split_names': SPLIT_NAMES_PROPERTY, - } - - @property - def splits(self) -> Sequence[str]: - return standard_artifact_utils.decode_split_names(self.split_names) - - @splits.setter - def splits(self, splits: Sequence[str]) -> None: - if not pure_typing_utils.is_compatible(splits, Sequence[str]): - raise TypeError(f'splits should be Sequence[str] but got {splits}') - self.split_names = standard_artifact_utils.encode_split_names(list(splits)) + TYPE_NAME = "Examples" + TYPE_ANNOTATION = Dataset + PROPERTIES = { + "span": SPAN_PROPERTY, + "version": VERSION_PROPERTY, + "split_names": SPLIT_NAMES_PROPERTY, + } + + @property + def splits(self) -> Sequence[str]: + return standard_artifact_utils.decode_split_names(self.split_names) + + @splits.setter + def splits(self, splits: Sequence[str]) -> None: + if not pure_typing_utils.is_compatible(splits, Sequence[str]): + raise TypeError(f"splits should be Sequence[str] but got {splits}") + self.split_names = standard_artifact_utils.encode_split_names(list(splits)) + + def path(self, *, split: str) -> str: + """Path to the artifact URI's split subdirectory. + + This method DOES NOT create a directory path it returns; caller must make + a directory of the returned path value before writing. + + Args: + split: A name of the split, e.g. `"train"`, `"validation"`, `"test"`. + + Raises: + ValueError: if the `split` is not in the `self.splits`. + + Returns: + A path to `{self.uri}/Split-{split}`. + """ + if split not in self.splits: + raise ValueError( + f"Split {split} not found in {self.splits=}. Did you forget to update" + " Examples.splits first?" + ) + return standard_artifact_utils.get_split_uris([self], split)[0] + + +class ExampleAnomalies(_TfxArtifact): + """TFX first-party component artifact definition.""" + TYPE_NAME = "ExampleAnomalies" + PROPERTIES = { + "span": SPAN_PROPERTY, + "split_names": SPLIT_NAMES_PROPERTY, + } + + @property + def splits(self) -> Sequence[str]: + return standard_artifact_utils.decode_split_names(self.split_names) + + @splits.setter + def splits(self, splits: Sequence[str]) -> None: + if not pure_typing_utils.is_compatible(splits, Sequence[str]): + raise TypeError(f"splits should be Sequence[str] but got {splits}") + self.split_names = standard_artifact_utils.encode_split_names(list(splits)) + + +class ExampleValidationMetrics(_TfxArtifact): + """TFX first-party component artifact definition.""" + TYPE_NAME = "ExampleValidationMetrics" + PROPERTIES = { + "span": SPAN_PROPERTY, + "split_names": SPLIT_NAMES_PROPERTY, + } + + @property + def splits(self) -> Sequence[str]: + return standard_artifact_utils.decode_split_names(self.split_names) + + @splits.setter + def splits(self, splits: Sequence[str]) -> None: + if not pure_typing_utils.is_compatible(splits, Sequence[str]): + raise TypeError(f"splits should be Sequence[str] but got {splits}") + self.split_names = standard_artifact_utils.encode_split_names(list(splits)) + + +class ExampleStatistics(_TfxArtifact): + """TFX first-party component artifact definition.""" + TYPE_NAME = "ExampleStatistics" + TYPE_ANNOTATION = Statistics + PROPERTIES = { + "span": SPAN_PROPERTY, + "split_names": SPLIT_NAMES_PROPERTY, + } + + @property + def splits(self) -> Sequence[str]: + return standard_artifact_utils.decode_split_names(self.split_names) + + @splits.setter + def splits(self, splits: Sequence[str]) -> None: + if not pure_typing_utils.is_compatible(splits, Sequence[str]): + raise TypeError(f"splits should be Sequence[str] but got {splits}") + self.split_names = standard_artifact_utils.encode_split_names(list(splits)) class ExamplesDiff(_TfxArtifact): - TYPE_NAME = 'ExamplesDiff' + """TFX first-party component artifact definition.""" + TYPE_NAME = "ExamplesDiff" # TODO(b/158334890): deprecate ExternalArtifact. class ExternalArtifact(_TfxArtifact): - TYPE_NAME = 'ExternalArtifact' + """TFX first-party component artifact definition.""" + TYPE_NAME = "ExternalArtifact" class InferenceResult(_TfxArtifact): - TYPE_NAME = 'InferenceResult' + """TFX first-party component artifact definition.""" + TYPE_NAME = "InferenceResult" class InfraBlessing(_TfxArtifact): - TYPE_NAME = 'InfraBlessing' + """TFX first-party component artifact definition.""" + TYPE_NAME = "InfraBlessing" class Model(_TfxArtifact): - """Artifact that contains the actual persisted model. - - Training components stores the trained model like a saved model in this - artifact. A `Model` artifact contains serialization of the trained model in - one or more formats, each suitable for different usage (e.g. serving, - evaluation), and serving environments. - - * File structure: - - `{uri}/` - - `Format-Serving/`: Model exported for serving. - - `saved_model.pb` - - Other actual model files. - - `Format-TFMA/`: Model exported for evaluation. - - `saved_model.pb` - - Other actual model files. - - * Commonly used custom properties of the Model artifact: - """ - TYPE_NAME = 'Model' - TYPE_ANNOTATION = SystemModel + """Artifact that contains the actual persisted model. + + Training components stores the trained model like a saved model in this + artifact. A `Model` artifact contains serialization of the trained model in + one or more formats, each suitable for different usage (e.g. serving, + evaluation), and serving environments. + + * File structure: + - `{uri}/` + - `Format-Serving/`: Model exported for serving. + - `saved_model.pb` + - Other actual model files. + - `Format-TFMA/`: Model exported for evaluation. + - `saved_model.pb` + - Other actual model files. + + * Commonly used custom properties of the Model artifact: + """ + TYPE_NAME = "Model" + TYPE_ANNOTATION = SystemModel class ModelRun(_TfxArtifact): - TYPE_NAME = 'ModelRun' + """TFX first-party component artifact definition.""" + TYPE_NAME = "ModelRun" class ModelBlessing(_TfxArtifact): - """Artifact that contains the evaluation of a trained model. - - This artifact is usually used with - Conditional when determining - whether to push this model on service or not. - - ```python - # Run pusher if evaluator has blessed the model. - with tfx.dsl.Cond(evaluator.outputs['blessing'].future() - [0].custom_property('blessed') == 1): - pusher = Pusher(...) - ``` - - * File structure: - - `{uri}/` - - `BLESSED`: if the evaluator has blessed the model. - - `NOT_BLESSED`: if the evaluator has not blessed the model. - - See tfx/components/evaluator/executor.py for how to write - ModelBlessing. - - * Commonly used custom properties of the ModelBlessing artifact: - - `blessed`: int value that represents whether the evaluator has blessed its - model or not. - """ - TYPE_NAME = 'ModelBlessing' + """Artifact that contains the evaluation of a trained model. + + This artifact is usually used with + Conditional when determining + whether to push this model on service or not. + + ```python + # Run pusher if evaluator has blessed the model. + with tfx.dsl.Cond(evaluator.outputs['blessing'].future() + [0].custom_property('blessed') == 1): + pusher = Pusher(...) + ``` + + * File structure: + - `{uri}/` + - `BLESSED`: if the evaluator has blessed the model. + - `NOT_BLESSED`: if the evaluator has not blessed the model. + - See tfx/components/evaluator/executor.py for how to write + ModelBlessing. + + * Commonly used custom properties of the ModelBlessing artifact: + - `blessed`: int value that represents whether the evaluator has blessed its + model or not. + """ + TYPE_NAME = "ModelBlessing" class ModelEvaluation(_TfxArtifact): - TYPE_NAME = 'ModelEvaluation' + """TFX first-party component artifact definition.""" + TYPE_NAME = "ModelEvaluation" class PushedModel(_TfxArtifact): - TYPE_NAME = 'PushedModel' - TYPE_ANNOTATION = SystemModel + """TFX first-party component artifact definition.""" + TYPE_NAME = "PushedModel" + TYPE_ANNOTATION = SystemModel class Schema(_TfxArtifact): - """Artifact that contains the schema of the data. - - Schema artifact is used to store the - schema of the data. The schema is a proto that describes the data, including - the type of each feature, the range of values for each feature, and other - properties. The schema is usually generated by the SchemaGen component, which - uses the statistics of the data to infer the schema. The schema can be used by - other components in the pipeline to validate the data and to generate models. - - * File structure: - - `{uri}/` - - `schema.pbtxt`: Text-proto format serialization of - [tensorflow_metadata.proto.v0.schema.Schema](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/schema.proto) - proto message. - """ - - TYPE_NAME = 'Schema' + """Artifact that contains the schema of the data. + + Schema artifact is used to store the + schema of the data. The schema is a proto that describes the data, including + the type of each feature, the range of values for each feature, and other + properties. The schema is usually generated by the SchemaGen component, which + uses the statistics of the data to infer the schema. The schema can be used by + other components in the pipeline to validate the data and to generate models. + + * File structure: + - `{uri}/` + - `schema.pbtxt`: Text-proto format serialization of + [tensorflow_metadata.proto.v0.schema.Schema](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/schema.proto) + proto message. + """ + TYPE_NAME = "Schema" class TransformCache(_TfxArtifact): - TYPE_NAME = 'TransformCache' + """TFX first-party component artifact definition.""" + TYPE_NAME = "TransformCache" class JsonValue(ValueArtifact): - """Artifacts representing a Jsonable value.""" - TYPE_NAME = 'JsonValue' + """Artifacts representing a Jsonable value.""" + TYPE_NAME = "JsonValue" - def encode(self, value: json_utils.JsonableType) -> str: - return json_utils.dumps(value) + def encode(self, value: json_utils.JsonableType) -> str: + return json_utils.dumps(value) - def decode(self, serialized_value: str) -> json_utils.JsonableType: - return json_utils.loads(serialized_value) + def decode(self, serialized_value: str) -> json_utils.JsonableType: + return json_utils.loads(serialized_value) class Bytes(ValueArtifact): - """Artifacts representing raw bytes.""" - TYPE_NAME = 'Bytes' + """Artifacts representing raw bytes.""" + TYPE_NAME = "Bytes" - def encode(self, value: bytes): - if not isinstance(value, bytes): - raise TypeError('Expecting bytes but got value %s of type %s' % - (str(value), type(value))) - return value + def encode(self, value: bytes): + if not isinstance(value, bytes): + raise TypeError( + "Expecting bytes but got value %s of type %s" + % (str(value), type(value)) + ) + return value - def decode(self, serialized_value: bytes): - return serialized_value + def decode(self, serialized_value: bytes): + return serialized_value class String(ValueArtifact): - """String-typed artifact. + """String-typed artifact. - String value artifacts are encoded using UTF-8. - """ - TYPE_NAME = 'String' + String value artifacts are encoded using UTF-8. + """ + TYPE_NAME = "String" - # Note, currently we enforce unicode-encoded string. - def encode(self, value: str) -> bytes: - if not isinstance(value, str): - raise TypeError('Expecting Text but got value %s of type %s' % - (str(value), type(value))) - return value.encode('utf-8') + # Note, currently we enforce unicode-encoded string. + def encode(self, value: str) -> bytes: + if not isinstance(value, str): + raise TypeError( + "Expecting Text but got value %s of type %s" % (str(value), type(value)) + ) + return value.encode("utf-8") - def decode(self, serialized_value: bytes) -> str: - return serialized_value.decode('utf-8') + def decode(self, serialized_value: bytes) -> str: + return serialized_value.decode("utf-8") class Boolean(ValueArtifact): - """Artifacts representing a boolean. + """Artifacts representing a boolean. - Boolean value artifacts are encoded as "1" for True and "0" for False. - """ - TYPE_NAME = 'Boolean' + Boolean value artifacts are encoded as "1" for True and "0" for False. + """ + TYPE_NAME = "Boolean" - def encode(self, value: bool): - if not isinstance(value, bool): - raise TypeError( - f'Expecting bytes but got value {value} of type {type(value)}' - ) - return b'1' if value else b'0' + def encode(self, value: bool): + if not isinstance(value, bool): + raise TypeError( + f"Expecting bytes but got value {value} of type {type(value)}" + ) + return b"1" if value else b"0" - def decode(self, serialized_value: bytes): - return int(serialized_value) != 0 + def decode(self, serialized_value: bytes): + return int(serialized_value) != 0 class Integer(ValueArtifact): - """Integer-typed artifact. + """Integer-typed artifact. - Integer value artifacts are encoded as a decimal string. - """ - TYPE_NAME = 'Integer' + Integer value artifacts are encoded as a decimal string. + """ + TYPE_NAME = "Integer" - def encode(self, value: int) -> bytes: - if not isinstance(value, int): - raise TypeError( - f'Expecting int but got value {value} of type {type(value)}' - ) - return str(value).encode('utf-8') + def encode(self, value: int) -> bytes: + if not isinstance(value, int): + raise TypeError( + f"Expecting int but got value {value} of type {type(value)}" + ) + return str(value).encode("utf-8") - def decode(self, serialized_value: bytes) -> int: - return int(serialized_value) + def decode(self, serialized_value: bytes) -> int: + return int(serialized_value) class Float(ValueArtifact): - """Float-typed artifact. - - Float value artifacts are encoded using Python str() class. However, - Nan and Infinity are handled separately. See string constants in the - class. - """ - TYPE_NAME = 'Float' - - _POSITIVE_INFINITY = float('Inf') - _NEGATIVE_INFINITY = float('-Inf') - - _ENCODED_POSITIVE_INFINITY = 'Infinity' - _ENCODED_NEGATIVE_INFINITY = '-Infinity' - _ENCODED_NAN = 'NaN' - - def encode(self, value: float) -> bytes: - if not isinstance(value, float): - raise TypeError( - f'Expecting float but got value {value} of type {type(value)}' - ) - if math.isinf(value) or math.isnan(value): - logging.warning( - '! The number "%s" may be unsupported by non-python components.', - value) - str_value = str(value) - # Special encoding for infinities and NaN to increase comatibility with - # other languages. - # Decoding works automatically. - if math.isinf(value): - if value >= 0: - str_value = Float._ENCODED_POSITIVE_INFINITY - else: - str_value = Float._ENCODED_NEGATIVE_INFINITY - if math.isnan(value): - str_value = Float._ENCODED_NAN - - return str_value.encode('utf-8') - - def decode(self, serialized_value: bytes) -> float: - result = float(serialized_value) - - # Check that the decoded value exactly matches the encoded string. - # Note that float() can handle bytes, but Decimal() cannot. - serialized_string = serialized_value.decode('utf-8') - reserialized_string = str(result) - is_exact = (decimal.Decimal(serialized_string) == - decimal.Decimal(reserialized_string)) - if not is_exact: - logging.warning( - 'The number "%s" has lost precision when converted to float "%s"', - serialized_value, reserialized_string) - - return result + """Float-typed artifact. + + Float value artifacts are encoded using Python str() class. However, + Nan and Infinity are handled separately. See string constants in the + class. + """ + TYPE_NAME = "Float" + + _POSITIVE_INFINITY = float("Inf") + _NEGATIVE_INFINITY = float("-Inf") + + _ENCODED_POSITIVE_INFINITY = "Infinity" + _ENCODED_NEGATIVE_INFINITY = "-Infinity" + _ENCODED_NAN = "NaN" + + def encode(self, value: float) -> bytes: + if not isinstance(value, float): + raise TypeError( + f"Expecting float but got value {value} of type {type(value)}" + ) + if math.isinf(value) or math.isnan(value): + logging.warning( + '! The number "%s" may be unsupported by non-python components.', value + ) + str_value = str(value) + # Special encoding for infinities and NaN to increase comatibility with + # other languages. + # Decoding works automatically. + if math.isinf(value): + if value >= 0: + str_value = Float._ENCODED_POSITIVE_INFINITY + else: + str_value = Float._ENCODED_NEGATIVE_INFINITY + if math.isnan(value): + str_value = Float._ENCODED_NAN + + return str_value.encode("utf-8") + + def decode(self, serialized_value: bytes) -> float: + result = float(serialized_value) + + # Check that the decoded value exactly matches the encoded string. + # Note that float() can handle bytes, but Decimal() cannot. + serialized_string = serialized_value.decode("utf-8") + reserialized_string = str(result) + is_exact = decimal.Decimal(serialized_string) == decimal.Decimal( + reserialized_string + ) + if not is_exact: + logging.warning( + 'The number "%s" has lost precision when converted to float "%s"', + serialized_value, + reserialized_string, + ) + + return result class TransformGraph(_TfxArtifact): - TYPE_NAME = 'TransformGraph' + """TFX first-party component artifact definition.""" + TYPE_NAME = "TransformGraph" class HyperParameters(_TfxArtifact): - TYPE_NAME = 'HyperParameters' + """TFX first-party component artifact definition.""" + TYPE_NAME = "HyperParameters" class TunerResults(_TfxArtifact): - TYPE_NAME = 'TunerResults' + """TFX first-party component artifact definition.""" + TYPE_NAME = "TunerResults" # WIP and subject to change. class DataView(_TfxArtifact): - TYPE_NAME = 'DataView' + """TFX first-party component artifact definition.""" + TYPE_NAME = "DataView" class Config(_TfxArtifact): - TYPE_NAME = 'Config' + """TFX first-party component artifact definition.""" + TYPE_NAME = "Config" + + +__all__ = [ + "Boolean", + "Bytes", + "Config", + "DataView", + "ExampleAnomalies", + "ExampleStatistics", + "ExampleValidationMetrics", + "Examples", + "ExamplesDiff", + "ExternalArtifact", + "Float", + "HyperParameters", + "InferenceResult", + "InfraBlessing", + "Integer", + "Integer", + "JsonValue", + "Model", + "ModelBlessing", + "ModelEvaluation", + "ModelRun", + "PushedModel", + "Schema", + "String", + "TransformCache", + "TransformGraph", + "TunerResults", +] diff --git a/tfx/v1/components/__init__.py b/tfx/v1/components/__init__.py index 48f5acda7a..e7dd355aea 100644 --- a/tfx/v1/components/__init__.py +++ b/tfx/v1/components/__init__.py @@ -34,4 +34,24 @@ from tfx.components.trainer.fn_args_utils import DataAccessor from tfx.components.trainer.fn_args_utils import FnArgs from tfx.components.tuner.component import TunerFnResult + # pylint: enable=g-bad-import-order +__all__ = [ + "BulkInferrer", + "CsvExampleGen", + "DataAccessor", + "Evaluator", + "ExampleDiff", + "ExampleValidator", + "FnArgs", + "ImportExampleGen", + "ImportSchemaGen", + "InfraValidator", + "Pusher", + "SchemaGen", + "StatisticsGen", + "Trainer", + "Transform", + "Tuner", + "TunerFnResult", +] diff --git a/tfx/v1/dsl/__init__.py b/tfx/v1/dsl/__init__.py index b205e4a41b..2c3c45b92b 100644 --- a/tfx/v1/dsl/__init__.py +++ b/tfx/v1/dsl/__init__.py @@ -16,8 +16,10 @@ from tfx.dsl.components.common.importer import Importer from tfx.dsl.components.common.resolver import Resolver + # TODO(b/273382055): Conditional should graduate experimental. from tfx.dsl.experimental.conditionals.conditional import Cond + # TODO(b/184980265): move Pipeline implementation to tfx/dsl. from tfx.orchestration.pipeline import ExecutionMode from tfx.orchestration.pipeline import Pipeline @@ -27,3 +29,17 @@ from tfx.v1.dsl import experimental from tfx.v1.dsl import io from tfx.v1.dsl import placeholders + +__all__ = [ + "Artifact", + "Channel", + "Cond", + "ExecutionMode", + "Importer", + "Pipeline", + "Resolver", + "components", + "experimental", + "io", + "placeholders", +] diff --git a/tfx/v1/dsl/components/__init__.py b/tfx/v1/dsl/components/__init__.py index 8984754a95..de50577583 100644 --- a/tfx/v1/dsl/components/__init__.py +++ b/tfx/v1/dsl/components/__init__.py @@ -21,3 +21,13 @@ from tfx.dsl.component.experimental.annotations import OutputDict from tfx.dsl.component.experimental.annotations import Parameter from tfx.dsl.component.experimental.decorators import component + +__all__ = [ + "AsyncOutputArtifact", + "BeamComponentParameter", + "InputArtifact", + "OutputArtifact", + "OutputDict", + "Parameter", + "component", +] diff --git a/tfx/v1/dsl/experimental/__init__.py b/tfx/v1/dsl/experimental/__init__.py index 799755b461..436171ef13 100644 --- a/tfx/v1/dsl/experimental/__init__.py +++ b/tfx/v1/dsl/experimental/__init__.py @@ -14,11 +14,26 @@ """TFX dsl.experimental module.""" # pylint: disable=unused-import -from tfx.dsl.component.experimental.container_component import create_container_component +from tfx.dsl.component.experimental.container_component import ( + create_container_component, +) from tfx.dsl.components.common.resolver import ResolverStrategy -from tfx.dsl.input_resolution.strategies.latest_artifact_strategy import LatestArtifactStrategy -from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy +from tfx.dsl.input_resolution.strategies.latest_artifact_strategy import ( + LatestArtifactStrategy, +) +from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import ( + LatestBlessedModelStrategy, +) from tfx.dsl.input_resolution.strategies.span_range_strategy import SpanRangeStrategy # TODO(b/185911128): move RuntimeParameter implementation to tfx/dsl. from tfx.orchestration.data_types import RuntimeParameter + +__all__ = [ + "LatestArtifactStrategy", + "LatestBlessedModelStrategy", + "ResolverStrategy", + "RuntimeParameter", + "SpanRangeStrategy", + "create_container_component", +] diff --git a/tfx/v1/dsl/io/__init__.py b/tfx/v1/dsl/io/__init__.py index 263de250a4..a8ba1257b5 100644 --- a/tfx/v1/dsl/io/__init__.py +++ b/tfx/v1/dsl/io/__init__.py @@ -14,3 +14,5 @@ """TFX DSL I/O module.""" from tfx.v1.dsl.io import fileio + +__all__ = ["fileio"] diff --git a/tfx/v1/dsl/io/fileio.py b/tfx/v1/dsl/io/fileio.py index 034a1b4ae7..6cb1e2f894 100644 --- a/tfx/v1/dsl/io/fileio.py +++ b/tfx/v1/dsl/io/fileio.py @@ -29,3 +29,21 @@ from tfx.dsl.io.fileio import rmtree from tfx.dsl.io.fileio import stat from tfx.dsl.io.fileio import walk + +__all__ = [ + "NotFoundError", + "copy", + "exists", + "glob", + "isdir", + "listdir", + "makedirs", + "mkdir", + "open", + "remove", + "rename", + "rmtree", + "stat", + "walk", + "PathType", +] diff --git a/tfx/v1/dsl/placeholders/__init__.py b/tfx/v1/dsl/placeholders/__init__.py index 8a27c59848..e78707d137 100644 --- a/tfx/v1/dsl/placeholders/__init__.py +++ b/tfx/v1/dsl/placeholders/__init__.py @@ -18,3 +18,10 @@ from tfx.dsl.placeholder.placeholder import execution_invocation from tfx.dsl.placeholder.placeholder import input # pylint: disable=redefined-builtin from tfx.dsl.placeholder.placeholder import output + +__all__ = [ + "exec_property", + "execution_invocation", + "input", + "output", +] diff --git a/tfx/v1/extensions/__init__.py b/tfx/v1/extensions/__init__.py index a755a5512f..3cfa2aa31e 100644 --- a/tfx/v1/extensions/__init__.py +++ b/tfx/v1/extensions/__init__.py @@ -15,3 +15,5 @@ from tfx.v1.extensions import google_cloud_ai_platform from tfx.v1.extensions import google_cloud_big_query + +__all__ = ["google_cloud_ai_platform", "google_cloud_big_query"] diff --git a/tfx/v1/extensions/google_cloud_ai_platform/__init__.py b/tfx/v1/extensions/google_cloud_ai_platform/__init__.py index 55f03be40f..1d28a399b3 100644 --- a/tfx/v1/extensions/google_cloud_ai_platform/__init__.py +++ b/tfx/v1/extensions/google_cloud_ai_platform/__init__.py @@ -13,19 +13,41 @@ # limitations under the License. """Google cloud AI platform module.""" -from tfx.extensions.google_cloud_ai_platform.bulk_inferrer.component import CloudAIBulkInferrerComponent as BulkInferrer +from tfx.extensions.google_cloud_ai_platform.bulk_inferrer.component import ( + CloudAIBulkInferrerComponent as BulkInferrer, +) from tfx.extensions.google_cloud_ai_platform.constants import ENABLE_VERTEX_KEY from tfx.extensions.google_cloud_ai_platform.constants import SERVING_ARGS_KEY -from tfx.extensions.google_cloud_ai_platform.constants import VERTEX_CONTAINER_IMAGE_URI_KEY +from tfx.extensions.google_cloud_ai_platform.constants import ( + VERTEX_CONTAINER_IMAGE_URI_KEY, +) from tfx.extensions.google_cloud_ai_platform.constants import VERTEX_REGION_KEY from tfx.extensions.google_cloud_ai_platform.pusher.component import Pusher from tfx.extensions.google_cloud_ai_platform.trainer.component import Trainer + # ENABLE_UCAIP_KEY is deprecated, please use ENABLE_VERTEX_KEY instead from tfx.extensions.google_cloud_ai_platform.trainer.executor import ENABLE_UCAIP_KEY from tfx.extensions.google_cloud_ai_platform.trainer.executor import JOB_ID_KEY from tfx.extensions.google_cloud_ai_platform.trainer.executor import LABELS_KEY from tfx.extensions.google_cloud_ai_platform.trainer.executor import TRAINING_ARGS_KEY + # UCAIP_REGION_KEY is deprecated, please use VERTEX_REGION_KEY instead from tfx.extensions.google_cloud_ai_platform.trainer.executor import UCAIP_REGION_KEY from tfx.extensions.google_cloud_ai_platform.tuner.component import Tuner -from tfx.v1.extensions.google_cloud_ai_platform import experimental +from tfx.v1.extensions.google_cloud_ai_platform import experimental # noqa: F401 + +__all__ = [ + "BulkInferrer", + "Pusher", + "Trainer", + "Tuner", + "ENABLE_UCAIP_KEY", + "ENABLE_VERTEX_KEY", + "JOB_ID_KEY", + "LABELS_KEY", + "SERVING_ARGS_KEY", + "TRAINING_ARGS_KEY", + "UCAIP_REGION_KEY", + "VERTEX_CONTAINER_IMAGE_URI_KEY", + "VERTEX_REGION_KEY", +] diff --git a/tfx/v1/extensions/google_cloud_ai_platform/experimental/__init__.py b/tfx/v1/extensions/google_cloud_ai_platform/experimental/__init__.py index 94cb123e5b..40ab1b62b3 100644 --- a/tfx/v1/extensions/google_cloud_ai_platform/experimental/__init__.py +++ b/tfx/v1/extensions/google_cloud_ai_platform/experimental/__init__.py @@ -13,10 +13,25 @@ # limitations under the License. """Types used in Google Cloud AI Platform under experimental stage.""" -from tfx.extensions.google_cloud_ai_platform.bulk_inferrer.executor import SERVING_ARGS_KEY as BULK_INFERRER_SERVING_ARGS_KEY +from tfx.extensions.google_cloud_ai_platform.bulk_inferrer.executor import ( + SERVING_ARGS_KEY as BULK_INFERRER_SERVING_ARGS_KEY, +) from tfx.extensions.google_cloud_ai_platform.constants import ENDPOINT_ARGS_KEY + # PUSHER_SERVING_ARGS_KEY is deprecated. # Please use tfx.extensions.google_cloud_ai_platform.SERVING_ARGS_KEY instead. -from tfx.extensions.google_cloud_ai_platform.constants import SERVING_ARGS_KEY as PUSHER_SERVING_ARGS_KEY -from tfx.extensions.google_cloud_ai_platform.tuner.executor import REMOTE_TRIALS_WORKING_DIR_KEY +from tfx.extensions.google_cloud_ai_platform.constants import ( + SERVING_ARGS_KEY as PUSHER_SERVING_ARGS_KEY, +) +from tfx.extensions.google_cloud_ai_platform.tuner.executor import ( + REMOTE_TRIALS_WORKING_DIR_KEY, +) from tfx.extensions.google_cloud_ai_platform.tuner.executor import TUNING_ARGS_KEY + +__all__ = [ + "BULK_INFERRER_SERVING_ARGS_KEY", + "ENDPOINT_ARGS_KEY", + "PUSHER_SERVING_ARGS_KEY", + "REMOTE_TRIALS_WORKING_DIR_KEY", + "TUNING_ARGS_KEY", +] diff --git a/tfx/v1/extensions/google_cloud_big_query/__init__.py b/tfx/v1/extensions/google_cloud_big_query/__init__.py index af24f885dc..4776abdb62 100644 --- a/tfx/v1/extensions/google_cloud_big_query/__init__.py +++ b/tfx/v1/extensions/google_cloud_big_query/__init__.py @@ -13,6 +13,16 @@ # limitations under the License. """Google Cloud Big Query module.""" -from tfx.extensions.google_cloud_big_query.example_gen.component import BigQueryExampleGen +from tfx.extensions.google_cloud_big_query.example_gen.component import ( + BigQueryExampleGen, +) from tfx.extensions.google_cloud_big_query.pusher.component import Pusher -from tfx.extensions.google_cloud_big_query.pusher.executor import SERVING_ARGS_KEY as PUSHER_SERVING_ARGS_KEY +from tfx.extensions.google_cloud_big_query.pusher.executor import ( + SERVING_ARGS_KEY as PUSHER_SERVING_ARGS_KEY, +) + +__all__ = [ + "BigQueryExampleGen", + "Pusher", + "PUSHER_SERVING_ARGS_KEY", +] diff --git a/tfx/v1/orchestration/__init__.py b/tfx/v1/orchestration/__init__.py index 07d66d54ef..b897747ccd 100644 --- a/tfx/v1/orchestration/__init__.py +++ b/tfx/v1/orchestration/__init__.py @@ -16,3 +16,5 @@ from tfx.orchestration.local.local_dag_runner import LocalDagRunner from tfx.v1.orchestration import experimental from tfx.v1.orchestration import metadata + +__all__ = ["LocalDagRunner", "experimental", "metadata"] diff --git a/tfx/v1/orchestration/experimental/__init__.py b/tfx/v1/orchestration/experimental/__init__.py index 7963c45a1f..4f222b8371 100644 --- a/tfx/v1/orchestration/experimental/__init__.py +++ b/tfx/v1/orchestration/experimental/__init__.py @@ -13,27 +13,49 @@ # limitations under the License. """TFX orchestration.experimental module.""" -try: # pylint: disable=g-statement-before-imports - from tfx.orchestration.kubeflow import kubeflow_dag_runner # pylint: disable=g-import-not-at-top - from tfx.orchestration.kubeflow.decorators import exit_handler # pylint: disable=g-import-not-at-top - from tfx.orchestration.kubeflow.decorators import FinalStatusStr # pylint: disable=g-import-not-at-top - from tfx.utils import telemetry_utils # pylint: disable=g-import-not-at-top +try: + from tfx.orchestration.kubeflow import ( + kubeflow_dag_runner, + ) + from tfx.orchestration.kubeflow.decorators import ( + exit_handler, + ) + from tfx.orchestration.kubeflow.decorators import ( + FinalStatusStr, + ) + from tfx.utils import telemetry_utils - KubeflowDagRunner = kubeflow_dag_runner.KubeflowDagRunner - KubeflowDagRunnerConfig = kubeflow_dag_runner.KubeflowDagRunnerConfig - get_default_kubeflow_metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config - LABEL_KFP_SDK_ENV = telemetry_utils.LABEL_KFP_SDK_ENV + KubeflowDagRunner = kubeflow_dag_runner.KubeflowDagRunner + KubeflowDagRunnerConfig = kubeflow_dag_runner.KubeflowDagRunnerConfig + get_default_kubeflow_metadata_config = ( + kubeflow_dag_runner.get_default_kubeflow_metadata_config + ) + LABEL_KFP_SDK_ENV = telemetry_utils.LABEL_KFP_SDK_ENV - del telemetry_utils - del kubeflow_dag_runner + del telemetry_utils + del kubeflow_dag_runner except ImportError: # Import will fail without kfp package. - pass + pass try: - from tfx.orchestration.kubeflow.v2 import kubeflow_v2_dag_runner # pylint: disable=g-import-not-at-top + from tfx.orchestration.kubeflow.v2 import ( + kubeflow_v2_dag_runner, + ) - KubeflowV2DagRunner = kubeflow_v2_dag_runner.KubeflowV2DagRunner - KubeflowV2DagRunnerConfig = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig - del kubeflow_v2_dag_runner + KubeflowV2DagRunner = kubeflow_v2_dag_runner.KubeflowV2DagRunner + KubeflowV2DagRunnerConfig = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig + del kubeflow_v2_dag_runner except ImportError: # Import will fail without kfp package. - pass + pass + + +__all__ = [ + "FinalStatusStr", + "KubeflowDagRunner", + "KubeflowDagRunnerConfig", + "KubeflowV2DagRunner", + "KubeflowV2DagRunnerConfig", + "LABEL_KFP_SDK_ENV", + "exit_handler", + "get_default_kubeflow_metadata_config", +] diff --git a/tfx/v1/orchestration/metadata.py b/tfx/v1/orchestration/metadata.py index c7eb057f94..2eaaa2f6d8 100644 --- a/tfx/v1/orchestration/metadata.py +++ b/tfx/v1/orchestration/metadata.py @@ -18,3 +18,9 @@ ConnectionConfigType = metadata.ConnectionConfigType mysql_metadata_connection_config = metadata.mysql_metadata_connection_config sqlite_metadata_connection_config = metadata.sqlite_metadata_connection_config + +__all__ = [ + "mysql_metadata_connection_config", + "sqlite_metadata_connection_config", + "ConnectionConfigType", +] diff --git a/tfx/v1/proto/__init__.py b/tfx/v1/proto/__init__.py index eb6bdb30a7..47eebef596 100644 --- a/tfx/v1/proto/__init__.py +++ b/tfx/v1/proto/__init__.py @@ -13,30 +13,52 @@ # limitations under the License. """TFX proto module.""" -from tfx.proto import bulk_inferrer_pb2 -from tfx.proto import distribution_validator_pb2 -from tfx.proto import evaluator_pb2 -from tfx.proto import example_diff_pb2 -from tfx.proto import example_gen_pb2 -from tfx.proto import infra_validator_pb2 -from tfx.proto import pusher_pb2 -from tfx.proto import range_config_pb2 -from tfx.proto import trainer_pb2 -from tfx.proto import transform_pb2 -from tfx.proto import tuner_pb2 - +from tfx.proto.bulk_inferrer_pb2 import ( + ClassifyOutput, + DataSpec, + ModelSpec, + OutputColumnsSpec, + OutputExampleSpec, + PredictOutput, + PredictOutputCol, + RegressOutput, +) +from tfx.proto.distribution_validator_pb2 import ( + DistributionValidatorConfig, + FeatureComparator, +) +from tfx.proto.evaluator_pb2 import FeatureSlicingSpec, SingleSlicingSpec +from tfx.proto.example_diff_pb2 import ( + ExampleDiffConfig, + PairedExampleSkew, +) +from tfx.proto.example_gen_pb2 import ( + CustomConfig, + Input, + Output, + PayloadFormat, + SplitConfig, +) +from tfx.proto.infra_validator_pb2 import ( + EnvVar, + EnvVarSource, + KubernetesConfig, + LocalDockerConfig, + PodOverrides, + RequestSpec, + SecretKeySelector, + ServingSpec, + TensorFlowServing, + TensorFlowServingRequestSpec, + ValidationSpec, +) +from tfx.proto.pusher_pb2 import PushDestination, Versioning +from tfx.proto.range_config_pb2 import RangeConfig, RollingRange, StaticRange +from tfx.proto.trainer_pb2 import EvalArgs, TrainArgs +from tfx.proto.transform_pb2 import SplitsConfig +from tfx.proto.tuner_pb2 import TuneArgs from tfx.v1.proto import orchestration -ModelSpec = bulk_inferrer_pb2.ModelSpec -DataSpec = bulk_inferrer_pb2.DataSpec -OutputExampleSpec = bulk_inferrer_pb2.OutputExampleSpec -OutputColumnsSpec = bulk_inferrer_pb2.OutputColumnsSpec -ClassifyOutput = bulk_inferrer_pb2.ClassifyOutput -RegressOutput = bulk_inferrer_pb2.RegressOutput -PredictOutput = bulk_inferrer_pb2.PredictOutput -PredictOutputCol = bulk_inferrer_pb2.PredictOutputCol -del bulk_inferrer_pb2 - ModelSpec.__doc__ = """ Specifies the signature name to run the inference in `components.BulkInferrer`. """ @@ -71,10 +93,6 @@ Proto type of output_columns under `proto.PredictOutput`. """ -FeatureSlicingSpec = evaluator_pb2.FeatureSlicingSpec -SingleSlicingSpec = evaluator_pb2.SingleSlicingSpec -del evaluator_pb2 - FeatureSlicingSpec.__doc__ = """ Slices corresponding to data set in `components.Evaluator`. """ @@ -84,13 +102,6 @@ An empty proto means we do not slice on features (i.e. use the entire data set). """ -CustomConfig = example_gen_pb2.CustomConfig -Input = example_gen_pb2.Input -Output = example_gen_pb2.Output -SplitConfig = example_gen_pb2.SplitConfig -PayloadFormat = example_gen_pb2.PayloadFormat -del example_gen_pb2 - CustomConfig.__doc__ = """ Optional specified configuration for ExampleGen components. """ @@ -111,19 +122,6 @@ Enum to indicate payload format ExampleGen produces. """ -ServingSpec = infra_validator_pb2.ServingSpec -ValidationSpec = infra_validator_pb2.ValidationSpec -TensorFlowServing = infra_validator_pb2.TensorFlowServing -LocalDockerConfig = infra_validator_pb2.LocalDockerConfig -KubernetesConfig = infra_validator_pb2.KubernetesConfig -PodOverrides = infra_validator_pb2.PodOverrides -EnvVar = infra_validator_pb2.EnvVar -EnvVarSource = infra_validator_pb2.EnvVarSource -SecretKeySelector = infra_validator_pb2.SecretKeySelector -RequestSpec = infra_validator_pb2.RequestSpec -TensorFlowServingRequestSpec = infra_validator_pb2.TensorFlowServingRequestSpec -del infra_validator_pb2 - ServingSpec.__doc__ = """ Defines an environment of the validating infrastructure in `components.InfraValidator`. """ @@ -171,11 +169,6 @@ Request spec for building TF Serving requests. """ -PushDestination = pusher_pb2.PushDestination -Versioning = pusher_pb2.Versioning -Filesystem = pusher_pb2.PushDestination.Filesystem -del pusher_pb2 - PushDestination.__doc__ = """ Defines the destination of pusher in `components.Pusher`. """ @@ -185,15 +178,10 @@ For example TF Serving only accepts an integer version that is monotonically increasing. """ -Filesystem.__doc__ = """ +PushDestination.Filesystem.__doc__ = """ File system based destination definition. """ -RangeConfig = range_config_pb2.RangeConfig -RollingRange = range_config_pb2.RollingRange -StaticRange = range_config_pb2.StaticRange -del range_config_pb2 - RangeConfig.__doc__ = """ RangeConfig is an abstract proto which can be used to describe ranges for different entities in TFX Pipeline. """ @@ -214,10 +202,6 @@ Note that both numbers should be specified for `proto.StaticRange`. """ -TrainArgs = trainer_pb2.TrainArgs -EvalArgs = trainer_pb2.EvalArgs -del trainer_pb2 - TrainArgs.__doc__ = """ Args specific to training in `components.Trainer`. """ @@ -226,40 +210,68 @@ Args specific to eval in `components.Trainer`. """ -SplitsConfig = transform_pb2.SplitsConfig -del transform_pb2 - SplitsConfig.__doc__ = """ Defines the splits config in `components.Transform`. """ -TuneArgs = tuner_pb2.TuneArgs -del tuner_pb2 - TuneArgs.__doc__ = """ Args specific to tuning in `components.Tuner`. """ -ExampleDiffConfig = example_diff_pb2.ExampleDiffConfig - ExampleDiffConfig.__doc__ = """ Configurations related to Example Diff. """ -FeatureComparator = distribution_validator_pb2.FeatureComparator - FeatureComparator.__doc__ = """ Per feature configuration in Distribution Validator. """ -DistributionValidatorConfig = distribution_validator_pb2.DistributionValidatorConfig - DistributionValidatorConfig.__doc__ = """ Configurations related to Distribution Validator. """ -PairedExampleSkew = example_diff_pb2.PairedExampleSkew - PairedExampleSkew.__doc__ = """ Configurations related to Example Diff on feature pairing level. -""" \ No newline at end of file +""" + +__all__ = [ + "orchestration", + "ClassifyOutput", + "CustomConfig", + "DataSpec", + "DistributionValidatorConfig", + "EnvVar", + "EnvVarSource", + "EvalArgs", + "ExampleDiffConfig", + "FeatureComparator", + "FeatureSlicingSpec", + "Filesystem", + "Input", + "KubernetesConfig", + "LocalDockerConfig", + "ModelSpec", + "Output", + "OutputColumnsSpec", + "OutputExampleSpec", + "PairedExampleSkew", + "PodOverrides", + "PredictOutput", + "PredictOutputCol", + "PushDestination", + "RangeConfig", + "RegressOutput", + "RequestSpec", + "RollingRange", + "SecretKeySelector", + "ServingSpec", + "SingleSlicingSpec", + "SplitConfig", + "SplitsConfig", + "StaticRange", + "TensorFlowServing", + "TensorFlowServingRequestSpec", + "TrainArgs", + "TuneArgs", + "ValidationSpec", +] diff --git a/tfx/v1/proto/orchestration/__init__.py b/tfx/v1/proto/orchestration/__init__.py index bbb3bec9de..10aec6594d 100644 --- a/tfx/v1/proto/orchestration/__init__.py +++ b/tfx/v1/proto/orchestration/__init__.py @@ -16,3 +16,5 @@ from tfx.proto.orchestration import run_state_pb2 RunState = run_state_pb2.RunState + +__all__ = ["RunState"] diff --git a/tfx/v1/testing/__init__.py b/tfx/v1/testing/__init__.py index 1c268295fa..672f68335e 100644 --- a/tfx/v1/testing/__init__.py +++ b/tfx/v1/testing/__init__.py @@ -13,8 +13,6 @@ # limitations under the License. """Public testing modules for TFX.""" -from tfx.types import channel_utils +from tfx.types.channel_utils import ChannelForTesting as Channel -Channel = channel_utils.ChannelForTesting - -del channel_utils +__all__ = ["Channel"] diff --git a/tfx/v1/types/__init__.py b/tfx/v1/types/__init__.py index 526c9dac7f..29e15fa8d2 100644 --- a/tfx/v1/types/__init__.py +++ b/tfx/v1/types/__init__.py @@ -23,3 +23,13 @@ from tfx.dsl.components.base.base_node import BaseNode from tfx.types.channel import BaseChannel from tfx.v1.types import standard_artifacts + +__all__ = [ + "standard_artifacts", + "BaseBeamComponent", + "BaseChannel", + "BaseComponent", + "BaseFunctionalComponent", + "BaseFunctionalComponentFactory", + "BaseNode", +] diff --git a/tfx/v1/types/standard_artifacts.py b/tfx/v1/types/standard_artifacts.py index 1cb8716342..155ce36ac6 100644 --- a/tfx/v1/types/standard_artifacts.py +++ b/tfx/v1/types/standard_artifacts.py @@ -13,27 +13,52 @@ # limitations under the License. """Public API for standard_artifacts.""" -from tfx.types import standard_artifacts - -Examples = standard_artifacts.Examples -ExampleAnomalies = standard_artifacts.ExampleAnomalies -ExampleStatistics = standard_artifacts.ExampleStatistics -InferenceResult = standard_artifacts.InferenceResult -InfraBlessing = standard_artifacts.InfraBlessing -Model = standard_artifacts.Model -ModelRun = standard_artifacts.ModelRun -ModelBlessing = standard_artifacts.ModelBlessing -ModelEvaluation = standard_artifacts.ModelEvaluation -PushedModel = standard_artifacts.PushedModel -Schema = standard_artifacts.Schema -TransformCache = standard_artifacts.TransformCache -TransformGraph = standard_artifacts.TransformGraph -HyperParameters = standard_artifacts.HyperParameters +from tfx.types.standard_artifacts import ( + Examples, + ExampleAnomalies, + ExampleStatistics, + InferenceResult, + InfraBlessing, + Model, + ModelRun, + ModelBlessing, + ModelEvaluation, + PushedModel, + Schema, + TransformCache, + TransformGraph, + HyperParameters, +) # Artifacts of small scalar-values. -Bytes = standard_artifacts.Bytes -Float = standard_artifacts.Float -Integer = standard_artifacts.Integer -String = standard_artifacts.String -Boolean = standard_artifacts.Boolean -JsonValue = standard_artifacts.JsonValue +from tfx.types.standard_artifacts import ( + Bytes, + Float, + Integer, + String, + Boolean, + JsonValue, +) + +__all__ = [ + "Boolean", + "Bytes", + "ExampleAnomalies", + "ExampleStatistics", + "Examples", + "Float", + "HyperParameters", + "InferenceResult", + "InfraBlessing", + "Integer", + "JsonValue", + "Model", + "ModelBlessing", + "ModelEvaluation", + "ModelRun", + "PushedModel", + "Schema", + "String", + "TransformCache", + "TransformGraph", +] diff --git a/tfx/v1/utils/__init__.py b/tfx/v1/utils/__init__.py index 3c09143c28..d6d86e49df 100644 --- a/tfx/v1/utils/__init__.py +++ b/tfx/v1/utils/__init__.py @@ -15,3 +15,5 @@ from tfx.utils.io_utils import parse_pbtxt_file from tfx.utils.json_utils import JsonableType + +__all__ = ["JsonableType", "parse_pbtxt_file"]