-
Notifications
You must be signed in to change notification settings - Fork 55
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2f425d5
commit 2ff35cc
Showing
17 changed files
with
449 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.databricks/ | ||
build/ | ||
dist/ | ||
__pycache__/ | ||
*.egg-info | ||
.venv/ | ||
scratch/** | ||
!scratch/README.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Typings for Pylance in Visual Studio Code | ||
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md | ||
from databricks.sdk.runtime import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# demo_for_dbas_python | ||
|
||
The 'demo_for_dbas_python' project was generated by using the default-python template. | ||
|
||
## Getting started | ||
|
||
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html | ||
|
||
2. Authenticate to your Databricks workspace, if you have not done so already: | ||
``` | ||
$ databricks configure | ||
``` | ||
|
||
3. To deploy a development copy of this project, type: | ||
``` | ||
$ databricks bundle deploy --target dev | ||
``` | ||
(Note that "dev" is the default target, so the `--target` parameter | ||
is optional here.) | ||
|
||
This deploys everything that's defined for this project. | ||
For example, the default template would deploy a job called | ||
`[dev yourname] demo_for_dbas_python_job` to your workspace. | ||
You can find that job by opening your workpace and clicking on **Workflows**. | ||
|
||
4. Similarly, to deploy a production copy, type: | ||
``` | ||
$ databricks bundle deploy --target prod | ||
``` | ||
|
||
Note that the default job from the template has a schedule that runs every day | ||
(defined in resources/demo_for_dbas_python_job.yml). The schedule | ||
is paused when deploying in development mode (see | ||
https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). | ||
|
||
5. To run a job or pipeline, use the "run" command: | ||
``` | ||
$ databricks bundle run | ||
``` | ||
|
||
6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from | ||
https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for | ||
**Databricks Connect** for instructions on running the included Python code from a different IDE. | ||
|
||
7. For documentation on the Databricks asset bundles format used | ||
for this project, and for CI/CD configuration, see | ||
https://docs.databricks.com/dev-tools/bundles/index.html. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# This is a Databricks asset bundle definition for demo_for_dbas_python. | ||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. | ||
bundle: | ||
name: demo_for_dbas_python | ||
|
||
include: | ||
- resources/*.yml | ||
|
||
targets: | ||
# The 'dev' target, for development purposes. This target is the default. | ||
dev: | ||
# We use 'mode: development' to indicate this is a personal development copy: | ||
# - Deployed resources get prefixed with '[dev my_user_name]' | ||
# - Any job schedules and triggers are paused by default | ||
# - The 'development' mode is used for Delta Live Tables pipelines | ||
mode: development | ||
default: true | ||
workspace: | ||
host: https://adb-2541733722036151.11.azuredatabricks.net | ||
|
||
## Optionally, there could be a 'staging' target here. | ||
## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) | ||
# | ||
# staging: | ||
# workspace: | ||
# host: https://adb-2541733722036151.11.azuredatabricks.net | ||
|
||
# The 'prod' target, used for production deployment. | ||
prod: | ||
# We use 'mode: production' to indicate this is a production deployment. | ||
# Doing so enables strict verification of the settings below. | ||
mode: production | ||
workspace: | ||
host: https://adb-2541733722036151.11.azuredatabricks.net | ||
# We always use /Users/satish.muralikrishnan@databricks.com for all resources to make sure we only have a single copy. | ||
# If this path results in an error, please make sure you have a recent version of the CLI installed. | ||
root_path: /Users/satish.muralikrishnan@databricks.com/.bundle/${bundle.name}/${bundle.target} | ||
run_as: | ||
# This runs as satish.muralikrishnan@databricks.com in production. We could also use a service principal here, | ||
# see https://docs.databricks.com/dev-tools/bundles/permissions.html. | ||
user_name: satish.muralikrishnan@databricks.com |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Fixtures | ||
|
||
This folder is reserved for fixtures, such as CSV files. | ||
|
||
Below is an example of how to load fixtures as a data frame: | ||
|
||
``` | ||
import pandas as pd | ||
import os | ||
|
||
def get_absolute_path(*relative_parts): | ||
if 'dbutils' in globals(): | ||
base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore | ||
path = os.path.normpath(os.path.join(base_dir, *relative_parts)) | ||
return path if path.startswith("/Workspace") else "/Workspace" + path | ||
else: | ||
return os.path.join(*relative_parts) | ||
|
||
csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") | ||
df = pd.read_csv(csv_file) | ||
display(df) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[pytest] | ||
testpaths = tests | ||
pythonpath = src |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
## requirements-dev.txt: dependencies for local development. | ||
## | ||
## For defining dependencies used by jobs in Databricks Workflows, see | ||
## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html | ||
|
||
## Add code completion support for DLT | ||
databricks-dlt | ||
|
||
## pytest is the default package used for testing | ||
pytest | ||
|
||
## Dependencies for building wheel files | ||
setuptools | ||
wheel | ||
|
||
## databricks-connect can be used to run parts of this project locally. | ||
## See https://docs.databricks.com/dev-tools/databricks-connect.html. | ||
## | ||
## databricks-connect is automatically installed if you're using Databricks | ||
## extension for Visual Studio Code | ||
## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). | ||
## | ||
## To manually install databricks-connect, either follow the instructions | ||
## at https://docs.databricks.com/dev-tools/databricks-connect.html | ||
## to install the package system-wide. Or uncomment the line below to install a | ||
## version of db-connect that corresponds to the Databricks Runtime version used | ||
## for this project. | ||
# | ||
# databricks-connect>=13.3,<13.4 |
48 changes: 48 additions & 0 deletions
48
demo_for_dbas_python/resources/demo_for_dbas_python_job.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# The main job for demo_for_dbas_python. | ||
resources: | ||
jobs: | ||
demo_for_dbas_python_job: | ||
name: demo_for_dbas_python_job | ||
|
||
schedule: | ||
# Run every day at 8:37 AM | ||
quartz_cron_expression: '44 37 8 * * ?' | ||
timezone_id: Europe/Amsterdam | ||
|
||
email_notifications: | ||
on_failure: | ||
- satish.muralikrishnan@databricks.com | ||
|
||
tasks: | ||
- task_key: notebook_task | ||
job_cluster_key: job_cluster | ||
notebook_task: | ||
notebook_path: ../src/notebook.ipynb | ||
|
||
- task_key: refresh_pipeline | ||
depends_on: | ||
- task_key: notebook_task | ||
pipeline_task: | ||
pipeline_id: ${resources.pipelines.demo_for_dbas_python_pipeline.id} | ||
|
||
- task_key: main_task | ||
depends_on: | ||
- task_key: refresh_pipeline | ||
job_cluster_key: job_cluster | ||
python_wheel_task: | ||
package_name: demo_for_dbas_python | ||
entry_point: main | ||
libraries: | ||
# By default we just include the .whl file generated for the demo_for_dbas_python package. | ||
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html | ||
# for more information on how to add other libraries. | ||
- whl: ../dist/*.whl | ||
|
||
job_clusters: | ||
- job_cluster_key: job_cluster | ||
new_cluster: | ||
spark_version: 13.3.x-scala2.12 | ||
node_type_id: Standard_D3_v2 | ||
autoscale: | ||
min_workers: 1 | ||
max_workers: 4 |
12 changes: 12 additions & 0 deletions
12
demo_for_dbas_python/resources/demo_for_dbas_python_pipeline.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# The main pipeline for demo_for_dbas_python | ||
resources: | ||
pipelines: | ||
demo_for_dbas_python_pipeline: | ||
name: demo_for_dbas_python_pipeline | ||
target: demo_for_dbas_python_${bundle.environment} | ||
libraries: | ||
- notebook: | ||
path: ../src/dlt_pipeline.ipynb | ||
|
||
configuration: | ||
bundle.sourcePath: /Workspace/${workspace.file_path}/src |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# scratch | ||
|
||
This folder is reserved for personal, exploratory notebooks. | ||
By default these are not committed to Git, as 'scratch' is listed in .gitignore. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
""" | ||
setup.py configuration script describing how to build and package this project. | ||
This file is primarily used by the setuptools library and typically should not | ||
be executed directly. See README.md for how to deploy, test, and run | ||
the demo_for_dbas_python project. | ||
""" | ||
from setuptools import setup, find_packages | ||
|
||
import sys | ||
sys.path.append('./src') | ||
|
||
import datetime | ||
import demo_for_dbas_python | ||
|
||
setup( | ||
name="demo_for_dbas_python", | ||
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) | ||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters | ||
version=demo_for_dbas_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), | ||
url="https://databricks.com", | ||
author="satish.muralikrishnan@databricks.com", | ||
description="wheel file based on demo_for_dbas_python/src", | ||
packages=find_packages(where='./src'), | ||
package_dir={'': 'src'}, | ||
entry_points={ | ||
"packages": [ | ||
"main=demo_for_dbas_python.main:main" | ||
] | ||
}, | ||
install_requires=[ | ||
# Dependencies in case the output wheel file is used as a library dependency. | ||
# For defining dependencies, when this package is used in Databricks, see: | ||
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html | ||
"setuptools" | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "0.0.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from pyspark.sql import SparkSession, DataFrame | ||
|
||
def get_taxis(spark: SparkSession) -> DataFrame: | ||
return spark.read.table("samples.nyctaxi.trips") | ||
|
||
|
||
# Create a new Databricks Connect session. If this fails, | ||
# check that you have configured Databricks Connect correctly. | ||
# See https://docs.databricks.com/dev-tools/databricks-connect.html. | ||
def get_spark() -> SparkSession: | ||
try: | ||
from databricks.connect import DatabricksSession | ||
return DatabricksSession.builder.getOrCreate() | ||
except ImportError: | ||
return SparkSession.builder.getOrCreate() | ||
|
||
def main(): | ||
get_taxis(get_spark()).show(5) | ||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.