From ef081feb1e8077cb1238fdce87646d3aaf5b12db Mon Sep 17 00:00:00 2001 From: Jessica <12jessicasmith34@gmail.com> Date: Tue, 19 Dec 2023 14:55:59 -0600 Subject: [PATCH 1/4] avoid duplicating https:// schema if host already contains it, add test for this Signed-off-by: Jessica <12jessicasmith34@gmail.com> --- src/databricks/sql/thrift_backend.py | 28 +++++++++++++--------------- tests/unit/test_thrift_backend.py | 6 ++++++ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 288c3e10..18cc14ab 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -1,40 +1,36 @@ -from decimal import Decimal import errno import logging import math +import threading import time import uuid -import threading +from decimal import Decimal from ssl import CERT_NONE, CERT_REQUIRED, create_default_context from typing import List, Union import pyarrow -import thrift.transport.THttpClient import thrift.protocol.TBinaryProtocol +import thrift.transport.THttpClient import thrift.transport.TSocket import thrift.transport.TTransport - import urllib3.exceptions import databricks.sql.auth.thrift_http_client -from databricks.sql.auth.thrift_http_client import CommandType -from databricks.sql.auth.authenticators import AuthProvider -from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes from databricks.sql import * +from databricks.sql.auth.authenticators import AuthProvider +from databricks.sql.auth.thrift_http_client import CommandType from databricks.sql.exc import MaxRetryDurationError -from databricks.sql.thrift_api.TCLIService.TCLIService import ( - Client as TCLIServiceClient, -) - +from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes +from databricks.sql.thrift_api.TCLIService.TCLIService import Client as TCLIServiceClient from databricks.sql.utils import ( ExecuteResponse, - _bound, - RequestErrorInfo, NoRetryReason, + RequestErrorInfo, ResultSetQueueFactory, + _bound, convert_arrow_based_set_to_arrow_table, - convert_decimals_in_arrow_table, convert_column_based_set_to_arrow_table, + convert_decimals_in_arrow_table, ) logger = logging.getLogger(__name__) @@ -141,9 +137,11 @@ def __init__( if kwargs.get("_connection_uri"): uri = kwargs.get("_connection_uri") elif server_hostname and http_path: - uri = "https://{host}:{port}/{path}".format( + uri = "{host}:{port}/{path}".format( host=server_hostname, port=port, path=http_path.lstrip("/") ) + if not uri.startswith("https://"): + uri = "https://" + uri else: raise ValueError("No valid connection settings.") diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index e641ed21..1cef15ba 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -212,6 +212,12 @@ def test_port_and_host_are_respected(self, t_http_client_class): self.assertEqual(t_http_client_class.call_args[1]["uri_or_host"], "https://hostname:123/path_value") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") + def test_host_with_https_does_not_duplicate(self, t_http_client_class): + ThriftBackend("https://hostname", 123, "path_value", [], auth_provider=AuthProvider()) + self.assertEqual(t_http_client_class.call_args[1]["uri_or_host"], + "https://hostname:123/path_value") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") def test_socket_timeout_is_propagated(self, t_http_client_class): ThriftBackend("hostname", 123, "path_value", [], auth_provider=AuthProvider(), _socket_timeout=129) From 4646cfb8ed4c608ea88ee3bc63180275a7a51853 Mon Sep 17 00:00:00 2001 From: Jessica <12jessicasmith34@gmail.com> Date: Tue, 19 Dec 2023 15:01:14 -0600 Subject: [PATCH 2/4] handle trailing backslash in server hostname + test Signed-off-by: Jessica <12jessicasmith34@gmail.com> --- src/databricks/sql/thrift_backend.py | 2 +- tests/unit/test_thrift_backend.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 18cc14ab..c4193307 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -138,7 +138,7 @@ def __init__( uri = kwargs.get("_connection_uri") elif server_hostname and http_path: uri = "{host}:{port}/{path}".format( - host=server_hostname, port=port, path=http_path.lstrip("/") + host=server_hostname.rstrip("/"), port=port, path=http_path.lstrip("/") ) if not uri.startswith("https://"): uri = "https://" + uri diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 1cef15ba..92c664a0 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -217,6 +217,12 @@ def test_host_with_https_does_not_duplicate(self, t_http_client_class): ThriftBackend("https://hostname", 123, "path_value", [], auth_provider=AuthProvider()) self.assertEqual(t_http_client_class.call_args[1]["uri_or_host"], "https://hostname:123/path_value") + + @patch("databricks.sql.auth.thrift_http_client.THttpClient") + def test_host_with_trailing_backslash_does_not_duplicate(self, t_http_client_class): + ThriftBackend("https://hostname/", 123, "path_value", [], auth_provider=AuthProvider()) + self.assertEqual(t_http_client_class.call_args[1]["uri_or_host"], + "https://hostname:123/path_value") @patch("databricks.sql.auth.thrift_http_client.THttpClient") def test_socket_timeout_is_propagated(self, t_http_client_class): From 4c3df5035e1cf9fbbe1a3977545b2d9a68319b19 Mon Sep 17 00:00:00 2001 From: Jessica <12jessicasmith34@gmail.com> Date: Tue, 19 Dec 2023 15:13:16 -0600 Subject: [PATCH 3/4] revert formatting changes Signed-off-by: Jessica <12jessicasmith34@gmail.com> --- src/databricks/sql/thrift_backend.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index c4193307..69ac760a 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -1,36 +1,40 @@ +from decimal import Decimal import errno import logging import math -import threading import time import uuid -from decimal import Decimal +import threading from ssl import CERT_NONE, CERT_REQUIRED, create_default_context from typing import List, Union import pyarrow -import thrift.protocol.TBinaryProtocol import thrift.transport.THttpClient +import thrift.protocol.TBinaryProtocol import thrift.transport.TSocket import thrift.transport.TTransport + import urllib3.exceptions import databricks.sql.auth.thrift_http_client -from databricks.sql import * -from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.auth.thrift_http_client import CommandType -from databricks.sql.exc import MaxRetryDurationError +from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes -from databricks.sql.thrift_api.TCLIService.TCLIService import Client as TCLIServiceClient +from databricks.sql import * +from databricks.sql.exc import MaxRetryDurationError +from databricks.sql.thrift_api.TCLIService.TCLIService import ( + Client as TCLIServiceClient, +) + from databricks.sql.utils import ( ExecuteResponse, - NoRetryReason, + _bound, RequestErrorInfo, + NoRetryReason, ResultSetQueueFactory, - _bound, convert_arrow_based_set_to_arrow_table, - convert_column_based_set_to_arrow_table, convert_decimals_in_arrow_table, + convert_column_based_set_to_arrow_table, ) logger = logging.getLogger(__name__) From 2993aa2468a73c4c3bc2f3a86cbb346de69f28c7 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 23 Jan 2024 13:37:25 -0500 Subject: [PATCH 4/4] Update the changelog Signed-off-by: Jesse Whitehouse --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c15f2f9..18b2208c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Release History +# 3.1.0 (TBD) + +- Fix: `server_hostname` URIs that included `https://` would raise an exception + ## 3.0.1 (2023-12-01) - Other: updated docstring comment about default parameterization approach (#287)