+lakehouse_engine.dq_processors.custom_expectations.expect_column_pair_a_to_be_not_equal_to_b
+
+ Expectation to check if column 'a' is not equal to column 'b'.
+1"""Expectation to check if column 'a' is not equal to column 'b'.""" + 2 + 3from typing import Any, Dict, Optional + 4 + 5from great_expectations.core import ExpectationConfiguration + 6from great_expectations.execution_engine import ExecutionEngine, SparkDFExecutionEngine + 7from great_expectations.expectations.expectation import ColumnPairMapExpectation + 8from great_expectations.expectations.metrics.map_metric_provider import ( + 9 ColumnPairMapMetricProvider, + 10 column_pair_condition_partial, + 11) + 12 + 13from lakehouse_engine.utils.expectations_utils import validate_result + 14 + 15 + 16class ColumnPairCustom(ColumnPairMapMetricProvider): + 17 """Asserts that column 'A' is not equal to column 'B'. + 18 + 19 Additionally, It compares Null as well. + 20 """ + 21 + 22 condition_metric_name = "column_pair_values.a_not_equal_to_b" + 23 condition_domain_keys = ( + 24 "batch_id", + 25 "table", + 26 "column_A", + 27 "column_B", + 28 "ignore_row_if", + 29 ) + 30 condition_value_keys = () + 31 + 32 @column_pair_condition_partial(engine=SparkDFExecutionEngine) + 33 def _spark( + 34 self: ColumnPairMapMetricProvider, + 35 column_A: Any, + 36 column_B: Any, + 37 **kwargs: dict, + 38 ) -> Any: + 39 """Implementation of the expectation's logic. + 40 + 41 Args: + 42 column_A: Value of the row of column_A. + 43 column_B: Value of the row of column_B. + 44 kwargs: dict with additional parameters. + 45 + 46 Returns: + 47 If the condition is met. + 48 """ + 49 return ((column_A.isNotNull()) | (column_B.isNotNull())) & ( + 50 column_A != column_B + 51 ) # noqa: E501 + 52 + 53 + 54class ExpectColumnPairAToBeNotEqualToB(ColumnPairMapExpectation): + 55 """Expect values in column A to be not equal to column B. + 56 + 57 Args: + 58 column_A: The first column name. + 59 column_B: The second column name. + 60 + 61 Keyword Args: + 62 - allow_cross_type_comparisons: If True, allow + 63 comparisons between types (e.g. integer and string). + 64 Otherwise, attempting such comparisons will raise an exception. + 65 - ignore_row_if: "both_values_are_missing", + 66 "either_value_is_missing", "neither" (default). + 67 - result_format: Which output mode to use: + 68 `BOOLEAN_ONLY`, `BASIC` (default), `COMPLETE`, or `SUMMARY`. + 69 - include_config: If True (default), then include the expectation config + 70 as part of the result object. + 71 - catch_exceptions: If True, then catch exceptions and + 72 include them as part of the result object. Default: False. + 73 - meta: A JSON-serializable dictionary (nesting allowed) + 74 that will be included in the output without modification. + 75 + 76 Returns: + 77 An ExpectationSuiteValidationResult. + 78 """ + 79 + 80 examples = [ + 81 { + 82 "dataset_name": "Test Dataset", + 83 "data": [ + 84 { + 85 "data": { + 86 "a": ["IE4019", "IM6092", "IE1405"], + 87 "b": ["IE4019", "IM6092", "IE1405"], + 88 "c": ["IE1404", "IN6192", "842075"], + 89 }, + 90 "schemas": { + 91 "spark": { + 92 "a": "StringType", + 93 "b": "StringType", + 94 "c": "StringType", + 95 } + 96 }, + 97 } + 98 ], + 99 "tests": [ +100 { +101 "title": "negative_test", +102 "exact_match_out": False, +103 "include_in_gallery": True, +104 "in": { +105 "column_A": "a", +106 "column_B": "b", +107 "result_format": { +108 "result_format": "COMPLETE", +109 "unexpected_index_column_names": ["b"], +110 }, +111 }, +112 "out": { +113 "success": False, +114 "unexpected_index_list": [ +115 {"b": "IE4019", "a": "IE4019"}, +116 {"b": "IM6092", "a": "IM6092"}, +117 {"b": "IE1405", "a": "IE1405"}, +118 ], +119 }, +120 }, +121 { +122 "title": "positive_test", +123 "exact_match_out": False, +124 "include_in_gallery": True, +125 "in": { +126 "column_A": "a", +127 "column_B": "c", +128 "result_format": { +129 "result_format": "COMPLETE", +130 "unexpected_index_column_names": ["a"], +131 }, +132 }, +133 "out": { +134 "success": True, +135 "unexpected_index_list": [], +136 }, +137 }, +138 ], +139 }, +140 ] +141 +142 map_metric = "column_pair_values.a_not_equal_to_b" +143 success_keys = ( +144 "column_A", +145 "column_B", +146 "ignore_row_if", +147 "mostly", +148 ) +149 default_kwarg_values = { +150 "mostly": 1.0, +151 "ignore_row_if": "neither", +152 "result_format": "BASIC", +153 "include_config": True, +154 "catch_exceptions": False, +155 } +156 +157 def _validate( +158 self, +159 configuration: ExpectationConfiguration, +160 metrics: Dict, +161 runtime_configuration: Optional[dict] = None, +162 execution_engine: Optional[ExecutionEngine] = None, +163 ) -> Any: +164 """Custom implementation of the GE _validate method. +165 +166 This method is used on the tests to validate both the result +167 of the tests themselves and if the unexpected index list +168 is correctly generated. +169 The GE test logic does not do this validation, and thus +170 we need to make it manually. +171 +172 Args: +173 configuration: Configuration used in the test. +174 metrics: Test result metrics. +175 runtime_configuration: Configuration used when running the expectation. +176 execution_engine: Execution Engine where the expectation was run. +177 +178 Returns: +179 Dictionary with the result of the validation. +180 """ +181 return validate_result( +182 self, +183 configuration, +184 metrics, +185 runtime_configuration, +186 execution_engine, +187 ColumnPairMapExpectation, +188 ) +189 +190 +191"""Mandatory block of code. If it is removed the expectation will not be available.""" +192if __name__ == "__main__": +193 # test the custom expectation with the function `print_diagnostic_checklist()` +194 ExpectColumnPairAToBeNotEqualToB().print_diagnostic_checklist() +
+
+ class
+ ColumnPairCustom(great_expectations.expectations.metrics.map_metric_provider.column_pair_map_metric_provider.ColumnPairMapMetricProvider):
+
+
+
+
+
+ 17class ColumnPairCustom(ColumnPairMapMetricProvider): +18 """Asserts that column 'A' is not equal to column 'B'. +19 +20 Additionally, It compares Null as well. +21 """ +22 +23 condition_metric_name = "column_pair_values.a_not_equal_to_b" +24 condition_domain_keys = ( +25 "batch_id", +26 "table", +27 "column_A", +28 "column_B", +29 "ignore_row_if", +30 ) +31 condition_value_keys = () +32 +33 @column_pair_condition_partial(engine=SparkDFExecutionEngine) +34 def _spark( +35 self: ColumnPairMapMetricProvider, +36 column_A: Any, +37 column_B: Any, +38 **kwargs: dict, +39 ) -> Any: +40 """Implementation of the expectation's logic. +41 +42 Args: +43 column_A: Value of the row of column_A. +44 column_B: Value of the row of column_B. +45 kwargs: dict with additional parameters. +46 +47 Returns: +48 If the condition is met. +49 """ +50 return ((column_A.isNotNull()) | (column_B.isNotNull())) & ( +51 column_A != column_B +52 ) # noqa: E501 +
Asserts that column 'A' is not equal to column 'B'.
+ +Additionally, It compares Null as well.
+
+
+
+
+ condition_domain_keys =
+('batch_id', 'table', 'column_A', 'column_B', 'ignore_row_if')
+
+
+
+
+
+
+
+
+
+ Inherited Members
+-
+
- great_expectations.expectations.metrics.map_metric_provider.column_pair_map_metric_provider.ColumnPairMapMetricProvider +
- function_domain_keys +
- function_value_keys + +
- great_expectations.expectations.metrics.map_metric_provider.map_metric_provider.MapMetricProvider +
- filter_column_isnull +
- is_sqlalchemy_metric_selectable + +
- great_expectations.expectations.metrics.metric_provider.MetricProvider +
- domain_keys +
- value_keys +
- default_kwarg_values +
- get_evaluation_dependencies + +
+
+ class
+ ExpectColumnPairAToBeNotEqualToB(great_expectations.expectations.expectation.ColumnPairMapExpectation):
+
+
+
+
+
+ 55class ExpectColumnPairAToBeNotEqualToB(ColumnPairMapExpectation): + 56 """Expect values in column A to be not equal to column B. + 57 + 58 Args: + 59 column_A: The first column name. + 60 column_B: The second column name. + 61 + 62 Keyword Args: + 63 - allow_cross_type_comparisons: If True, allow + 64 comparisons between types (e.g. integer and string). + 65 Otherwise, attempting such comparisons will raise an exception. + 66 - ignore_row_if: "both_values_are_missing", + 67 "either_value_is_missing", "neither" (default). + 68 - result_format: Which output mode to use: + 69 `BOOLEAN_ONLY`, `BASIC` (default), `COMPLETE`, or `SUMMARY`. + 70 - include_config: If True (default), then include the expectation config + 71 as part of the result object. + 72 - catch_exceptions: If True, then catch exceptions and + 73 include them as part of the result object. Default: False. + 74 - meta: A JSON-serializable dictionary (nesting allowed) + 75 that will be included in the output without modification. + 76 + 77 Returns: + 78 An ExpectationSuiteValidationResult. + 79 """ + 80 + 81 examples = [ + 82 { + 83 "dataset_name": "Test Dataset", + 84 "data": [ + 85 { + 86 "data": { + 87 "a": ["IE4019", "IM6092", "IE1405"], + 88 "b": ["IE4019", "IM6092", "IE1405"], + 89 "c": ["IE1404", "IN6192", "842075"], + 90 }, + 91 "schemas": { + 92 "spark": { + 93 "a": "StringType", + 94 "b": "StringType", + 95 "c": "StringType", + 96 } + 97 }, + 98 } + 99 ], +100 "tests": [ +101 { +102 "title": "negative_test", +103 "exact_match_out": False, +104 "include_in_gallery": True, +105 "in": { +106 "column_A": "a", +107 "column_B": "b", +108 "result_format": { +109 "result_format": "COMPLETE", +110 "unexpected_index_column_names": ["b"], +111 }, +112 }, +113 "out": { +114 "success": False, +115 "unexpected_index_list": [ +116 {"b": "IE4019", "a": "IE4019"}, +117 {"b": "IM6092", "a": "IM6092"}, +118 {"b": "IE1405", "a": "IE1405"}, +119 ], +120 }, +121 }, +122 { +123 "title": "positive_test", +124 "exact_match_out": False, +125 "include_in_gallery": True, +126 "in": { +127 "column_A": "a", +128 "column_B": "c", +129 "result_format": { +130 "result_format": "COMPLETE", +131 "unexpected_index_column_names": ["a"], +132 }, +133 }, +134 "out": { +135 "success": True, +136 "unexpected_index_list": [], +137 }, +138 }, +139 ], +140 }, +141 ] +142 +143 map_metric = "column_pair_values.a_not_equal_to_b" +144 success_keys = ( +145 "column_A", +146 "column_B", +147 "ignore_row_if", +148 "mostly", +149 ) +150 default_kwarg_values = { +151 "mostly": 1.0, +152 "ignore_row_if": "neither", +153 "result_format": "BASIC", +154 "include_config": True, +155 "catch_exceptions": False, +156 } +157 +158 def _validate( +159 self, +160 configuration: ExpectationConfiguration, +161 metrics: Dict, +162 runtime_configuration: Optional[dict] = None, +163 execution_engine: Optional[ExecutionEngine] = None, +164 ) -> Any: +165 """Custom implementation of the GE _validate method. +166 +167 This method is used on the tests to validate both the result +168 of the tests themselves and if the unexpected index list +169 is correctly generated. +170 The GE test logic does not do this validation, and thus +171 we need to make it manually. +172 +173 Args: +174 configuration: Configuration used in the test. +175 metrics: Test result metrics. +176 runtime_configuration: Configuration used when running the expectation. +177 execution_engine: Execution Engine where the expectation was run. +178 +179 Returns: +180 Dictionary with the result of the validation. +181 """ +182 return validate_result( +183 self, +184 configuration, +185 metrics, +186 runtime_configuration, +187 execution_engine, +188 ColumnPairMapExpectation, +189 ) +
Expect values in column A to be not equal to column B.
+ +Arguments:
+ +-
+
- column_A: The first column name. +
- column_B: The second column name. +
Keyword Args:
+ +++ ++
+- allow_cross_type_comparisons: If True, allow + comparisons between types (e.g. integer and string). + Otherwise, attempting such comparisons will raise an exception.
+- ignore_row_if: "both_values_are_missing", + "either_value_is_missing", "neither" (default).
+- result_format: Which output mode to use: +
+BOOLEAN_ONLY
,BASIC
(default),COMPLETE
, orSUMMARY
.- include_config: If True (default), then include the expectation config + as part of the result object.
+- catch_exceptions: If True, then catch exceptions and + include them as part of the result object. Default: False.
+- meta: A JSON-serializable dictionary (nesting allowed) + that will be included in the output without modification.
+
Returns:
+ +++An ExpectationSuiteValidationResult.
+
+
+
+
+
+ examples =
+
+ [{'dataset_name': 'Test Dataset', 'data': [{'data': {'a': ['IE4019', 'IM6092', 'IE1405'], 'b': ['IE4019', 'IM6092', 'IE1405'], 'c': ['IE1404', 'IN6192', '842075']}, 'schemas': {'spark': {'a': 'StringType', 'b': 'StringType', 'c': 'StringType'}}}], 'tests': [{'title': 'negative_test', 'exact_match_out': False, 'include_in_gallery': True, 'in': {'column_A': 'a', 'column_B': 'b', 'result_format': {'result_format': 'COMPLETE', 'unexpected_index_column_names': ['b']}}, 'out': {'success': False, 'unexpected_index_list': [{'b': 'IE4019', 'a': 'IE4019'}, {'b': 'IM6092', 'a': 'IM6092'}, {'b': 'IE1405', 'a': 'IE1405'}]}}, {'title': 'positive_test', 'exact_match_out': False, 'include_in_gallery': True, 'in': {'column_A': 'a', 'column_B': 'c', 'result_format': {'result_format': 'COMPLETE', 'unexpected_index_column_names': ['a']}}, 'out': {'success': True, 'unexpected_index_list': []}}]}]
+
+
+
+
+
+
+
+
+
+
+
+ default_kwarg_values =
+
+ {'include_config': True, 'catch_exceptions': False, 'result_format': 'BASIC', 'row_condition': None, 'condition_parser': None, 'mostly': 1.0, 'ignore_row_if': 'neither'}
+
+
+
+
+
+
+
+
+
+ Inherited Members
+-
+
- great_expectations.expectations.expectation.Expectation +
- Expectation +
- version +
- runtime_keys +
- get_allowed_config_keys +
- metrics_validate +
- get_domain_kwargs +
- get_success_kwargs +
- get_runtime_kwargs +
- get_result_format +
- validate +
- configuration +
- run_diagnostics +
- print_diagnostic_checklist +
- is_expectation_self_initializing +
- is_expectation_auto_initializing + +
- great_expectations.expectations.expectation.ColumnPairMapExpectation +
- domain_keys +
- domain_type +
- is_abstract +
- validate_configuration +
- get_validation_dependencies + +
- great_expectations.expectations.expectation.BatchExpectation +
- metric_dependencies +
- args_keys +
- validate_metric_value_between_configuration + +