unicode fix for legacy

capitalone · Mar 25, 2024 · 959b320 · 959b320
1 parent 2381110
commit 959b320
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 19 deletions.
diff --git a/datacompy/legacy.py b/datacompy/legacy.py
@@ -531,7 +531,7 @@ def _print_num_of_rows_with_column_equality(self, myfile: TextIO) -> None:
 
         where_cond = " AND ".join(
             [
-                "A." + name + "=" + str(MatchType.MATCH.value)
+                "A.`" + name + "`=" + str(MatchType.MATCH.value)
                 for name in self.columns_compared
             ]
         )

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,16 +11,7 @@ maintainers = [
   { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }
 ]
 license = {text = "Apache Software License"}
-<<<<<<< HEAD
 dependencies = ["pandas<=2.2.1,>=0.25.0", "numpy<=1.26.4,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "fugue<=0.8.7,>=0.8.7"]
-=======
-dependencies = [
-    "pandas<=2.2.1,>=0.25.0",
-    "numpy<=1.26.4,>=1.22.0",
-    "ordered-set<=4.1.0,>=4.0.2",
-    "fugue<=0.8.7,>=0.8.7",
-]
->>>>>>> 8cdb17d (tweaking SparkCompare and adding back Legacy)
 requires-python = ">=3.8.0"
 classifiers = [
     "Intended Audience :: Developers",
@@ -65,14 +56,7 @@ python-tag = "py3"
 [project.optional-dependencies]
 duckdb = ["fugue[duckdb]"]
 polars = ["polars"]
-<<<<<<< HEAD
 spark = ["pyspark>=3.1.1; python_version < \"3.11\"", "pyspark>=3.4; python_version >= \"3.11\""]
-=======
-spark = [
-    "pyspark>=3.2.4; python_version < '3.11'",
-    "pyspark>=3.4; python_version >= '3.11'",
-]
->>>>>>> 8cdb17d (tweaking SparkCompare and adding back Legacy)
 dask = ["fugue[dask]"]
 ray = ["fugue[ray]"]
 docs = ["sphinx", "furo", "myst-parser"]

diff --git a/tests/test_legacy_spark.py b/tests/test_legacy_spark.py
@@ -2090,8 +2090,20 @@ def text_alignment_validator(
 
 
 def test_unicode_columns(spark_session):
-    df1 = spark_session.createDataFrame([{"a": 1, "例": 2}, {"a": 1, "例": 3}])
-    df2 = spark_session.createDataFrame([{"a": 1, "例": 2}, {"a": 1, "例": 3}])
+    df1 = spark_session.createDataFrame(
+        [
+            (1, "foo", "test"),
+            (2, "bar", "test"),
+        ],
+        ["id", "例", "予測対象日"],
+    )
+    df2 = spark_session.createDataFrame(
+        [
+            (1, "foo", "test"),
+            (2, "baz", "test"),
+        ],
+        ["id", "例", "予測対象日"],
+    )
     compare = LegacySparkCompare(spark_session, df1, df2, join_columns=["例"])
     # Just render the report to make sure it renders.
     compare.report()