Fix lib_test_utils + Added OS Env for PYSPARK & added libraries in README
This commit is contained in:
parent
1f9509c574
commit
b22ebc40fe
4 changed files with 19 additions and 8 deletions
|
|
@ -3,6 +3,8 @@
|
||||||
## Installation
|
## Installation
|
||||||
- [Python 3.10](https://www.python.org/downloads/)
|
- [Python 3.10](https://www.python.org/downloads/)
|
||||||
- pyspark=3.1.1
|
- pyspark=3.1.1
|
||||||
|
- findspark
|
||||||
|
- pyspark-test
|
||||||
- [Spark 3.1.1](https://spark.apache.org/downloads.html)
|
- [Spark 3.1.1](https://spark.apache.org/downloads.html)
|
||||||
- [Hadoop 3.3.6](https://hadoop.apache.org/releases.html)
|
- [Hadoop 3.3.6](https://hadoop.apache.org/releases.html)
|
||||||
- [Java JDK 11](https://www.oracle.com/java/technologies/downloads/#java11)
|
- [Java JDK 11](https://www.oracle.com/java/technologies/downloads/#java11)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import findspark
|
import findspark
|
||||||
import logging
|
import logging
|
||||||
import pytest
|
import pytest
|
||||||
|
|
@ -7,9 +8,17 @@ from pyspark.sql import SparkSession
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def spark_session(request):
|
def spark_session(request):
|
||||||
|
"""
|
||||||
|
Return a Spark Session
|
||||||
|
:param request:
|
||||||
|
:return: Spark session
|
||||||
|
"""
|
||||||
|
os.environ['PYSPARK_PYTHON'] = sys.executable
|
||||||
|
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
|
||||||
os.environ["JAVA_HOME"] = "C:\\Program Files\\Java\\jdk-11"
|
os.environ["JAVA_HOME"] = "C:\\Program Files\\Java\\jdk-11"
|
||||||
os.environ["SPARK_HOME"] = "C:\\SPARK\\spark-3.1.1-bin-hadoop3.2"
|
os.environ["SPARK_HOME"] = "C:\\SPARK\\spark-3.1.1-bin-hadoop3.2"
|
||||||
os.environ["HADOOP_HOME"] = "C:\\SPARK\\hadoop"
|
os.environ["HADOOP_HOME"] = "C:\\SPARK\\hadoop"
|
||||||
|
|
||||||
findspark.init()
|
findspark.init()
|
||||||
|
|
||||||
spark = SparkSession.builder.master("local[*]").getOrCreate()
|
spark = SparkSession.builder.master("local[*]").getOrCreate()
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,16 @@
|
||||||
|
from pyspark.sql import DataFrame
|
||||||
from pyspark_test import assert_pyspark_df_equal
|
from pyspark_test import assert_pyspark_df_equal
|
||||||
|
|
||||||
|
|
||||||
def assert_df_equal(df1, df2):
|
def assert_df_equal(actual_df: DataFrame, expected_df: DataFrame) -> None:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assert df1.schema == df2.schema
|
assert actual_df.schema == expected_df.schema
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
print('Error Schema')
|
print('Error Schema')
|
||||||
print('df1\n')
|
print('Actual :\n')
|
||||||
df1.printSchema()
|
actual_df.printSchema()
|
||||||
print('df2\n')
|
print('Expected :\n')
|
||||||
df2.printSchema()
|
expected_df.printSchema()
|
||||||
|
|
||||||
assert_pyspark_df_equal(df1, df2)
|
assert_pyspark_df_equal(actual_df, expected_df)
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,6 @@ def test_remove_extra_spaces(spark_session):
|
||||||
]
|
]
|
||||||
input_df = spark_session.createDataFrame(input_data, input_schema)
|
input_df = spark_session.createDataFrame(input_data, input_schema)
|
||||||
|
|
||||||
|
|
||||||
expected_schema = T.StructType(
|
expected_schema = T.StructType(
|
||||||
[
|
[
|
||||||
T.StructField('name', T.StringType(), False),
|
T.StructField('name', T.StringType(), False),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue