Fix lib_test_utils + Added OS Env for PYSPARK & added libraries in README
This commit is contained in:
parent
1f9509c574
commit
b22ebc40fe
4 changed files with 19 additions and 8 deletions
|
|
@ -3,6 +3,8 @@
|
|||
## Installation
|
||||
- [Python 3.10](https://www.python.org/downloads/)
|
||||
- pyspark=3.1.1
|
||||
- findspark
|
||||
- pyspark-test
|
||||
- [Spark 3.1.1](https://spark.apache.org/downloads.html)
|
||||
- [Hadoop 3.3.6](https://hadoop.apache.org/releases.html)
|
||||
- [Java JDK 11](https://www.oracle.com/java/technologies/downloads/#java11)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import sys
|
||||
import findspark
|
||||
import logging
|
||||
import pytest
|
||||
|
|
@ -7,9 +8,17 @@ from pyspark.sql import SparkSession
|
|||
|
||||
@pytest.fixture
|
||||
def spark_session(request):
|
||||
"""
|
||||
Return a Spark Session
|
||||
:param request:
|
||||
:return: Spark session
|
||||
"""
|
||||
os.environ['PYSPARK_PYTHON'] = sys.executable
|
||||
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
|
||||
os.environ["JAVA_HOME"] = "C:\\Program Files\\Java\\jdk-11"
|
||||
os.environ["SPARK_HOME"] = "C:\\SPARK\\spark-3.1.1-bin-hadoop3.2"
|
||||
os.environ["HADOOP_HOME"] = "C:\\SPARK\\hadoop"
|
||||
|
||||
findspark.init()
|
||||
|
||||
spark = SparkSession.builder.master("local[*]").getOrCreate()
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
from pyspark.sql import DataFrame
|
||||
from pyspark_test import assert_pyspark_df_equal
|
||||
|
||||
|
||||
def assert_df_equal(df1, df2):
|
||||
def assert_df_equal(actual_df: DataFrame, expected_df: DataFrame) -> None:
|
||||
|
||||
try:
|
||||
assert df1.schema == df2.schema
|
||||
assert actual_df.schema == expected_df.schema
|
||||
except AssertionError:
|
||||
print('Error Schema')
|
||||
print('df1\n')
|
||||
df1.printSchema()
|
||||
print('df2\n')
|
||||
df2.printSchema()
|
||||
print('Actual :\n')
|
||||
actual_df.printSchema()
|
||||
print('Expected :\n')
|
||||
expected_df.printSchema()
|
||||
|
||||
assert_pyspark_df_equal(df1, df2)
|
||||
assert_pyspark_df_equal(actual_df, expected_df)
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ def test_remove_extra_spaces(spark_session):
|
|||
]
|
||||
input_df = spark_session.createDataFrame(input_data, input_schema)
|
||||
|
||||
|
||||
expected_schema = T.StructType(
|
||||
[
|
||||
T.StructField('name', T.StringType(), False),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue