Init
This commit is contained in:
commit
c4fdb2860c
14 changed files with 62 additions and 0 deletions
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
0
src/pyspark_training/__init__.py
Normal file
0
src/pyspark_training/__init__.py
Normal file
0
src/pyspark_training/output_dataset_1/__init__.py
Normal file
0
src/pyspark_training/output_dataset_1/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
def remove_extra_spaces(df, column_name):
|
||||
df_transformed = df.withColumn(column_name, F.regexp_replace(F.col(column_name), "\\s+", " "))
|
||||
return df_transformed
|
||||
0
src/pyspark_training/utils.py
Normal file
0
src/pyspark_training/utils.py
Normal file
0
src/test_pyspark_training/__init__.py
Normal file
0
src/test_pyspark_training/__init__.py
Normal file
3
src/test_pyspark_training/example_test.py
Normal file
3
src/test_pyspark_training/example_test.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
|
||||
def test_example_test():
|
||||
16
src/test_pyspark_training/lib_test_utils.py
Normal file
16
src/test_pyspark_training/lib_test_utils.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
|
||||
def assert_df_equal(df1, df2):
|
||||
|
||||
try:
|
||||
assert df1.schema() == df2.schema()
|
||||
except AssertionError:
|
||||
print('Error Schema')
|
||||
print(df1.schema())
|
||||
print(df1.schema())
|
||||
|
||||
try:
|
||||
assert df1.equals(df2)
|
||||
except AssertionError:
|
||||
print('Error Schema')
|
||||
df1.show()
|
||||
df2.show()
|
||||
0
src/test_pyspark_training/utils/__init__.py
Normal file
0
src/test_pyspark_training/utils/__init__.py
Normal file
Loading…
Add table
Add a link
Reference in a new issue