Add env python for Spark
This commit is contained in:
parent
1e42b00ce7
commit
e28c446569
8 changed files with 79 additions and 32 deletions
19
init.py
19
init.py
|
|
@ -1,24 +1,21 @@
|
|||
import os
|
||||
import findspark
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
from pyspark.sql import SparkSession
|
||||
from src.pyspark_training.output_dataset_1.compute_output_dataset_1 import compute_output_dataset_1
|
||||
|
||||
|
||||
def init_env():
|
||||
os.environ["JAVA_HOME"] = "C:\\Program Files\\Java\\jdk-11"
|
||||
os.environ["SPARK_HOME"] = "C:\\SPARK\\spark-3.1.1-bin-hadoop3.2"
|
||||
os.environ["HADOOP_HOME"] = "C:\\SPARK\\hadoop"
|
||||
|
||||
findspark.init()
|
||||
|
||||
|
||||
def init_spark():
|
||||
return SparkSession.builder.master("local[*]").getOrCreate()
|
||||
|
||||
|
||||
def main():
|
||||
print("hey there")
|
||||
init_env()
|
||||
load_dotenv()
|
||||
print(os.environ["SPARK_HOME"]) # spark-3.5.0-bin-hadoop3
|
||||
print(os.environ["HADOOP_HOME"]) # spark-3.5.0-bin-hadoop3, + winutils et dll hadoop 3.0
|
||||
print(os.environ["JAVA_HOME"]) # java 8 local (zulu)
|
||||
print("EXEC:")
|
||||
print(sys.executable)
|
||||
spark_session = init_spark()
|
||||
|
||||
compute_output_dataset_1(spark_session)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue