Add env python for Spark

This commit is contained in:
Yûki VACHOT 2024-01-11 14:01:57 +01:00
parent 1e42b00ce7
commit e28c446569
8 changed files with 79 additions and 32 deletions

19
init.py
View file

@ -1,24 +1,21 @@
import os
import findspark
import sys
from dotenv import load_dotenv
from pyspark.sql import SparkSession
from src.pyspark_training.output_dataset_1.compute_output_dataset_1 import compute_output_dataset_1
def init_env():
os.environ["JAVA_HOME"] = "C:\\Program Files\\Java\\jdk-11"
os.environ["SPARK_HOME"] = "C:\\SPARK\\spark-3.1.1-bin-hadoop3.2"
os.environ["HADOOP_HOME"] = "C:\\SPARK\\hadoop"
findspark.init()
def init_spark():
return SparkSession.builder.master("local[*]").getOrCreate()
def main():
print("hey there")
init_env()
load_dotenv()
print(os.environ["SPARK_HOME"]) # spark-3.5.0-bin-hadoop3
print(os.environ["HADOOP_HOME"]) # spark-3.5.0-bin-hadoop3, + winutils et dll hadoop 3.0
print(os.environ["JAVA_HOME"]) # java 8 local (zulu)
print("EXEC:")
print(sys.executable)
spark_session = init_spark()
compute_output_dataset_1(spark_session)