from os.path import abspath
from pyspark.sql import SparkSession
from pyspark.sql import Row
# warehouse_location points to the default location for managed databases and tables
warehouse_location = abspath("spark-warehouse")
spark = (
SparkSession.builder.appName("Python Spark SQL Hive integration example")
.config("spark.sql.warehouse.dir", warehouse_location)
.enableHiveSupport()
.getOrCreate()
)
# spark is an existing SparkSession
spark.sql("CREATE TABLE IF NOT EXISTS src (value STRING, key STRING) USING hive")
spark.sql("LOAD DATA LOCAL INPATH '../../data/pl.csv' INTO TABLE src2")
spark.sql("use default")
spark.sql("show tables").show()