-
Notifications
You must be signed in to change notification settings - Fork 3
/
mongo_hive.py
29 lines (18 loc) · 3.41 KB
/
mongo_hive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#Mongo-HIVE reads MONGO collection and writes into HIVE table
from pyspark import SparkContext,SparkConf
from pyspark.sql import SQLContext, SparkSession, HiveContext
from pyspark.sql.functions import col,explode
conf = SparkConf().set("spark.jars.packages","org.mongodb.spark:mongo-spark-connector_2.11:2.3.2")
spark = SparkSession.builder \
.appName("covid19") \
.config("spark.mongodb.input.uri", "mongodb://127.0.0.1/bigdatadb.covid19") \
.config("spark.mongodb.output.uri","mongodb://127.0.0.1/bigdatadb.covid19") \
.config("spark.sql.warehouse.dir", "/root/spark-warehouse") \
.enableHiveSupport() \
.getOrCreate()
sqlContext = SQLContext(spark.sparkContext)
df = sqlContext.read.format("com.mongodb.spark.sql.DefaultSource").option("uri","mongodb://localhost/bigdatadb.covid19").load()
df.printSchema()
#Database on Hive
spark.sql("create database bigdatadb")
df.write.mode("overwrite").saveAsTable("bigdatadb.covid19")