本文共 3028 字,大约阅读时间需要 10 分钟。
本文提供一种用SCALA把JSON串转换为HIVE表的方法,由于比较简单,只贴代码,不做解释。有问题可以留言探讨
package com.gabry.hive import org.json4s._ import org.json4s.native.JsonMethods._ import scala.io.Source
class Json2Hive{ /** * sealed abstract class JValue *case object JNothing extends JValue // 'zero' for JValue *case object JNull extends JValue *case class JString(s: String) extends JValue *case class JDouble(num: Double) extends JValue *case class JDecimal(num: BigDecimal) extends JValue *case class JInt(num: BigInt) extends JValue *case class JBool(value: Boolean) extends JValue *case class JObject(obj: List[JField]) extends JValue *case class JArray(arr: List[JValue]) extends JValue *type JField = (String, JValue) *create table student_test(id INT, info struct< name:string,age:INT >) *jsonString:{ "people_type":1,"people":{"person_id": 5,"test_count": 5,"para":{"name":"jack","age":6}}} */ private def fieldDelimiter(level:Int) = if ( level == 2 ) " " else ":" private def decodeJson(jv: Any,level:Int,hql:StringBuilder) :Unit = { jv match { case js:JString => hql.append(fieldDelimiter(level)+"string,") case jdo:JDouble => hql.append(fieldDelimiter(level)+"double,") case jde:JDecimal => hql.append(fieldDelimiter(level)+"decimal,") case ji:JInt => hql.append(fieldDelimiter(level)+"bigint,") case jb:JBool => hql.append(fieldDelimiter(level)+"int,") case jf:JField=> hql.append(jf._1) decodeJson(jf._2,level+1,hql) case ja:JArray=> hql.append(level + " struct<") ja.arr.foreach(decodeJson(_,level+1,hql)) hql.append(">") case jo:JObject=> if (level !=0) hql.append(" struct<") jo.obj.foreach(decodeJson(_,level+1,hql)) if ( hql.endsWith(",") ) hql.deleteCharAt(hql.length-1) if (level !=0) hql.append(">,") case JNull=> hql.append(fieldDelimiter(level)+"string,") case _ =>println(jv) } } def toHive(jsonStr:String,tableName:String):String = { val jsonObj = parse(jsonStr) val hql = new StringBuilder() decodeJson(jsonObj,0,hql) "create table %s ( %s )".format(tableName,hql.toString()) }}object Json2Hive{ val json2hive = new Json2Hive() def main (args :Array[String]) : Unit = { if ( args.length != 2 ) println("usage : json2hive jsonFile hiveTableName") val jsonFile = args(0) val hiveTableName = args(1) //val jsonstr ="{ \"people_type\":0,\"people_num\":0.1,\"people\":{\"person_id\": 5,\"test_count\": 5,\"para\":{\"name\":\"jack\",\"age\":6}},\"gender\":1}" //val jsonstr ="{ \"people_type\":0,\"object\":{\"f1\":1,\"f2\":1},\"gender\":1}" /* 由于JSON串不容易用参数传递,故此处以json文件代替 */ val file = Source.fromFile(jsonFile,"UTF-8") /* 将文件中的json串转换为对应的HIVE表 */ file.getLines().foreach(line=>println(json2hive.toHive(line.toString,hiveTableName))) file.close() }}
以下是测试结果
create table example ( people_type bigint,people_num double,people struct<person_id:bigint,test_count:bigint,para struct<name:string,age:bigint>>,gender bigint )
转载地址:http://mwjta.baihongyu.com/