有如下hive记录表records,记录车辆信息:
create table records{ id string,//记录编号 indate string,//过车记录时间 plate_no string,//车辆号牌 device_id int//经过的设备编号 } partitioned by(month string,day string) row format delimited fields terminated by ‘\t’ stored as ORC;
1.请使用HQL得到最近一个月内晚上(晚22点-早6点)出现记录最多的车辆号牌Top10及次数
SELECT
plate_no as `车牌号`,COUNT(1) as `次数`
from
records
where
SUBSTR(indate ,12,2)<6 or SUBSTR(indate,12,2)>22 and DATEDIFF(NOW(),indate)<=30
GROUP BY plate_no
ORDER BY `次数` desc
limit 10
;
2.请用spark RDD将上述表中indate、plate_no、device_id三个字段记录重复的数据只保留一条
package udftest
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types.{DataTypes, StructType}
object test2 {
def main(args: Array[String]): Unit = {
Logger.getLogger("org").setLevel(Level.WARN)
val spark = SparkSession.builder()
.appName(this.getClass.getSimpleName)
.master("local[*]")
.getOrCreate()
import spark.sql
val schema = new StructType()
.add("id", DataTypes.StringType)
.add("indate", DataTypes.StringType)
.add("plate_no", DataTypes.StringType)
.add("device_id", DataTypes.IntegerType)
val df: DataFrame = spark.read.schema(schema).orc("/data/hive/records.orc")
val value: RDD[(String, Row)] = df.rdd.map(r=>(r.getString(1)+r.getString(2)+r.getInt(3),r))
val result: RDD[(String, Iterable[Row])] = value.groupByKey()
result.values.foreach(tp=>println(tp.head.toString()))
/*df.createTempView("records")
val result: DataFrame = sql(
"""
|
|select
| min(id) as id,
| indate,
| plate_no,
| device_id
|from records
|group by indate,plate_no,device_id
|""".stripMargin)
result.printSchema()
result.show(100)*/
}
}
3.有string A和string B ,分别由4096个随机的0或者1组成,样例为String A = “01010111001…110010”,String B = “1010110…100101”,现有公式double C(A个B诼位与的和)/(A中1的个数*B中1的个数),请用自己熟悉的语言实现出满足次公式的方法
package com.ALi.test;
public class test3 {
public static void main(String[] args) {
String A = "01010101010101";
String B = "01010101010101";
int[] intsA = toBinary(A);
int[] intsB = toBinary(B);
double c = toAnd(intsA, intsB) / (toSum(intsA) * toSum(intsB));
System.out.println(c);
}
public static int[] toBinary(String str) {//将二进制字符串转换为整型数组
char[] chars = str.toCharArray();
int[] ints = new int[chars.length];
for (int i = 0; i < chars.length; i++) {
ints[i] = (chars[i] - 48);
}
return ints;
}
public static double toAnd(int[] intsA, int[] intsB) {//按位与
double sum = 0;
for (int i = 0; i < intsA.length; i++) {
sum += (intsA[i] & intsB[i]);
}
return sum;
}
public static double toSum(int[] ints) {//按位求和
double sum = 0;
for (int i = 0; i < ints.length; i++) {
sum += ints[i];
}
return sum;
}
}

关注 易学在线 公众号
每日更新大量优质技术文档
第一时间获知最新上架课程
与众多大数据猿分享与交流
DOIT.EDU 资料来自网络收集整理,如有侵权请告知