最新面试题集锦(13)- 卓视智通

有如下hive记录表records,记录车辆信息:

create table records{ id string,//记录编号 indate string,//过车记录时间 plate_no string,//车辆号牌 device_id int//经过的设备编号 } partitioned by(month string,day string) row format delimited fields terminated by ‘\t’ stored as ORC;

1.请使用HQL得到最近一个月内晚上(晚22点-早6点)出现记录最多的车辆号牌Top10及次数

SELECT
plate_no as `车牌号`,COUNT(1) as `次数`
from 
records
where 
SUBSTR(indate ,12,2)<6 or SUBSTR(indate,12,2)>22 and DATEDIFF(NOW(),indate)<=30
GROUP BY plate_no
ORDER BY `次数` desc
limit 10
;

2.请用spark RDD将上述表中indate、plate_no、device_id三个字段记录重复的数据只保留一条

package udftest

import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types.{DataTypes, StructType}


object test2 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org").setLevel(Level.WARN)
    val spark = SparkSession.builder()
      .appName(this.getClass.getSimpleName)
      .master("local[*]")
      .getOrCreate()



    import spark.sql

    val schema = new StructType()
      .add("id", DataTypes.StringType)
      .add("indate", DataTypes.StringType)
      .add("plate_no", DataTypes.StringType)
      .add("device_id", DataTypes.IntegerType)

    val df: DataFrame = spark.read.schema(schema).orc("/data/hive/records.orc")

   val value: RDD[(String, Row)] = df.rdd.map(r=>(r.getString(1)+r.getString(2)+r.getInt(3),r))
    val result: RDD[(String, Iterable[Row])] = value.groupByKey()
    result.values.foreach(tp=>println(tp.head.toString()))




    /*df.createTempView("records")
    val result: DataFrame = sql(
      """
        |
        |select
        | min(id) as id,
        | indate,
        | plate_no,
        | device_id
        |from  records
        |group by indate,plate_no,device_id
        |""".stripMargin)

    result.printSchema()
    result.show(100)*/
  }
}

3.有string A和string B ,分别由4096个随机的0或者1组成,样例为String A = “01010111001…110010”,String B = “1010110…100101”,现有公式double C(A个B诼位与的和)/(A中1的个数*B中1的个数),请用自己熟悉的语言实现出满足次公式的方法

package com.ALi.test;

public class test3 {
    public static void main(String[] args) {
        String A = "01010101010101";
        String B = "01010101010101";
        int[] intsA = toBinary(A);
        int[] intsB = toBinary(B);
        double c = toAnd(intsA, intsB) / (toSum(intsA) * toSum(intsB));

        System.out.println(c);

    }

    public static int[] toBinary(String str) {//将二进制字符串转换为整型数组
        char[] chars = str.toCharArray();
        int[] ints = new int[chars.length];
        for (int i = 0; i < chars.length; i++) {
            ints[i] = (chars[i] - 48);
        }
        return ints;
    }

    public static double toAnd(int[] intsA, int[] intsB) {//按位与
        double sum = 0;
        for (int i = 0; i < intsA.length; i++) {
            sum += (intsA[i] & intsB[i]);
        }
        return sum;
    }

    public static double toSum(int[] ints) {//按位求和
        double sum = 0;
        for (int i = 0; i < ints.length; i++) {
            sum += ints[i];
        }
        return sum;
    }
}
最新面试题集锦(13)- 卓视智通

关注 易学在线 公众号

每日更新大量优质技术文档
第一时间获知最新上架课程
与众多大数据猿分享与交流

DOIT.EDU 资料来自网络收集整理,如有侵权请告知

(0)
打赏 微信扫一扫 微信扫一扫
上一篇 4月 30, 2020 11:58 上午
下一篇 5月 9, 2020 7:27 上午

相关推荐

wx