Hive自定义UDF函数

==作者:YB-Chi==

  1. idea新建个maven项目,引入依赖
1
2
3
4
5
6
7
8
9
10
11
12
13
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.1.1</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
  1. 编写函数类,继承UDF,并实现evaluate方法
1
2
3
4
5
6
7
8
9
10
11
12
13
import org.apache.hadoop.hive.ql.exec.UDF;
public class split_distinct extends UDF {

public String evaluate(String line, String split) {
if (null == line) return null;
StringBuffer sb = new StringBuffer();
String[] sp = line.split(split);
for (String s : sp) {
if (null != s && sb.indexOf(s) < 0) sb.append(s).append(",");
}
return sb.deleteCharAt(sb.length() - 1).toString();
}
}
  1. maven-package打包,上传不带依赖的包并测试
1
2
3
4
hive>add jar /home/module/testdata/HiveUDF-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION split_distinct AS 'split_distinct';
#测试
select split_distinct('[["C8:13:8B:81:32:C1"],["C8:13:8B:81:32:C1"],["C8:13:8B:81:32:C1"],["C8:13:8B:81:32:C2"]]',',');
  1. 测试成功后,注册永久函数
1
2
3
4
5
6
7
8
9
#建立hdfs文件夹
hdfs dfs -mkdir /out/hive_udf_jars
#上传
hdfs dfs -put HiveUDF-1.0-SNAPSHOT.jar /out/hive_udf_jars/
#注册
create function split_distinct as 'split_distinct' using jar 'hdfs://xbsafe102/out/hive_udf_jars/HiveUDF-1.0-SNAPSHOT.jar';
#删除函数
drop function split_distinct;
#注意更新jar包需要重启hive或者spark-sql客户端!!!!!
文章作者: CYBSKY
文章链接: https://cybsky.top/2022/09/07/cyb-mds/bigdata/Hive/Hive自定义UDF函数/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 CYBSKY