A Hive UDF example(hands on example)
DateExpand.java
-----------
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class DateExpand extends UDF{
public Text evaluate(Text t){
String tempDate1= null;
if (t == null || t.toString().length() <= 0) {
//warn(null, null);
return null;
}
try{
String inStr =(String) t.toString();
String[] tempDate =inStr.toString().split("-");
int month =Integer.parseInt(tempDate[1].toString());
String monthString;
switch(month)
{
case 1: monthString = "January";
break;
case 2: monthString = "February";
break;
case 3: monthString = "March";
break;
case 4: monthString = "April";
break;
case 5: monthString = "May";
break;
case 6: monthString = "June";
break;
case 7: monthString = "July";
break;
case 8: monthString = "August";
break;
case 9: monthString = "September";
break;
case 10: monthString = "October";
break;
case 11: monthString = "November";
break;
case 12: monthString = "December";
break;
default: monthString = "Invalid month";
break;
}
int day1 =Integer.parseInt(tempDate[2].toString());
String dayString =null;
int tempWeek = 0;
if (day1 >0 & day1<=7){tempWeek =1;}
if (day1 >7 & day1<=14){tempWeek =2;}
if (day1 >14 & day1<=21){tempWeek =3;}
if (day1 >21){tempWeek =4;}
switch(tempWeek)
{
case 1: dayString = "First week of:"+ monthString.toString();
break;
case 2: dayString = "Second week of :"+ monthString.toString();
break;
case 3: dayString ="Third week of: "+ monthString.toString();
break;
case 4: dayString = "Fourth week of: "+ monthString.toString();
break;
default: dayString = "Invalid day specified";
break;
}
tempDate1 =t.toString()+":"+ dayString.toString()+" "+ tempDate[0].toString();
return new Text(tempDate1);
}catch(Exception e)
{
//e.printStackTrace();
return new Text(e.toString());
}
}
}
________
1)export the jar file with libraries DateExpand.jar to a desired location (example Desktop)
2) go to $HIVE_HOME/bin
execute
$HIVE_HOME/bin>hive(hive shell)
3)in hive shell
hive>show databases;
4)hive>use dafault;
5)hive>show tables;(locate where sales table is present)
_______
6)create a DateExpand.q script
At $HIVE_HOME>gedit DateExpand.q(add below commands and save it on desktop)
add jar /home/hadoop/Desktop/DateExpand.jar
use default;
CREATE TEMPORARY FUNCTION dateexpand AS 'DateExpand';
insert overwrite directory '/user/hadoop/hivesalesout' select cust_id,prod_no,qty,dateexpand(datestring),sales_id from sales;
______
7) execute
go to $HIVE_HOME/bin>./hive -f /home/hadoop/Desktop/DateExpand.q
______
Data file of sales
587 87634 1 2012-01-09 34823
922 88734 1 2012-01-09 34824
433 99207 2 2012-01-09 34825
64 98243 1 2012-01-09 34826
922 77623 3 2012-01-09 34827
922 88734 24 2012-01-09 34828
331 282009 2 2012-01-09 34829
482 87634 1 2012-01-09 34830
3221 92387 15 2012-01-09 34831
452 282299 2 2012-01-09 34832
64 77624 17 2012-01-09 34833
895 88211 31 2012-01-09 34834
1993 92387 2 2012-01-09 34835
720 282009 2 2012-01-09 34836
830 282299 1 2012-01-09 34837
176 77623 1 2012-01-09 34838
128 88734 4 2012-01-09 34839
97 99202 1 2012-01-09 34840
322 99202 6 2012-01-09 34841
7 98243 1 2012-01-24 34842
11 77623 2 2012-01-24 34843
482 88734 1 2012-01-24 34844
3221 282009 1 2012-01-24 34845
452 99202 23 2012-01-24 34846
64 92387 4 2012-01-24 34847
895 282009 7 2012-01-24 34848
1993 92387 3 2012-01-24 34849
720 99207 1 2012-01-24 34850
102 98243 1 2012-01-24 34851
227 77623 1 2012-01-24 34852
323 99207 2 2012-01-24 34853
47 92387 1 2012-01-24 34854
431 87659 1 2012-01-24 34855
24 45641 5 2012-01-24 34856
19 88734 7 2012-01-24 34857
773 45641 4 2012-01-24 34858
647 45628 2 2012-01-24 34859
773 45628 10 2012-01-24 34860
Time taken: 0.111 seconds, Fetched: 38 row(s)
_____________________________
check the output in the directory of hdfs :/user/hadoop/hivesalesout
No comments:
Post a Comment