Problem Statement : Sales data in Sales.csv ,read the data and display the date column in readable format by writing UDF function in pig
convdate.pig file on desktop.
_____________________________
convdate.pig
REGISTER /home/hadoop/Desktop/ConvDate.jar;
salesdt = LOAD '/home/hadoop/Desktop/Sales.csv' using PigStorage(',') AS (cust_id:int,prod_num:int,quan_sold:int,dt1:chararray,sales_id:int);
dateexp = FOREACH salesdt GENERATE cust_id,prod_num,quan_sold,ConvDate(dt1),sales_id ;
--AS (cust_id1:int,prod_num1:int,quan_sold1:int,dateinfull:chararray,sales_id1:int);
STORE dateexp into '/home/hadoop/Desktop/convdtout';
_______________________________________________
create a java project /a package and create a class with name ConvDate as shown below.
Build path with necessary jar file that you plan on using in ConvDate.java
-------
mport java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.PigWarning;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.WrappedIOException;
public class ConvDate extends EvalFunc<String>{
@SuppressWarnings("deprecation")
@Override
public String exec(Tuple input) throws IOException {
if (input == null || input.size() <= 0) {
warn("invalid number of arguments to DEIDENTIFY", PigWarning.UDF_WARNING_1);
//warn(null, null);
return null;
}
try{
String date= null;
String inStr =(String) input.get(0);
String[] tempDate =inStr.toString().split("-");
int month =Integer.parseInt(tempDate[1].toString());
String monthString;
switch(month)
{
case 1: monthString = "January";
break;
case 2: monthString = "February";
break;
case 3: monthString = "March";
break;
case 4: monthString = "April";
break;
case 5: monthString = "May";
break;
case 6: monthString = "June";
break;
case 7: monthString = "July";
break;
case 8: monthString = "August";
break;
case 9: monthString = "September";
break;
case 10: monthString = "October";
break;
case 11: monthString = "November";
break;
case 12: monthString = "December";
break;
default: monthString = "Invalid month";
break;
}
int day =Integer.parseInt(tempDate[2].toString());
String dayString =null;
int week = 0;
if (day >0 & day<=7){week =1;}
if (day >7 & day<=14){week =2;}
if (day >14 & day<=21){week =3;}
if (day >21){week =4;}
switch(week)
{
case 1: dayString = "First week of:"+ monthString.toString();
break;
case 2: dayString = "Second week of :"+ monthString.toString();
break;
case 3: dayString ="Third week of: "+ monthString.toString();
break;
case 4: dayString = "Fourth week of: "+ monthString.toString();
break;
default: dayString = "Invalid day specified";
break;
}
date =input.toString()+":"+ dayString.toString()+ tempDate[0].toString()+":";
return date;
}catch(Exception e)
{
throw WrappedIOException.wrap(
"Caught exception processing input row ", e);
}
}
}
Export the ConvDate.Jar to the desired location(here Desktop) .
>Jar tf ConvDate.jar to check it out
-----------------------------------------------------------------------------------------------------------------------------
Go to $PIG_HOME directory
change to bin directory
Execute this command
$PIG_HOME/bin>./pig -x local $PIG_HOME/convdate.pig
___________________________________________________________________
Sales.csv file
587,87634,1,2012-01-09,34823
922,88734,1,2012-01-09,34824
433,99207,2,2012-01-09,34825
64,98243,1,2012-01-09,34826
922,77623,3,2012-01-09,34827
922,88734,24,2012-01-09,34828
331,282009,2,2012-01-09,34829
482,87634,1,2012-01-09,34830
3221,92387,15,2012-01-09,34831
452,282299,2,2012-01-09,34832
64,77624,17,2012-01-09,34833
895,88211,31,2012-01-09,34834
1993,92387,2,2012-01-09,34835
720,282009,2,2012-01-09,34836
830,282299,1,2012-01-09,34837
176,77623,1,2012-01-09,34838
128,88734,4,2012-01-09,34839
97,99202,1,2012-01-09,34840
322,99202,6,2012-01-09,34841
7,98243,1,2012-01-24,34842
11,77623,2,2012-01-24,34843
482,88734,1,2012-01-24,34844
3221,282009,1,2012-01-24,34845
452,99202,23,2012-01-24,34846
64,92387,4,2012-01-24,34847
895,282009,7,2012-01-24,34848
1993,92387,3,2012-01-24,34849
720,99207,1,2012-01-24,34850
102,98243,1,2012-01-24,34851
227,77623,1,2012-01-24,34852
323,99207,2,2012-01-24,34853
47,92387,1,2012-01-24,34854
431,87659,1,2012-01-24,34855
24,45641,5,2012-01-24,34856
19,88734,7,2012-01-24,34857
773,45641,4,2012-01-24,34858
647,45628,2,2012-01-24,34859
773,45628,10,2012-01-24,34860
_________________________________________________
Pig Output as
587 87634 1 (2012-01-09):Second week of :January2012: 34823
922 88734 1 (2012-01-09):Second week of :January2012: 34824
433 99207 2 (2012-01-09):Second week of :January2012: 34825
64 98243 1 (2012-01-09):Second week of :January2012: 34826
922 77623 3 (2012-01-09):Second week of :January2012: 34827
922 88734 24 (2012-01-09):Second week of :January2012: 34828
331 282009 2 (2012-01-09):Second week of :January2012: 34829
482 87634 1 (2012-01-09):Second week of :January2012: 34830
3221 92387 15 (2012-01-09):Second week of :January2012: 34831
452 282299 2 (2012-01-09):Second week of :January2012: 34832
64 77624 17 (2012-01-09):Second week of :January2012: 34833
895 88211 31 (2012-01-09):Second week of :January2012: 34834
1993 92387 2 (2012-01-09):Second week of :January2012: 34835
720 282009 2 (2012-01-09):Second week of :January2012: 34836
830 282299 1 (2012-01-09):Second week of :January2012: 34837
176 77623 1 (2012-01-09):Second week of :January2012: 34838
128 88734 4 (2012-01-09):Second week of :January2012: 34839
97 99202 1 (2012-01-09):Second week of :January2012: 34840
322 99202 6 (2012-01-09):Second week of :January2012: 34841
7 98243 1 (2012-01-24):Fourth week of: January2012: 34842
11 77623 2 (2012-01-24):Fourth week of: January2012: 34843
482 88734 1 (2012-01-24):Fourth week of: January2012: 34844
3221 282009 1 (2012-01-24):Fourth week of: January2012: 34845
452 99202 23 (2012-01-24):Fourth week of: January2012: 34846
64 92387 4 (2012-01-24):Fourth week of: January2012: 34847
895 282009 7 (2012-01-24):Fourth week of: January2012: 34848
1993 92387 3 (2012-01-24):Fourth week of: January2012: 34849
720 99207 1 (2012-01-24):Fourth week of: January2012: 34850
102 98243 1 (2012-01-24):Fourth week of: January2012: 34851
227 77623 1 (2012-01-24):Fourth week of: January2012: 34852
323 99207 2 (2012-01-24):Fourth week of: January2012: 34853
47 92387 1 (2012-01-24):Fourth week of: January2012: 34854
431 87659 1 (2012-01-24):Fourth week of: January2012: 34855
24 45641 5 (2012-01-24):Fourth week of: January2012: 34856
19 88734 7 (2012-01-24):Fourth week of: January2012: 34857
773 45641 4 (2012-01-24):Fourth week of: January2012: 34858
647 45628 2 (2012-01-24):Fourth week of: January2012: 34859
773 45628 10 (2012-01-24):Fourth week of: January2012: 34860
No comments:
Post a Comment