A User Defined Function In Pig

 Problem Statement : Sales data in Sales.csv ,read the data and display the date column in readable format by writing UDF function in pig


convdate.pig file on desktop. 
_____________________________
convdate.pig

REGISTER /home/hadoop/Desktop/ConvDate.jar;
salesdt = LOAD '/home/hadoop/Desktop/Sales.csv' using PigStorage(',') AS (cust_id:int,prod_num:int,quan_sold:int,dt1:chararray,sales_id:int);
dateexp = FOREACH salesdt GENERATE cust_id,prod_num,quan_sold,ConvDate(dt1),sales_id ;
--AS (cust_id1:int,prod_num1:int,quan_sold1:int,dateinfull:chararray,sales_id1:int);
STORE  dateexp into '/home/hadoop/Desktop/convdtout';

_______________________________________________
create a java project /a package and create a class  with name ConvDate as shown below.
Build path with necessary jar file that you plan on using in ConvDate.java
-------
mport java.io.IOException;

import org.apache.pig.EvalFunc;
import org.apache.pig.PigWarning;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.WrappedIOException;


public class ConvDate extends EvalFunc<String>{

    @SuppressWarnings("deprecation")
    @Override
    public String exec(Tuple input) throws IOException {
       
        if (input == null || input.size() <= 0) {
            warn("invalid number of arguments to DEIDENTIFY", PigWarning.UDF_WARNING_1);
            //warn(null, null);
            return null;
        }
            try{
                String date= null;
                String inStr =(String) input.get(0);
                String[] tempDate =inStr.toString().split("-");
                int month =Integer.parseInt(tempDate[1].toString());
                String monthString;
                switch(month)
                {
                case 1:  monthString = "January";
                break;
                case 2:  monthString = "February";
                break;
                case 3:  monthString = "March";
                break;
                case 4:  monthString = "April";
                break;
                case 5:  monthString = "May";
                break;
                case 6:  monthString = "June";
                break;
                case 7:  monthString = "July";
                break;
                case 8:  monthString = "August";
                break;
                case 9:  monthString = "September";
                break;
                case 10: monthString = "October";
                break;
                case 11: monthString = "November";
                break;
                case 12: monthString = "December";
                break;
                default: monthString = "Invalid month";
                break;
                }
           
                int day =Integer.parseInt(tempDate[2].toString());
                String dayString =null;
                int week = 0;
                if (day >0 & day<=7){week =1;}
                if (day >7 & day<=14){week =2;}
                if (day >14 & day<=21){week =3;}
                if (day >21){week =4;}
                switch(week)
                {
                case 1:  dayString = "First week of:"+ monthString.toString();
                break;
                case 2:  dayString = "Second week of :"+ monthString.toString();
                break;
                case 3:  dayString ="Third week of:  "+ monthString.toString();
                break;
                case 4:  dayString = "Fourth week of: "+ monthString.toString();
                break;
                default: dayString = "Invalid day specified";
                break;
                }
                    date =input.toString()+":"+ dayString.toString()+ tempDate[0].toString()+":";       
                    return date;
            }catch(Exception e)
            {
                throw WrappedIOException.wrap(
                        "Caught exception processing input row ", e);
            }
        }

}

Export the ConvDate.Jar to the desired location(here Desktop) .
>Jar tf ConvDate.jar to check it out
-----------------------------------------------------------------------------------------------------------------------------

Go to $PIG_HOME directory
change to bin directory

Execute this command
$PIG_HOME/bin>./pig -x local  $PIG_HOME/convdate.pig
___________________________________________________________________
Sales.csv file
587,87634,1,2012-01-09,34823
922,88734,1,2012-01-09,34824
433,99207,2,2012-01-09,34825
64,98243,1,2012-01-09,34826
922,77623,3,2012-01-09,34827
922,88734,24,2012-01-09,34828
331,282009,2,2012-01-09,34829
482,87634,1,2012-01-09,34830
3221,92387,15,2012-01-09,34831
452,282299,2,2012-01-09,34832
64,77624,17,2012-01-09,34833
895,88211,31,2012-01-09,34834
1993,92387,2,2012-01-09,34835
720,282009,2,2012-01-09,34836
830,282299,1,2012-01-09,34837
176,77623,1,2012-01-09,34838
128,88734,4,2012-01-09,34839
97,99202,1,2012-01-09,34840
322,99202,6,2012-01-09,34841
7,98243,1,2012-01-24,34842
11,77623,2,2012-01-24,34843
482,88734,1,2012-01-24,34844
3221,282009,1,2012-01-24,34845
452,99202,23,2012-01-24,34846
64,92387,4,2012-01-24,34847
895,282009,7,2012-01-24,34848
1993,92387,3,2012-01-24,34849
720,99207,1,2012-01-24,34850
102,98243,1,2012-01-24,34851
227,77623,1,2012-01-24,34852
323,99207,2,2012-01-24,34853
47,92387,1,2012-01-24,34854
431,87659,1,2012-01-24,34855
24,45641,5,2012-01-24,34856
19,88734,7,2012-01-24,34857
773,45641,4,2012-01-24,34858
647,45628,2,2012-01-24,34859
773,45628,10,2012-01-24,34860
_________________________________________________
Pig Output as
587    87634    1    (2012-01-09):Second week of :January2012:    34823
922    88734    1    (2012-01-09):Second week of :January2012:    34824
433    99207    2    (2012-01-09):Second week of :January2012:    34825
64    98243    1    (2012-01-09):Second week of :January2012:    34826
922    77623    3    (2012-01-09):Second week of :January2012:    34827
922    88734    24    (2012-01-09):Second week of :January2012:    34828
331    282009    2    (2012-01-09):Second week of :January2012:    34829
482    87634    1    (2012-01-09):Second week of :January2012:    34830
3221    92387    15    (2012-01-09):Second week of :January2012:    34831
452    282299    2    (2012-01-09):Second week of :January2012:    34832
64    77624    17    (2012-01-09):Second week of :January2012:    34833
895    88211    31    (2012-01-09):Second week of :January2012:    34834
1993    92387    2    (2012-01-09):Second week of :January2012:    34835
720    282009    2    (2012-01-09):Second week of :January2012:    34836
830    282299    1    (2012-01-09):Second week of :January2012:    34837
176    77623    1    (2012-01-09):Second week of :January2012:    34838
128    88734    4    (2012-01-09):Second week of :January2012:    34839
97    99202    1    (2012-01-09):Second week of :January2012:    34840
322    99202    6    (2012-01-09):Second week of :January2012:    34841
7    98243    1    (2012-01-24):Fourth week of: January2012:    34842
11    77623    2    (2012-01-24):Fourth week of: January2012:    34843
482    88734    1    (2012-01-24):Fourth week of: January2012:    34844
3221    282009    1    (2012-01-24):Fourth week of: January2012:    34845
452    99202    23    (2012-01-24):Fourth week of: January2012:    34846
64    92387    4    (2012-01-24):Fourth week of: January2012:    34847
895    282009    7    (2012-01-24):Fourth week of: January2012:    34848
1993    92387    3    (2012-01-24):Fourth week of: January2012:    34849
720    99207    1    (2012-01-24):Fourth week of: January2012:    34850
102    98243    1    (2012-01-24):Fourth week of: January2012:    34851
227    77623    1    (2012-01-24):Fourth week of: January2012:    34852
323    99207    2    (2012-01-24):Fourth week of: January2012:    34853
47    92387    1    (2012-01-24):Fourth week of: January2012:    34854
431    87659    1    (2012-01-24):Fourth week of: January2012:    34855
24    45641    5    (2012-01-24):Fourth week of: January2012:    34856
19    88734    7    (2012-01-24):Fourth week of: January2012:    34857
773    45641    4    (2012-01-24):Fourth week of: January2012:    34858
647    45628    2    (2012-01-24):Fourth week of: January2012:    34859
773    45628    10    (2012-01-24):Fourth week of: January2012:    34860

No comments:

Post a Comment