Cogroup in pig with nested foreach

 Cogroup in pig with nested foreach (hands on lab)

---------------------------------
salrec = load '/home/horton/Desktop/Sales.csv' using PigStorage(',')
 AS
(custid:long, prod_id:long, qty_pur:int, pur_date:datetime, sale_id:long);

custrec = load '/home/horton/Desktop/Customer.csv' using PigStorage(',')
 AS
 (fn:chararray, ln:chararray, status:chararray, ph:chararray, custid1:long, add:chararray);

pd_rec = load  '/home/horton/Desktop/Product.csv' using PigStorage(',')
AS
(pdname:chararray, pddesc:chararray, pdcat:chararray, pdgty:long, pdid:long, pkwith:chararray);

slrec_pdrec  =  cogroup salrec by custid, custrec by custid1;

ns_rec  =  foreach slrec_pdrec { crossed  = cross salrec, custrec;
generate crossed;
}

STORE  ns_rec into '/home/horton/Desktop/pigout2 using PigStorage();

sample output
7    {(7,98243,1,2012-01-24T00:00:00.000+05:30,34842)}    {(Allen,Jaskobec,A,416-299-0202,7,Bigcity|12345)}
11    {(11,77623,2,2012-01-24T00:00:00.000+05:30,34843)}    {(John,Jarkin,A,416-622-0991,11,Bigcity|12345)}
12    {}    {(Jillian,Panelo,A,905-498-8872,12,Mediumcity|67890)}
19    {(19,88734,7,2012-01-24T00:00:00.000+05:30,34857)}    {(Hubert,Banhent,A,416-526-8888,19,Bigcity|12345)}
24    {(24,45641,5,2012-01-24T00:00:00.000+05:30,34856)}    {(Roger,Getnet,I,416-309-9982,24,Bigcity|12345)}
37    {}    {(Franklin,Drill,A,647-309-2331,37,Smallcity|98765)}
43    {}    {(Michael,Junielle,A,416-209-9987,43,Bigcity|12345)}
47    {(47,92387,1,2012-01-24T00:00:00.000+05:30,34854)}    {(Cliff,Mello,A,416-210-9997,47,Bigcity|12345)}
54    {}    {(Jack,Merdec,A,905-216-0989,54,Mediumcity|67890)}
55    {}    {(John,Merdec,I,416-922-2331,55,Bigcity|12345)}
64    {(64,98243,1,2012-01-09T00:00:00.000+05:30,34826),(64,77624,17,2012-01-09T00:00:00.000+05:30,34833),(64,92387,4,2012-01-24T00:00:00.000+05:30,34847)}    {(Mello,Reynolds,A,905-245-4431,64,Mediumcity|67890)}
85    {}    {(Elizabeth,Metzer,A,416-322-9001,85,Bigcity|12345)}

No comments:

Post a Comment