Cogroup in pig with nested foreach (hands on lab)
---------------------------------salrec = load '/home/horton/Desktop/Sales.csv' using PigStorage(',')
AS
(custid:long, prod_id:long, qty_pur:int, pur_date:datetime, sale_id:long);
custrec = load '/home/horton/Desktop/Customer.csv' using PigStorage(',')
AS
(fn:chararray, ln:chararray, status:chararray, ph:chararray, custid1:long, add:chararray);
pd_rec = load '/home/horton/Desktop/Product.csv' using PigStorage(',')
AS
(pdname:chararray, pddesc:chararray, pdcat:chararray, pdgty:long, pdid:long, pkwith:chararray);
slrec_pdrec = cogroup salrec by custid, custrec by custid1;
ns_rec = foreach slrec_pdrec { crossed = cross salrec, custrec;
generate crossed;
}
STORE ns_rec into '/home/horton/Desktop/pigout2 using PigStorage();
sample output
7 {(7,98243,1,2012-01-24T00:00:00.000+05:30,34842)} {(Allen,Jaskobec,A,416-299-0202,7,Bigcity|12345)}
11 {(11,77623,2,2012-01-24T00:00:00.000+05:30,34843)} {(John,Jarkin,A,416-622-0991,11,Bigcity|12345)}
12 {} {(Jillian,Panelo,A,905-498-8872,12,Mediumcity|67890)}
19 {(19,88734,7,2012-01-24T00:00:00.000+05:30,34857)} {(Hubert,Banhent,A,416-526-8888,19,Bigcity|12345)}
24 {(24,45641,5,2012-01-24T00:00:00.000+05:30,34856)} {(Roger,Getnet,I,416-309-9982,24,Bigcity|12345)}
37 {} {(Franklin,Drill,A,647-309-2331,37,Smallcity|98765)}
43 {} {(Michael,Junielle,A,416-209-9987,43,Bigcity|12345)}
47 {(47,92387,1,2012-01-24T00:00:00.000+05:30,34854)} {(Cliff,Mello,A,416-210-9997,47,Bigcity|12345)}
54 {} {(Jack,Merdec,A,905-216-0989,54,Mediumcity|67890)}
55 {} {(John,Merdec,I,416-922-2331,55,Bigcity|12345)}
64 {(64,98243,1,2012-01-09T00:00:00.000+05:30,34826),(64,77624,17,2012-01-09T00:00:00.000+05:30,34833),(64,92387,4,2012-01-24T00:00:00.000+05:30,34847)} {(Mello,Reynolds,A,905-245-4431,64,Mediumcity|67890)}
85 {} {(Elizabeth,Metzer,A,416-322-9001,85,Bigcity|12345)}
No comments:
Post a Comment