Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.

Commit 9b4b7e6

Browse files
nareshgundlacarsonwang
authored andcommitted
Support Multiple runs for scan/join/aggregation workloads (#290)
* To Support Multiple runs fro scan/join/aggregation workloads * Corrected Throughput per node * reverted back the Throughput per node commit changes
1 parent 43f75af commit 9b4b7e6

1 file changed

Lines changed: 7 additions & 1 deletion

File tree

bin/functions/workload-functions.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,9 @@ set ${MAP_CONFIG_NAME}=$NUM_MAPS;
420420
set ${REDUCER_CONFIG_NAME}=$NUM_REDS;
421421
set hive.stats.autogather=false;
422422
${HIVE_SQL_COMPRESS_OPTS}
423-
423+
DROP TABLE IF EXISTS uservisits;
424424
CREATE EXTERNAL TABLE uservisits (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
425+
DROP TABLE IF EXISTS uservisits_aggre;
425426
CREATE EXTERNAL TABLE uservisits_aggre ( sourceIP STRING, sumAdRevenue DOUBLE) STORED AS SEQUENCEFILE LOCATION '$OUTPUT_HDFS/uservisits_aggre';
426427
INSERT OVERWRITE TABLE uservisits_aggre SELECT sourceIP, SUM(adRevenue) FROM uservisits GROUP BY sourceIP;
427428
EOF
@@ -442,8 +443,11 @@ set hive.stats.autogather=false;
442443
443444
${HIVE_SQL_COMPRESS_OPTS}
444445
446+
DROP TABLE IF EXISTS rankings;
445447
CREATE EXTERNAL TABLE rankings (pageURL STRING, pageRank INT, avgDuration INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/rankings';
448+
DROP TABLE IF EXISTS uservisits_copy;
446449
CREATE EXTERNAL TABLE uservisits_copy (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
450+
DROP TABLE IF EXISTS rankings_uservisits_join;
447451
CREATE EXTERNAL TABLE rankings_uservisits_join ( sourceIP STRING, avgPageRank DOUBLE, totalRevenue DOUBLE) STORED AS SEQUENCEFILE LOCATION '$OUTPUT_HDFS/rankings_uservisits_join';
448452
INSERT OVERWRITE TABLE rankings_uservisits_join SELECT sourceIP, avg(pageRank), sum(adRevenue) as totalRevenue FROM rankings R JOIN (SELECT sourceIP, destURL, adRevenue FROM uservisits_copy UV WHERE (datediff(UV.visitDate, '1999-01-01')>=0 AND datediff(UV.visitDate, '2000-01-01')<=0)) NUV ON (R.pageURL = NUV.destURL) group by sourceIP order by totalRevenue DESC;
449453
EOF
@@ -464,7 +468,9 @@ set hive.stats.autogather=false;
464468
465469
${HIVE_SQL_COMPRESS_OPTS}
466470
471+
DROP TABLE IF EXISTS uservisits;
467472
CREATE EXTERNAL TABLE uservisits (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
473+
DROP TABLE IF EXISTS uservisits_copy;
468474
CREATE EXTERNAL TABLE uservisits_copy (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$OUTPUT_HDFS/uservisits_copy';
469475
INSERT OVERWRITE TABLE uservisits_copy SELECT * FROM uservisits;
470476
EOF

0 commit comments

Comments
 (0)