|
| 1 | +#!/bin/bash |
| 2 | +# Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | +# contributor license agreements. See the NOTICE file distributed with |
| 4 | +# this work for additional information regarding copyright ownership. |
| 5 | +# The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | +# (the "License"); you may not use this file except in compliance with |
| 7 | +# the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | + |
| 17 | +current_dir=`dirname "$0"` |
| 18 | +current_dir=`cd "$current_dir"; pwd` |
| 19 | +root_dir=${current_dir}/../../../../.. |
| 20 | +workload_config=${root_dir}/conf/workloads/streaming/fixwindow.conf |
| 21 | +. "${root_dir}/bin/functions/load-bench-config.sh" |
| 22 | + |
| 23 | + |
| 24 | +# generate seed data1 by hive |
| 25 | +enter_bench HadoopPrepareDatafile1 ${workload_config} ${current_dir} |
| 26 | +show_bannar start |
| 27 | + |
| 28 | +PAGES=120000 |
| 29 | +USERVISITS=1000000 |
| 30 | + |
| 31 | +rmr-hdfs $STREAMING_DATA_DIR || true |
| 32 | +echo -e "${On_Blue}Pages:${PAGES}, USERVISITS:${USERVISITS}${Color_Off}" |
| 33 | + |
| 34 | +OPTION="-t hive \ |
| 35 | + -b ${STREAMING_DATA_DIR} \ |
| 36 | + -n ${STREAMING_DATA1_NAME} \ |
| 37 | + -m ${NUM_MAPS} \ |
| 38 | + -r ${NUM_REDS} \ |
| 39 | + -p ${PAGES} \ |
| 40 | + -v ${USERVISITS}" |
| 41 | + |
| 42 | +START_TIME=`timestamp` |
| 43 | +run-hadoop-job ${DATATOOLS} HiBench.DataGen ${OPTION} |
| 44 | +END_TIME=`timestamp` |
| 45 | +SIZE="0" |
| 46 | + |
| 47 | +show_bannar finish |
| 48 | +leave_bench |
| 49 | + |
| 50 | +# generate seed data2 by kmeans |
| 51 | +enter_bench HadoopPrepareDatafile2 ${workload_config} ${current_dir} |
| 52 | +show_bannar start |
| 53 | + |
| 54 | +rmr-hdfs $STREAMING_DATA2_SAMPLE_DIR || true |
| 55 | +OPTION="-sampleDir ${STREAMING_DATA2_SAMPLE_DIR} -clusterDir ${STREAMING_DATA2_CLUSTER_DIR} -numClusters 5 -numSamples 3000000 -samplesPerFile 600000 -sampleDimension 20 -textOutput" |
| 56 | +run-hadoop-job ${DATATOOLS} org.apache.mahout.clustering.kmeans.GenKMeansDataset -D hadoop.job.history.user.location=${STREAMING_DATA2_SAMPLE_DIR} ${OPTION} |
| 57 | +END_TIME=`timestamp` |
| 58 | + |
| 59 | +show_bannar finish |
| 60 | +leave_bench |
0 commit comments