-
Notifications
You must be signed in to change notification settings - Fork 80
Expand file tree
/
Copy pathmetacode_derived_features_buildmodel.txt
More file actions
77 lines (70 loc) · 1.91 KB
/
metacode_derived_features_buildmodel.txt
File metadata and controls
77 lines (70 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/**
metacode_derived_features_buildmodel.txt
Script to machine learning
DolphinDB Inc.
DolphinDB server version: 2.00.6 2022.05.09
Last modification time: 2022.08.31
*/
/**
Attention:
1. The variable result is from features engineering calculation
2. The developer need to install Xgboost plugin in advance
3. There is one place in the script that need to be modified according to the environment
*/
//import Xgboost plugin
try{
loadPlugin(getHomeDir()+"/plugins/xgboost/PluginXgboost.txt")
}
catch(ex){
print(ex)
}
/**
part1: Load data from database
modified location 1: modelSavePath
*/
modelSavePath = "/hdd/hdd9/machineLearning/model/001.model"
/**
part2: data preprocessing
Attention: the variable result is from features engineering calculation
*/
result = result[each(isValid, result.values()).rowAnd()]
result_input = copy(result)
label = result[`LogReturn0_realizedVolatility]
result_input.update!(`SecurityID_int, int(result[`SecurityID]))
result_input.dropColumns!(`SecurityID`DateTime`LogReturn0_realizedVolatility)
/**
part3: split data set
*/
def trainTestSplit(x, testRatio) {
xSize = x.size()
testSize =( xSize * (1-testRatio))$INT
return x[0: testSize], x[testSize:xSize]
}
Train_x, Test_x = trainTestSplit(result_input, 0.3)
Train_y, Test_y = trainTestSplit(label, 0.3)
/**
part4: set parameters and train, save model
*/
params = {
objective: 'reg:squarederror',
colsample_bytree: 0.8,
subsample: 0.8,
min_child_weight: 1,
max_leaves:128,
eta: 0.1,
max_depth:10,
eval_metric : 'rmse'
}
model_1 = xgboost::train(Train_y ,Train_x, params, 500)
xgboost::saveModel(model_1, modelSavePath)
/**
part5: load model and predict, use RMSPE to evaluate.
You can choose one of model(saved) and model_1 to predict.
*/
def RMSPE(a,b)
{
return sqrt( sum( ((a-b)\a)*((a-b)\a) ) \a.size() )
}
model = xgboost::loadModel(modelSavePath)
y_pred = xgboost::predict(model_1 , Test_x)
print('RMSPE='+RMSPE(Test_y, y_pred))